8000 Added tesseract data · pdfix/pdfix_sdk_example_java@563c1c8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 563c1c8

Browse files
committed
Added tesseract data
1 parent d3f4648 commit 563c1c8

36 files changed

+180
-0
lines changed

resources/tessdata/Makefile.am

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
datadir = @datadir@/tessdata
2+
3+
data_DATA = pdf.ttf
4+
EXTRA_DIST = $(data_DATA)
5+
6+
SUBDIRS = configs tessconfigs
7+
8+
langdata = bul.traineddata mlt.traineddata chr.traineddata \
9+
slk.traineddata dan-frak.traineddata eng.traineddata \
10+
ces.traineddata afr.traineddata swa.traineddata \
11+
kan.traineddata bel.traineddata ind.traineddata \
12+
lit.traineddata nld.traineddata osd.traineddata \
13+
mkd.traineddata est.traineddata fra.traineddata \
14+
hin.traineddata lat_lid.traineddata nor.traineddata \
15+
por.traineddata ron.traineddata swe.traineddata \
16+
pol.traineddata ara.traineddata tel.traineddata \
17+
ell.traineddata mal.traineddata vie.traineddata \
18+
heb.traineddata deu.traineddata eus.traineddata \
19+
ita_old.traineddata rus.traineddata sqi.traineddata \
20+
spa.traineddata glg.traineddata slk-frak.traineddata \
21+
equ.traineddata hrv.traineddata frk.traineddata \
22+
cat.traineddata lav.traineddata ukr.traineddata \
23+
enm.traineddata dan.traineddata fin.traineddata \
24+
ben.traineddata srp.traineddata tha.traineddata \
25+
hun.traineddata tgl.traineddata frm.traineddata \
26+
slv.traineddata chi_sim.traineddata tam.traineddata \
27+
tur.traineddata epo.traineddata msa.traineddata \
28+
kor.traineddata isl.traineddata jpn.traineddata \
29+
chi_tra.traineddata ita.traineddata spa_old.traineddata \
30+
deu-frak.traineddata aze.traineddata
31+
32+
uninstall-local:
33+
cd $(DESTDIR)$(datadir); \
34+
rm -f $(langdata)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
datadir = @datadir@/tessdata/configs
2+
data_DATA = inter makebox box.train unlv ambigs.train lstm.train api_config kannada box.train.stderr quiet logfile digits hocr tsv linebox pdf rebox strokewidth bigram txt
3+
EXTRA_DIST = inter makebox box.train unlv ambigs.train lstm.train api_config kannada box.train.stderr quiet logfile digits hocr tsv linebox pdf rebox strokewidth bigram txt
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
tessedit_ambigs_training 1
2+
load_freq_dawg 0
3+
load_punc_dawg 0
4+
load_system_dawg 0
5+
load_number_dawg 0
6+
ambigs_debug_level 3
7+
load_fixed_length_dawgs 0

resources/tessdata/configs/api_config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tessedit_zero_rejection T

resources/tessdata/configs/bazaar

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
load_system_dawg F
2+
load_freq_dawg F
3+
user_words_suffix user-words
4+
user_patterns_suffix user-patterns

resources/tessdata/configs/bigram

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
load_bigram_dawg True
2+
tessedit_enable_bigram_correction True
3+
tessedit_bigram_debug 3
4+
save_raw_choices True
5+
save_alt_choices True

resources/tessdata/configs/box.train

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
disable_character_fragments T
2+
file_type .bl
3+
textord_fast_pitch_test T
4+
tessedit_single_match 0
5+
tessedit_zero_rejection T
6+
tessedit_minimal_rejection F
7+
tessedit_write_rep_codes F
8+
il1_adaption_test 1
9+
edges_children_fix F
10+
edges_childarea 0.65
11+
edges_boxarea 0.9
12+
tessedit_resegment_from_boxes T
13+
tessedit_train_from_boxes T
14+
textord_no_rejects T
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
file_type .bl
2+
#tessedit_use_nn F
3+
textord_fast_pitch_test T
4+
tessedit_single_match 0
5+
tessedit_zero_rejection T
6+
tessedit_minimal_rejection F
7+
tessedit_write_rep_codes F
8+
il1_adaption_test 1
9+
edges_children_fix F
10+
edges_childarea 0.65
11+
edges_boxarea 0.9
12+
tessedit_resegment_from_boxes T
13+
tessedit_train_from_boxes T
14+
#textord_repeat_extraction F
15+
textord_no_rejects T

resources/tessdata/configs/digits

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tessedit_char_whitelist 0123456789-.

resources/tessdata/configs/get.image

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tessedit_write_images T

0 commit comments

Comments
 (0)
0