MuPDFCore 1.8.0
Multiplatform .NET bindings for MuPDF
Loading...
Searching...
No Matches
TesseractLanguage.cs
1using System;
2using System.Collections.Generic;
3using System.IO;
4using System.Net;
5using System.Reflection;
6using System.Text;
7
8namespace MuPDFCore
9{
10 /// <summary>
11 /// Represents a language used by Tesseract OCR.
12 /// </summary>
13 public class TesseractLanguage
14 {
15 /// <summary>
16 /// The name of the folder where the language file is located.
17 /// </summary>
18 public string Prefix { get; }
19
20 /// <summary>
21 /// The name of the language. The Tesseract library will assume that the trained language data file can be found at <c>Prefix/Language.traineddata</c>.
22 /// </summary>
23 public string Language { get; }
24
25 /// <summary>
26 /// Fast integer versions of trained models. These are models for a single language.
27 /// </summary>
28 public enum Fast
29 {
30 /// <summary>
31 /// The Afrikaans language.
32 /// </summary>
33 Afr,
34 /// <summary>
35 /// The Amharic language.
36 /// </summary>
37 Amh,
38 /// <summary>
39 /// The Arabic language.
40 /// </summary>
41 Ara,
42 /// <summary>
43 /// The Assamese language.
44 /// </summary>
45 Asm,
46 /// <summary>
47 /// The Azerbaijani language.
48 /// </summary>
49 Aze,
50 /// <summary>
51 /// The Azerbaijani language (Cyrillic).
52 /// </summary>
53 Aze_Cyrl,
54 /// <summary>
55 /// The Belarusian language.
56 /// </summary>
57 Bel,
58 /// <summary>
59 /// The Bengali language.
60 /// </summary>
61 Ben,
62 /// <summary>
63 /// The Tibetan language.
64 /// </summary>
65 Bod,
66 /// <summary>
67 /// The Bosnian language.
68 /// </summary>
69 Bos,
70 /// <summary>
71 /// The Breton language.
72 /// </summary>
73 Bre,
74 /// <summary>
75 /// The Bulgarian language.
76 /// </summary>
77 Bul,
78 /// <summary>
79 /// The Catalan/Valencian language.
80 /// </summary>
81 Cat,
82 /// <summary>
83 /// The Cebuano language.
84 /// </summary>
85 Ceb,
86 /// <summary>
87 /// The Czech language.
88 /// </summary>
89 Ces,
90 /// <summary>
91 /// The Chinese (Simplified) language.
92 /// </summary>
93 Chi_Sim,
94 /// <summary>
95 /// The Chinese (Simplified) language (vertical).
96 /// </summary>
97 Chi_Sim_Vert,
98 /// <summary>
99 /// The Chinese (Traditional) language.
100 /// </summary>
101 Chi_Tra,
102 /// <summary>
103 /// The Chinese (Traditional) language (vertical).
104 /// </summary>
105 Chi_Tra_Vert,
106 /// <summary>
107 /// The Cherokee language.
108 /// </summary>
109 Chr,
110 /// <summary>
111 /// The Corsican language.
112 /// </summary>
113 Cos,
114 /// <summary>
115 /// The Welsh language.
116 /// </summary>
117 Cym,
118 /// <summary>
119 /// The Danish language.
120 /// </summary>
121 Dan,
122 /// <summary>
123 /// The German language.
124 /// </summary>
125 Deu,
126 /// <summary>
127 /// The Divehi/Dhivehi/Maldivian language.
128 /// </summary>
129 Div,
130 /// <summary>
131 /// The Dzongkha language.
132 /// </summary>
133 Dzo,
134 /// <summary>
135 /// The Greek, Modern (1453-) language.
136 /// </summary>
137 Ell,
138 /// <summary>
139 /// The English language.
140 /// </summary>
141 Eng,
142 /// <summary>
143 /// The English, Middle (1100-1500) language.
144 /// </summary>
145 Enm,
146 /// <summary>
147 /// The Esperanto language.
148 /// </summary>
149 Epo,
150 /// <summary>
151 /// A language for equations.
152 /// </summary>
153 Equ,
154 /// <summary>
155 /// The Estonian language.
156 /// </summary>
157 Est,
158 /// <summary>
159 /// The Basque language.
160 /// </summary>
161 Eus,
162 /// <summary>
163 /// The Faroese language.
164 /// </summary>
165 Fao,
166 /// <summary>
167 /// The Persian language.
168 /// </summary>
169 Fas,
170 /// <summary>
171 /// The Filipino/Pilipino language.
172 /// </summary>
173 Fil,
174 /// <summary>
175 /// The Finnish language.
176 /// </summary>
177 Fin,
178 /// <summary>
179 /// The French language.
180 /// </summary>
181 Fra,
182 /// <summary>
183 /// The German - Fraktur language.
184 /// </summary>
185 Frk,
186 /// <summary>
187 /// The French, Middle (ca.1400-1600) language.
188 /// </summary>
189 Frm,
190 /// <summary>
191 /// The Western Frisian language.
192 /// </summary>
193 Fry,
194 /// <summary>
195 /// The Gaelic/Scottish Gaelic language.
196 /// </summary>
197 Gla,
198 /// <summary>
199 /// The Irish language.
200 /// </summary>
201 Gle,
202 /// <summary>
203 /// The Galician language.
204 /// </summary>
205 Glg,
206 /// <summary>
207 /// The Greek, Ancient (to 1453) language.
208 /// </summary>
209 Grc,
210 /// <summary>
211 /// The Gujarati language.
212 /// </summary>
213 Guj,
214 /// <summary>
215 /// The Haitian/Haitian Creole language.
216 /// </summary>
217 Hat,
218 /// <summary>
219 /// The Hebrew language.
220 /// </summary>
221 Heb,
222 /// <summary>
223 /// The Hindi language.
224 /// </summary>
225 Hin,
226 /// <summary>
227 /// The Croatian language.
228 /// </summary>
229 Hrv,
230 /// <summary>
231 /// The Hungarian language.
232 /// </summary>
233 Hun,
234 /// <summary>
235 /// The Armenian language.
236 /// </summary>
237 Hye,
238 /// <summary>
239 /// The Inuktitut language.
240 /// </summary>
241 Iku,
242 /// <summary>
243 /// The Indonesian language.
244 /// </summary>
245 Ind,
246 /// <summary>
247 /// The Icelandic language.
248 /// </summary>
249 Isl,
250 /// <summary>
251 /// The Italian language.
252 /// </summary>
253 Ita,
254 /// <summary>
255 /// The Italian language (old).
256 /// </summary>
257 Ita_Old,
258 /// <summary>
259 /// The Javanese language.
260 /// </summary>
261 Jav,
262 /// <summary>
263 /// The Japanese language.
264 /// </summary>
265 Jpn,
266 /// <summary>
267 /// The Japanese language (vertical).
268 /// </summary>
269 Jpn_Vert,
270 /// <summary>
271 /// The Kannada language.
272 /// </summary>
273 Kan,
274 /// <summary>
275 /// The Georgian language.
276 /// </summary>
277 Kat,
278 /// <summary>
279 /// The Georgian language (old).
280 /// </summary>
281 Kat_Old,
282 /// <summary>
283 /// The Kazakh language.
284 /// </summary>
285 Kaz,
286 /// <summary>
287 /// The Central Khmer language.
288 /// </summary>
289 Khm,
290 /// <summary>
291 /// The Kirghiz/Kyrgyz language.
292 /// </summary>
293 Kir,
294 /// <summary>
295 /// The Northern Kurdish language.
296 /// </summary>
297 Kmr,
298 /// <summary>
299 /// The Korean language.
300 /// </summary>
301 Kor,
302 /// <summary>
303 /// The Korean language (vertical).
304 /// </summary>
305 Kor_Vert,
306 /// <summary>
307 /// The Lao language.
308 /// </summary>
309 Lao,
310 /// <summary>
311 /// The Latin language.
312 /// </summary>
313 Lat,
314 /// <summary>
315 /// The Latvian language.
316 /// </summary>
317 Lav,
318 /// <summary>
319 /// The Lithuanian language.
320 /// </summary>
321 Lit,
322 /// <summary>
323 /// The Luxembourgish/Letzeburgesch language.
324 /// </summary>
325 Ltz,
326 /// <summary>
327 /// The Malayalam language.
328 /// </summary>
329 Mal,
330 /// <summary>
331 /// The Marathi language.
332 /// </summary>
333 Mar,
334 /// <summary>
335 /// The Macedonian language.
336 /// </summary>
337 Mkd,
338 /// <summary>
339 /// The Maltese language.
340 /// </summary>
341 Mlt,
342 /// <summary>
343 /// The Mongolian language.
344 /// </summary>
345 Mon,
346 /// <summary>
347 /// The Maori language.
348 /// </summary>
349 Mri,
350 /// <summary>
351 /// The Malay language.
352 /// </summary>
353 Msa,
354 /// <summary>
355 /// The Burmese language.
356 /// </summary>
357 Mya,
358 /// <summary>
359 /// The Nepali language.
360 /// </summary>
361 Nep,
362 /// <summary>
363 /// The Dutch/Flemish language.
364 /// </summary>
365 Nld,
366 /// <summary>
367 /// The Norwegian language.
368 /// </summary>
369 Nor,
370 /// <summary>
371 /// The Occitan (post 1500) language.
372 /// </summary>
373 Oci,
374 /// <summary>
375 /// The Oriya language.
376 /// </summary>
377 Ori,
378 /// <summary>
379 /// The Orientation and script detection module.
380 /// </summary>
381 Osd,
382 /// <summary>
383 /// The Panjabi/Punjabi language.
384 /// </summary>
385 Pan,
386 /// <summary>
387 /// The Polish language.
388 /// </summary>
389 Pol,
390 /// <summary>
391 /// The Portuguese language.
392 /// </summary>
393 Por,
394 /// <summary>
395 /// The Pushto/Pashto language.
396 /// </summary>
397 Pus,
398 /// <summary>
399 /// The Quechua language.
400 /// </summary>
401 Que,
402 /// <summary>
403 /// The Romanian/Moldavian/Moldovan language.
404 /// </summary>
405 Ron,
406 /// <summary>
407 /// The Russian language.
408 /// </summary>
409 Rus,
410 /// <summary>
411 /// The Sanskrit language.
412 /// </summary>
413 San,
414 /// <summary>
415 /// The Sinhala/Sinhalese language.
416 /// </summary>
417 Sin,
418 /// <summary>
419 /// The Slovak language.
420 /// </summary>
421 Slk,
422 /// <summary>
423 /// The Slovenian language.
424 /// </summary>
425 Slv,
426 /// <summary>
427 /// The Sindhi language.
428 /// </summary>
429 Snd,
430 /// <summary>
431 /// The Spanish/Castilian language.
432 /// </summary>
433 Spa,
434 /// <summary>
435 /// The Spanish/Castilian language (old).
436 /// </summary>
437 Spa_Old,
438 /// <summary>
439 /// The Albanian language.
440 /// </summary>
441 Sqi,
442 /// <summary>
443 /// The Serbian language.
444 /// </summary>
445 Srp,
446 /// <summary>
447 /// The Serbian language (Latin).
448 /// </summary>
449 Srp_Latn,
450 /// <summary>
451 /// The Sundanese language.
452 /// </summary>
453 Sun,
454 /// <summary>
455 /// The Swahili language.
456 /// </summary>
457 Swa,
458 /// <summary>
459 /// The Swedish language.
460 /// </summary>
461 Swe,
462 /// <summary>
463 /// The Syriac language.
464 /// </summary>
465 Syr,
466 /// <summary>
467 /// The Tamil language.
468 /// </summary>
469 Tam,
470 /// <summary>
471 /// The Tatar language.
472 /// </summary>
473 Tat,
474 /// <summary>
475 /// The Telugu language.
476 /// </summary>
477 Tel,
478 /// <summary>
479 /// The Tajik language.
480 /// </summary>
481 Tgk,
482 /// <summary>
483 /// The Thai language.
484 /// </summary>
485 Tha,
486 /// <summary>
487 /// The Tigrinya language.
488 /// </summary>
489 Tir,
490 /// <summary>
491 /// The Tonga (Tonga Islands) language.
492 /// </summary>
493 Ton,
494 /// <summary>
495 /// The Turkish language.
496 /// </summary>
497 Tur,
498 /// <summary>
499 /// The Uighur/Uyghur language.
500 /// </summary>
501 Uig,
502 /// <summary>
503 /// The Ukrainian language.
504 /// </summary>
505 Ukr,
506 /// <summary>
507 /// The Urdu language.
508 /// </summary>
509 Urd,
510 /// <summary>
511 /// The Uzbek language.
512 /// </summary>
513 Uzb,
514 /// <summary>
515 /// The Uzbek language (Cyrillic).
516 /// </summary>
517 Uzb_Cyrl,
518 /// <summary>
519 /// The Vietnamese language.
520 /// </summary>
521 Vie,
522 /// <summary>
523 /// The Yiddish language.
524 /// </summary>
525 Yid,
526 /// <summary>
527 /// The Yoruba language.
528 /// </summary>
529 Yor
530 }
531
532 /// <summary>
533 /// Fast integer versions of trained models. These are models for a single script supporting one or more languages.
534 /// </summary>
535 public enum FastScripts
536 {
537 /// <summary>
538 /// The Arabic script.
539 /// </summary>
540 Arabic,
541 /// <summary>
542 /// The Armenian script.
543 /// </summary>
544 Armenian,
545 /// <summary>
546 /// The Bengali script.
547 /// </summary>
548 Bengali,
549 /// <summary>
550 /// The Canadian Aboriginal script.
551 /// </summary>
552 Canadian_Aboriginal,
553 /// <summary>
554 /// The Cherokee script.
555 /// </summary>
556 Cherokee,
557 /// <summary>
558 /// The Cyrillic script.
559 /// </summary>
560 Cyrillic,
561 /// <summary>
562 /// The Devanagari script.
563 /// </summary>
564 Devanagari,
565 /// <summary>
566 /// The Ethiopic script.
567 /// </summary>
568 Ethiopic,
569 /// <summary>
570 /// The Fraktur script.
571 /// </summary>
572 Fraktur,
573 /// <summary>
574 /// The Georgian script.
575 /// </summary>
576 Georgian,
577 /// <summary>
578 /// The Greek script.
579 /// </summary>
580 Greek,
581 /// <summary>
582 /// The Gujarati script.
583 /// </summary>
584 Gujarati,
585 /// <summary>
586 /// The Gurmukhi script.
587 /// </summary>
588 Gurmukhi,
589 /// <summary>
590 /// The Han (Simplified) script.
591 /// </summary>
592 HanS,
593 /// <summary>
594 /// The Han (Simplified) script. (vertical)
595 /// </summary>
596 HanS_Vert,
597 /// <summary>
598 /// The Han (Traditional) script.
599 /// </summary>
600 HanT,
601 /// <summary>
602 /// The Han (Traditional) script. (vertical)
603 /// </summary>
604 HanT_Vert,
605 /// <summary>
606 /// The Hangul script.
607 /// </summary>
608 Hangul,
609 /// <summary>
610 /// The Hangul script. (vertical)
611 /// </summary>
612 Hangul_Vert,
613 /// <summary>
614 /// The Hebrew script.
615 /// </summary>
616 Hebrew,
617 /// <summary>
618 /// The Japanese script.
619 /// </summary>
620 Japanese,
621 /// <summary>
622 /// The Japanese script. (vertical)
623 /// </summary>
624 Japanese_Vert,
625 /// <summary>
626 /// The Kannada script.
627 /// </summary>
628 Kannada,
629 /// <summary>
630 /// The Khmer script.
631 /// </summary>
632 Khmer,
633 /// <summary>
634 /// The Lao script.
635 /// </summary>
636 Lao,
637 /// <summary>
638 /// The Latin script.
639 /// </summary>
640 Latin,
641 /// <summary>
642 /// The Malayalam script.
643 /// </summary>
644 Malayalam,
645 /// <summary>
646 /// The Myanmar script.
647 /// </summary>
648 Myanmar,
649 /// <summary>
650 /// The Oriya script.
651 /// </summary>
652 Oriya,
653 /// <summary>
654 /// The Sinhala script.
655 /// </summary>
656 Sinhala,
657 /// <summary>
658 /// The Syriac script.
659 /// </summary>
660 Syriac,
661 /// <summary>
662 /// The Tamil script.
663 /// </summary>
664 Tamil,
665 /// <summary>
666 /// The Telugu script.
667 /// </summary>
668 Telugu,
669 /// <summary>
670 /// The Thaana script.
671 /// </summary>
672 Thaana,
673 /// <summary>
674 /// The Thai script.
675 /// </summary>
676 Thai,
677 /// <summary>
678 /// The Tibetan script.
679 /// </summary>
680 Tibetan,
681 /// <summary>
682 /// The Vietnamese script.
683 /// </summary>
684 Vietnamese
685 }
686
687 /// <summary>
688 /// Best (most accurate) trained models. These are models for a single language.
689 /// </summary>
690 public enum Best
691 {
692 /// <summary>
693 /// The Afrikaans language.
694 /// </summary>
695 Afr,
696 /// <summary>
697 /// The Amharic language.
698 /// </summary>
699 Amh,
700 /// <summary>
701 /// The Arabic language.
702 /// </summary>
703 Ara,
704 /// <summary>
705 /// The Assamese language.
706 /// </summary>
707 Asm,
708 /// <summary>
709 /// The Azerbaijani language.
710 /// </summary>
711 Aze,
712 /// <summary>
713 /// The Azerbaijani language (Cyrillic).
714 /// </summary>
715 Aze_Cyrl,
716 /// <summary>
717 /// The Belarusian language.
718 /// </summary>
719 Bel,
720 /// <summary>
721 /// The Bengali language.
722 /// </summary>
723 Ben,
724 /// <summary>
725 /// The Tibetan language.
726 /// </summary>
727 Bod,
728 /// <summary>
729 /// The Bosnian language.
730 /// </summary>
731 Bos,
732 /// <summary>
733 /// The Breton language.
734 /// </summary>
735 Bre,
736 /// <summary>
737 /// The Bulgarian language.
738 /// </summary>
739 Bul,
740 /// <summary>
741 /// The Catalan/Valencian language.
742 /// </summary>
743 Cat,
744 /// <summary>
745 /// The Cebuano language.
746 /// </summary>
747 Ceb,
748 /// <summary>
749 /// The Czech language.
750 /// </summary>
751 Ces,
752 /// <summary>
753 /// The Chinese (Simplified) language.
754 /// </summary>
755 Chi_Sim,
756 /// <summary>
757 /// The Chinese (Simplified) language (vertical).
758 /// </summary>
759 Chi_Sim_Vert,
760 /// <summary>
761 /// The Chinese (Traditional) language.
762 /// </summary>
763 Chi_Tra,
764 /// <summary>
765 /// The Chinese (Traditional) language (vertical).
766 /// </summary>
767 Chi_Tra_Vert,
768 /// <summary>
769 /// The Cherokee language.
770 /// </summary>
771 Chr,
772 /// <summary>
773 /// The Corsican language.
774 /// </summary>
775 Cos,
776 /// <summary>
777 /// The Welsh language.
778 /// </summary>
779 Cym,
780 /// <summary>
781 /// The Danish language.
782 /// </summary>
783 Dan,
784 /// <summary>
785 /// The German language.
786 /// </summary>
787 Deu,
788 /// <summary>
789 /// The Divehi/Dhivehi/Maldivian language.
790 /// </summary>
791 Div,
792 /// <summary>
793 /// The Dzongkha language.
794 /// </summary>
795 Dzo,
796 /// <summary>
797 /// The Greek, Modern (1453-) language.
798 /// </summary>
799 Ell,
800 /// <summary>
801 /// The English language.
802 /// </summary>
803 Eng,
804 /// <summary>
805 /// The English, Middle (1100-1500) language.
806 /// </summary>
807 Enm,
808 /// <summary>
809 /// The Esperanto language.
810 /// </summary>
811 Epo,
812 /// <summary>
813 /// The Estonian language.
814 /// </summary>
815 Est,
816 /// <summary>
817 /// The Basque language.
818 /// </summary>
819 Eus,
820 /// <summary>
821 /// The Faroese language.
822 /// </summary>
823 Fao,
824 /// <summary>
825 /// The Persian language.
826 /// </summary>
827 Fas,
828 /// <summary>
829 /// The Filipino/Pilipino language.
830 /// </summary>
831 Fil,
832 /// <summary>
833 /// The Finnish language.
834 /// </summary>
835 Fin,
836 /// <summary>
837 /// The French language.
838 /// </summary>
839 Fra,
840 /// <summary>
841 /// The German - Fraktur language.
842 /// </summary>
843 Frk,
844 /// <summary>
845 /// The French, Middle (ca.1400-1600) language.
846 /// </summary>
847 Frm,
848 /// <summary>
849 /// The Western Frisian language.
850 /// </summary>
851 Fry,
852 /// <summary>
853 /// The Gaelic/Scottish Gaelic language.
854 /// </summary>
855 Gla,
856 /// <summary>
857 /// The Irish language.
858 /// </summary>
859 Gle,
860 /// <summary>
861 /// The Galician language.
862 /// </summary>
863 Glg,
864 /// <summary>
865 /// The Greek, Ancient (to 1453) language.
866 /// </summary>
867 Grc,
868 /// <summary>
869 /// The Gujarati language.
870 /// </summary>
871 Guj,
872 /// <summary>
873 /// The Haitian/Haitian Creole language.
874 /// </summary>
875 Hat,
876 /// <summary>
877 /// The Hebrew language.
878 /// </summary>
879 Heb,
880 /// <summary>
881 /// The Hindi language.
882 /// </summary>
883 Hin,
884 /// <summary>
885 /// The Croatian language.
886 /// </summary>
887 Hrv,
888 /// <summary>
889 /// The Hungarian language.
890 /// </summary>
891 Hun,
892 /// <summary>
893 /// The Armenian language.
894 /// </summary>
895 Hye,
896 /// <summary>
897 /// The Inuktitut language.
898 /// </summary>
899 Iku,
900 /// <summary>
901 /// The Indonesian language.
902 /// </summary>
903 Ind,
904 /// <summary>
905 /// The Icelandic language.
906 /// </summary>
907 Isl,
908 /// <summary>
909 /// The Italian language.
910 /// </summary>
911 Ita,
912 /// <summary>
913 /// The Italian language (old).
914 /// </summary>
915 Ita_Old,
916 /// <summary>
917 /// The Javanese language.
918 /// </summary>
919 Jav,
920 /// <summary>
921 /// The Japanese language.
922 /// </summary>
923 Jpn,
924 /// <summary>
925 /// The Japanese language (vertical).
926 /// </summary>
927 Jpn_Vert,
928 /// <summary>
929 /// The Kannada language.
930 /// </summary>
931 Kan,
932 /// <summary>
933 /// The Georgian language.
934 /// </summary>
935 Kat,
936 /// <summary>
937 /// The Georgian language (old).
938 /// </summary>
939 Kat_Old,
940 /// <summary>
941 /// The Kazakh language.
942 /// </summary>
943 Kaz,
944 /// <summary>
945 /// The Central Khmer language.
946 /// </summary>
947 Khm,
948 /// <summary>
949 /// The Kirghiz/Kyrgyz language.
950 /// </summary>
951 Kir,
952 /// <summary>
953 /// The Northern Kurdish language.
954 /// </summary>
955 Kmr,
956 /// <summary>
957 /// The Korean language.
958 /// </summary>
959 Kor,
960 /// <summary>
961 /// The Korean language (vertical).
962 /// </summary>
963 Kor_Vert,
964 /// <summary>
965 /// The Lao language.
966 /// </summary>
967 Lao,
968 /// <summary>
969 /// The Latin language.
970 /// </summary>
971 Lat,
972 /// <summary>
973 /// The Latvian language.
974 /// </summary>
975 Lav,
976 /// <summary>
977 /// The Lithuanian language.
978 /// </summary>
979 Lit,
980 /// <summary>
981 /// The Luxembourgish/Letzeburgesch language.
982 /// </summary>
983 Ltz,
984 /// <summary>
985 /// The Malayalam language.
986 /// </summary>
987 Mal,
988 /// <summary>
989 /// The Marathi language.
990 /// </summary>
991 Mar,
992 /// <summary>
993 /// The Macedonian language.
994 /// </summary>
995 Mkd,
996 /// <summary>
997 /// The Maltese language.
998 /// </summary>
999 Mlt,
1000 /// <summary>
1001 /// The Mongolian language.
1002 /// </summary>
1003 Mon,
1004 /// <summary>
1005 /// The Maori language.
1006 /// </summary>
1007 Mri,
1008 /// <summary>
1009 /// The Malay language.
1010 /// </summary>
1011 Msa,
1012 /// <summary>
1013 /// The Burmese language.
1014 /// </summary>
1015 Mya,
1016 /// <summary>
1017 /// The Nepali language.
1018 /// </summary>
1019 Nep,
1020 /// <summary>
1021 /// The Dutch/Flemish language.
1022 /// </summary>
1023 Nld,
1024 /// <summary>
1025 /// The Norwegian language.
1026 /// </summary>
1027 Nor,
1028 /// <summary>
1029 /// The Occitan (post 1500) language.
1030 /// </summary>
1031 Oci,
1032 /// <summary>
1033 /// The Oriya language.
1034 /// </summary>
1035 Ori,
1036 /// <summary>
1037 /// The Orientation and script detection module.
1038 /// </summary>
1039 Osd,
1040 /// <summary>
1041 /// The Panjabi/Punjabi language.
1042 /// </summary>
1043 Pan,
1044 /// <summary>
1045 /// The Polish language.
1046 /// </summary>
1047 Pol,
1048 /// <summary>
1049 /// The Portuguese language.
1050 /// </summary>
1051 Por,
1052 /// <summary>
1053 /// The Pushto/Pashto language.
1054 /// </summary>
1055 Pus,
1056 /// <summary>
1057 /// The Quechua language.
1058 /// </summary>
1059 Que,
1060 /// <summary>
1061 /// The Romanian/Moldavian/Moldovan language.
1062 /// </summary>
1063 Ron,
1064 /// <summary>
1065 /// The Russian language.
1066 /// </summary>
1067 Rus,
1068 /// <summary>
1069 /// The Sanskrit language.
1070 /// </summary>
1071 San,
1072 /// <summary>
1073 /// The Sinhala/Sinhalese language.
1074 /// </summary>
1075 Sin,
1076 /// <summary>
1077 /// The Slovak language.
1078 /// </summary>
1079 Slk,
1080 /// <summary>
1081 /// The Slovenian language.
1082 /// </summary>
1083 Slv,
1084 /// <summary>
1085 /// The Sindhi language.
1086 /// </summary>
1087 Snd,
1088 /// <summary>
1089 /// The Spanish/Castilian language.
1090 /// </summary>
1091 Spa,
1092 /// <summary>
1093 /// The Spanish/Castilian language (old).
1094 /// </summary>
1095 Spa_Old,
1096 /// <summary>
1097 /// The Albanian language.
1098 /// </summary>
1099 Sqi,
1100 /// <summary>
1101 /// The Serbian language.
1102 /// </summary>
1103 Srp,
1104 /// <summary>
1105 /// The Serbian language (Latin).
1106 /// </summary>
1107 Srp_Latn,
1108 /// <summary>
1109 /// The Sundanese language.
1110 /// </summary>
1111 Sun,
1112 /// <summary>
1113 /// The Swahili language.
1114 /// </summary>
1115 Swa,
1116 /// <summary>
1117 /// The Swedish language.
1118 /// </summary>
1119 Swe,
1120 /// <summary>
1121 /// The Syriac language.
1122 /// </summary>
1123 Syr,
1124 /// <summary>
1125 /// The Tamil language.
1126 /// </summary>
1127 Tam,
1128 /// <summary>
1129 /// The Tatar language.
1130 /// </summary>
1131 Tat,
1132 /// <summary>
1133 /// The Telugu language.
1134 /// </summary>
1135 Tel,
1136 /// <summary>
1137 /// The Tajik language.
1138 /// </summary>
1139 Tgk,
1140 /// <summary>
1141 /// The Thai language.
1142 /// </summary>
1143 Tha,
1144 /// <summary>
1145 /// The Tigrinya language.
1146 /// </summary>
1147 Tir,
1148 /// <summary>
1149 /// The Tonga (Tonga Islands) language.
1150 /// </summary>
1151 Ton,
1152 /// <summary>
1153 /// The Turkish language.
1154 /// </summary>
1155 Tur,
1156 /// <summary>
1157 /// The Uighur/Uyghur language.
1158 /// </summary>
1159 Uig,
1160 /// <summary>
1161 /// The Ukrainian language.
1162 /// </summary>
1163 Ukr,
1164 /// <summary>
1165 /// The Urdu language.
1166 /// </summary>
1167 Urd,
1168 /// <summary>
1169 /// The Uzbek language.
1170 /// </summary>
1171 Uzb,
1172 /// <summary>
1173 /// The Uzbek language (Cyrillic).
1174 /// </summary>
1175 Uzb_Cyrl,
1176 /// <summary>
1177 /// The Vietnamese language.
1178 /// </summary>
1179 Vie,
1180 /// <summary>
1181 /// The Yiddish language.
1182 /// </summary>
1183 Yid,
1184 /// <summary>
1185 /// The Yoruba language.
1186 /// </summary>
1187 Yor
1188 }
1189
1190 /// <summary>
1191 /// Best (most accurate) trained models. These are models for a single script supporting one or more languages.
1192 /// </summary>
1193 public enum BestScripts
1194 {
1195 /// <summary>
1196 /// The Arabic script.
1197 /// </summary>
1198 Arabic,
1199 /// <summary>
1200 /// The Armenian script.
1201 /// </summary>
1202 Armenian,
1203 /// <summary>
1204 /// The Bengali script.
1205 /// </summary>
1206 Bengali,
1207 /// <summary>
1208 /// The Canadian Aboriginal script.
1209 /// </summary>
1210 Canadian_Aboriginal,
1211 /// <summary>
1212 /// The Cherokee script.
1213 /// </summary>
1214 Cherokee,
1215 /// <summary>
1216 /// The Cyrillic script.
1217 /// </summary>
1218 Cyrillic,
1219 /// <summary>
1220 /// The Devanagari script.
1221 /// </summary>
1222 Devanagari,
1223 /// <summary>
1224 /// The Ethiopic script.
1225 /// </summary>
1226 Ethiopic,
1227 /// <summary>
1228 /// The Fraktur script.
1229 /// </summary>
1230 Fraktur,
1231 /// <summary>
1232 /// The Georgian script.
1233 /// </summary>
1234 Georgian,
1235 /// <summary>
1236 /// The Greek script.
1237 /// </summary>
1238 Greek,
1239 /// <summary>
1240 /// The Gujarati script.
1241 /// </summary>
1242 Gujarati,
1243 /// <summary>
1244 /// The Gurmukhi script.
1245 /// </summary>
1246 Gurmukhi,
1247 /// <summary>
1248 /// The Han (Simplified) script.
1249 /// </summary>
1250 HanS,
1251 /// <summary>
1252 /// The Han (Simplified) script. (vertical)
1253 /// </summary>
1254 HanS_Vert,
1255 /// <summary>
1256 /// The Han (Traditional) script.
1257 /// </summary>
1258 HanT,
1259 /// <summary>
1260 /// The Han (Traditional) script. (vertical)
1261 /// </summary>
1262 HanT_Vert,
1263 /// <summary>
1264 /// The Hangul script.
1265 /// </summary>
1266 Hangul,
1267 /// <summary>
1268 /// The Hangul script. (vertical)
1269 /// </summary>
1270 Hangul_Vert,
1271 /// <summary>
1272 /// The Hebrew script.
1273 /// </summary>
1274 Hebrew,
1275 /// <summary>
1276 /// The Japanese script.
1277 /// </summary>
1278 Japanese,
1279 /// <summary>
1280 /// The Japanese script. (vertical)
1281 /// </summary>
1282 Japanese_Vert,
1283 /// <summary>
1284 /// The Kannada script.
1285 /// </summary>
1286 Kannada,
1287 /// <summary>
1288 /// The Khmer script.
1289 /// </summary>
1290 Khmer,
1291 /// <summary>
1292 /// The Lao script.
1293 /// </summary>
1294 Lao,
1295 /// <summary>
1296 /// The Latin script.
1297 /// </summary>
1298 Latin,
1299 /// <summary>
1300 /// The Malayalam script.
1301 /// </summary>
1302 Malayalam,
1303 /// <summary>
1304 /// The Myanmar script.
1305 /// </summary>
1306 Myanmar,
1307 /// <summary>
1308 /// The Oriya script.
1309 /// </summary>
1310 Oriya,
1311 /// <summary>
1312 /// The Sinhala script.
1313 /// </summary>
1314 Sinhala,
1315 /// <summary>
1316 /// The Syriac script.
1317 /// </summary>
1318 Syriac,
1319 /// <summary>
1320 /// The Tamil script.
1321 /// </summary>
1322 Tamil,
1323 /// <summary>
1324 /// The Telugu script.
1325 /// </summary>
1326 Telugu,
1327 /// <summary>
1328 /// The Thaana script.
1329 /// </summary>
1330 Thaana,
1331 /// <summary>
1332 /// The Thai script.
1333 /// </summary>
1334 Thai,
1335 /// <summary>
1336 /// The Tibetan script.
1337 /// </summary>
1338 Tibetan,
1339 /// <summary>
1340 /// The Vietnamese script.
1341 /// </summary>
1342 Vietnamese
1343 }
1344
1345 /// <summary>
1346 /// Create a new <see cref="TesseractLanguage"/> object using the provided <paramref name="prefix"/> and <paramref name="language"/> name, without processing them in any way.
1347 /// </summary>
1348 /// <param name="prefix">The name of the folder where the language file is located. If this is <see langword="null" />, the value of the environment variable <c>TESSDATA_PREFIX</c> will be used.</param>
1349 /// <param name="language">The name of the language. The Tesseract library will assume that the trained language data file can be found at <paramref name="prefix"/><c>/</c><paramref name="language"/><c>.traineddata</c>.</param>
1350 public TesseractLanguage(string prefix, string language)
1351 {
1352 this.Prefix = prefix;
1353 this.Language = language;
1354 }
1355
1356 /// <summary>
1357 /// Create a new <see cref="TesseractLanguage"/> object using the specified trained model data file.
1358 /// </summary>
1359 /// <param name="fileName">The path to the trained model data file. If the file name does not end in <c>.traineddata</c>, the file is copied to a temporary folder, and the temporary file is used by the Tesseract library.</param>
1360 public TesseractLanguage(string fileName)
1361 {
1362 if (fileName.EndsWith(".traineddata"))
1363 {
1364 fileName = Path.GetFullPath(fileName);
1365
1366 this.Prefix = Path.GetDirectoryName(fileName);
1367 this.Language = Path.GetFileName(fileName).Substring(0, Path.GetFileName(fileName).Length - 12);
1368 }
1369 else
1370 {
1371 this.Prefix = Path.GetTempPath();
1372 this.Language = Guid.NewGuid().ToString("N");
1373
1374 File.Copy(fileName, Path.Combine(this.Prefix, this.Language + ".traineddata"));
1375 }
1376
1377 }
1378
1379 private static readonly string ExecutablePath = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
1380 private static readonly string LocalCachePath = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
1381
1382 /// <summary>
1383 /// Create a new <see cref="TesseractLanguage"/> object using a fast integer version of a trained model for the specified language. The language file is downloaded from the <c>tesseract-ocr/tessdata_fast</c> GitHub repository. If it has already been downloaded and cached before, the downloaded file is re-used.
1384 /// </summary>
1385 /// <param name="language">The language to use for the OCR process.</param>
1386 /// <param name="useAnyCached">If this is <see langword="true"/>, if a cached trained model file is available for the specified language, it will be used even if it is a "best (most accurate)" model. Otherwise, only cached fast integer trained models will be used.</param>
1387 public TesseractLanguage(Fast language, bool useAnyCached = false)
1388 {
1389 string languageName = language.ToString().ToLower();
1390
1391 string prefix = null;
1392
1393 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "fast", languageName + ".traineddata")))
1394 {
1395 prefix = Path.Combine(ExecutablePath, "tessdata", "fast");
1396 }
1397 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "fast", languageName + ".traineddata")))
1398 {
1399 prefix = Path.Combine(ExecutablePath, "fast");
1400 }
1401 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "fast", languageName + ".traineddata")))
1402 {
1403 prefix = Path.Combine(LocalCachePath, "tessdata", "fast");
1404 }
1405 else if (useAnyCached)
1406 {
1407 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, languageName + ".traineddata")))
1408 {
1409 prefix = Path.Combine(ExecutablePath);
1410 }
1411 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "best", languageName + ".traineddata")))
1412 {
1413 prefix = Path.Combine(ExecutablePath, "tessdata", "best");
1414 }
1415 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "best", languageName + ".traineddata")))
1416 {
1417 prefix = Path.Combine(ExecutablePath, "best");
1418 }
1419 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "best", languageName + ".traineddata")))
1420 {
1421 prefix = Path.Combine(LocalCachePath, "tessdata", "best");
1422 }
1423 }
1424
1425 if (prefix == null)
1426 {
1427 string remotePath = "https://github.com/tesseract-ocr/tessdata_fast/raw/main/" + languageName + ".traineddata";
1428
1429 string localDirectory = Path.Combine(LocalCachePath, "tessdata", "fast");
1430
1431 if (!Directory.Exists(localDirectory))
1432 {
1433 Directory.CreateDirectory(localDirectory);
1434 }
1435
1436 using (WebClient client = new WebClient())
1437 {
1438 client.DownloadFile(remotePath, Path.Combine(localDirectory, languageName + ".traineddata"));
1439 }
1440
1441 prefix = localDirectory;
1442 }
1443
1444 this.Prefix = prefix;
1445 this.Language = languageName;
1446 }
1447
1448 /// <summary>
1449 /// Create a new <see cref="TesseractLanguage"/> object using the best (most accurate) version of the trained model for the specified language. The language file is downloaded from the <c>tesseract-ocr/tessdata_best</c> GitHub repository. If it has already been downloaded and cached before, the downloaded file is re-used.
1450 /// </summary>
1451 /// <param name="language">The language to use for the OCR process.</param>
1452 /// <param name="useAnyCached">If this is <see langword="true"/>, if a cached trained model file is available for the specified language, it will be used even if it is a "fast" model. Otherwise, only cached best (most accurate) trained models will be used.</param>
1453 public TesseractLanguage(Best language, bool useAnyCached = false)
1454 {
1455 string languageName = language.ToString().ToLower();
1456
1457 string prefix = null;
1458
1459 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "best", languageName + ".traineddata")))
1460 {
1461 prefix = Path.Combine(ExecutablePath, "tessdata", "best");
1462 }
1463 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "best", languageName + ".traineddata")))
1464 {
1465 prefix = Path.Combine(ExecutablePath, "best");
1466 }
1467 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "best", languageName + ".traineddata")))
1468 {
1469 prefix = Path.Combine(LocalCachePath, "tessdata", "best");
1470 }
1471 else if (useAnyCached)
1472 {
1473 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, languageName + ".traineddata")))
1474 {
1475 prefix = Path.Combine(ExecutablePath);
1476 }
1477 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "fast", languageName + ".traineddata")))
1478 {
1479 prefix = Path.Combine(ExecutablePath, "tessdata", "fast");
1480 }
1481 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "fast", languageName + ".traineddata")))
1482 {
1483 prefix = Path.Combine(ExecutablePath, "fast");
1484 }
1485 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "fast", languageName + ".traineddata")))
1486 {
1487 prefix = Path.Combine(LocalCachePath, "tessdata", "fast");
1488 }
1489 }
1490
1491 if (prefix == null)
1492 {
1493 string remotePath = "https://github.com/tesseract-ocr/tessdata_best/raw/main/" + languageName + ".traineddata";
1494
1495 string localDirectory = Path.Combine(LocalCachePath, "tessdata", "best");
1496
1497 if (!Directory.Exists(localDirectory))
1498 {
1499 Directory.CreateDirectory(localDirectory);
1500 }
1501
1502 using (WebClient client = new WebClient())
1503 {
1504 client.DownloadFile(remotePath, Path.Combine(localDirectory, languageName + ".traineddata"));
1505 }
1506
1507 prefix = localDirectory;
1508 }
1509
1510 this.Prefix = prefix;
1511 this.Language = languageName;
1512 }
1513
1514 /// <summary>
1515 /// Create a new <see cref="TesseractLanguage"/> object using a fast integer version of a trained model for the specified script. The language file is downloaded from the <c>tesseract-ocr/tessdata_fast</c> GitHub repository. If it has already been downloaded and cached before, the downloaded file is re-used.
1516 /// </summary>
1517 /// <param name="script">The script to use for the OCR process.</param>
1518 /// <param name="useAnyCached">If this is <see langword="true"/>, if a cached trained model file is available for the specified script, it will be used even if it is a "best (most accurate)" model. Otherwise, only cached fast integer trained models will be used.</param>
1519 public TesseractLanguage(FastScripts script, bool useAnyCached = false)
1520 {
1521 string languageName = script.ToString().Replace("_Vert", "_vert");
1522
1523 string prefix = null;
1524
1525 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "fast", "script", languageName + ".traineddata")))
1526 {
1527 prefix = Path.Combine(ExecutablePath, "tessdata", "fast", "script");
1528 }
1529 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "fast", "script", languageName + ".traineddata")))
1530 {
1531 prefix = Path.Combine(ExecutablePath, "fast", "script");
1532 }
1533 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "fast", "script", languageName + ".traineddata")))
1534 {
1535 prefix = Path.Combine(LocalCachePath, "tessdata", "fast", "script");
1536 }
1537 else if (useAnyCached)
1538 {
1539 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "script", languageName + ".traineddata")))
1540 {
1541 prefix = Path.Combine(ExecutablePath, "script");
1542 }
1543 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, languageName + ".traineddata")))
1544 {
1545 prefix = Path.Combine(ExecutablePath);
1546 }
1547 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "best", "script", languageName + ".traineddata")))
1548 {
1549 prefix = Path.Combine(ExecutablePath, "tessdata", "best", "script");
1550 }
1551 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "best", "script", languageName + ".traineddata")))
1552 {
1553 prefix = Path.Combine(ExecutablePath, "best", "script");
1554 }
1555 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "best", "script", languageName + ".traineddata")))
1556 {
1557 prefix = Path.Combine(LocalCachePath, "tessdata", "best", "script");
1558 }
1559 }
1560
1561 if (prefix == null)
1562 {
1563 string remotePath = "https://github.com/tesseract-ocr/tessdata_fast/raw/main/script/" + languageName + ".traineddata";
1564
1565 string localDirectory = Path.Combine(LocalCachePath, "tessdata", "fast", "script");
1566
1567 if (!Directory.Exists(localDirectory))
1568 {
1569 Directory.CreateDirectory(localDirectory);
1570 }
1571
1572 using (WebClient client = new WebClient())
1573 {
1574 client.DownloadFile(remotePath, Path.Combine(localDirectory, languageName + ".traineddata"));
1575 }
1576
1577 prefix = localDirectory;
1578 }
1579
1580 this.Prefix = prefix;
1581 this.Language = languageName;
1582 }
1583
1584 /// <summary>
1585 /// Create a new <see cref="TesseractLanguage"/> object using the best (most accurate) version of the trained model for the specified script. The language file is downloaded from the <c>tesseract-ocr/tessdata_best</c> GitHub repository. If it has already been downloaded and cached before, the downloaded file is re-used.
1586 /// </summary>
1587 /// <param name="script">The script to use for the OCR process.</param>
1588 /// <param name="useAnyCached">If this is <see langword="true"/>, if a cached trained model file is available for the specified script, it will be used even if it is a "fast" model. Otherwise, only cached best (most accurate) trained models will be used.</param>
1589 public TesseractLanguage(BestScripts script, bool useAnyCached = false)
1590 {
1591 string languageName = script.ToString().Replace("_Vert", "_vert");
1592
1593 string prefix = null;
1594
1595 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "best", "script", languageName + ".traineddata")))
1596 {
1597 prefix = Path.Combine(ExecutablePath, "tessdata", "best", "script");
1598 }
1599 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "best", "script", languageName + ".traineddata")))
1600 {
1601 prefix = Path.Combine(ExecutablePath, "best", "script");
1602 }
1603 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "best", "script", languageName + ".traineddata")))
1604 {
1605 prefix = Path.Combine(LocalCachePath, "tessdata", "best", "script");
1606 }
1607 else if (useAnyCached)
1608 {
1609 if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "script", languageName + ".traineddata")))
1610 {
1611 prefix = Path.Combine(ExecutablePath, "script");
1612 }
1613 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, languageName + ".traineddata")))
1614 {
1615 prefix = Path.Combine(ExecutablePath);
1616 }
1617 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "tessdata", "fast", "script", languageName + ".traineddata")))
1618 {
1619 prefix = Path.Combine(ExecutablePath, "tessdata", "fast", "script");
1620 }
1621 else if (!string.IsNullOrEmpty(ExecutablePath) && File.Exists(Path.Combine(ExecutablePath, "fast", "script", languageName + ".traineddata")))
1622 {
1623 prefix = Path.Combine(ExecutablePath, "fast", "script");
1624 }
1625 else if (File.Exists(Path.Combine(LocalCachePath, "tessdata", "fast", "script", languageName + ".traineddata")))
1626 {
1627 prefix = Path.Combine(LocalCachePath, "tessdata", "fast", "script");
1628 }
1629 }
1630
1631 if (prefix == null)
1632 {
1633 string remotePath = "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/" + languageName + ".traineddata";
1634
1635 string localDirectory = Path.Combine(LocalCachePath, "tessdata", "best", "script");
1636
1637 if (!Directory.Exists(localDirectory))
1638 {
1639 Directory.CreateDirectory(localDirectory);
1640 }
1641
1642 using (WebClient client = new WebClient())
1643 {
1644 client.DownloadFile(remotePath, Path.Combine(localDirectory, languageName + ".traineddata"));
1645 }
1646
1647 prefix = localDirectory;
1648 }
1649
1650 this.Prefix = prefix;
1651 this.Language = languageName;
1652 }
1653 }
1654}
Represents a language used by Tesseract OCR.
Fast
Fast integer versions of trained models. These are models for a single language.
BestScripts
Best (most accurate) trained models. These are models for a single script supporting one or more lang...
TesseractLanguage(FastScripts script, bool useAnyCached=false)
Create a new TesseractLanguage object using a fast integer version of a trained model for the specifi...
TesseractLanguage(BestScripts script, bool useAnyCached=false)
Create a new TesseractLanguage object using the best (most accurate) version of the trained model for...
TesseractLanguage(Best language, bool useAnyCached=false)
Create a new TesseractLanguage object using the best (most accurate) version of the trained model for...
TesseractLanguage(string prefix, string language)
Create a new TesseractLanguage object using the provided prefix and language name,...
Best
Best (most accurate) trained models. These are models for a single language.
string Prefix
The name of the folder where the language file is located.
TesseractLanguage(Fast language, bool useAnyCached=false)
Create a new TesseractLanguage object using a fast integer version of a trained model for the specifi...
TesseractLanguage(string fileName)
Create a new TesseractLanguage object using the specified trained model data file.
string Language
The name of the language. The Tesseract library will assume that the trained language data file can b...
FastScripts
Fast integer versions of trained models. These are models for a single script supporting one or more ...