# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4 PortSystem 1.0 PortGroup github 1.0 PortGroup compiler_blacklist_versions 1.0 name tesseract if {${subport} in [list tesseract tesseract-training]} { github.setup tesseract-ocr tesseract 5.4.1 revision 2 checksums rmd160 d21bc77911627900ad32b084ffbd39d3d339aee3 \ sha256 37e38d561ebeadc24f1e0614f5213761e4c281a93e998978edcdefb5e2da5c98 \ size 1900262 } elseif {[variant_isset best]} { github.setup tesseract-ocr tessdata_best 4.1.0 checksums rmd160 753e227df9393d064a78d3243a5bc6fdce20f73c \ sha256 61e07fc0d2e52f0d1a41dc92a1542725f68ed3110f1df3bae715973cd8042851 \ size 1387835400 } elseif {[variant_isset fast]} { github.setup tesseract-ocr tessdata_fast 4.1.0 checksums rmd160 8163a934780db1b80edb94e5e1df45427aa4ff23 \ sha256 54d80714745d79955e7997fa0df6d3c43b1b1989179d5e64a67290a2eea8be64 \ size 352218148 } else { github.setup tesseract-ocr tessdata 4.1.0 checksums rmd160 f314b4fa97b64c5bc1c8318bbfce0484d63823d2 \ sha256 251c890578e37f6d829c706b7e3728c751e2b061ac57b549b14f8769137dcf0e \ size 668512655 } categories textproc graphics pdf license Apache-2 maintainers {mark @markemer} openmaintainer description Open source OCR engine long_description This package contains an OCR engine - libtesseract and a \ command line program - tesseract. Tesseract 4 adds a new \ neural net (LSTM) based OCR engine which is focused on line \ recognition, but also still supports the legacy Tesseract \ OCR engine of Tesseract 3 which works by recognizing \ character patterns. if {${subport} eq ${name}} { github.livecheck.regex {(\d\.\d+(\.\d+)?(?!-rc))} } else { livecheck.type none } if {${subport} in [list tesseract tesseract-training]} { notes "To use tesseract you must also install one of its language data subports. (ex tesseract-eng)" } subport ${name}-training { depends_lib-append path:lib/pkgconfig/cairo.pc:cairo \ path:lib/pkgconfig/icu-uc.pc:icu \ path:lib/pkgconfig/pango.pc:pango build.target training destroot.target training-install } if {${subport} in [list tesseract tesseract-training]} { compiler.cxx_standard 2017 use_autoreconf yes autoreconf.cmd ./autogen.sh autoreconf.args configure.args --disable-silent-rules depends_build-append port:pkgconfig \ port:autoconf \ port:automake \ port:libtool \ port:asciidoc depends_lib-append port:curl \ port:leptonica \ port:libarchive \ path:include/turbojpeg.h:libjpeg-turbo \ port:libpng \ port:zlib } else { supported_archs noarch platforms any use_configure no build {} depends_run-append port:${name} variant best conflicts fast description {Use best training data} { notes-append "Legacy OCR Engine mode (--oem 0) is not supported by this variant" } variant fast conflicts best description {Use fast training data} { notes-append "Legacy OCR Engine mode (--oem 0) is not supported by this variant" } } set langs { afr Afrikaans amh Amharic ara Arabic asm Assamese aze Azerbaijani aze_cyrl Azerbaijani-cyrillic bel Belarusian ben Bengali bod Tibetan bos Bosnian bul Bulgarian cat Catalan ceb Cebuano ces Czech chi_sim Chinese-simple chi_tra Chinese-traditional chr Cherokee cym Welsh dan Danish deu German dzo Dzongkha ell Modern eng English enm Middle epo Esperanto est Estonian eus Basque fas Persian fin Finnish fra French frm Middle gle Irish glg Galician grc Ancient guj Gujarati hat Haitian heb Hebrew hin Hindi hrv Croatian hun Hungarian iku Inuktitut ind Indonesian isl Icelandic ita Italian jav Javanese jpn Japanese kan Kannada kat Georgian kaz Kazakh khm Central kir Kirghiz kor Korean kmr Kurdish lao Lao lat Latin lav Latvian lit Lithuanian mal Malayalam mar Marathi mkd Macedonian mlt Maltese msa Malay mya Burmese nep Nepali nld Dutch nor Norwegian ori Odiya osd OSD pan Panjabi pol Polish por Portuguese pus Pushto ron Romanian rus Russian san Sanskrit sin Sinhala slk Slovak slv Slovenian spa Spanish sqi Albanian srp Serbian srp_latn Serbian-latin swa Swahili swe Swedish syr Syriac tam Tamil tel Telugu tgk Tajik tgl Tagalog tha Thai tir Tigrinya tur Turkish uig Uighur ukr Ukrainian urd Urdu uzb Uzbek uzb_cyrl Uzbek-cyrillic vie Vietnamese yid Yiddish } # Kurdish kur data changed to kmr # Obsolete Date: 2021-11-29 subport ${name}-kur { PortGroup obsolete 1.0 replaced_by ${name}-kmr description "Kurdish language data for the Tesseract OCR engine" long_description "Kurdish language data for the Tesseract OCR engine" } foreach {lang_code lang_name} ${langs} { subport ${name}-[strsed ${lang_code} {g/_/-/}] " description ${lang_name} language data for the Tesseract OCR engine long_description ${lang_name} language data for the Tesseract OCR engine destroot { xinstall -d -m 0755 ${destroot}${prefix}/share/tessdata/ xinstall -m 0644 ${worksrcpath}/${lang_code}.traineddata ${destroot}${prefix}/share/tessdata/ } " }