pax_global_header00006660000000000000000000000064147142040660014516gustar00rootroot0000000000000052 comment=64eab6c457b2337dd690746a5fde5c222b40d5f8 tesseract-5.5.0/000077500000000000000000000000001471420406600135225ustar00rootroot00000000000000tesseract-5.5.0/.clang-format000066400000000000000000000007461471420406600161040ustar00rootroot00000000000000BasedOnStyle: Google # Modifications for Tesseract. # Only merge empty functions. AllowShortFunctionsOnASingleLine: Empty # Do not allow short if statements. AllowShortIfStatementsOnASingleLine: false IndentPPDirectives: AfterHash # Default style for some settings. AccessModifierOffset: -2 AllowShortLoopsOnASingleLine: false # Enforce always the same pointer alignment. DerivePointerAlignment: false IncludeBlocks: Preserve PointerAlignment: Right SpacesBeforeTrailingComments: 1 tesseract-5.5.0/.gitattributes000066400000000000000000000000141471420406600164100ustar00rootroot00000000000000* text=auto tesseract-5.5.0/.github/000077500000000000000000000000001471420406600150625ustar00rootroot00000000000000tesseract-5.5.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001471420406600172455ustar00rootroot00000000000000tesseract-5.5.0/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000002361471420406600212360ustar00rootroot00000000000000blank_issues_enabled: false contact_links: - name: Question url: https://groups.google.com/g/tesseract-ocr about: Please ask questions in our forum tesseract-5.5.0/.github/ISSUE_TEMPLATE/issue-bug.yml000066400000000000000000000060071471420406600216760ustar00rootroot00000000000000name: Bug Report description: File a bug report body: - type: markdown attributes: value: | ### Attention Before you submit an issue, please review [the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md). Have a question? Need help? Please use [our forum](https://groups.google.com/g/tesseract-ocr). Please follow these rules: * Check that your Operating Systems is [supported](https://tesseract-ocr.github.io/tessdoc/supported-operating-systems.html). * Don't open an issue for [Tesseract version which was released more than a year ago](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html). * Don't open an issue which involves 3rd party tools that use Tesseract as a library. Only report about an issue with the Tesseract command line tool or the C/C++ API. * Please provide the input image. * Also provide output files (txt and/or tsv, hocr, pdf). You can make a zip archive that will contain these files, so GitHub will let you upload them. * Don't attach a screenshot of the command line and output. Instead, copy the text and paste it in your bug report. - type: textarea attributes: label: Current Behavior - type: textarea attributes: label: Expected Behavior - type: textarea attributes: label: Suggested Fix - type: textarea attributes: label: tesseract -v description: Version info, compiled libraries, SIMD, OpenMP placeholder: "Please paste the output of the command: tesseract -v" - type: dropdown id: os-linux attributes: label: Operating System description: Choose the OS where the bug occurs multiple: true options: - Windows 11 - Windows 10 - macOS 14 Sonoma - macOS 13 Ventura - macOS 12 Monterey - macOS 11 Big Sur - Ubuntu 24.04 Noble - Ubuntu 22.04 Jammy - Debian 12 Bookworm - Debian 11 Bullseye - RHEL 9 - RHEL 8 - type: textarea attributes: label: Other Operating System placeholder: Enter the name and version of the OS - type: textarea attributes: label: uname -a placeholder: "Paste the output of the command: umame -a (if available in your system)." - type: textarea attributes: label: Compiler placeholder: "Enter compiler name and version (Examples: MSVC 2019 16.11, Clang 13.0.1, GCC 11.2, Xcode 14.1)" - type: textarea attributes: label: CPU placeholder: "Enter your CPU vendor name and model (Examples: Intel Core i7-11700K, AMD Ryzen 7 5800X, Apple Silicon M1)" - type: textarea attributes: label: Virtualization / Containers placeholder: "Enter the name and version of the VM / container which you use (Examples: Oracle VM VirtualBox 7.0.4,VMware Workstation 17.0, Hyper-V, Docker 20.10.22)" - type: textarea attributes: label: Other Information placeholder: Add more details here. tesseract-5.5.0/.github/ISSUE_TEMPLATE/issue-feature-request.yml000066400000000000000000000004421471420406600242370ustar00rootroot00000000000000name: Feature Request description: File a feature request body: - type: textarea attributes: label: Your Feature Request description: Please look first at the [open issues labeled as 'feature request'](https://github.com/tesseract-ocr/tesseract/labels/feature%20request). tesseract-5.5.0/.github/workflows/000077500000000000000000000000001471420406600171175ustar00rootroot00000000000000tesseract-5.5.0/.github/workflows/autotools-macos.yml000066400000000000000000000156421471420406600230030ustar00rootroot00000000000000name: autotools-macos # autotools build of tesseract and training tools on macos homebrew and macports. # run command line tests, basicapitest and unittests. '--disable-openmp' on: #push: schedule: - cron: 0 20 * * * workflow_dispatch: jobs: brew: runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ } steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Get fonts, tessdata and langdata required for unit tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Install dependencies run: | brew install autoconf automake cabextract libtool brew install curl icu4c leptonica libarchive pango - name: Setup Tesseract run: | ./autogen.sh - name: Configure Tesseract run: | ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' - name: Make and Install Tesseract run: | make -j 8 sudo make install install - name: Make and Install Training Tools run: | make training -j 8 sudo make install training-install - name: Make and run Unit Tests (clang) if: startsWith(matrix.config.cxx, 'clang') run: | make check - name: Make and run Unit Tests (unset LANG needed for g++-8, g++-9, g++-10 on macOS) if: startsWith(matrix.config.cxx, 'g') shell: bash run: | unset LANG LC_ALL LC_CTYPE locale make check - name: Display Version for tesseract, lstmtraining, text2image run: | tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: List languages in different test tessdata-dir run: | tesseract --list-langs --tessdata-dir ../tessdata tesseract --list-langs --tessdata-dir ../tessdata_best tesseract --list-langs --tessdata-dir ../tessdata_fast - name: Run Tesseract on test images in different languages run: | tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata - name: Run Tesseract basicapitest run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework accelerate ./basicapitest - name: Display Compiler Version run: | ${{ matrix.config.cxx }} --version git log -3 --pretty=format:'%h %ad %s | %an' if: always() - name: Display Unit Tests Report run: | cat test-suite.log if: always() # ============================================================================================ ports: runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ } steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Get fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Install Macports run: | curl -sSLO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install # --remove-brew does not remove the Homebrew entries in bin, # so remove them now. rm -v $(brew --prefix)/bin/* - name: Install Dependencies run: | sudo port install autoconf automake libtool pkgconfig sudo port install leptonica sudo port install cairo pango sudo port install icu +devel sudo port install cabextract libarchive curl - name: Setup Tesseract run: | ./autogen.sh - name: Configure Tesseract run: | ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' - name: Make and Install Tesseract run: | make -j 8 sudo make install install - name: Make and Install Training Tools run: | make training -j 8 sudo make install training-install - name: Make and run Unit Tests (clang) if: startsWith(matrix.config.cxx, 'clang') run: | make check - name: Display Version for tesseract, lstmtraining, text2image run: | tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: List languages in different test tessdata-dir run: | tesseract --list-langs --tessdata-dir ../tessdata tesseract --list-langs --tessdata-dir ../tessdata_best tesseract --list-langs --tessdata-dir ../tessdata_fast - name: Run Tesseract on test images in different languages run: | tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata - name: Run Tesseract basicapitest run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework Accelerate ./basicapitest - name: Display Compiler Version run: | ${{ matrix.config.cxx }} --version git log -3 --pretty=format:'%h %ad %s | %an' if: always() - name: Display Unit Tests Report run: | cat test-suite.log if: always() tesseract-5.5.0/.github/workflows/autotools-openmp.yml000066400000000000000000000056521471420406600231770ustar00rootroot00000000000000name: autotools-openmp # autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools on: #push: #schedule: # - cron: 0 20 * * * workflow_dispatch: jobs: linux: runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: 20.04-openmp, os: ubuntu-20.04 } - { name: 22.04-openmp, os: ubuntu-22.04 } steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Install dependencies run: | sudo apt-get update sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y - name: Setup Tesseract run: | ./autogen.sh - name: Configure Tesseract run: | ./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2' grep -i OpenMP config.log - name: Make and Install Tesseract run: | make sudo make install - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file run: | wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list - name: Run Tesseract using image from issue 263 with tessdata_fast run: | lscpu free g++ --version tesseract -v time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 echo "tessdata_fast" - name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit run: | for lmt in {1..3}; do time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast" done - name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit run: | for lmt in {1..3}; do time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best" done - name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit run: | for lmt in {1..3}; do time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata" done tesseract-5.5.0/.github/workflows/autotools.yml000066400000000000000000000113651471420406600217010ustar00rootroot00000000000000name: autotools # autotools build of tesseract and training tools on Ubuntu. # run command line tests, basicapitest and unittests. '--disable-openmp' on: #push: schedule: - cron: 0 20 * * * jobs: linux: runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed - { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: g++-10 } #installed - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Install Compiler run: | sudo apt-get update sudo apt-get install -y ${{ matrix.config.cxx }} - name: Install dependencies run: | sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y - name: Setup Tesseract run: | ./autogen.sh - name: Configure Tesseract run: | ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' - name: Make and Install Tesseract run: | make -j 8 sudo make install install - name: Make and Install Training Tools run: | make training -j 8 sudo make install training-install - name: Make and run Unit Tests run: | make check - name: Display Version for tesseract, lstmtraining, text2image run: | tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: List languages in different test tessdata-dir run: | tesseract --list-langs --tessdata-dir ../tessdata tesseract --list-langs --tessdata-dir ../tessdata_best tesseract --list-langs --tessdata-dir ../tessdata_fast - name: Run Tesseract on test images in different languages run: | tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata - name: Run Tesseract basicapitest run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++17 ./basicapitest - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file run: | wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list lscpu free tesseract -v - name: Run Tesseract using image from issue 263 with tessdata_fast run: | time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 echo "tessdata_fast - disable-openmp" - name: Run Tesseract using image from issue 263 with tessdata_best run: | time tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 echo "tessdata_best - disable-openmp" - name: Run Tesseract using image from issue 263 with tessdata_fast run: | time tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 echo "tessdata - disable-openmp" - name: Display Compiler Version run: | ${{ matrix.config.cxx }} --version git log -3 --pretty=format:'%h %ad %s | %an' if: always() - name: Display Unit Tests Report run: | cat test-suite.log if: always() tesseract-5.5.0/.github/workflows/cifuzz.yml000066400000000000000000000015201471420406600211520ustar00rootroot00000000000000name: CIFuzz # OSS-Fuzz CI # See https://google.github.io/oss-fuzz/getting-started/continuous-integration/ on: pull_request: branches: - main paths: - '**.cpp' - '**.h' jobs: Fuzzing: runs-on: ubuntu-latest steps: - name: Build Fuzzers id: build uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master with: oss-fuzz-project-name: 'tesseract-ocr' language: c++ dry-run: false - name: Run Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master with: oss-fuzz-project-name: 'tesseract-ocr' fuzz-seconds: 600 dry-run: false - name: Upload Crash uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: artifacts path: ./out/artifacts tesseract-5.5.0/.github/workflows/cmake-win64.yml000066400000000000000000000125011471420406600216660ustar00rootroot00000000000000# Based on https://github.com/zdenop/tesserocr/actions/runs/691257659/workflow # Build Tesseract on Windows using cmake. No Training Tools. name: cmake-win64 on: #push: schedule: - cron: 0 23 * * * workflow_dispatch: env: ILOC: d:/a/local png_ver: 1643 jobs: build: name: cmake-win64 runs-on: windows-latest steps: - uses: ilammy/setup-nasm@v1 - uses: microsoft/setup-msbuild@v2 - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: submodules: recursive - run: git fetch --prune --unshallow --tags - name: Get the version id: get_version continue-on-error: true run: | $git_info=$(git describe --tags HEAD) $stamp=$(date +'%Y-%m-%d_%H%M%S') echo "version=${git_info}" >> $env:GITHUB_OUTPUT echo "stamp=${stamp}" >> $env:GITHUB_OUTPUT - name: Setup Installation Location run: | mkdir ${{env.ILOC}} - name: Uninstall Perl run: | choco uninstall strawberryperl - name: Build and Install zlib-ng shell: cmd run: | git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git cd zlib-ng cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF cmake --build build --target install cd .. - name: Build and Install libpng shell: cmd run: | curl -sSL -o lpng${{env.png_ver}}.zip https://download.sourceforge.net/libpng/lpng${{env.png_ver}}.zip unzip.exe -qq lpng${{env.png_ver}}.zip cd lpng${{env.png_ver}} cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF cmake --build build --target install cd .. - name: Build and Install libjpeg shell: cmd run: | git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git cd libjpeg-turbo cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF cmake --build build --target install cd .. - name: Build and Install jbigkit shell: cmd run: | git clone --depth 1 https://github.com/zdenop/jbigkit.git cd jbigkit cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF cmake --build build --target install cd .. - name: Build and Install libtiff shell: cmd run: | git clone -c advice.detachedHead=false -b "v4.6.0" --depth 1 https://gitlab.com/libtiff/libtiff.git cd libtiff cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF cmake --build build --target install cd .. - name: Build and Install leptonica shell: cmd run: | echo "Building leptonica..." git clone --depth 1 https://github.com/DanBloomberg/leptonica.git cd leptonica cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON cmake --build build --target install - name: Remove not needed tools Before building tesseract shell: cmd run: > rm -Rf ${{env.ILOC}}/bin/*.exe - name: Build and Install tesseract shell: cmd run: | cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF cmake --build build --target install - name: Upload Build Results uses: actions/upload-artifact@v4 with: name: tesseract-${{ steps.get_version.outputs.version }}-${{steps.get_version.outputs.stamp}}-VS2019_win64 path: ${{env.ILOC}} retention-days: 5 - name: Display Tesseract Version and Test Command Line Usage shell: cmd run: | curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata echo "Setting TESSDATA_PREFIX..." set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata echo "Setting PATH..." set PATH=${{env.ILOC}}/bin;%PATH% echo "Checking installed tesseract version..." tesseract -v echo "Checking installed langs" tesseract --list-langs echo "Checking OCR process" tesseract test/testing/phototest.tif - tesseract-5.5.0/.github/workflows/cmake.yml000066400000000000000000000143601471420406600207260ustar00rootroot00000000000000name: cmake # cmake build of tesseract and training tools on ubuntu and macOS homebrew using Ninja. # test command line version of tesseract. run basicapitest. on: #push: schedule: - cron: 0 21 * * * jobs: basictests: name: ${{ matrix.config.name }} runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: macos-14-clang-15-cmake, os: macos-14, cxx: clang++ } # default - { name: macos-14-gcc-14-cmake, os: macos-14, cxx: g++-14 } #installed - { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed - { name: ubuntu-22.04-gcc-11-cmake, os: ubuntu-22.04, cxx: g++-11 } #installed - { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } #installed - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed steps: - name: Install compilers on Linux run: | sudo apt-get update sudo apt-get install ${{ matrix.config.cxx }} -y if: runner.os == 'Linux' - name: Install dependencies on Linux run: | sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libarchive-dev libcurl4-openssl-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract -y sudo apt-get install ninja-build -y cmake --version if: runner.os == 'Linux' - name: Install dependencies on macOS run: | brew install autoconf automake brew install leptonica # brew install libarchive brew install pango brew install icu4c && brew link icu4c brew install cabextract brew install ninja ninja --version cmake --version clang++ --version g++ --version if: runner.os == 'macOS' - name: Checkout Source uses: actions/checkout@v4 with: submodules: recursive - name: Configure Tesseract (Linux) run: | mkdir build mkdir inst cmake \ -S . \ -B build \ -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DOPENMP_BUILD=OFF \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DCMAKE_INSTALL_PREFIX:PATH=inst if: runner.os == 'Linux' - name: Configure Tesseract (macOS) shell: bash run: | set -e export PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:/$(brew --prefix)/opt/libffi/lib/pkgconfig:$PKG_CONFIG_PATH export LDFLAGS="-L/usr/local/opt/icu4c/lib" export CPPFLAGS="-I/usr/local/opt/icu4c/include" mkdir build mkdir inst cmake \ -S . \ -B build \ -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DOPENMP_BUILD=OFF \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DCMAKE_INSTALL_PREFIX:PATH=inst if: runner.os == 'macOS' - name: Build Tesseract run: | cmake --build build --config Release --target install - name: Display Tesseract Version run: | build/inst/bin/tesseract -v - name: Display Training Tools Version run: | build/inst/bin/lstmtraining -v build/inst/bin/text2image -v - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: List languages in different tessdata-dir run: | build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_best build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_fast - name: Run Tesseract on test images in different languages run: | build/inst/bin/tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata build/inst/bin/tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata build/inst/bin/tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata build/inst/bin/tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata build/inst/bin/tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best build/inst/bin/tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata - name: Build and run basicapitest (Linux) run: | export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH" cd test ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++17 ./basicapitest if: runner.os == 'Linux' - name: Build and run basicapitest (macOS) run: | export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH" cd test ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++17 ./basicapitest if: runner.os == 'macOS' - name: Display Compiler Version run: | ${{ matrix.config.cxx }} --version pwd ls -la # git log -3 --pretty=format:'%h %ad %s | %an' if: always() tesseract-5.5.0/.github/workflows/codeql-analysis.yml000066400000000000000000000050261471420406600227350ustar00rootroot00000000000000# For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: push: branches: [ main ] paths: - '**.cpp' - '**.h' - '**/codeql-analysis.yml' - 'm4/*.m4' - 'Makefile.am' - 'autogen.sh' - 'configure.ac' pull_request: # The branches below must be a subset of the branches above branches: [ main ] paths: - '**.cpp' - '**.h' - '**/codeql-analysis.yml' - 'm4/*.m4' - 'Makefile.am' - 'autogen.sh' - 'configure.ac' schedule: - cron: '34 23 * * 2' jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: [ 'cpp' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # Learn more: # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install dependencies run: | sudo apt-get update sudo apt-get install autoconf libleptonica-dev -y sudo apt-get install libpango1.0-dev -y sudo apt-get install cabextract libarchive-dev -y sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main - name: Build run: | ./autogen.sh ./configure make all training - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 tesseract-5.5.0/.github/workflows/installer-for-windows.yml000066400000000000000000000012331471420406600241120ustar00rootroot00000000000000# GitHub actions - Create Tesseract installer for Windows name: Cross build for Windows on: # Trigger workflow in GitHub web frontend or from API. workflow_dispatch: inputs: targets: description: 'Target operating system' required: true default: 'Windows (64 bit)' type: choice options: - 'Windows (64 bit)' jobs: build64: runs-on: [ubuntu-24.04] steps: - uses: actions/checkout@v4 - name: Build Tesseract installer (64 bit) run: nsis/build.sh x86_64 - uses: actions/upload-artifact@v4 with: name: Tesseract Installer for Windows (64 bit) path: dist tesseract-5.5.0/.github/workflows/msys2.yml000066400000000000000000000046501471420406600207240ustar00rootroot00000000000000name: msys2 # msys2 build for tesseract -head from main branch. on: #push: schedule: - cron: 0 17 * * * jobs: windows: runs-on: windows-2019 strategy: fail-fast: false matrix: include: - msystem: MINGW64 mingw_package_prefix: mingw-w64-x86_64 defaults: run: shell: msys2 {0} steps: - uses: actions/checkout@v4 with: submodules: recursive - uses: msys2/setup-msys2@v2 with: msystem: ${{ matrix.msystem }} install: autoconf automake automake-wrapper git libtool make - run: pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc - run: gcc --version - name: Install dependencies run: | pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-cairo pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-curl pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-gcc-libs pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-icu pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-leptonica pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-libarchive pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pango pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-pkg-config pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-zlib - name: Setup Tesseract run: | ./autogen.sh - name: Configure Tesseract run: | ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' - name: Build and install Tesseract run: | make make install - name: Make and install training tools run: | make training make training-install - name: Display version run: | tesseract -v text2image -v lstmtraining -v - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Run Tesseract on phototest.tif and devatest.png run: | tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata tesseract-5.5.0/.github/workflows/sw.yml000066400000000000000000000057251471420406600203040ustar00rootroot00000000000000name: sw on: schedule: # every 3rd day - cron: 0 0 */3 * * jobs: build: runs-on: ${{ matrix.os }} container: ${{ matrix.container }} strategy: fail-fast: false matrix: os: [windows-2022, macos-latest] include: - os: ubuntu-22.04 container: fedora:latest steps: - name: packages if: matrix.os == 'ubuntu-22.04' run: sudo dnf -y install cmake gcc lld which flex bison clang clang-tools-extra git - uses: actions/checkout@v4 with: submodules: recursive - uses: egorpugin/sw-action@master - name: build if: github.event_name != 'pull_request' && (matrix.os == 'windows-2022') run: ./sw -static -shared -platform x86,x64 -config d,r build - name: build-pr if: github.event_name == 'pull_request' && (matrix.os == 'windows-2022') run: ./sw build - name: build if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022') run: ./sw -static -shared -config d,r build -Dwith-tests=1 - name: build-pr if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022') run: ./sw build -Dwith-tests=1 - name: download test data run: git clone https://github.com/egorpugin/tessdata tessdata_unittest - name: copy fonts if: matrix.os != 'windows-2022' run: cp tessdata_unittest/fonts/* test/testing/ - name: copy fonts if: matrix.os == 'windows-2022' run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse shell: pwsh - name: test if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022' && matrix.os != 'macos-latest') run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode" continue-on-error: true - name: test if: github.event_name == 'pull_request' && (matrix.os != 'windows-2022') run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode" continue-on-error: true - name: test-nightly if: matrix.os != 'windows-2022' && matrix.os != 'macos-latest' && github.event.schedule=='0 0 * * *' run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 continue-on-error: true # windows and macos-latest tests hang here for some reason, investigate #- name: test #if: matrix.os == 'windows-2022' || matrix.os == 'macos-latest' #run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode" #continue-on-error: true - name: Upload Unit Test Results if: always() && matrix.os != 'windows-2022' uses: actions/upload-artifact@v4 with: name: Test Results (${{ matrix.os }}) path: .sw/test/results.xml - name: Publish Test Report if: always() && matrix.os != 'windows-2022' uses: mikepenz/action-junit-report@v4 with: check_name: test (${{ matrix.os }}) report_paths: .sw/test/results.xml github_token: ${{ secrets.GITHUB_TOKEN }} tesseract-5.5.0/.github/workflows/unittest-disablelegacy.yml000066400000000000000000000040621471420406600243110ustar00rootroot00000000000000name: unittest-disablelegacy # autotools build on ubuntu, unittests with disabled legacy engine. # currently some unittests are failing with disabled legacy engine. on: #push: schedule: - cron: 0 10 * * * jobs: linux: runs-on: ${{ matrix.os }} timeout-minutes: 150 strategy: fail-fast: false matrix: compiler: [ g++, clang++-15 ] os: [ ubuntu-22.04 ] steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Install dependencies run: | sudo apt-get update sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y sudo apt-get install cabextract -y #sudo apt-get install libc++-7-dev libc++abi-7-dev -y - name: Setup run: | ./autogen.sh - name: Configure run: | ./configure '--disable-shared' '--disable-legacy' 'CXX=${{ matrix.compiler }}' - name: Make and Install Tesseract run: | make sudo make install install - name: Make and Install Training Tools run: | make training sudo make install training-install - name: Display Version run: | ${{ matrix.compiler }} --version tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Run Tesseract on phototest.tif and devatest.png run: | tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata - name: Make and run Unit Tests run: | make check - name: Display Unit Tests Report run: | git log -3 ${{ matrix.compiler }} --version cat test-suite.log if: always() tesseract-5.5.0/.github/workflows/unittest-macos.yml000066400000000000000000000043711471420406600226260ustar00rootroot00000000000000name: unittest-macos # autotools build on homebrew. unittests with address sanitizers. with openmp. on: #push: schedule: - cron: 0 0 * * * jobs: sanitizers: name: ${{ matrix.config.name }} runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: macos-arm-14-clang-unittest, os: macos-14, cxx: clang++ } # Apple silicon - { name: macos-latest-clang-unittest, os: macos-latest, cxx: clang++ } - { name: macos-latest-gcc-unittest, os: macos-latest, cxx: g++ } steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Install dependencies (macOS Homebrew) run: | brew install autoconf automake cabextract libtool brew install curl icu4c leptonica libarchive pango - name: Setup run: | ./autogen.sh - name: Configure (macOS Homebrew) run: | ./configure '--disable-shared' '--with-pic' \ 'CXX=${{ matrix.config.cxx }}' \ 'CXXFLAGS=-g -O2 -fsanitize=address,undefined' - name: Make and Install Tesseract run: | make sudo make install - name: Make and Install Training Tools run: | make training sudo make training-install - name: Display Tesseract and Training Tools Version run: | tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Run Tesseract on phototest.tif and devatest.png run: | tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata - name: Make and run Unit Tests run: | make check - name: Display Unit Tests Report and compiler version run: | cat test-suite.log ${{ matrix.config.cxx }} --version git log -3 --pretty=format:'%h %ad %s | %an' if: always() tesseract-5.5.0/.github/workflows/unittest.yml000066400000000000000000000054311471420406600215240ustar00rootroot00000000000000name: unittest # autotools build on ubuntu. unittests with address sanitizers. with openmp. # ubuntu-20.04-gcc-unittest - CI runs out of diskspace. on: #push: schedule: - cron: 0 0 * * * workflow_dispatch: jobs: sanitizers: name: ${{ matrix.config.name }} runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: config: - { name: ubuntu-20.04-gcc-unittest, os: ubuntu-20.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' } - { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++, cxxflags: '-g -O2 -fsanitize=address,undefined -stdlib=libc++' } steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Remove Homebrew, Android and .NET to provide more disk space run: | # https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150 sudo rm -rf /home/linuxbrew # will release Homebrew sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - name: Install dependencies (Linux) run: | sudo apt-get update sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y sudo apt-get install cabextract -y - name: Setup run: | ./autogen.sh - name: Configure (Linux) run: | ./configure '--disable-shared' 'CXX=${{ matrix.config.cxx }}' \ 'CXXFLAGS=${{ matrix.config.cxxflags }}' - name: Make and Install Tesseract run: | ${{ matrix.config.cxx }} --version make sudo make install - name: Make and Install Training Tools run: | make training sudo make training-install - name: Display Tesseract and Training Tools Version run: | tesseract -v lstmtraining -v text2image -v if: success() || failure() - name: Download fonts, tessdata and langdata required for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest cp tessdata_unittest/fonts/* test/testing/ mv tessdata_unittest/* ../ - name: Run Tesseract on phototest.tif and devatest.png run: | tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata - name: Make and run Unit Tests run: | make check - name: Display Unit Tests Report and Compiler Version run: | cat test-suite.log ${{ matrix.config.cxx }} --version git log -3 --pretty=format:'%h %ad %s | %an' if: always() tesseract-5.5.0/.github/workflows/vcpkg.yml000066400000000000000000000105751471420406600207640ustar00rootroot00000000000000name: vcpkg # build and test of tesseract on windows using vcpkg and cmake. # vcpkg with -head does not work. https://github.com/microsoft/vcpkg/issues/16019 on: #push: schedule: - cron: 0 23 * * * jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [windows-2019] steps: - name: Checkout Tesseract Source (--head from main branch) uses: actions/checkout@v4 with: submodules: recursive - name: Visual Studio Setup shell: cmd run: | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - name: Install vcpkg run: | git clone https://github.com/microsoft/vcpkg vcpkg/bootstrap-vcpkg.bat vcpkg/vcpkg integrate install - name: Build and Install Leptonica and image libraries using vcpkg run: | vcpkg/vcpkg install leptonica:x64-windows - name: Configure and Build Tesseract (--head from main branch) with cmake run: | cmake . -B build -DCMAKE_BUILD_TYPE=Release -DSW_BUILD=OFF -DOPENMP_BUILD=OFF -DBUILD_TRAINING_TOOLS=OFF "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" cmake --build build --config Release --target install - name: Display Tesseract Version run: | D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --version - name: Create CMakeLists.txt file for basicapitest shell: bash run: | cd test cat << "EOF" > CMakeLists.txt cmake_minimum_required(VERSION 3.19) project( basicapitest ) find_package( Tesseract REQUIRED ) find_package( Leptonica REQUIRED ) include_directories(${Tesseract_INCLUDE_DIRS}) include_directories(${Leptonica_INCLUDE_DIRS}) add_executable( basicapitest testing/basicapitest.cpp ) target_link_libraries(basicapitest ${Leptonica_LIBRARIES}) target_link_libraries(basicapitest Tesseract::libtesseract) add_library(libtesseract UNKNOWN IMPORTED) set_property(TARGET libtesseract PROPERTY IMPORTED_LOCATION D:/a/tesseract/tesseract/build/Release/tesseract50.lib) target_link_libraries(basicapitest Tesseract::libtesseract) EOF cat CMakeLists.txt - name: Configure basicapitest run: | cd test cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" - name: Build basicapitest run: | cd test cmake --build . --config Release - name: Download tessdata and image files used for tests run: | git clone https://github.com/egorpugin/tessdata tessdata_unittest mv tessdata_unittest/* ../ - name: Run basicapitest run: | cd test D:\a\tesseract\tesseract\test\Release\basicapitest.exe - name: Run Tesseract CLI on test images in different languages run: | D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\phototest.tif - --oem 1 --tessdata-dir ..\tessdata D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\raaj.tif - -l hin --oem 1 --tessdata-dir ..\tessdata D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\viet.tif - -l vie --oem 1 --tessdata-dir ..\tessdata D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\hebrew.png - -l heb --oem 1 --tessdata-dir ..\tessdata D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\eurotext.tif - -l fra --oem 1 --tessdata-dir ..\tessdata_best D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ..\tessdata - name: List languages in different test tessdata-dir run: | D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_best D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_fast tesseract-5.5.0/.gitignore000066400000000000000000000027471471420406600155240ustar00rootroot00000000000000*~ # Windows *.user.* *.idea* *.log *.tlog *.cache *.obj *.sdf *.opensdf *.lastbuildstate *.unsuccessfulbuild *.suo *.res *.ipch *.manifest # Linux # ignore local configuration config.* config/* Makefile Makefile.in *.m4 # ignore help scripts/files configure libtool stamp-h1 tesseract.pc config_auto.h /doc/html/* /doc/*.1 /doc/*.5 /doc/*.html /doc/*.xml # generated version file /include/tesseract/version.h # executables /tesseract /src/training/ambiguous_words /src/training/classifier_tester /src/training/cntraining /src/training/combine_tessdata /src/training/dawg2wordlist /src/training/merge_unicharsets /src/training/mftraining /src/training/set_unicharset_properties /src/training/shapeclustering /src/training/text2image /src/training/unicharset_extractor /src/training/wordlist2dawg *.patch # files generated by libtool /src/training/combine_lang_model /src/training/lstmeval /src/training/lstmtraining # ignore compilation files build/* /bin /cmake-* .deps .dirstamp /.libs */.libs/* */*/.deps/* */*/.libs/* *.lo *.la *.o *.Plo *.a *.class *.jar __pycache__ # tessdata *.traineddata tessdata_* # build dirs /build* /*.dll /*.lib /*.exe /*.lnk /win* .vs* .s* # files generated by "make check" /tests/.dirstamp /unittest/*.trs /unittest/tmp/* # test programs /unittest/*_test /unittest/primesbitvector /unittest/primesmap # generated files from unlvtests times.txt /unlvtests/results* # snap packaging specific rules /parts/ /stage/ /prime/ /snap/.snapcraft/ /*.snap /*_source.tar.bz2 tesseract-5.5.0/.gitmodules000066400000000000000000000003021471420406600156720ustar00rootroot00000000000000[submodule "googletest"] path = unittest/third_party/googletest url = https://github.com/google/googletest.git [submodule "test"] path = test url = https://github.com/tesseract-ocr/test.git tesseract-5.5.0/.mailmap000066400000000000000000000030561471420406600151470ustar00rootroot00000000000000Amit Dovev Egor Pugin Jeff Breidenbach Jeff Breidenbach Jim O'Regan Jim O'Regan Jim O'Regan Ray Smith Ray Smith Ray Smith Ray Smith Ray Smith Ray Smith Shree Devi Kumar <5095331+Shreeshrii@users.noreply.github.com> Shree Devi Kumar <5095331+Shreeshrii@users.noreply.github.com> <5095331+Shreeshrii@users.noreply.github.com5095331+Shreeshrii@users.noreply.github.com> Stefan Weil Stefan Weil Stefan Weil Stefan Weil Stefan Weil Stefan Weil Zdenko Podobný Zdenko Podobný Zdenko Podobný Zdenko Podobný tesseract-5.5.0/AUTHORS000066400000000000000000000014231471420406600145720ustar00rootroot00000000000000Ray Smith (lead developer) Ahmad Abdulkader Rika Antonova Nicholas Beato Jeff Breidenbach Samuel Charron Phil Cheatle Simon Crouch David Eger Sheelagh Huddleston Dan Johnson Rajesh Katikam Thomas Kielbus Dar-Shyang Lee Zongyi (Joe) Liu Robert Moss Chris Newton Michael Reimer Marius Renn Raquel Romano Christy Russon Shobhit Saxena Mark Seaman Faisal Shafait Hiroshi Takenaka Ranjith Unnikrishnan Joern Wanke Ping Ping Xiu Andrew Ziem Oscar Zuniga Community Contributors: Zdenko Podobný (Maintainer) Jim Regan (Maintainer) James R Barlow Stefan Brechtken Thomas Breuel Amit Dovev Martin Ettl Shree Devi Kumar Noah Metzger Tom Morris Tobias Müller Egor Pugin Robert Sachunsky Raf Schietekat Sundar M. Vaidya Robin Watts Stefan Weil Nick White Alexander Zaitsev tesseract-5.5.0/CITATIONS.bib000066400000000000000000000054431471420406600155430ustar00rootroot00000000000000@inproceedings{TableDetect, author = {Faisal Shafait and Ray Smith}, booktitle = {Document Analysis Systems}, editor = {David S. Doermann and Venu Govindaraju and Daniel P. Lopresti and Premkumar Natarajan}, pages = {65--72}, publisher = {ACM}, series = {ACM International Conference Proceeding Series}, title = {Table detection in heterogeneous documents.}, url = {http://dblp.uni-trier.de/db/conf/das/das2010.html#ShafaitS10}, year = 2010, isbn = {978-1-60558-773-8}, date = {2010-07-07} } @inproceedings{Multilingual, author = {Ray Smith and Daria Antonova and Dar-Shyang Lee}, booktitle = {MOCR '09: Proceedings of the International Workshop on Multilingual OCR}, editor = {Venu Govindaraju and Premkumar Natarajan and Santanu Chaudhury and Daniel P. Lopresti}, pages = {1--8}, publisher = {ACM}, series = {ACM International Conference Proceeding Series}, title = {Adapting the Tesseract Open Source OCR Engine for Multilingual OCR.}, url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35248.pdf}, year = 2009, isbn = {978-1-60558-698-4}, date = {2009-07-25}, doi = {http://doi.acm.org/10/1145/1577802.1577804}, location = {Barcelona, Spain}, } @inproceedings{ScriptDetect, author = {Ranjith Unnikrishnan and Ray Smith}, title = {Combined Orientation and Script Detection using the Tesseract OCR Engine}, booktitle = {MOCR '09: Proceedings of the International Workshop on Multilingual OCR}, editor = {Venu Govindaraju and Premkumar Natarajan and Santanu Chaudhury and Daniel P. Lopresti}, url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35506.pdf}, year = {2009}, isbn = {978-1-60558-698-4}, pages = {1--7}, location = {Barcelona, Spain}, doi = {http://doi.acm.org/10.1145/1577802.1577809}, publisher = {ACM}, address = {New York, NY, USA}, } @inproceedings{PageLayout, author = {Ray Smith}, title = {Hybrid Page Layout Analysis via Tab-Stop Detection}, booktitle = {ICDAR '09: Proceedings of the 2009 10th International Conference on Document Analysis and Recognition}, url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35094.pdf}, year = {2009}, isbn = {978-0-7695-3725-2}, pages = {241--245}, doi = {http://dx.doi.org/10.1109/ICDAR.2009.257}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } @inproceedings{TessOverview, author = {Ray Smith}, title = {An Overview of the Tesseract OCR Engine}, booktitle = {ICDAR '07: Proceedings of the Ninth International Conference on Document Analysis and Recognition}, url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/33418.pdf}, year = {2007}, isbn = {0-7695-2822-8}, pages = {629--633}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } tesseract-5.5.0/CMakeLists.txt000066400000000000000000000777031471420406600163000ustar00rootroot00000000000000# # tesseract # # ############################################################################## # # cmake settings # # ############################################################################## cmake_minimum_required(VERSION 3.10 FATAL_ERROR) # In-source builds are disabled. if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") message( FATAL_ERROR "CMake generation is not possible within the source directory!" "\n Remove the CMakeCache.txt file and try again from another folder, " "e.g.:\n " "\n rm CMakeCache.txt" "\n mkdir build" "\n cd build" "\n cmake ..") endif() set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}") # Use solution folders. set_property(GLOBAL PROPERTY USE_FOLDERS ON) set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets") if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0") if(WIN32) cmake_policy(SET CMP0091 NEW) message(STATUS "Setting policy CMP0091 to NEW") endif() endif() # ############################################################################## # # project settings # # ############################################################################## project(tesseract C CXX) # Get version with components from VERSION file. file(STRINGS "VERSION" VERSION_PLAIN) string(REGEX REPLACE "^([^.]*)\\..*" "\\1" VERSION_MAJOR ${VERSION_PLAIN}) string(REGEX REPLACE "^[^.]*\\.([^.]*)\\..*" "\\1" VERSION_MINOR ${VERSION_PLAIN}) string(REGEX REPLACE "^[^.]*\\.[^.]*\\.([0-9]*).*" "\\1" VERSION_PATCH ${VERSION_PLAIN}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git) execute_process(COMMAND git --git-dir ${CMAKE_CURRENT_SOURCE_DIR}/.git describe --abbrev=4 OUTPUT_VARIABLE GIT_REV) string(REGEX REPLACE "\n$" "" PACKAGE_VERSION "${GIT_REV}") endif() if(NOT PACKAGE_VERSION) set(PACKAGE_VERSION ${VERSION_PLAIN}) endif() # Provide also same macro names as autoconf (see configure.ac). set(GENERIC_MAJOR_VERSION ${VERSION_MAJOR}) set(GENERIC_MINOR_VERSION ${VERSION_MINOR}) set(GENERIC_MICRO_VERSION ${VERSION_PATCH}) set(MINIMUM_LEPTONICA_VERSION 1.74) # ############################################################################## # # options # # ############################################################################## message(STATUS "Configuring tesseract version ${PACKAGE_VERSION}...") if(WIN32) option(SW_BUILD "Build with sw" ON) else() option(SW_BUILD "Build with sw" OFF) endif() option(OPENMP_BUILD "Build with openmp support" OFF) # see issue #1662 option(GRAPHICS_DISABLED "Disable disable graphics (ScrollView)" OFF) option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF) option(ENABLE_LTO "Enable link-time optimization" OFF) option(FAST_FLOAT "Enable float for LSTM" ON) option(ENABLE_NATIVE "Enable optimization for host CPU (could break HW compatibility)" OFF) # see # https://stackoverflow.com/questions/52653025/why-is-march-native-used-so-rarely option(BUILD_TRAINING_TOOLS "Build training tools" ON) option(BUILD_TESTS "Build tests" OFF) option(USE_SYSTEM_ICU "Use system ICU" OFF) option(DISABLE_TIFF "Disable build with libtiff (if available)" OFF) option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF) option(DISABLE_CURL "Disable build with libcurl (if available)" OFF) option(INSTALL_CONFIGS "Install tesseract configs" ON) if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0") if(WIN32 AND MSVC) option(WIN32_MT_BUILD "Build with MT flag for MSVC" OFF) endif() endif() # ############################################################################## # # compiler and linker # # ############################################################################## if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CLANG 1) endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") endif() include(CheckCXXCompilerFlag) set(CMAKE_CXX_STANDARD 17) if("cxx_std_20" IN_LIST CMAKE_CXX_COMPILE_FEATURES) set(CMAKE_CXX_STANDARD 20) endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # cygwin gnu c++ needs to use -std=gnu++17 instead of -std=c++17 set(CMAKE_CXX_EXTENSIONS OFF) endif() if(BUILD_SHARED_LIBS) set(CMAKE_CXX_VISIBILITY_PRESET hidden) endif() # LTO cmake_policy(SET CMP0069 NEW) include(CheckIPOSupported) check_ipo_supported(RESULT LTO_SUPPORTED OUTPUT error) if(LTO_SUPPORTED) message(STATUS "IPO / LTO supported") else() message(STATUS "IPO / LTO not supported: <${error}>") endif() set(MARCH_NATIVE_OPT OFF) if(ENABLE_NATIVE) check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -march=native") if(NOT CLANG AND MSVC) # clang-cl does not know this argument set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -mtune=native") endif() set(MARCH_NATIVE_OPT ON) endif(COMPILER_SUPPORTS_MARCH_NATIVE) endif(ENABLE_NATIVE) message(STATUS "CMAKE_SYSTEM_PROCESSOR=<${CMAKE_SYSTEM_PROCESSOR}>") if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686") set(HAVE_NEON FALSE) if(MSVC) set(HAVE_AVX ON) set(AVX_COMPILE_FLAGS "/arch:AVX") add_definitions("-DHAVE_AVX") set(HAVE_AVX2 ON) set(AVX2_COMPILE_FLAGS "/arch:AVX2") add_definitions("-DHAVE_AVX2") set(HAVE_AVX512F ON) set(AVX512F_COMPILE_FLAGS "/arch:AVX512") add_definitions("-DHAVE_AVX512F") set(HAVE_FMA ON) set(FMA_COMPILE_FLAGS "-D__FMA__") add_definitions("-DHAVE_FMA") set(HAVE_SSE4_1 ON) set(SSE4_1_COMPILE_FLAGS "-D__SSE4_1__") add_definitions("-DHAVE_SSE4_1") set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -openmp:experimental") add_definitions("-DOPENMP_SIMD") # clang with MSVC compatibility if(CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-microsoft-unqualified-friend") if(HAVE_FMA) set(FMA_COMPILE_FLAGS "-mfma ${FMA_COMPILE_FLAGS}") endif(HAVE_FMA) if(HAVE_SSE4_1) set(SSE4_1_COMPILE_FLAGS "-msse4.1 ${SSE4_1_COMPILE_FLAGS}") endif(HAVE_SSE4_1) endif(CLANG) else() # if not MSVC check_cxx_compiler_flag("-mavx" HAVE_AVX) if(HAVE_AVX) set(AVX_COMPILE_FLAGS "-mavx") add_definitions("-DHAVE_AVX") endif(HAVE_AVX) check_cxx_compiler_flag("-mavx2" HAVE_AVX2) if(HAVE_AVX2) set(AVX2_COMPILE_FLAGS "-mavx2") add_definitions("-DHAVE_AVX2") endif() check_cxx_compiler_flag("-mavx512f" HAVE_AVX512F) if(HAVE_AVX512F) set(AVX512F_COMPILE_FLAGS "-mavx512f") add_definitions("-DHAVE_AVX512F") endif() check_cxx_compiler_flag("-mfma" HAVE_FMA) if(HAVE_FMA) set(FMA_COMPILE_FLAGS "-mfma") add_definitions("-DHAVE_FMA") endif() check_cxx_compiler_flag("-msse4.1" HAVE_SSE4_1) if(HAVE_SSE4_1) set(SSE4_1_COMPILE_FLAGS "-msse4.1") add_definitions("-DHAVE_SSE4_1") endif() check_cxx_compiler_flag("-fopenmp-simd" OPENMP_SIMD) if(OPENMP_SIMD) set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -fopenmp-simd") add_definitions("-DOPENMP_SIMD") endif(OPENMP_SIMD) endif(MSVC) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*") set(HAVE_AVX FALSE) set(HAVE_AVX2 FALSE) set(HAVE_AVX512F FALSE) set(HAVE_FMA FALSE) set(HAVE_SSE4_1 FALSE) set(HAVE_NEON TRUE) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*") set(HAVE_AVX FALSE) set(HAVE_AVX2 FALSE) set(HAVE_AVX512F FALSE) set(HAVE_FMA FALSE) set(HAVE_SSE4_1 FALSE) check_cxx_compiler_flag("-mfpu=neon" HAVE_NEON) if(HAVE_NEON) set(NEON_COMPILE_FLAGS "-mfpu=neon") endif(HAVE_NEON) else() set(HAVE_AVX FALSE) set(HAVE_AVX2 FALSE) set(HAVE_AVX512F FALSE) set(HAVE_FMA FALSE) set(HAVE_NEON FALSE) set(HAVE_SSE4_1 FALSE) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686") if(HAVE_NEON) message(STATUS "LTO build is not supported on arm/RBPi.") set(ENABLE_LTO FALSE) # enable LTO cause fatal error on arm/RBPi endif() # Compiler specific environment if(CMAKE_COMPILER_IS_GNUCXX OR MINGW) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og") elseif(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) # strdup set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8") if(NOT CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") endif() # Hide some warnings for release target wd4244 'argument': conversion from # 'uint64_t' to 'unsigned int', possible loss of data wd4251 needs to have # dll-interface wd4267 return': conversion from 'size_t' to 'int', possible # loss of data wd4275 non dll-interface class wd4305 ...truncation from # 'double' to 'float' set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267 /wd4251 /wd4275 /wd4005" ) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /wd4068") # Don't use /Wall because it generates too many warnings. set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W0 /bigobj") # MT flag if(WIN32_MT_BUILD) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") message(STATUS "Building with static CRT.") endif() endif() if(CLANG) # clang all platforms set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wno-unused-command-line-argument") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -O0") endif() if(OPENMP_BUILD AND MSVC AND "${MSVC_VERSION}" LESS 1929) set(OPENMP_BUILD OFF) endif() if(OPENMP_BUILD) find_package(OpenMP QUIET) # https://stackoverflow.com/questions/12399422 # how-to-set-linker-flags-for-openmp-in-cmakes-try-compile-function if(NOT OpenMP_FOUND AND CLANG AND WIN32) # workaround because find_package(OpenMP) does not work for clang-cl # https://gitlab.kitware.com/cmake/cmake/issues/19404 check_include_file_cxx(omp.h HAVE_OMP_H_INCLUDE) find_library(OpenMP_LIBRARY NAMES omp libomp.lib) message(">> OpenMP_LIBRARY: ${OpenMP_LIBRARY}") if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") endif() set(OpenMP_FOUND 1) # OpenMP 3.1 is fully supported from Clang 3.8.0 add_definitions(-D_OPENMP=201107) endif() if(MSVC) # Note: -openmp:llvm is available for X64 from MSVC 16.9 from MSVC 16.10 # Preview 2 there is support also for x86 and arm64 # https://devblogs.microsoft.com/cppblog/openmp-updates-and-fixes-for-cpp-in-visual-studio-2019-16-10/ if("${OpenMP_CXX_FLAGS}" STREQUAL "-openmp") set(OpenMP_CXX_FLAGS "-openmp:llvm") endif() endif() if(OpenMP_FOUND) message(">> OpenMP_FOUND ${OpenMP_FOUND} version: ${OpenMP_CXX_VERSION}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") if(NOT TARGET OpenMP::OpenMP_CXX) add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE) endif() endif() endif() if(CYGWIN) add_definitions(-D__CYGWIN__) elseif(UNIX) if(NOT ANDROID) set(LIB_pthread pthread) endif() elseif(WIN32) set(LIB_Ws2_32 Ws2_32) endif() add_definitions("-DCMAKE_BUILD") # ############################################################################## # # packages # # ############################################################################## include(CheckFunctions) if(SW_BUILD) find_package(SW REQUIRED) if(BUILD_SHARED_LIBS) set(SW_BUILD_SHARED_LIBS 1) else() set(SW_BUILD_SHARED_LIBS 0) endif() sw_add_package(org.sw.demo.danbloomberg.leptonica org.sw.demo.libarchive.libarchive) if(BUILD_TRAINING_TOOLS) sw_add_package(org.sw.demo.gnome.pango.pangocairo org.sw.demo.unicode.icu.i18n) endif() sw_execute() else() find_package(PkgConfig) # Check for required library. option -DLeptonica_DIR=path => cmake hint where # to find leptonica find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} CONFIG) if(NOT Leptonica_FOUND AND PKG_CONFIG_EXECUTABLE) pkg_check_modules(Leptonica lept>=${MINIMUM_LEPTONICA_VERSION}) link_directories(${Leptonica_LIBRARY_DIRS}) endif() if(NOT Leptonica_FOUND) message(FATAL_ERROR "Cannot find required library Leptonica. Quitting!") else() message(STATUS "Found leptonica version: ${Leptonica_VERSION}") endif(NOT Leptonica_FOUND) include_directories(${Leptonica_INCLUDE_DIRS}) check_leptonica_tiff_support() if ((NOT LEPT_TIFF_RESULT EQUAL 0) AND LEPT_TIFF_COMPILE_SUCCESS) message(NOTICE "Leptonica was build without TIFF support! Disabling TIFF support...") set(DISABLE_TIFF ON) elseif(NOT ${CMAKE_VERSION} VERSION_LESS "3.25") message(STATUS "Leptonica was build with TIFF support.") endif() # Check for optional libraries. if(DISABLE_TIFF) set(HAVE_TIFFIO_H OFF) message(STATUS "TIFF support disabled.") else(DISABLE_TIFF) find_package(TIFF) # for tesseract if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE) # try PKG_CONFIG to find libtiff if cmake failed pkg_check_modules(TIFF libtiff-4) endif() if(TIFF_FOUND) set(HAVE_TIFFIO_H ON) include_directories(${TIFF_INCLUDE_DIRS}) endif(TIFF_FOUND) endif(DISABLE_TIFF) if(DISABLE_ARCHIVE) set(HAVE_LIBARCHIVE OFF) message(STATUS "LibArchive support disabled.") else(DISABLE_ARCHIVE) find_package(LibArchive) if(NOT LibArchive_FOUND AND PKG_CONFIG_EXECUTABLE) # try PKG_CONFIG to find libarchive if cmake failed pkg_check_modules(LibArchive libarchive) endif() if(LibArchive_FOUND) set(HAVE_LIBARCHIVE ON) include_directories(${LibArchive_INCLUDE_DIRS}) endif(LibArchive_FOUND) endif(DISABLE_ARCHIVE) if(DISABLE_CURL) set(HAVE_LIBCURL OFF) message(STATUS "CURL support disabled.") else(DISABLE_CURL) find_package(CURL) if(NOT CURL_FOUND AND PKG_CONFIG_EXECUTABLE) # try PKG_CONFIG to find libcurl if cmake failed pkg_check_modules(CURL libcurl) endif() if(CURL_FOUND) set(HAVE_LIBCURL ON) include_directories(${CURL_INCLUDE_DIRS}) endif(CURL_FOUND) endif(DISABLE_CURL) endif() # ############################################################################## # # configure # # ############################################################################## if(MSVC) set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} /fp:fast") else() set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -O3 -ffast-math") endif() include (GNUInstallDirs) set(AUTOCONFIG_SRC ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h.in) set(AUTOCONFIG ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h) add_definitions(-DHAVE_CONFIG_H) if(GRAPHICS_DISABLED) message("ScrollView debugging disabled.") endif() set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} "${CMAKE_PREFIX_PATH}/include" ${CMAKE_INSTALL_INCLUDEDIR}) include(Configure) configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY) set(INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) set(LIBRARY_DIRS ${CMAKE_INSTALL_LIBDIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/tesseract/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/tesseract/version.h @ONLY) include(CMakePackageConfigHelpers) include(GenerateExportHeader) # show summary of configuration if(${CMAKE_BUILD_TYPE} MATCHES Debug) set(COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") elseif(${CMAKE_BUILD_TYPE} MATCHES Release) set(COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") if(LTO_SUPPORTED AND ENABLE_LTO) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE) endif() # LTO_SUPPORTED endif() if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(BUILD_ARCH "64 bits") elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) set(BUILD_ARCH "32 bits") endif() message(STATUS) message(STATUS "General configuration for Tesseract ${PACKAGE_VERSION}") message(STATUS "--------------------------------------------------------") message(STATUS "Build type: ${CMAKE_BUILD_TYPE} ${BUILD_ARCH}") message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID}") message(STATUS "Used standard: C++${CMAKE_CXX_STANDARD}") message(STATUS "CXX compiler options: ${COMPILER_FLAGS}") get_directory_property(DirCompDefs COMPILE_DEFINITIONS) message(STATUS "Compile definitions = ${DirCompDefs}") message(STATUS "Linker options: ${CMAKE_EXE_LINKER_FLAGS} " "${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UP}}") message(STATUS "Install directory: ${CMAKE_INSTALL_PREFIX}") message(STATUS "HAVE_AVX: ${HAVE_AVX}") message(STATUS "HAVE_AVX2: ${HAVE_AVX2}") message(STATUS "HAVE_AVX512F: ${HAVE_AVX512F}") message(STATUS "HAVE_FMA: ${HAVE_FMA}") message(STATUS "HAVE_SSE4_1: ${HAVE_SSE4_1}") message(STATUS "MARCH_NATIVE_OPT: ${MARCH_NATIVE_OPT}") message(STATUS "HAVE_NEON: ${HAVE_NEON}") message(STATUS "Link-time optimization: ${CMAKE_INTERPROCEDURAL_OPTIMIZATION}") message(STATUS "--------------------------------------------------------") message(STATUS "Build with sw [SW_BUILD]: ${SW_BUILD}") message(STATUS "Build with openmp support [OPENMP_BUILD]: ${OPENMP_BUILD}") message(STATUS "Build with libarchive support [HAVE_LIBARCHIVE]: " "${HAVE_LIBARCHIVE}") message(STATUS "Build with libcurl support [HAVE_LIBCURL]: ${HAVE_LIBCURL}") message(STATUS "Enable float for LSTM [FAST_FLOAT]: ${FAST_FLOAT}") message(STATUS "Enable optimization for host CPU (could break HW compatibility)" " [ENABLE_NATIVE]: ${ENABLE_NATIVE}") message(STATUS "Disable disable graphics (ScrollView) [GRAPHICS_DISABLED]: " "${GRAPHICS_DISABLED}") message(STATUS "Disable the legacy OCR engine [DISABLED_LEGACY_ENGINE]: " "${DISABLED_LEGACY_ENGINE}") message(STATUS "Build training tools [BUILD_TRAINING_TOOLS]: " "${BUILD_TRAINING_TOOLS}") message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}") message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}") message( STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}") message(STATUS "--------------------------------------------------------") message(STATUS) # ############################################################################## # # build # # ############################################################################## include(BuildFunctions) include(SourceGroups) add_definitions(-D_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1) include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) if(ANDROID_TOOLCHAIN) include_directories(${ANDROID_TOOLCHAIN}/sysroot/usr/include) add_compile_definitions(__ANDROID_API_FUTURE__) endif() # ############################################################################## # LIBRARY tesseract # ############################################################################## file( GLOB TESSERACT_SRC src/ccmain/*.cpp src/ccstruct/*.cpp src/ccutil/*.cpp src/classify/*.cpp src/cutil/*.cpp src/dict/*.cpp src/lstm/*.cpp src/textord/*.cpp src/viewer/*.cpp src/wordrec/*.cpp) if(DISABLED_LEGACY_ENGINE) # prepend path to list of source files function(prepend_path srcs path) set(tmp, "") foreach(src IN LISTS ${srcs}) list(APPEND tmp ${path}/${src}) endforeach(src ${srcs}) set(${srcs} ${tmp} PARENT_SCOPE) endfunction() set(TESSERACT_SRC_LEGACY src/ccmain/adaptions.cpp src/ccmain/docqual.cpp src/ccmain/equationdetect.cpp src/ccmain/fixspace.cpp src/ccmain/fixxht.cpp src/ccmain/osdetect.cpp src/ccmain/par_control.cpp src/ccmain/recogtraining.cpp src/ccmain/superscript.cpp src/ccmain/tessbox.cpp src/ccmain/tfacepp.cpp src/ccstruct/fontinfo.cpp src/ccstruct/params_training_featdef.cpp src/ccutil/ambigs.cpp src/ccutil/bitvector.cpp src/ccutil/indexmapbidi.cpp src/classify/adaptive.cpp src/classify/adaptmatch.cpp src/classify/blobclass.cpp src/classify/cluster.cpp src/classify/clusttool.cpp src/classify/cutoffs.cpp src/classify/featdefs.cpp src/classify/float2int.cpp src/classify/fpoint.cpp src/classify/intfeaturespace.cpp src/classify/intfx.cpp src/classify/intmatcher.cpp src/classify/intproto.cpp src/classify/kdtree.cpp src/classify/mf.cpp src/classify/mfoutline.cpp src/classify/mfx.cpp src/classify/normfeat.cpp src/classify/normmatch.cpp src/classify/ocrfeatures.cpp src/classify/outfeat.cpp src/classify/picofeat.cpp src/classify/protos.cpp src/classify/shapeclassifier.cpp src/classify/shapetable.cpp src/classify/tessclassifier.cpp src/classify/trainingsample.cpp src/dict/permdawg.cpp src/dict/hyphen.cpp src/wordrec/associate.cpp src/wordrec/chop.cpp src/wordrec/chopper.cpp src/wordrec/drawfx.cpp src/wordrec/findseam.cpp src/wordrec/gradechop.cpp src/wordrec/language_model.cpp src/wordrec/lm_consistency.cpp src/wordrec/lm_pain_points.cpp src/wordrec/lm_state.cpp src/wordrec/outlines.cpp src/wordrec/params_model.cpp src/wordrec/pieces.cpp src/wordrec/plotedges.cpp src/wordrec/render.cpp src/wordrec/segsearch.cpp src/wordrec/wordclass.cpp) prepend_path(TESSERACT_SRC_LEGACY "${CMAKE_CURRENT_SOURCE_DIR}") list(REMOVE_ITEM TESSERACT_SRC ${TESSERACT_SRC_LEGACY}) endif(DISABLED_LEGACY_ENGINE) list(APPEND arch_files src/arch/dotproduct.cpp src/arch/simddetect.cpp src/arch/intsimdmatrix.cpp) if(DOTPRODUCT_FLAGS) set_source_files_properties(src/arch/dotproduct.cpp PROPERTIES COMPILE_FLAGS ${DOTPRODUCT_FLAGS}) endif(DOTPRODUCT_FLAGS) if(HAVE_AVX) list(APPEND arch_files_opt src/arch/dotproductavx.cpp) set_source_files_properties(src/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS ${AVX_COMPILE_FLAGS}) endif(HAVE_AVX) if(HAVE_AVX2) list(APPEND arch_files_opt src/arch/intsimdmatrixavx2.cpp src/arch/dotproductavx.cpp) set_source_files_properties(src/arch/intsimdmatrixavx2.cpp PROPERTIES COMPILE_FLAGS ${AVX2_COMPILE_FLAGS}) endif(HAVE_AVX2) if(HAVE_AVX512F) list(APPEND arch_files_opt src/arch/dotproductavx512.cpp) set_source_files_properties(src/arch/dotproductavx512.cpp PROPERTIES COMPILE_FLAGS ${AVX512F_COMPILE_FLAGS}) endif(HAVE_AVX512F) if(HAVE_FMA) list(APPEND arch_files_opt src/arch/dotproductfma.cpp) set_source_files_properties(src/arch/dotproductfma.cpp PROPERTIES COMPILE_FLAGS ${FMA_COMPILE_FLAGS}) endif(HAVE_FMA) if(HAVE_SSE4_1) list(APPEND arch_files_opt src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp) set_source_files_properties( src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp PROPERTIES COMPILE_FLAGS ${SSE4_1_COMPILE_FLAGS}) endif(HAVE_SSE4_1) if(HAVE_NEON) list(APPEND arch_files_opt src/arch/dotproductneon.cpp src/arch/intsimdmatrixneon.cpp) if(NEON_COMPILE_FLAGS) set_source_files_properties( src/arch/dotproductneon.cpp src/arch/intsimdmatrixneon.cpp PROPERTIES COMPILE_FLAGS ${NEON_COMPILE_FLAGS}) endif() endif(HAVE_NEON) file( GLOB_RECURSE TESSERACT_HDR include/* src/arch/*.h src/ccmain/*.h src/ccstruct/*.h src/ccutil/*.h src/classify/*.h src/cutil/*.h src/dict/*.h src/lstm/*.h src/textord/*.h src/viewer/*.h src/wordrec/*.h) set(TESSERACT_SRC ${TESSERACT_SRC} src/api/baseapi.cpp src/api/capi.cpp src/api/renderer.cpp src/api/altorenderer.cpp src/api/pagerenderer.cpp src/api/hocrrenderer.cpp src/api/lstmboxrenderer.cpp src/api/pdfrenderer.cpp src/api/wordstrboxrenderer.cpp) set(TESSERACT_CONFIGS tessdata/configs/alto tessdata/configs/ambigs.train tessdata/configs/api_config tessdata/configs/bazaar tessdata/configs/bigram tessdata/configs/box.train tessdata/configs/box.train.stderr tessdata/configs/digits tessdata/configs/get.images tessdata/configs/hocr tessdata/configs/inter tessdata/configs/kannada tessdata/configs/linebox tessdata/configs/logfile tessdata/configs/lstm.train tessdata/configs/lstmbox tessdata/configs/lstmdebug tessdata/configs/makebox tessdata/configs/page tessdata/configs/pdf tessdata/configs/quiet tessdata/configs/rebox tessdata/configs/strokewidth tessdata/configs/tsv tessdata/configs/txt tessdata/configs/unlv tessdata/configs/wordstrbox) set(TESSERACT_TESSCONFIGS tessdata/tessconfigs/batch tessdata/tessconfigs/batch.nochop tessdata/tessconfigs/matdemo tessdata/tessconfigs/msdemo tessdata/tessconfigs/nobatch tessdata/tessconfigs/segdemo) set(LIBTESSFILES ${TESSERACT_SRC} ${arch_files} ${arch_files_opt} ${TESSERACT_HDR}) source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${LIBTESSFILES}) add_library(libtesseract ${LIBTESSFILES}) target_include_directories( libtesseract BEFORE PRIVATE src PUBLIC $ $ $ $ $ $ $ $ $ $ $ $ $) if(BUILD_SHARED_LIBS) target_compile_definitions( libtesseract PRIVATE -DTESS_EXPORTS INTERFACE -DTESS_IMPORTS) # generate_export_header (libtesseract EXPORT_MACRO_NAME TESS_API) endif() target_link_libraries(libtesseract PRIVATE ${LIB_Ws2_32} ${LIB_pthread}) if(OpenMP_CXX_FOUND) target_link_libraries(libtesseract PUBLIC OpenMP::OpenMP_CXX) endif() if(LibArchive_FOUND) target_link_libraries(libtesseract PUBLIC ${LibArchive_LIBRARIES}) endif(LibArchive_FOUND) if(CURL_FOUND) if(NOT CURL_LIBRARIES) target_link_libraries(libtesseract PUBLIC CURL::libcurl) else() target_link_libraries(libtesseract PUBLIC ${CURL_LIBRARIES}) endif() endif(CURL_FOUND) set_target_properties( libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) set_target_properties( libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR}) set_target_properties( libtesseract PROPERTIES OUTPUT_NAME tesseract$<$:${VERSION_MAJOR}${VERSION_MINOR}$<$:d>> ) if(SW_BUILD) target_link_libraries(libtesseract PUBLIC org.sw.demo.danbloomberg.leptonica org.sw.demo.libarchive.libarchive) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_CURRENT_BINARY_DIR}/cppan.cmake)\n") export( TARGETS libtesseract APPEND FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake NAMESPACE Tesseract::) else() target_link_libraries(libtesseract PUBLIC ${Leptonica_LIBRARIES}) export( TARGETS libtesseract FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake NAMESPACE Tesseract::) endif() if(WIN32 AND CLANG AND OPENMP_BUILD) # Workaround for "libomp.lib is not automatically added on Windows" see: # http://lists.llvm.org/pipermail/openmp-dev/2015-August/000857.html target_link_libraries(libtesseract PRIVATE ${OpenMP_LIBRARY}) endif() if(ANDROID) add_definitions(-DANDROID) find_package(CpuFeaturesNdkCompat REQUIRED) target_include_directories( libtesseract PRIVATE "${CpuFeaturesNdkCompat_DIR}/../../../include/ndk_compat") target_link_libraries(libtesseract PRIVATE CpuFeatures::ndk_compat) endif() # ############################################################################## # EXECUTABLE tesseract # ############################################################################## add_executable(tesseract src/tesseract.cpp) target_link_libraries(tesseract libtesseract) if(HAVE_TIFFIO_H AND WIN32) target_link_libraries(tesseract ${TIFF_LIBRARIES}) endif() if(OPENMP_BUILD AND UNIX) target_link_libraries(tesseract pthread) endif() # ############################################################################## if(BUILD_TESTS AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt ) add_subdirectory(unittest/third_party/googletest) endif() if(BUILD_TRAINING_TOOLS) add_subdirectory(src/training) endif() get_target_property(tesseract_NAME libtesseract NAME) get_target_property(tesseract_VERSION libtesseract VERSION) get_target_property(tesseract_OUTPUT_NAME libtesseract OUTPUT_NAME) configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in @ONLY) # to resolve generator expression in OUTPUT_NAME file( GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$.pc INPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in) configure_package_config_file( cmake/templates/TesseractConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/cmake/tesseract/TesseractConfig.cmake INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/tesseract PATH_VARS INCLUDE_DIR LIBRARY_DIRS) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/cmake/tesseract/TesseractConfigVersion.cmake VERSION ${PACKAGE_VERSION} COMPATIBILITY SameMajorVersion) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig RENAME tesseract.pc) install(TARGETS tesseract DESTINATION bin) if (MSVC) install(FILES $ DESTINATION bin OPTIONAL) endif() install( TARGETS libtesseract EXPORT TesseractTargets RUNTIME DESTINATION bin RUNTIME DESTINATION bin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) if (MSVC AND BUILD_SHARED_LIBS) install(FILES $ DESTINATION bin OPTIONAL) endif() install( EXPORT TesseractTargets NAMESPACE Tesseract:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/tesseract) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}) install( FILES include/tesseract/baseapi.h include/tesseract/capi.h include/tesseract/renderer.h ${CMAKE_CURRENT_BINARY_DIR}/include/tesseract/version.h include/tesseract/ltrresultiterator.h include/tesseract/pageiterator.h include/tesseract/resultiterator.h include/tesseract/osdetect.h include/tesseract/publictypes.h include/tesseract/ocrclass.h include/tesseract/export.h include/tesseract/unichar.h # ${CMAKE_CURRENT_BINARY_DIR}/src/endianness.h DESTINATION include/tesseract) if(INSTALL_CONFIGS) install(FILES ${TESSERACT_CONFIGS} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/configs) install(FILES ${TESSERACT_TESSCONFIGS} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/tessconfigs) endif() # ############################################################################## # uninstall target # ############################################################################## if(NOT TARGET uninstall) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/cmake_uninstall.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY) add_custom_target( uninstall COMMENT "Uninstall installed files" COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) endif() # ############################################################################## tesseract-5.5.0/CONTRIBUTING.md000066400000000000000000000137401471420406600157600ustar00rootroot00000000000000# Contributing **Please follow these rules and advice**. ## Creating an Issue or Using the Forum If you think you found a bug in Tesseract, please create an issue. Use the [user forum](https://groups.google.com/g/tesseract-ocr) instead of creating an issue if ... * You have problems using Tesseract and need some help. * You have problems installing the software. * You are not satisfied with the accuracy of the OCR, and want to ask how you can improve it. Note: You should first read the [ImproveQuality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) documentation. * You are trying to train Tesseract and you have a problem and/or want to ask a question about the training process. Note: You should first read the **official** guides [[1]](https://tesseract-ocr.github.io/tessdoc/) or [[2]](https://tesseract-ocr.github.io/tessdoc/tess5/TrainingTesseract-5.html) found in the project documentation. * You have a general question. An issue should only be reported if the platform you are using is one of these: * Linux (but not a version that is more than 4 years old) * Windows (Windows 7 or newer version) * macOS (last 3 releases) For older versions or other operating systems, use the Tesseract forum. When creating an issue, please report your operating system, including its specific version: "Ubuntu 16.04", "Windows 10", "Mac OS X 10.11" etc. Search through open and closed issues to see if similar issue has been reported already (and sometimes also has been solved). Similarly, before you post your question in the forum, search through past threads to see if similar question has been asked already. Read the [documentation](https://tesseract-ocr.github.io/tessdoc/) before you report your issue or ask a question in the forum. Only report an issue in the latest official release. Optionally, try to check if the issue is not already solved in the latest snapshot in the git repository. Make sure you are able to replicate the problem with Tesseract command line program. For external programs that use Tesseract (including wrappers and your own program, if you are developer), report the issue to the developers of that software if it's possible. You can also try to find help in the Tesseract forum. Each version of Tesseract has its own language data you need to obtain. You **must** obtain and install trained data for English (eng) and osd. Verify that Tesseract knows about these two files (and other trained data you installed) with this command: `tesseract --list-langs`. Post example files to demonstrate the problem. BUT don't post files with private info (about yourself or others). When attaching a file to the issue report / forum ... * Do not post a file larger than 20 MB. * GitHub supports only few file name extensions like `.png` or `.txt`. If GitHub rejects your files, you can compress them using a program that can produce a zip archive and then load this zip file to GitHub. Do not attach programs or libraries to your issues/posts. For large files or for programs, add a link to a location where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.) Attaching a multi-page TIFF image is useful only if you have problem with multi-page functionality, otherwise attach only one or a few single page images. Copy the error message from the console instead of sending a screenshot of it. Use the toolbar above the comment edit area to format your comment. Add three backticks before and after a code sample or output of a command to format it (The `Insert code` button can help you doing it). If your comment includes a code sample or output of a command that exceeds ~25 lines, post it as attached text file (`filename.txt`). Use `Preview` before you send your issue. Read it again before sending. Note that most of the people that respond to issues and answer questions are either other 'regular' users or **volunteers** developers. Please be nice to them :-) The [tesseract developers](https://groups.google.com/g/tesseract-dev) forum should be used to discuss Tesseract development: bug fixes, enhancements, add-ons for Tesseract. Sometimes you will not get a respond to your issue or question. We apologize in advance! Please don't take it personally. There can be many reasons for this, including: time limits, no one knows the answer (at least not the ones that are available at that time) or just that your question has been asked (and has been answered) many times before... ## For Developers: Creating a Pull Request You should always make sure your changes build and run successfully. For that, your clone needs to have all submodules (`googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure. Have a look at [the README](./README.md) and [testing README](https://github.com/tesseract-ocr/test/blob/main/README.md) and the [documentation](https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation.html#unit-test-builds) on installation. In short, after running `configure` from the build directory of your choice, to build the library and CLI, run `make`. To test it, run `make check`. To build the training tools, run `make training`. As soon as your changes are building and tests are succeeding, you can publish them. If you have not already, please [fork](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) tesseract (somewhere) on GitHub, and push your changes to that fork (in a new branch). Then [submit as PR](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). Please also keep track of reports from CI (automated build status) and Coverity/CodeQL (quality scan). When the indicators show deterioration after your changes, further action may be required to improve them. tesseract-5.5.0/ChangeLog000066400000000000000000000550041471420406600153000ustar00rootroot000000000000002024-11-10 - V5.5.0 * Set hOCR capabilities ocrp_dir and ocrp_lang unconditionally. * Calculate row bounding box in single-word mode per (issue #4304). * Reduce clock syscalls (#4303). * Several small performance and other code fixes. * Modernized code. * Print time for tessedit_timing_debug in milliseconds. * Print time for ErrorCounter::ComputeErrorRate in milliseconds. * cmake: Correctly set the soversion based on SemVer properties. * Do not export PDBs for static libraries (issue #4279). * Several other small fixes and improvements for builds and CI. * Modernize code for renderers and remove filename conversion for Windows (#4330). * Add build rule for Windows installer. * Support symbolic values for --oem and --psm options. * Remove Tensorflow support. * Add RISC-V V support (#4346). * Remove broken GitHub action msys2-4.1.1. 2024-06-11 - V5.4.1 * Avoid FP overflow in NormEvidenceOf (fixes issue #4257) (#4259) * Small build fixes and code improvements (#4262, #4263, #4266, #4267) 2024-06-06 - V5.4.0 * Small build fixes and code improvements (#4241, #4243, #4244, #4245, #4246, #4248, #4249, #4250, #4253) 2024-05-19 - V5.4.0-rc2 * Fix setup of datadir on installations with Conda (issue #4230) (#4240) * Fix FP exception in Wordrec::angle_change (issue #4242) (#4243) 2024-05-12 - V5.4.0-rc1 * Build fixes, code refactoring and other smaller changes. * Fix grey result of indexed PNG in pdfrenderer. * Rename frk -> deu_latf (ISO 639-3, ISO 15924). * Remove broken Dockerfile. * Fixes for several issues reported by Coverity Scan. * Remove unsupported OpenCL code and related API functions (#4220). * Facilitate vectorization for generic build (#4223). * Add PAGE XML renderer / export (#4214). * Support training without lstmf files. * Improve CCUtil::main_setup (fixes issue #4230 related to Coda). * Allow for text angle/gradient to be retrieved (#4070). 2024-01-18 - V5.3.4 * Fixes for scrollview * Fixes for autoconf, clang and sw builds * Improve OCR for an image URL * Fail on curl download errors * New parameter curl_cookiefile * Set User-Agent: header field in HTTP request for curl downloads * Output directory list from "combine_tessdata -d" to stdout * Other small improvements for code and documentation. 2023-10-05 - V5.3.3 * Small code fixes and improvements to fix Coverity Scan issues. * Disable -mfpu=neon for aarch64. * Fix build without git clone in cloned directory (required for FreeBSD). * Other build fixes for autotools, cmake and sw. * Fix regression in layout detection which was introduced in release 5.0.0. * Fix regression which prevented loading of submodels, introduced in release 5.0.0-rc2. * Other small improvements for code and documentation. 2023-07-11 - V5.3.2 * Updates for snap package building. * Support for Sgaw and W Pwo Karen languages in the Myanmar validator (#4065). * Improve format of logging from lstmtraining. * Use less digits in filenames of checkpoints written by lstmtraining. * Replace deprecated sprintf. * Remove unused code in function fix_rep_char. * Avoid 32 bit overflow in multiplication (fixes 3 CodeQL CI alerts). * Avoid conversions from std::string to char* to std::string. * Abort with error message if OSD is requested with LSTM-only model. * cmake: allow to disable tiff (-DDISABLE_TIFF=ON). * cmake: provide info about disabled LibArchive and CURL. * cmake: check if leptonica was build with tiff support. * Remove old broken GitHub action vcpkg-4.1.1 (fixes issue #4078). * Create config.yml. * Fix typos. 2023-04-01 - V5.3.1 * Bug fixes for some special scenarios: * Fix issue #4010. * textord: Catch empty rows in block iterator (fixes #4039). * Fix FP division by zero (issue #3995). * Improve documentation and log messages. * Build fixes and improvements (mainly for cmake). 2022-12-22 - V5.3.0 * Minor updates for documentation and cmake builds. 2022-12-13 - V5.3.0-rc1 * Fix the training tools for the legacy OCR engine (fix issue #3925). * PDF renderer: Ignore non-text blocks (fix issue #3957). * Remove colormap before thresholding (fix issue #3940). * Fix a number of performance issues reported by Coverity Scan. * Training tools: Replace call of exit function by return statement in main function. * Fix double free in function vigorous_noise_removal (fix issue #3876). * Create to_win if needed in Textord::make_spline_rows (fix issue #3875). * Bug fixes for ScrollView viewer: * Fix memory issues in ScrollView::MessageReceiver. * Catch potential nullptr in SVNetwork::SVNetwork. * Move svpaint.cpp from src/viewer to src/. * Add rule for svpaint executable in Autotools. * Bug fixes and improvements for build tools: * Fix AMD64 detection with autobuild on FreeBSD (fix issue #3964). * Fix tesseract.pc generated from CMake to match Autotools. * Detect availability of AVX512-VNNI. * configure.ac: fix build on aarch64_be. * Drop CI for old versions of macOS and Ubuntu. 2022-07-06 - V5.2.0 * Improvements and fixes for continuous integration, autoconf and cmake builds. * Set /Os for some 32 bit MS compilers (fixes #3769). * Improve comments and other documentation. * Add initial support for Intel AVX512F. * Fix for very large PDF files on 32 bit hosts (fixes #3805). * Fix NEON detection on FreeBSD. * Fix regression with UZN files (fixes #3837). * Fix calling delete[] for memory allocated by malloc in C API. * Add an API function to init tesseract with traineddata from memory (fixes #3691). * Replace direct access to Leptonica internal data structures by function calls and support latest releases of Leptonica. * Replace std::regex by std::string functions (fixes issue #3830). * Use compiled-in TESSDATA_PREFIX also on Windows (fixes #3767). * Add new parameter 'invert_threshold', change the default threshold from 0.5 to 0.7 and mark parameter 'tessedit_do_invert' as deprecated. 2022-03-01 - V5.1.0 * Handle image and line regions in output formats ALTO, hOCR and text. * New parameter curl_timeout for curl_easy_setop. * Build fixes and improvements. * Catch nullptr in PageIterator::Orientation to improve robustness. * Remove unused code. 2022-01-06 - V5.0.1 * Add SPDX-License-Identifier to public include files. * Support redirections when running OCR on a URL. * Lots of fixes and improvements for cmake builds. Distributions should use the autoconf build. * Fix broken msys2 build with gcc 11. * Fix parameter certainty_scale (was duplicated). * Fix some compiler warnings and clean code. * Correctly detect amd64 and i386 on FreeBSD. * Add libarchive and libcurl in continuous integration actions. * Update submodule googletest to release v1.11.0. 2021-11-22 - V5.0.0 * Faster training and recognition by default (float instead of double calculations) * More options for binarization * Improved support for ARM NEON * Modernized code * Removed proprietary data types like GenericVector and STRING from public API * pdf.ttf no longer needed, now integrated into the code * Faster flat build with automake * New options for combine_tessdata to show details of traineddata files * Improved training messages * Improved unit tests and fuzzing tests * Lots of bug fixes 2021-11-15 - V4.1.3 * Fix build regression for autoconf build 2021-11-14 - V4.1.2 * Add RowAttributes getter to PageIterator * Allow line images with larger width for training * Fix memory leaks * Improve build process * Don't output empty ALTO sourceImageInformation (issue #2700) * Extend URI support for Tesseract with libcurl * Abort LSTM training with integer model (fixes issue #1573) * Update documentation * Make automake builds less noisy by default * Don't use -march=native in automake builds 2019-12-26 - V4.1.1 * Implemented sw build (cppan is depreciated) * Improved cmake build * Code cleanup and optimization * A lot of bug fixes... 2019-07-07 - V4.1.0 * Added new renders Alto, LSTMBox, WordStrBox. * Added character boxes in hOCR output. * Added python training scripts (experimental) as alternative shell scripts. * Better support AVX / AVX2 / SSE. * Disable OpenMP support by default (see e.g. #1171, #1081). * Fix for bounding box problem. * Implemented support for whitelist/blacklist in LSTM engine. * Improved cmake configuration. * Code modernization and improvements. * A lot of bug fixes... 2018-10-29 - V4.0.0 * Added new neural network system based on LSTMs, with major accuracy gains. * Improvements to PDF rendering. * Fixes to trainingdata rendering. * Added LSTM models+lang models to 101 languages. (tessdata repository) * Improved multi-page TIFF handling. * Fixed damage to binary images when processing PDFs. * Fixes to training process to allow incremental training from a recognition model. * Made LSTM the default engine, pushed cube out. * Deleted cube code. * Changed OEModes --oem 0 for legacy tesseract engine, --oem 1 for LSTM, --oem 2 for both, --oem 3 for default. * Avoid use of Leptonica debug parameters or functions. * Fixed multi-language mode. * Removed support for VS2010. * Added Support for VS2015 and VS2017 with CPPAN. * Implemented invisible text only for PDF. * Added AVX / SSE support for Windows. * Enabled OpenMP support. * Parameter unlv_tilde_crunching change to false. * Miscellaneous Fixes. * Detailed Changelog can be found at https://tesseract-ocr.github.io/tessdoc/4.0x-Changelog.html and https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html#tesseract-release-notes-oct-29-2018---v400 2017-02-16 - V3.05.00 * Made some fine tuning to the hOCR output. * Added TSV as another optional output format. * Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method. * text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer. * Training tools - Replaced asserts with tprintf() and exit(1). * Fixed Cygwin compatibility. * Improved multipage tiff processing. * Improved the embedded pdf font (pdf.ttf). * Enable selection of OCR engine mode from command line. * Changed tesseract command line parameter '-psm' to '--psm'. * Write output of tesseract --help, --version and --list-langs to stdout instead of stderr. * Added new C API for orientation and script detection, removed the old one. * Increased minimum autoconf version to 2.59. * Removed dead code. * Require Leptonica 1.74 or higher. * Fixed many compiler warning. * Fixed memory and resource leaks. * Fixed some issues with the 'Cube' OCR engine. * Fixed some openCL issues. * Added option to build Tesseract with CMake build system. * Implemented CPPAN support for easy Windows building. 2016-02-17 - V3.04.01 * Added OSD renderer for psm 0. Works for single page and multi-page images. * Improve tesstrain.sh script. * Simplify build and run of ScrollView. * Improved PDF output for OS X Preview utility. * INCOMPATIBLE fix to hOCR line height information - commit 134ebc3. * Added option to build Tesseract without Cube OCR engine (-DNO_CUBE_BUILD). * Enable OpenMP support. * Many bug fixes. 2015-07-11 - V3.04.00 * Tesseract development is now done with Git and hosted at github.com (Previously we used Subversion as a VCS and code.google.com for hosting). * Tesseract now requires leptonica 1.71 or a higher version. * Removed official support for VS 2008. * Added support for 39 additional scripts/languages, including: amh, asm, aze_cyrl, bod, bos, ceb, cym, dzo, fas, gle, guj, hat, iku, jav, kat, kat_old, kaz, khm, kir, kur, lao, lat, mar, mya, nep, ori, pan, pus, san, sin, srp_latn, syr, tgk, tir, uig, urd, uzb, uzb_cyrl, yid * Major updates to training system as a result of extensive testing on 100 languages. * New training data for over 100 languages * Improved performance with PIC compilation option. * Significant change to invisible font system in pdf output to improve correctness and compatibility with external programs, particularly ghostscript. * Improved font identification. * Major change to improve layout analysis for heavily diacritic languages: Thai, Vietnamese, Kannada, Telugu etc. * Fixed problems with shifted baselines so recognition can recover from layout analysis errors. * Major refactor to improve speed on difficult images, especially when running a heap checker. * Moved params from global in page layout to tesseractclass. * Improved single column layout analysis. * Allow ocr output to multiple formats using tesseract command line executable. * Fixed issues with mixed eng+ara scripts. * Improved script consistency in numbers. * Major refactor of control.cpp to enable line recognition. * Added tesstrain.sh - a master training script. * Added ability to text2image training tool to just list available fonts. * Added ability to text2image to underline words. * Improved efficiency of image processing for PDF output. * Added parameter description for each parameter listed with 'print-parameters' command line option. * Added font info to hOCR output. * Enabled streaming input and output of multi-page documents. * Many bug fixes. 2014-02-04 - V3.03(rc1) * Added new training tool text2image to generate box/tif file pairs from text and truetype fonts. * Added support for PDF output with searchable text. * Removed entire IMAGE class and all code in image directory. * Tesseract executable: support for output to stdout; limited support for one page images from stdin (especially on Windows) * Added Renderer to API to allow document-level processing and output of document formats, like hOCR, PDF. * Major refactor of word-level recognition, beam search, eliminating dead code. * Refactored classifier to make it easier to add new ones. * Generalized feature extractor to allow feature extraction from greyscale. * Improved sub/superscript treatment. * Improved baseline fit. * Added set_unicharset_properties to training tools. * Many bug fixes. * More training source data included. 2012-02-01 - V3.02 * Moved ResultIterator/PageIterator to ccmain. * Added Right-to-left/Bidi capability in the output iterators for Hebrew/Arabic. * Added paragraph detection in layout analysis/post OCR. * Fixed inconsistent xheight during training and over-chopping. * Added simultaneous multi-language capability. * Refactored top-level word recognition module. * Added experimental equation detector. * Improved handling of resolution from input images. * Blamer module added for error analysis. * Cleaned up externally used namespace by removing includes from baseapi.h. * Removed dead memory mangagement code. * Tidied up constraints on control parameters. * Added support for ShapeTable in classifier and training. * Refactored class pruner. * Fixed training leaks and randomness. * Major improvements to layout analysis for better image detection, diacritic detection, better textline finding, better tabstop finding. * Improved line detection and removal. * Added fixed pitch chopper for CJK. * Added UNICHARSET to WERD_CHOICE to make mult-language handling easier. * Fixed problems with internally scaled images. * Added page and bbox to string in tr files to identify source of training data better. * Fixes to Hindi Shiroreka splitter. * Added word bigram correction. * Reduced stack memory consumption and eliminated some ugly typedefs. * Added new uniform classifier API. * Added new training error counter. * Fixed endian bug in dawg reader. * Many other fixes, including the way in which the chopper finds chops and messes with the outline while it does so. 2010-11-29 - V3.01 * Removed old/dead serialise/deserialize methods on *LISTIZED classes. * Total rewrite of DENORM to better encapsulate operation and make for potential to extract features from images. * Thread-safety! Moved all critical global and static variables to members of the appropriate class. Tesseract is now thread-safe (multiple instances can be used in parallel in multiple threads.) with the minor exception that some control parameters are still global and affect all threads. * Added Cube, a new recognizer for Arabic. Cube can also be used in combination with normal Tesseract for other languages with an improvement in accuracy at the cost of (much) lower speed. *There is no training module for Cube yet.* * `OcrEngineMode` in `Init` replaces `AccuracyVSpeed` to control cube. * Greatly improved segmentation search with consequent accuracy and speed improvements, especially for Chinese. * Added `PageIterator` and `ResultIterator` as cleaner ways to get the full results out of Tesseract, that are not currently provided by any of the `TessBaseAPI::Get*` methods. All other methods, such as the `ETEXT_STRUCT` in particular are deprecated and will be deleted in the future. * ApplyBoxes totally rewritten to make training easier. It can now cope with touching/overlapping training characters, and a new boxfile format allows word boxes instead of character boxes, BUT to use that you have to have already bootstrapped the language with character boxes. "Cyclic dependency" on traineddata. * Auto orientation and script detection added to page layout analysis. * Deleted *lots* of dead code. * Fixxht module replaced with scalable data-driven module. * Output font characteristics accuracy improved. * Removed the double conversion at each classification. * Upgraded oldest structs to be classes and deprecated PBLOB. * Removed non-deterministic baseline fit. * Added fixed length dawgs for Chinese. * Handling of vertical text improved. * Handling of leader dots improved. * Table detection greatly improved. * Fixed a couple of memory leaks. * Fixed font labels on output text. (Not perfect, but a lot better than before.) * Cleanup and more bug fixes * Special treatments for Hindi. * Support for build in VS2010 with Microsoft Windows SDK for Windows 7 (thanks to Michael Lutz) 2010-09-21 - V3.00 * Preparations for thread safety: * Changed TessBaseAPI methods to be non-static * Created a class hierarchy for the directories to hold instance data, and began moving code into the classes. * Moved thresholding code to a separate class. * Added major new page layout analysis module. * Added HOCR output (issues 221, 263: thanks to amkryukov). * Added Leptonica as main image I/O and handling. Currently optional, but in future releases linking with Leptonica will be mandatory. * Ambiguity table rewritten to allow definite replacements in place of fix_quotes. * Added TessdataManager to combine data files into a single file. * Some dead code deleted. * VC++6 no longer supported. It can't cope with the use of templates. * Many more languages added. * Doxygenation of most of the function header comments. * Added man pages. * Added bash completion script (issue 247: thanks to neskiem) * Fix integer overview in thresholding (issue 366: thanks to Cyanide.Drake) * Add Danish Fraktur support (issues 300, 360: thanks to dsl602230@vip.cybercity.dk) * Fix file pointer leak (issue 359, thanks to yukihiro.nakadaira) * Fix an error using user-words (Issue 345: thanks to max.markin) * Fix a memory leak in tablefind.cpp (Issue 342, thanks to zdravco) * Fix a segfault due to double fclose (Issue 320, thanks to souther) * Fix an automake error (Issue 318, thanks to ichanjz) * Fix a Win32 crash on fileFormatIsTiff() (Issues 304, 316, 317, 330, 347, 349, 352: thanks to nguyenq87, max.markin, zdenop) * Fixed a number of errors in newer (stricter) versions of VC++ (Issues 301, among others) 2009-06-30 - V2.04 * Integrated bug fixes and patches and misc changes for portability. * Integrated a patch to remove some of the "access" macros. * Removed dependence on lua from the viewer, speeding it up dramatically. * Fixed the viewer so it compiles and runs properly! * Specifically fixing issues: 1, 63, 67, 71, 76, 81, 82, 106, 111, 112, 128, 129, 130, 133, 135, 142, 143, 145, 147, 153, 154, 160, 165, 170, 175, 177, 187, 192, 195, 199, 201, 205, 209, 108, 169 2008-04-22 - V2.03 * Fixed crash introduced in 2.02. * Fixed lack of tessembedded.cpp in distribution. * Added test for leptonica header files and conditional test for lib. 2008-04-21 - V2.02 (again) * Fixed namespace collisions with jpeg library (INT32). * Portability fixes for Windows for new code. * Updates to autoconf system for new code. 2008-01-23 - V2.02 * Improvements to clustering, training and classifier. * Major internationalization improvements for large-character-set * languages, eg Kannada. * Removed some compiler warnings. * Added multipage tiff support for training and running. * Updated graphics output to talk to new java-based viewer. * Added ability to save n-best lists. * Added leptonica support for more file types. * Improved Init/End to make them safe. * Reduced memory use of dictionaries. * Added some new APIs to TessBaseAPI. 2007-08-27 - V2.01 * Fixed UTF8 input problems with box file reader. * Fixed various infinite loops and crashes in dawg code. * Removed include of config_auto.h from host.h. * Added automatic wctype encoding to unicharset_extractor. * Fixed dawg table too full error. * Removed svn files from tarball. * Added new functions to tessdll. * Increased maximum utf8 string in a classification result to 8. 2007-07-02 - V2.00 * Converted internal character handling to UTF8. * Trained with 6 languages. * Added unicharset_extractor, wordlist2dawg. * Added boxfile creation mode. * Added UNLV regression test capability. * Fixed problems with copyright and registered symbols. * Fixed extern "C" declarations problem. 2007-05-15 - V1.04 * Added dll exports for Windows. * Fixed name collisions with stl etc. * Made some preliminary changes ready for unicodeization. * Several bug fixes discovered during unicodeization. 2007-02-02 - V1.03 * Added mftraining and cntraining. * Added baseapi with adaptive thresholding for grey and color. * Fixed many memory leaks. * Fixed several bugs including lack of use of adaptive classifier. * Added ifdefs to eliminate graphics code and add embedded platform support. * Incorporated several patches, including 64-bit builds, Mac builds. * Minor accuracy improvements. 2006-10-04 - V1.02 * Removed dependency on Aspirin. * Fixed a few missing Apache license headers. * Removed $log. 2006-09-07 - V1.01. * Added mfcpch.cpp and getopt.cpp for VC++. * Fixed problem with greyscale images and no libtiff. * Stopped debug window from being used for the usage output. * Fixed load of inttemp for big-endian architectures. * Fixed some Mac compilation issues. 2006-06-16 - V1.0 of open source Tesseract checked-in. tesseract-5.5.0/INSTALL000066400000000000000000000221331471420406600145540ustar00rootroot00000000000000Copyright 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== These are generic installation instructions. First you need to run `./autogen.sh', that creates `configure' script. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. (Caching is disabled by default to prevent problems with accidental use of stale cache files.) If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You only need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. If you're using `csh' on an old version of System V, you might need to type `sh ./configure' instead to prevent `csh' from trying to execute `configure' itself. Running `configure' takes a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package. 4. Type `make install' to install the programs and any data files and documentation. 5. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you must use a version of `make' that supports the `VPATH' variable, such as GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. If you have to use a `make' that does not support the `VPATH' variable, you have to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. Installation Names ================== By default, `make install' will install the package's files in `/usr/local/bin', `/usr/local/man', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PATH'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you give `configure' the option `--exec-prefix=PATH', the package will use PATH as the prefix for installing programs and libraries. Documentation and other data files will still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=PATH' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the `--target=TYPE' option to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc will cause the specified gcc to be used as the C compiler (unless it is overridden in the site shell script). `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of the options to `configure', and exit. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. tesseract-5.5.0/INSTALL.GIT.md000066400000000000000000000042311471420406600155740ustar00rootroot00000000000000## autotools (LINUX/UNIX , msys...) If you have cloned Tesseract from GitHub, you must generate the configure script. If you have tesseract 4.0x installation in your system, please remove it before new build. You need Leptonica 1.74.2 (minimum) for Tesseract 4.0x. Known dependencies for training tools (excluding leptonica): * compiler with c++17 support * automake * pkg-config * pango-devel * cairo-devel * icu-devel So, the steps for making Tesseract are: ./autogen.sh ./configure make sudo make install sudo ldconfig make training sudo make training-install You need to install at least English language and OSD traineddata files to `TESSDATA_PREFIX` directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser. All language data files can be retrieved from git repository (useful only for packagers!). (Repository is huge - more that 1.2 GB. You do NOT need to download traineddata files for all languages). git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata You need an Internet connection and [curl](https://curl.haxx.se/) to compile `ScrollView.jar` because the build will automatically download [piccolo2d-core-3.0.1.jar](https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar) and [piccolo2d-extras-3.0.1.jar](https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0.1/piccolo2d-extras-3.0.1.jar) and [jaxb-api-2.3.1.jar](http://search.maven.org/remotecontent?filepath=javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar) and place them to `tesseract/java`. Just run: make ScrollView.jar and follow the instruction on [Viewer Debugging](https://tesseract-ocr.github.io/tessdoc/ViewerDebugging.html). ## cmake There is alternative build system based on multiplatform [cmake](https://cmake.org/) ### LINUX mkdir build cd build && cmake .. && make sudo make install ### WINDOWS See the [documentation](https://tesseract-ocr.github.io/tessdoc/) for more information on this. tesseract-5.5.0/LICENSE000066400000000000000000000261361471420406600145370ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. tesseract-5.5.0/Makefile.am000066400000000000000000001720721471420406600155670ustar00rootroot00000000000000## run autogen.sh to create Makefile.in from this file ACLOCAL_AMFLAGS = -I m4 .PHONY: doc html install-langs ScrollView.jar install-jars pdf training CLEANFILES = SUBDIRS = . tessdata if MINGW SUBDIRS += nsis endif EXTRA_DIST = README.md LICENSE EXTRA_DIST += aclocal.m4 config configure.ac autogen.sh EXTRA_DIST += tesseract.pc.in doc if !GRAPHICS_DISABLED EXTRA_DIST += java endif EXTRA_DIST += CMakeLists.txt tesseract.pc.cmake cmake VERSION DIST_SUBDIRS = $(SUBDIRS) EXTRA_PROGRAMS = uninstall-hook: rm -rf $(DESTDIR)$(pkgincludedir) dist-hook: # added using EXTRA_DIST. $(distdir)/tessdata would in # theory suffice. rm -rf `find $(distdir) -name .deps -type d` -rm -f $(distdir)/*/Makefile $(distdir)/*/*/Makefile rm -f `find $(distdir) -name '*~'` rm -rf $(distdir)/doc/html/* $(distdir)/doc/*.log if !GRAPHICS_DISABLED ScrollView.jar: @cd "$(top_builddir)/java" && $(MAKE) $@ install-jars: @cd "$(top_builddir)/java" && $(MAKE) $@ endif doc: -srcdir="$(top_srcdir)" builddir="$(top_builddir)" \ version="@PACKAGE_VERSION@" name="@PACKAGE_NAME@" \ doxygen $(top_srcdir)/doc/Doxyfile doc-pack: doc -chmod a+r $(top_builddir)/doc/html/* @tar --create --directory=$(top_builddir)/doc/html --verbose --file=- . | gzip -c -9 > $(top_builddir)/@PACKAGE_NAME@-@PACKAGE_VERSION@-doc-html.tar.gz; doc-clean: rm -rf $(top_builddir)/doc/html/* if MINGW winsetup: training ScrollView.jar @cd "$(top_builddir)/nsis" && $(MAKE) winsetup endif pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = tesseract.pc pkginclude_HEADERS = $(top_builddir)/include/tesseract/version.h pkginclude_HEADERS += include/tesseract/baseapi.h pkginclude_HEADERS += include/tesseract/capi.h pkginclude_HEADERS += include/tesseract/export.h pkginclude_HEADERS += include/tesseract/ltrresultiterator.h pkginclude_HEADERS += include/tesseract/ocrclass.h pkginclude_HEADERS += include/tesseract/osdetect.h pkginclude_HEADERS += include/tesseract/pageiterator.h pkginclude_HEADERS += include/tesseract/publictypes.h pkginclude_HEADERS += include/tesseract/renderer.h pkginclude_HEADERS += include/tesseract/resultiterator.h pkginclude_HEADERS += include/tesseract/unichar.h # Rules for all subdirectories. noinst_HEADERS = noinst_LTLIBRARIES = AM_CPPFLAGS += -I$(top_srcdir)/include AM_CPPFLAGS += -I$(top_builddir)/include if VISIBILITY AM_CPPFLAGS += -DTESS_EXPORTS AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -fPIC endif AM_CXXFLAGS = $(OPENMP_CXXFLAGS) # Rules for src/api. libtesseract_la_CPPFLAGS = $(AM_CPPFLAGS) libtesseract_la_CPPFLAGS += -DTESS_COMMON_TRAINING_API= libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/arch libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/ccmain libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/ccstruct libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/classify libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/cutil libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/dict libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/lstm libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/textord libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/training/common libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/viewer libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/wordrec libtesseract_la_CPPFLAGS += $(libcurl_CFLAGS) lib_LTLIBRARIES = libtesseract.la libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) libtesseract_la_LDFLAGS += $(libarchive_LIBS) libtesseract_la_LDFLAGS += $(libcurl_LIBS) if T_WIN libtesseract_la_LDFLAGS += -no-undefined -lws2_32 else libtesseract_la_LDFLAGS += $(NOUNDEFINED) endif libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_la_SOURCES = src/api/baseapi.cpp libtesseract_la_SOURCES += src/api/altorenderer.cpp libtesseract_la_SOURCES += src/api/pagerenderer.cpp libtesseract_la_SOURCES += src/api/capi.cpp libtesseract_la_SOURCES += src/api/hocrrenderer.cpp libtesseract_la_SOURCES += src/api/lstmboxrenderer.cpp libtesseract_la_SOURCES += src/api/pdfrenderer.cpp libtesseract_la_SOURCES += src/api/renderer.cpp libtesseract_la_SOURCES += src/api/wordstrboxrenderer.cpp libtesseract_la_LIBADD = libtesseract_ccutil.la libtesseract_la_LIBADD += libtesseract_lstm.la libtesseract_la_LIBADD += libtesseract_native.la # Rules for src/arch. noinst_HEADERS += src/arch/dotproduct.h noinst_HEADERS += src/arch/intsimdmatrix.h noinst_HEADERS += src/arch/simddetect.h noinst_LTLIBRARIES += libtesseract_native.la libtesseract_native_la_CXXFLAGS = -O3 -ffast-math if OPENMP_SIMD libtesseract_native_la_CXXFLAGS += -fopenmp-simd -DOPENMP_SIMD endif libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp if HAVE_AVX libtesseract_avx_la_CXXFLAGS = -mavx libtesseract_avx_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_avx_la_SOURCES = src/arch/dotproductavx.cpp libtesseract_la_LIBADD += libtesseract_avx.la noinst_LTLIBRARIES += libtesseract_avx.la endif if HAVE_AVX2 libtesseract_avx2_la_CXXFLAGS = -mavx2 libtesseract_avx2_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_avx2_la_SOURCES = src/arch/intsimdmatrixavx2.cpp libtesseract_la_LIBADD += libtesseract_avx2.la noinst_LTLIBRARIES += libtesseract_avx2.la endif if HAVE_AVX512F libtesseract_avx512_la_CXXFLAGS = -mavx512f libtesseract_avx512_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_avx512_la_SOURCES = src/arch/dotproductavx512.cpp libtesseract_la_LIBADD += libtesseract_avx512.la noinst_LTLIBRARIES += libtesseract_avx512.la endif if HAVE_FMA libtesseract_fma_la_CXXFLAGS = -mfma libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_fma_la_SOURCES = src/arch/dotproductfma.cpp libtesseract_la_LIBADD += libtesseract_fma.la noinst_LTLIBRARIES += libtesseract_fma.la endif if HAVE_SSE4_1 libtesseract_sse_la_CXXFLAGS = -msse4.1 libtesseract_sse_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp libtesseract_la_LIBADD += libtesseract_sse.la noinst_LTLIBRARIES += libtesseract_sse.la endif if HAVE_NEON libtesseract_neon_la_CXXFLAGS = $(NEON_CXXFLAGS) libtesseract_neon_la_CXXFLAGS += -O3 if OPENMP_SIMD libtesseract_neon_la_CXXFLAGS += -fopenmp-simd -DOPENMP_SIMD endif libtesseract_neon_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_neon_la_SOURCES = src/arch/intsimdmatrixneon.cpp libtesseract_neon_la_SOURCES += src/arch/dotproductneon.cpp libtesseract_la_LIBADD += libtesseract_neon.la noinst_LTLIBRARIES += libtesseract_neon.la endif if HAVE_RVV libtesseract_rvv_la_CXXFLAGS = $(RVV_CXXFLAGS) libtesseract_rvv_la_CXXFLAGS += -O3 libtesseract_rvv_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_rvv_la_SOURCES = src/arch/intsimdmatrixrvv.cpp libtesseract_la_LIBADD += libtesseract_rvv.la noinst_LTLIBRARIES += libtesseract_rvv.la endif libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp libtesseract_la_SOURCES += src/arch/simddetect.cpp # Rules for src/ccmain. noinst_HEADERS += src/ccmain/control.h noinst_HEADERS += src/ccmain/mutableiterator.h noinst_HEADERS += src/ccmain/output.h noinst_HEADERS += src/ccmain/paragraphs.h noinst_HEADERS += src/ccmain/paragraphs_internal.h noinst_HEADERS += src/ccmain/paramsd.h noinst_HEADERS += src/ccmain/pgedit.h noinst_HEADERS += src/ccmain/tesseractclass.h noinst_HEADERS += src/ccmain/tessvars.h noinst_HEADERS += src/ccmain/thresholder.h noinst_HEADERS += src/ccmain/werdit.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/ccmain/docqual.h noinst_HEADERS += src/ccmain/equationdetect.h noinst_HEADERS += src/ccmain/fixspace.h noinst_HEADERS += src/ccmain/reject.h endif libtesseract_la_SOURCES += src/ccmain/applybox.cpp libtesseract_la_SOURCES += src/ccmain/control.cpp libtesseract_la_SOURCES += src/ccmain/linerec.cpp libtesseract_la_SOURCES += src/ccmain/ltrresultiterator.cpp libtesseract_la_SOURCES += src/ccmain/mutableiterator.cpp libtesseract_la_SOURCES += src/ccmain/output.cpp libtesseract_la_SOURCES += src/ccmain/pageiterator.cpp libtesseract_la_SOURCES += src/ccmain/pagesegmain.cpp libtesseract_la_SOURCES += src/ccmain/pagewalk.cpp libtesseract_la_SOURCES += src/ccmain/paragraphs.cpp if !GRAPHICS_DISABLED libtesseract_la_SOURCES += src/ccmain/paramsd.cpp libtesseract_la_SOURCES += src/ccmain/pgedit.cpp endif libtesseract_la_SOURCES += src/ccmain/reject.cpp libtesseract_la_SOURCES += src/ccmain/resultiterator.cpp libtesseract_la_SOURCES += src/ccmain/tessedit.cpp libtesseract_la_SOURCES += src/ccmain/tesseractclass.cpp libtesseract_la_SOURCES += src/ccmain/tessvars.cpp libtesseract_la_SOURCES += src/ccmain/thresholder.cpp libtesseract_la_SOURCES += src/ccmain/werdit.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/ccmain/adaptions.cpp libtesseract_la_SOURCES += src/ccmain/docqual.cpp libtesseract_la_SOURCES += src/ccmain/equationdetect.cpp libtesseract_la_SOURCES += src/ccmain/fixspace.cpp libtesseract_la_SOURCES += src/ccmain/fixxht.cpp libtesseract_la_SOURCES += src/ccmain/osdetect.cpp libtesseract_la_SOURCES += src/ccmain/par_control.cpp libtesseract_la_SOURCES += src/ccmain/recogtraining.cpp libtesseract_la_SOURCES += src/ccmain/superscript.cpp libtesseract_la_SOURCES += src/ccmain/tessbox.cpp libtesseract_la_SOURCES += src/ccmain/tfacepp.cpp endif # Rules for src/ccstruct. noinst_HEADERS += src/ccstruct/blamer.h noinst_HEADERS += src/ccstruct/blobbox.h noinst_HEADERS += src/ccstruct/blobs.h noinst_HEADERS += src/ccstruct/blread.h noinst_HEADERS += src/ccstruct/boxread.h noinst_HEADERS += src/ccstruct/boxword.h noinst_HEADERS += src/ccstruct/ccstruct.h noinst_HEADERS += src/ccstruct/coutln.h noinst_HEADERS += src/ccstruct/crakedge.h noinst_HEADERS += src/ccstruct/debugpixa.h noinst_HEADERS += src/ccstruct/detlinefit.h noinst_HEADERS += src/ccstruct/dppoint.h noinst_HEADERS += src/ccstruct/image.h noinst_HEADERS += src/ccstruct/imagedata.h noinst_HEADERS += src/ccstruct/linlsq.h noinst_HEADERS += src/ccstruct/matrix.h noinst_HEADERS += src/ccstruct/mod128.h noinst_HEADERS += src/ccstruct/normalis.h noinst_HEADERS += src/ccstruct/ocrblock.h noinst_HEADERS += src/ccstruct/ocrpara.h noinst_HEADERS += src/ccstruct/ocrrow.h noinst_HEADERS += src/ccstruct/otsuthr.h noinst_HEADERS += src/ccstruct/pageres.h noinst_HEADERS += src/ccstruct/pdblock.h noinst_HEADERS += src/ccstruct/points.h noinst_HEADERS += src/ccstruct/polyaprx.h noinst_HEADERS += src/ccstruct/polyblk.h noinst_HEADERS += src/ccstruct/quadlsq.h noinst_HEADERS += src/ccstruct/quadratc.h noinst_HEADERS += src/ccstruct/quspline.h noinst_HEADERS += src/ccstruct/ratngs.h noinst_HEADERS += src/ccstruct/rect.h noinst_HEADERS += src/ccstruct/rejctmap.h noinst_HEADERS += src/ccstruct/seam.h noinst_HEADERS += src/ccstruct/split.h noinst_HEADERS += src/ccstruct/statistc.h noinst_HEADERS += src/ccstruct/stepblob.h noinst_HEADERS += src/ccstruct/werd.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/ccstruct/fontinfo.h noinst_HEADERS += src/ccstruct/params_training_featdef.h endif libtesseract_la_SOURCES += src/ccstruct/blamer.cpp libtesseract_la_SOURCES += src/ccstruct/blobbox.cpp libtesseract_la_SOURCES += src/ccstruct/blobs.cpp libtesseract_la_SOURCES += src/ccstruct/blread.cpp libtesseract_la_SOURCES += src/ccstruct/boxread.cpp libtesseract_la_SOURCES += src/ccstruct/boxword.cpp libtesseract_la_SOURCES += src/ccstruct/ccstruct.cpp libtesseract_la_SOURCES += src/ccstruct/coutln.cpp libtesseract_la_SOURCES += src/ccstruct/detlinefit.cpp libtesseract_la_SOURCES += src/ccstruct/dppoint.cpp libtesseract_la_SOURCES += src/ccstruct/image.cpp libtesseract_la_SOURCES += src/ccstruct/imagedata.cpp libtesseract_la_SOURCES += src/ccstruct/linlsq.cpp libtesseract_la_SOURCES += src/ccstruct/matrix.cpp libtesseract_la_SOURCES += src/ccstruct/mod128.cpp libtesseract_la_SOURCES += src/ccstruct/normalis.cpp libtesseract_la_SOURCES += src/ccstruct/ocrblock.cpp libtesseract_la_SOURCES += src/ccstruct/ocrpara.cpp libtesseract_la_SOURCES += src/ccstruct/ocrrow.cpp libtesseract_la_SOURCES += src/ccstruct/otsuthr.cpp libtesseract_la_SOURCES += src/ccstruct/pageres.cpp libtesseract_la_SOURCES += src/ccstruct/pdblock.cpp libtesseract_la_SOURCES += src/ccstruct/points.cpp libtesseract_la_SOURCES += src/ccstruct/polyaprx.cpp libtesseract_la_SOURCES += src/ccstruct/polyblk.cpp libtesseract_la_SOURCES += src/ccstruct/quadlsq.cpp libtesseract_la_SOURCES += src/ccstruct/quspline.cpp libtesseract_la_SOURCES += src/ccstruct/ratngs.cpp libtesseract_la_SOURCES += src/ccstruct/rect.cpp libtesseract_la_SOURCES += src/ccstruct/rejctmap.cpp libtesseract_la_SOURCES += src/ccstruct/seam.cpp libtesseract_la_SOURCES += src/ccstruct/split.cpp libtesseract_la_SOURCES += src/ccstruct/statistc.cpp libtesseract_la_SOURCES += src/ccstruct/stepblob.cpp libtesseract_la_SOURCES += src/ccstruct/werd.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/ccstruct/fontinfo.cpp libtesseract_la_SOURCES += src/ccstruct/params_training_featdef.cpp endif # Rules for src/ccutil libtesseract_ccutil_la_CPPFLAGS = $(AM_CPPFLAGS) libtesseract_ccutil_la_CPPFLAGS += $(libarchive_CFLAGS) if !NO_TESSDATA_PREFIX libtesseract_ccutil_la_CPPFLAGS += -DTESSDATA_PREFIX='"@datadir@"' endif noinst_HEADERS += src/ccutil/ccutil.h noinst_HEADERS += src/ccutil/clst.h noinst_HEADERS += src/ccutil/elst2.h noinst_HEADERS += src/ccutil/elst.h noinst_HEADERS += src/ccutil/errcode.h noinst_HEADERS += src/ccutil/fileerr.h noinst_HEADERS += src/ccutil/genericheap.h noinst_HEADERS += src/ccutil/genericvector.h noinst_HEADERS += src/ccutil/helpers.h noinst_HEADERS += src/ccutil/host.h noinst_HEADERS += src/ccutil/kdpair.h noinst_HEADERS += src/ccutil/lsterr.h noinst_HEADERS += src/ccutil/object_cache.h noinst_HEADERS += src/ccutil/params.h noinst_HEADERS += src/ccutil/qrsequence.h noinst_HEADERS += src/ccutil/sorthelper.h noinst_HEADERS += src/ccutil/scanutils.h noinst_HEADERS += src/ccutil/serialis.h noinst_HEADERS += src/ccutil/tessdatamanager.h noinst_HEADERS += src/ccutil/tprintf.h noinst_HEADERS += src/ccutil/unicharcompress.h noinst_HEADERS += src/ccutil/unicharmap.h noinst_HEADERS += src/ccutil/unicharset.h noinst_HEADERS += src/ccutil/unicity_table.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/ccutil/ambigs.h noinst_HEADERS += src/ccutil/bitvector.h noinst_HEADERS += src/ccutil/indexmapbidi.h noinst_HEADERS += src/ccutil/universalambigs.h endif noinst_LTLIBRARIES += libtesseract_ccutil.la libtesseract_ccutil_la_SOURCES = src/ccutil/ccutil.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/clst.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/elst2.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/elst.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/errcode.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/serialis.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/scanutils.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/tessdatamanager.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/tprintf.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/unichar.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/unicharcompress.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/unicharmap.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/unicharset.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/params.cpp if !DISABLED_LEGACY_ENGINE libtesseract_ccutil_la_SOURCES += src/ccutil/ambigs.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/bitvector.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/indexmapbidi.cpp endif # Rules for src/classify. noinst_HEADERS += src/classify/classify.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/classify/adaptive.h noinst_HEADERS += src/classify/cluster.h noinst_HEADERS += src/classify/clusttool.h noinst_HEADERS += src/classify/featdefs.h noinst_HEADERS += src/classify/float2int.h noinst_HEADERS += src/classify/fpoint.h noinst_HEADERS += src/classify/intfeaturespace.h noinst_HEADERS += src/classify/intfx.h noinst_HEADERS += src/classify/intmatcher.h noinst_HEADERS += src/classify/intproto.h noinst_HEADERS += src/classify/kdtree.h noinst_HEADERS += src/classify/mf.h noinst_HEADERS += src/classify/mfdefs.h noinst_HEADERS += src/classify/mfoutline.h noinst_HEADERS += src/classify/mfx.h noinst_HEADERS += src/classify/normfeat.h noinst_HEADERS += src/classify/normmatch.h noinst_HEADERS += src/classify/ocrfeatures.h noinst_HEADERS += src/classify/outfeat.h noinst_HEADERS += src/classify/picofeat.h noinst_HEADERS += src/classify/protos.h noinst_HEADERS += src/classify/shapeclassifier.h noinst_HEADERS += src/classify/shapetable.h noinst_HEADERS += src/classify/tessclassifier.h noinst_HEADERS += src/classify/trainingsample.h endif libtesseract_la_SOURCES += src/classify/classify.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/classify/adaptive.cpp libtesseract_la_SOURCES += src/classify/adaptmatch.cpp libtesseract_la_SOURCES += src/classify/blobclass.cpp libtesseract_la_SOURCES += src/classify/cluster.cpp libtesseract_la_SOURCES += src/classify/clusttool.cpp libtesseract_la_SOURCES += src/classify/cutoffs.cpp libtesseract_la_SOURCES += src/classify/featdefs.cpp libtesseract_la_SOURCES += src/classify/float2int.cpp libtesseract_la_SOURCES += src/classify/fpoint.cpp libtesseract_la_SOURCES += src/classify/intfeaturespace.cpp libtesseract_la_SOURCES += src/classify/intfx.cpp libtesseract_la_SOURCES += src/classify/intmatcher.cpp libtesseract_la_SOURCES += src/classify/intproto.cpp libtesseract_la_SOURCES += src/classify/kdtree.cpp libtesseract_la_SOURCES += src/classify/mf.cpp libtesseract_la_SOURCES += src/classify/mfoutline.cpp libtesseract_la_SOURCES += src/classify/mfx.cpp libtesseract_la_SOURCES += src/classify/normfeat.cpp libtesseract_la_SOURCES += src/classify/normmatch.cpp libtesseract_la_SOURCES += src/classify/ocrfeatures.cpp libtesseract_la_SOURCES += src/classify/outfeat.cpp libtesseract_la_SOURCES += src/classify/picofeat.cpp libtesseract_la_SOURCES += src/classify/protos.cpp libtesseract_la_SOURCES += src/classify/shapeclassifier.cpp libtesseract_la_SOURCES += src/classify/shapetable.cpp libtesseract_la_SOURCES += src/classify/tessclassifier.cpp libtesseract_la_SOURCES += src/classify/trainingsample.cpp endif # Rules for src/cutil. if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/cutil/bitvec.h noinst_HEADERS += src/cutil/oldlist.h endif if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/cutil/oldlist.cpp endif # Rules for src/dict. noinst_HEADERS += src/dict/dawg.h noinst_HEADERS += src/dict/dawg_cache.h noinst_HEADERS += src/dict/dict.h noinst_HEADERS += src/dict/matchdefs.h noinst_HEADERS += src/dict/stopper.h noinst_HEADERS += src/dict/trie.h libtesseract_la_SOURCES += src/dict/context.cpp libtesseract_la_SOURCES += src/dict/dawg.cpp libtesseract_la_SOURCES += src/dict/dawg_cache.cpp libtesseract_la_SOURCES += src/dict/dict.cpp libtesseract_la_SOURCES += src/dict/stopper.cpp libtesseract_la_SOURCES += src/dict/trie.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/dict/hyphen.cpp libtesseract_la_SOURCES += src/dict/permdawg.cpp endif # Rules for src/lstm. libtesseract_lstm_la_CPPFLAGS = $(AM_CPPFLAGS) libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/arch libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/ccstruct libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/ccutil libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/classify libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/cutil libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/dict libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/lstm libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/viewer if !NO_TESSDATA_PREFIX libtesseract_lstm_la_CPPFLAGS += -DTESSDATA_PREFIX='"@datadir@"' endif noinst_HEADERS += src/lstm/convolve.h noinst_HEADERS += src/lstm/fullyconnected.h noinst_HEADERS += src/lstm/functions.h noinst_HEADERS += src/lstm/input.h noinst_HEADERS += src/lstm/lstm.h noinst_HEADERS += src/lstm/lstmrecognizer.h noinst_HEADERS += src/lstm/maxpool.h noinst_HEADERS += src/lstm/network.h noinst_HEADERS += src/lstm/networkio.h noinst_HEADERS += src/lstm/networkscratch.h noinst_HEADERS += src/lstm/parallel.h noinst_HEADERS += src/lstm/plumbing.h noinst_HEADERS += src/lstm/recodebeam.h noinst_HEADERS += src/lstm/reconfig.h noinst_HEADERS += src/lstm/reversed.h noinst_HEADERS += src/lstm/series.h noinst_HEADERS += src/lstm/static_shape.h noinst_HEADERS += src/lstm/stridemap.h noinst_HEADERS += src/lstm/weightmatrix.h noinst_LTLIBRARIES += libtesseract_lstm.la libtesseract_lstm_la_SOURCES = src/lstm/convolve.cpp libtesseract_lstm_la_SOURCES += src/lstm/fullyconnected.cpp libtesseract_lstm_la_SOURCES += src/lstm/functions.cpp libtesseract_lstm_la_SOURCES += src/lstm/input.cpp libtesseract_lstm_la_SOURCES += src/lstm/lstm.cpp libtesseract_lstm_la_SOURCES += src/lstm/lstmrecognizer.cpp libtesseract_lstm_la_SOURCES += src/lstm/maxpool.cpp libtesseract_lstm_la_SOURCES += src/lstm/network.cpp libtesseract_lstm_la_SOURCES += src/lstm/networkio.cpp libtesseract_lstm_la_SOURCES += src/lstm/parallel.cpp libtesseract_lstm_la_SOURCES += src/lstm/plumbing.cpp libtesseract_lstm_la_SOURCES += src/lstm/recodebeam.cpp libtesseract_lstm_la_SOURCES += src/lstm/reconfig.cpp libtesseract_lstm_la_SOURCES += src/lstm/reversed.cpp libtesseract_lstm_la_SOURCES += src/lstm/series.cpp libtesseract_lstm_la_SOURCES += src/lstm/stridemap.cpp libtesseract_lstm_la_SOURCES += src/lstm/weightmatrix.cpp # Rules for src/textord. noinst_HEADERS += src/textord/alignedblob.h noinst_HEADERS += src/textord/baselinedetect.h noinst_HEADERS += src/textord/bbgrid.h noinst_HEADERS += src/textord/blkocc.h noinst_HEADERS += src/textord/blobgrid.h noinst_HEADERS += src/textord/ccnontextdetect.h noinst_HEADERS += src/textord/cjkpitch.h noinst_HEADERS += src/textord/colfind.h noinst_HEADERS += src/textord/colpartition.h noinst_HEADERS += src/textord/colpartitionset.h noinst_HEADERS += src/textord/colpartitiongrid.h noinst_HEADERS += src/textord/devanagari_processing.h noinst_HEADERS += src/textord/drawtord.h noinst_HEADERS += src/textord/edgblob.h noinst_HEADERS += src/textord/edgloop.h noinst_HEADERS += src/textord/fpchop.h noinst_HEADERS += src/textord/gap_map.h noinst_HEADERS += src/textord/imagefind.h noinst_HEADERS += src/textord/linefind.h noinst_HEADERS += src/textord/makerow.h noinst_HEADERS += src/textord/oldbasel.h noinst_HEADERS += src/textord/pithsync.h noinst_HEADERS += src/textord/pitsync1.h noinst_HEADERS += src/textord/scanedg.h noinst_HEADERS += src/textord/sortflts.h noinst_HEADERS += src/textord/strokewidth.h noinst_HEADERS += src/textord/tabfind.h noinst_HEADERS += src/textord/tablefind.h noinst_HEADERS += src/textord/tabvector.h noinst_HEADERS += src/textord/tablerecog.h noinst_HEADERS += src/textord/textlineprojection.h noinst_HEADERS += src/textord/textord.h noinst_HEADERS += src/textord/topitch.h noinst_HEADERS += src/textord/tordmain.h noinst_HEADERS += src/textord/tovars.h noinst_HEADERS += src/textord/underlin.h noinst_HEADERS += src/textord/wordseg.h noinst_HEADERS += src/textord/workingpartset.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/textord/equationdetectbase.h endif libtesseract_la_SOURCES += src/textord/alignedblob.cpp libtesseract_la_SOURCES += src/textord/baselinedetect.cpp libtesseract_la_SOURCES += src/textord/bbgrid.cpp libtesseract_la_SOURCES += src/textord/blkocc.cpp libtesseract_la_SOURCES += src/textord/blobgrid.cpp libtesseract_la_SOURCES += src/textord/ccnontextdetect.cpp libtesseract_la_SOURCES += src/textord/cjkpitch.cpp libtesseract_la_SOURCES += src/textord/colfind.cpp libtesseract_la_SOURCES += src/textord/colpartition.cpp libtesseract_la_SOURCES += src/textord/colpartitionset.cpp libtesseract_la_SOURCES += src/textord/colpartitiongrid.cpp libtesseract_la_SOURCES += src/textord/devanagari_processing.cpp libtesseract_la_SOURCES += src/textord/drawtord.cpp libtesseract_la_SOURCES += src/textord/edgblob.cpp libtesseract_la_SOURCES += src/textord/edgloop.cpp libtesseract_la_SOURCES += src/textord/fpchop.cpp libtesseract_la_SOURCES += src/textord/gap_map.cpp libtesseract_la_SOURCES += src/textord/imagefind.cpp libtesseract_la_SOURCES += src/textord/linefind.cpp libtesseract_la_SOURCES += src/textord/makerow.cpp libtesseract_la_SOURCES += src/textord/oldbasel.cpp libtesseract_la_SOURCES += src/textord/pithsync.cpp libtesseract_la_SOURCES += src/textord/pitsync1.cpp libtesseract_la_SOURCES += src/textord/scanedg.cpp libtesseract_la_SOURCES += src/textord/sortflts.cpp libtesseract_la_SOURCES += src/textord/strokewidth.cpp libtesseract_la_SOURCES += src/textord/tabfind.cpp libtesseract_la_SOURCES += src/textord/tablefind.cpp libtesseract_la_SOURCES += src/textord/tabvector.cpp libtesseract_la_SOURCES += src/textord/tablerecog.cpp libtesseract_la_SOURCES += src/textord/textlineprojection.cpp libtesseract_la_SOURCES += src/textord/textord.cpp libtesseract_la_SOURCES += src/textord/topitch.cpp libtesseract_la_SOURCES += src/textord/tordmain.cpp libtesseract_la_SOURCES += src/textord/tospace.cpp libtesseract_la_SOURCES += src/textord/tovars.cpp libtesseract_la_SOURCES += src/textord/underlin.cpp libtesseract_la_SOURCES += src/textord/wordseg.cpp libtesseract_la_SOURCES += src/textord/workingpartset.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/textord/equationdetectbase.cpp endif # Rules for src/viewer. if !GRAPHICS_DISABLED noinst_HEADERS += src/viewer/scrollview.h noinst_HEADERS += src/viewer/svmnode.h noinst_HEADERS += src/viewer/svutil.h libtesseract_la_SOURCES += src/viewer/scrollview.cpp libtesseract_la_SOURCES += src/viewer/svmnode.cpp libtesseract_la_SOURCES += src/viewer/svutil.cpp EXTRA_PROGRAMS += svpaint svpaint_CPPFLAGS = $(AM_CPPFLAGS) svpaint_CPPFLAGS += -I$(top_srcdir)/src/ccstruct svpaint_CPPFLAGS += -I$(top_srcdir)/src/viewer svpaint_SOURCES = src/svpaint.cpp svpaint_LDADD = libtesseract.la endif # Rules for src/wordrec. noinst_HEADERS += src/wordrec/wordrec.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/wordrec/associate.h noinst_HEADERS += src/wordrec/chop.h noinst_HEADERS += src/wordrec/drawfx.h noinst_HEADERS += src/wordrec/findseam.h noinst_HEADERS += src/wordrec/language_model.h noinst_HEADERS += src/wordrec/lm_consistency.h noinst_HEADERS += src/wordrec/lm_pain_points.h noinst_HEADERS += src/wordrec/lm_state.h noinst_HEADERS += src/wordrec/outlines.h noinst_HEADERS += src/wordrec/params_model.h noinst_HEADERS += src/wordrec/plotedges.h noinst_HEADERS += src/wordrec/render.h endif libtesseract_la_SOURCES += src/wordrec/tface.cpp libtesseract_la_SOURCES += src/wordrec/wordrec.cpp if !DISABLED_LEGACY_ENGINE libtesseract_la_SOURCES += src/wordrec/associate.cpp libtesseract_la_SOURCES += src/wordrec/chop.cpp libtesseract_la_SOURCES += src/wordrec/chopper.cpp libtesseract_la_SOURCES += src/wordrec/drawfx.cpp libtesseract_la_SOURCES += src/wordrec/findseam.cpp libtesseract_la_SOURCES += src/wordrec/gradechop.cpp libtesseract_la_SOURCES += src/wordrec/language_model.cpp libtesseract_la_SOURCES += src/wordrec/lm_consistency.cpp libtesseract_la_SOURCES += src/wordrec/lm_pain_points.cpp libtesseract_la_SOURCES += src/wordrec/lm_state.cpp libtesseract_la_SOURCES += src/wordrec/outlines.cpp libtesseract_la_SOURCES += src/wordrec/params_model.cpp libtesseract_la_SOURCES += src/wordrec/pieces.cpp if !GRAPHICS_DISABLED libtesseract_la_SOURCES += src/wordrec/plotedges.cpp endif libtesseract_la_SOURCES += src/wordrec/render.cpp libtesseract_la_SOURCES += src/wordrec/segsearch.cpp libtesseract_la_SOURCES += src/wordrec/wordclass.cpp endif # Rules for tesseract executable. bin_PROGRAMS = tesseract tesseract_SOURCES = src/tesseract.cpp tesseract_CPPFLAGS = $(AM_CPPFLAGS) tesseract_CPPFLAGS += -I$(top_srcdir)/src/arch tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccmain tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccstruct tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccutil tesseract_CPPFLAGS += -I$(top_srcdir)/src/classify tesseract_CPPFLAGS += -I$(top_srcdir)/src/cutil tesseract_CPPFLAGS += -I$(top_srcdir)/src/dict tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec tesseract_LDFLAGS = $(OPENMP_CXXFLAGS) tesseract_LDADD = libtesseract.la tesseract_LDADD += $(LEPTONICA_LIBS) tesseract_LDADD += $(libarchive_LIBS) tesseract_LDADD += $(libcurl_LIBS) if T_WIN tesseract_LDADD += -ltiff tesseract_LDADD += -lws2_32 endif if ADD_RT tesseract_LDADD += -lrt endif # Rules for training tools. if ENABLE_TRAINING training: $(trainingtools) | $(PROGRAMS) training-install: $(trainingtools) mkdir -p $(DESTDIR)$(bindir) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install \ $(INSTALL) $(INSTALL_STRIP_FLAG) $(trainingtools) $(DESTDIR)$(bindir) training-uninstall: # Some unit tests use code from training. check: libtesseract_training.la # dawg_test runs dawg2wordlist and wordlist2dawg. check: dawg2wordlist wordlist2dawg else training: @echo "Need to reconfigure project, so there are no errors" endif CLEANFILES += $(EXTRA_PROGRAMS) training_CPPFLAGS = $(AM_CPPFLAGS) training_CPPFLAGS += -DPANGO_ENABLE_ENGINE training_CPPFLAGS += -DTESS_COMMON_TRAINING_API= training_CPPFLAGS += -DTESS_PANGO_TRAINING_API= training_CPPFLAGS += -DTESS_UNICHARSET_TRAINING_API= training_CPPFLAGS += -I$(top_srcdir)/src/training training_CPPFLAGS += -I$(top_srcdir)/src/training/common training_CPPFLAGS += -I$(top_srcdir)/src/training/pango training_CPPFLAGS += -I$(top_srcdir)/src/training/unicharset training_CPPFLAGS += -I$(top_srcdir)/src/api training_CPPFLAGS += -I$(top_srcdir)/src/ccmain training_CPPFLAGS += -I$(top_srcdir)/src/ccutil training_CPPFLAGS += -I$(top_srcdir)/src/ccstruct training_CPPFLAGS += -I$(top_srcdir)/src/lstm training_CPPFLAGS += -I$(top_srcdir)/src/arch training_CPPFLAGS += -I$(top_srcdir)/src/viewer training_CPPFLAGS += -I$(top_srcdir)/src/textord training_CPPFLAGS += -I$(top_srcdir)/src/dict training_CPPFLAGS += -I$(top_srcdir)/src/classify training_CPPFLAGS += -I$(top_srcdir)/src/wordrec training_CPPFLAGS += -I$(top_srcdir)/src/cutil training_CPPFLAGS += $(ICU_UC_CFLAGS) $(ICU_I18N_CFLAGS) training_CPPFLAGS += $(pango_CFLAGS) training_CPPFLAGS += $(cairo_CFLAGS) if DISABLED_LEGACY_ENGINE training_CPPFLAGS += -DDISABLED_LEGACY_ENGINE endif # TODO: training programs cannot be linked to shared library created # with -fvisibility if VISIBILITY AM_LDFLAGS += -all-static endif noinst_HEADERS += src/training/pango/boxchar.h noinst_HEADERS += src/training/common/commandlineflags.h noinst_HEADERS += src/training/common/commontraining.h noinst_HEADERS += src/training/common/ctc.h noinst_HEADERS += src/training/common/networkbuilder.h noinst_HEADERS += src/training/degradeimage.h noinst_HEADERS += src/training/pango/ligature_table.h noinst_HEADERS += src/training/pango/pango_font_info.h noinst_HEADERS += src/training/pango/stringrenderer.h noinst_HEADERS += src/training/pango/tlog.h noinst_HEADERS += src/training/unicharset/icuerrorcode.h noinst_HEADERS += src/training/unicharset/fileio.h noinst_HEADERS += src/training/unicharset/lang_model_helpers.h noinst_HEADERS += src/training/unicharset/lstmtester.h noinst_HEADERS += src/training/unicharset/lstmtrainer.h noinst_HEADERS += src/training/unicharset/normstrngs.h noinst_HEADERS += src/training/unicharset/unicharset_training_utils.h noinst_HEADERS += src/training/unicharset/validate_grapheme.h noinst_HEADERS += src/training/unicharset/validate_indic.h noinst_HEADERS += src/training/unicharset/validate_javanese.h noinst_HEADERS += src/training/unicharset/validate_khmer.h noinst_HEADERS += src/training/unicharset/validate_myanmar.h noinst_HEADERS += src/training/unicharset/validator.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/training/common/errorcounter.h noinst_HEADERS += src/training/common/intfeaturedist.h noinst_HEADERS += src/training/common/intfeaturemap.h noinst_HEADERS += src/training/common/mastertrainer.h noinst_HEADERS += src/training/common/sampleiterator.h noinst_HEADERS += src/training/common/trainingsampleset.h noinst_HEADERS += src/training/mergenf.h endif CLEANFILES += libtesseract_training.la EXTRA_LTLIBRARIES = libtesseract_training.la libtesseract_training_la_CPPFLAGS = $(training_CPPFLAGS) libtesseract_training_la_SOURCES = src/training/pango/boxchar.cpp libtesseract_training_la_SOURCES += src/training/common/commandlineflags.cpp libtesseract_training_la_SOURCES += src/training/common/commontraining.cpp libtesseract_training_la_SOURCES += src/training/common/ctc.cpp libtesseract_training_la_SOURCES += src/training/common/networkbuilder.cpp libtesseract_training_la_SOURCES += src/training/degradeimage.cpp libtesseract_training_la_SOURCES += src/training/pango/ligature_table.cpp libtesseract_training_la_SOURCES += src/training/pango/pango_font_info.cpp libtesseract_training_la_SOURCES += src/training/pango/stringrenderer.cpp libtesseract_training_la_SOURCES += src/training/pango/tlog.cpp libtesseract_training_la_SOURCES += src/training/unicharset/icuerrorcode.cpp libtesseract_training_la_SOURCES += src/training/unicharset/fileio.cpp libtesseract_training_la_SOURCES += src/training/unicharset/lang_model_helpers.cpp libtesseract_training_la_SOURCES += src/training/unicharset/lstmtester.cpp libtesseract_training_la_SOURCES += src/training/unicharset/lstmtrainer.cpp libtesseract_training_la_SOURCES += src/training/unicharset/normstrngs.cpp libtesseract_training_la_SOURCES += src/training/unicharset/unicharset_training_utils.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validate_grapheme.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validate_indic.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validate_javanese.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validate_khmer.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validate_myanmar.cpp libtesseract_training_la_SOURCES += src/training/unicharset/validator.cpp if !DISABLED_LEGACY_ENGINE libtesseract_training_la_SOURCES += src/training/common/errorcounter.cpp libtesseract_training_la_SOURCES += src/training/common/intfeaturedist.cpp libtesseract_training_la_SOURCES += src/training/common/intfeaturemap.cpp libtesseract_training_la_SOURCES += src/training/common/mastertrainer.cpp libtesseract_training_la_SOURCES += src/training/common/sampleiterator.cpp libtesseract_training_la_SOURCES += src/training/common/trainingsampleset.cpp endif trainingtools = combine_lang_model$(EXEEXT) trainingtools += combine_tessdata$(EXEEXT) trainingtools += dawg2wordlist$(EXEEXT) trainingtools += lstmeval$(EXEEXT) trainingtools += lstmtraining$(EXEEXT) trainingtools += merge_unicharsets$(EXEEXT) trainingtools += set_unicharset_properties$(EXEEXT) trainingtools += text2image$(EXEEXT) trainingtools += unicharset_extractor$(EXEEXT) trainingtools += wordlist2dawg$(EXEEXT) if !DISABLED_LEGACY_ENGINE trainingtools += ambiguous_words$(EXEEXT) trainingtools += classifier_tester$(EXEEXT) trainingtools += cntraining$(EXEEXT) trainingtools += mftraining$(EXEEXT) trainingtools += shapeclustering$(EXEEXT) endif $(trainingtools): libtesseract.la EXTRA_PROGRAMS += $(trainingtools) extralib = libtesseract.la extralib += $(libarchive_LIBS) extralib += $(LEPTONICA_LIBS) if T_WIN extralib += -lws2_32 endif if !DISABLED_LEGACY_ENGINE ambiguous_words_CPPFLAGS = $(training_CPPFLAGS) ambiguous_words_SOURCES = src/training/ambiguous_words.cpp ambiguous_words_LDADD = libtesseract_training.la ambiguous_words_LDADD += $(extralib) classifier_tester_CPPFLAGS = $(training_CPPFLAGS) classifier_tester_SOURCES = src/training/classifier_tester.cpp classifier_tester_LDADD = libtesseract_training.la classifier_tester_LDADD += $(extralib) cntraining_CPPFLAGS = $(training_CPPFLAGS) cntraining_SOURCES = src/training/cntraining.cpp cntraining_LDADD = libtesseract_training.la cntraining_LDADD += $(extralib) mftraining_CPPFLAGS = $(training_CPPFLAGS) mftraining_SOURCES = src/training/mftraining.cpp src/training/mergenf.cpp mftraining_LDADD = libtesseract_training.la mftraining_LDADD += $(ICU_UC_LIBS) mftraining_LDADD += $(extralib) shapeclustering_CPPFLAGS = $(training_CPPFLAGS) shapeclustering_SOURCES = src/training/shapeclustering.cpp shapeclustering_LDADD = libtesseract_training.la shapeclustering_LDADD += $(extralib) endif combine_lang_model_CPPFLAGS = $(training_CPPFLAGS) combine_lang_model_SOURCES = src/training/combine_lang_model.cpp combine_lang_model_LDADD = libtesseract_training.la combine_lang_model_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) combine_lang_model_LDADD += $(extralib) combine_tessdata_CPPFLAGS = $(training_CPPFLAGS) combine_tessdata_SOURCES = src/training/combine_tessdata.cpp combine_tessdata_LDADD = $(extralib) dawg2wordlist_CPPFLAGS = $(training_CPPFLAGS) dawg2wordlist_SOURCES = src/training/dawg2wordlist.cpp dawg2wordlist_LDADD = $(extralib) lstmeval_CPPFLAGS = $(training_CPPFLAGS) lstmeval_SOURCES = src/training/lstmeval.cpp lstmeval_LDADD = libtesseract_training.la lstmeval_LDADD += $(ICU_UC_LIBS) lstmeval_LDADD += $(extralib) lstmtraining_CPPFLAGS = $(training_CPPFLAGS) lstmtraining_SOURCES = src/training/lstmtraining.cpp lstmtraining_LDADD = libtesseract_training.la lstmtraining_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) lstmtraining_LDADD += $(extralib) merge_unicharsets_CPPFLAGS = $(training_CPPFLAGS) merge_unicharsets_SOURCES = src/training/merge_unicharsets.cpp merge_unicharsets_LDADD = $(extralib) set_unicharset_properties_CPPFLAGS = $(training_CPPFLAGS) set_unicharset_properties_SOURCES = src/training/set_unicharset_properties.cpp set_unicharset_properties_LDADD = libtesseract_training.la set_unicharset_properties_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) set_unicharset_properties_LDADD += $(extralib) text2image_CPPFLAGS = $(training_CPPFLAGS) text2image_SOURCES = src/training/text2image.cpp text2image_LDADD = libtesseract_training.la text2image_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) text2image_LDADD += $(extralib) text2image_LDADD += $(ICU_UC_LIBS) $(cairo_LIBS) text2image_LDADD += $(pango_LIBS) $(pangocairo_LIBS) $(pangoft2_LIBS) unicharset_extractor_CPPFLAGS = $(training_CPPFLAGS) unicharset_extractor_SOURCES = src/training/unicharset_extractor.cpp unicharset_extractor_LDADD = libtesseract_training.la unicharset_extractor_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) unicharset_extractor_LDADD += $(extralib) wordlist2dawg_CPPFLAGS = $(training_CPPFLAGS) wordlist2dawg_SOURCES = src/training/wordlist2dawg.cpp wordlist2dawg_LDADD = $(extralib) # fuzzer-api is used for fuzzing tests. # They are run by OSS-Fuzz https://oss-fuzz.com/, but can also be run locally. # Note: -fsanitize=fuzzer currently requires the clang++ compiler. # LIB_FUZZING_ENGINE can be overridden by the caller. # This is used by OSS-Fuzz. LIB_FUZZING_ENGINE ?= -fsanitize=fuzzer fuzzer-api: libtesseract.la fuzzer-api: unittest/fuzzers/fuzzer-api.cpp $(CXX) $(CXXFLAGS) -g $(LIB_FUZZING_ENGINE) \ -I $(top_srcdir)/include \ -I $(builddir)/include \ -I $(top_srcdir)/src/ccmain \ -I $(top_srcdir)/src/ccstruct \ -I $(top_srcdir)/src/ccutil \ $(LEPTONICA_CFLAGS) \ $(OPENMP_CXXFLAGS) \ $< \ $(builddir)/.libs/libtesseract.a \ $(LEPTONICA_LIBS) \ $(libarchive_LIBS) \ $(libcurl_LIBS) \ -o $@ fuzzer-api-512x256: libtesseract.la fuzzer-api-512x256: unittest/fuzzers/fuzzer-api.cpp $(CXX) $(CXXFLAGS) -g $(LIB_FUZZING_ENGINE) \ -DTESSERACT_FUZZER_WIDTH=512 \ -DTESSERACT_FUZZER_HEIGHT=256 \ -I $(top_srcdir)/include \ -I $(builddir)/include \ -I $(top_srcdir)/src/ccmain \ -I $(top_srcdir)/src/ccstruct \ -I $(top_srcdir)/src/ccutil \ $(LEPTONICA_CFLAGS) \ $(OPENMP_CXXFLAGS) \ $< \ $(builddir)/.libs/libtesseract.a \ $(LEPTONICA_LIBS) \ $(libarchive_LIBS) \ $(libcurl_LIBS) \ -o $@ CLEANFILES += fuzzer-api fuzzer-api-512x256 if ASCIIDOC man_MANS = doc/combine_lang_model.1 man_MANS += doc/combine_tessdata.1 man_MANS += doc/dawg2wordlist.1 man_MANS += doc/lstmeval.1 man_MANS += doc/lstmtraining.1 man_MANS += doc/merge_unicharsets.1 man_MANS += doc/set_unicharset_properties.1 man_MANS += doc/tesseract.1 man_MANS += doc/text2image.1 man_MANS += doc/unicharset.5 man_MANS += doc/unicharset_extractor.1 man_MANS += doc/wordlist2dawg.1 if !DISABLED_LEGACY_ENGINE man_MANS += doc/ambiguous_words.1 man_MANS += doc/classifier_tester.1 man_MANS += doc/cntraining.1 man_MANS += doc/mftraining.1 man_MANS += doc/shapeclustering.1 man_MANS += doc/unicharambigs.5 endif man_xslt = http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl EXTRA_DIST += $(man_MANS) doc/Doxyfile html: ${man_MANS:%=%.html} pdf: ${man_MANS:%=%.pdf} SUFFIXES = .asc .html .pdf .asc: if HAVE_XML_CATALOG_FILES asciidoc -b docbook -d manpage -o - $< | \ XML_CATALOG_FILES=$(XML_CATALOG_FILES) xsltproc --nonet -o $@ $(man_xslt) - else asciidoc -b docbook -d manpage -o - $< | \ xsltproc --nonet -o $@ $(man_xslt) - endif .asc.html: asciidoc -b html5 -o $@ $< .asc.pdf: asciidoc -b docbook -d manpage -o $*.dbk $< docbook2pdf -o doc $*.dbk MAINTAINERCLEANFILES = $(man_MANS) Doxyfile endif # Absolute path of directory 'langdata'. LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm # Absolute path of directory 'tessdata' with traineddata files # (must be on same level as top source directory). TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata # Absolute path of directory 'testing' with test images and ground truth texts # (using submodule test). TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing # Absolute path of directory 'testdata' with test unicharset etc. # (using submodule test). TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata # Suppress some memory leaks reported by LeakSanitizer. export LSAN_OPTIONS=suppressions=$(top_srcdir)/unittest/tesseract_leaksanitizer.supp unittest_CPPFLAGS = $(AM_CPPFLAGS) unittest_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\"" unittest_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\"" unittest_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\"" unittest_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\"" unittest_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\"" unittest_CPPFLAGS += -DPANGO_ENABLE_ENGINE if DISABLED_LEGACY_ENGINE unittest_CPPFLAGS += -DDISABLED_LEGACY_ENGINE endif # DISABLED_LEGACY_ENGINE unittest_CPPFLAGS += -DTESS_COMMON_TRAINING_API= unittest_CPPFLAGS += -DTESS_PANGO_TRAINING_API= unittest_CPPFLAGS += -DTESS_UNICHARSET_TRAINING_API= unittest_CPPFLAGS += -I$(top_srcdir)/src/arch unittest_CPPFLAGS += -I$(top_srcdir)/src/ccmain unittest_CPPFLAGS += -I$(top_srcdir)/src/ccstruct unittest_CPPFLAGS += -I$(top_srcdir)/src/ccutil unittest_CPPFLAGS += -I$(top_srcdir)/src/classify unittest_CPPFLAGS += -I$(top_srcdir)/src/cutil unittest_CPPFLAGS += -I$(top_srcdir)/src/dict unittest_CPPFLAGS += -I$(top_srcdir)/src/display unittest_CPPFLAGS += -I$(top_srcdir)/src/lstm unittest_CPPFLAGS += -I$(top_srcdir)/src/textord unittest_CPPFLAGS += -I$(top_srcdir)/unittest/base unittest_CPPFLAGS += -I$(top_srcdir)/unittest/util unittest_CPPFLAGS += $(LEPTONICA_CFLAGS) if ENABLE_TRAINING unittest_CPPFLAGS += -I$(top_srcdir)/src/training unittest_CPPFLAGS += -I$(top_srcdir)/src/training/common unittest_CPPFLAGS += -I$(top_srcdir)/src/training/pango unittest_CPPFLAGS += -I$(top_srcdir)/src/training/unicharset unittest_CPPFLAGS += $(pangocairo_CFLAGS) endif # ENABLE_TRAINING unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec unittest_CPPFLAGS += -I$(top_srcdir)/unittest # Build googletest: check_LTLIBRARIES = libgtest.la libgtest_main.la libgmock.la libgmock_main.la libgtest_la_SOURCES = unittest/third_party/googletest/googletest/src/gtest-all.cc libgtest_la_CPPFLAGS = -I$(top_srcdir)/unittest/third_party/googletest/googletest/include libgtest_la_CPPFLAGS += -I$(top_srcdir)/unittest/third_party/googletest/googletest libgtest_la_CPPFLAGS += -pthread libgtest_main_la_SOURCES = unittest/third_party/googletest/googletest/src/gtest_main.cc libgtest_main_la_CPPFLAGS = $(libgtest_la_CPPFLAGS) GMOCK_INCLUDES = -I$(top_srcdir)/unittest/third_party/googletest/googlemock/include \ -I$(top_srcdir)/unittest/third_party/googletest/googlemock \ -I$(top_srcdir)/unittest/third_party/googletest/googletest/include \ -I$(top_srcdir)/unittest/third_party/googletest/googletest libgmock_la_SOURCES = unittest/third_party/googletest/googlemock/src/gmock-all.cc libgmock_la_CPPFLAGS = $(GMOCK_INCLUDES) \ -pthread libgmock_main_la_SOURCES = unittest/third_party/googletest/googlemock/src/gmock_main.cc libgmock_main_la_CPPFLAGS = $(GMOCK_INCLUDES) \ -pthread # Build unittests GTEST_LIBS = libgtest.la libgtest_main.la -lpthread GMOCK_LIBS = libgmock.la libgmock_main.la TESS_LIBS = $(GTEST_LIBS) TESS_LIBS += libtesseract.la $(libarchive_LIBS) TRAINING_LIBS = libtesseract_training.la TRAINING_LIBS += $(TESS_LIBS) unittest_CPPFLAGS += -isystem $(top_srcdir)/unittest/third_party/googletest/googletest/include unittest_CPPFLAGS += -isystem $(top_srcdir)/unittest/third_party/googletest/googlemock/include check_PROGRAMS = apiexample_test if ENABLE_TRAINING if !DISABLED_LEGACY_ENGINE check_PROGRAMS += applybox_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += baseapi_test check_PROGRAMS += baseapi_thread_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += bitvector_test endif # !DISABLED_LEGACY_ENGINE endif # ENABLE_TRAINING check_PROGRAMS += cleanapi_test check_PROGRAMS += colpartition_test if ENABLE_TRAINING check_PROGRAMS += commandlineflags_test check_PROGRAMS += dawg_test endif # ENABLE_TRAINING check_PROGRAMS += denorm_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += equationdetect_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += fileio_test check_PROGRAMS += heap_test check_PROGRAMS += imagedata_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += indexmapbidi_test check_PROGRAMS += intfeaturemap_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += intsimdmatrix_test check_PROGRAMS += lang_model_test check_PROGRAMS += layout_test check_PROGRAMS += ligature_table_test check_PROGRAMS += linlsq_test check_PROGRAMS += list_test if ENABLE_TRAINING check_PROGRAMS += lstm_recode_test check_PROGRAMS += lstm_squashed_test check_PROGRAMS += lstm_test check_PROGRAMS += lstmtrainer_test endif # ENABLE_TRAINING check_PROGRAMS += loadlang_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += mastertrainer_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += matrix_test check_PROGRAMS += networkio_test if ENABLE_TRAINING check_PROGRAMS += normstrngs_test endif # ENABLE_TRAINING check_PROGRAMS += nthitem_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += osd_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += pagesegmode_test if ENABLE_TRAINING check_PROGRAMS += pango_font_info_test endif # ENABLE_TRAINING check_PROGRAMS += paragraphs_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += params_model_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += progress_test check_PROGRAMS += qrsequence_test check_PROGRAMS += recodebeam_test check_PROGRAMS += rect_test check_PROGRAMS += resultiterator_test check_PROGRAMS += scanutils_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += shapetable_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += stats_test check_PROGRAMS += stridemap_test check_PROGRAMS += stringrenderer_test check_PROGRAMS += tablefind_test check_PROGRAMS += tablerecog_test check_PROGRAMS += tabvector_test check_PROGRAMS += tatweel_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += textlineprojection_test endif # !DISABLED_LEGACY_ENGINE check_PROGRAMS += tfile_test if ENABLE_TRAINING check_PROGRAMS += unichar_test check_PROGRAMS += unicharcompress_test check_PROGRAMS += unicharset_test check_PROGRAMS += validate_grapheme_test check_PROGRAMS += validate_indic_test check_PROGRAMS += validate_khmer_test check_PROGRAMS += validate_myanmar_test check_PROGRAMS += validator_test endif # ENABLE_TRAINING check_PROGRAMS: libtesseract.la libtesseract_training.la TESTS = $(check_PROGRAMS) # List of source files needed to build the executable: apiexample_test_SOURCES = unittest/apiexample_test.cc apiexample_test_CPPFLAGS = $(unittest_CPPFLAGS) apiexample_test_LDFLAGS = $(LEPTONICA_LIBS) apiexample_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE applybox_test_SOURCES = unittest/applybox_test.cc applybox_test_CPPFLAGS = $(unittest_CPPFLAGS) applybox_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) endif # !DISABLED_LEGACY_ENGINE baseapi_test_SOURCES = unittest/baseapi_test.cc baseapi_test_CPPFLAGS = $(unittest_CPPFLAGS) baseapi_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) baseapi_thread_test_SOURCES = unittest/baseapi_thread_test.cc baseapi_thread_test_CPPFLAGS = $(unittest_CPPFLAGS) baseapi_thread_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE bitvector_test_SOURCES = unittest/bitvector_test.cc bitvector_test_CPPFLAGS = $(unittest_CPPFLAGS) bitvector_test_LDADD = $(TRAINING_LIBS) endif # !DISABLED_LEGACY_ENGINE cleanapi_test_SOURCES = unittest/cleanapi_test.cc cleanapi_test_CPPFLAGS = $(unittest_CPPFLAGS) cleanapi_test_LDADD = $(TESS_LIBS) colpartition_test_SOURCES = unittest/colpartition_test.cc colpartition_test_CPPFLAGS = $(unittest_CPPFLAGS) colpartition_test_LDADD = $(TESS_LIBS) commandlineflags_test_SOURCES = unittest/commandlineflags_test.cc commandlineflags_test_CPPFLAGS = $(unittest_CPPFLAGS) commandlineflags_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS) dawg_test_SOURCES = unittest/dawg_test.cc dawg_test_CPPFLAGS = $(unittest_CPPFLAGS) dawg_test_LDADD = $(TRAINING_LIBS) denorm_test_SOURCES = unittest/denorm_test.cc denorm_test_CPPFLAGS = $(unittest_CPPFLAGS) denorm_test_LDADD = $(TESS_LIBS) if !DISABLED_LEGACY_ENGINE equationdetect_test_SOURCES = unittest/equationdetect_test.cc equationdetect_test_CPPFLAGS = $(unittest_CPPFLAGS) equationdetect_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) endif # !DISABLED_LEGACY_ENGINE fileio_test_SOURCES = unittest/fileio_test.cc fileio_test_CPPFLAGS = $(unittest_CPPFLAGS) fileio_test_LDADD = $(TRAINING_LIBS) heap_test_SOURCES = unittest/heap_test.cc heap_test_CPPFLAGS = $(unittest_CPPFLAGS) heap_test_LDADD = $(TESS_LIBS) imagedata_test_SOURCES = unittest/imagedata_test.cc imagedata_test_CPPFLAGS = $(unittest_CPPFLAGS) imagedata_test_LDADD = $(TRAINING_LIBS) if !DISABLED_LEGACY_ENGINE indexmapbidi_test_SOURCES = unittest/indexmapbidi_test.cc indexmapbidi_test_CPPFLAGS = $(unittest_CPPFLAGS) indexmapbidi_test_LDADD = $(TRAINING_LIBS) endif # !DISABLED_LEGACY_ENGINE if !DISABLED_LEGACY_ENGINE intfeaturemap_test_SOURCES = unittest/intfeaturemap_test.cc intfeaturemap_test_CPPFLAGS = $(unittest_CPPFLAGS) intfeaturemap_test_LDADD = $(TRAINING_LIBS) endif # !DISABLED_LEGACY_ENGINE intsimdmatrix_test_SOURCES = unittest/intsimdmatrix_test.cc intsimdmatrix_test_CPPFLAGS = $(unittest_CPPFLAGS) if HAVE_AVX2 intsimdmatrix_test_CPPFLAGS += -DHAVE_AVX2 endif if HAVE_SSE4_1 intsimdmatrix_test_CPPFLAGS += -DHAVE_SSE4_1 endif intsimdmatrix_test_LDADD = $(TESS_LIBS) lang_model_test_SOURCES = unittest/lang_model_test.cc lang_model_test_CPPFLAGS = $(unittest_CPPFLAGS) lang_model_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) layout_test_SOURCES = unittest/layout_test.cc layout_test_CPPFLAGS = $(unittest_CPPFLAGS) layout_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) ligature_table_test_SOURCES = unittest/ligature_table_test.cc ligature_table_test_CPPFLAGS = $(unittest_CPPFLAGS) ligature_table_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) ligature_table_test_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) ligature_table_test_LDADD += $(pangocairo_LIBS) $(pangoft2_LIBS) ligature_table_test_LDADD += $(cairo_LIBS) $(pango_LIBS) linlsq_test_SOURCES = unittest/linlsq_test.cc linlsq_test_CPPFLAGS = $(unittest_CPPFLAGS) linlsq_test_LDADD = $(TESS_LIBS) list_test_SOURCES = unittest/list_test.cc list_test_CPPFLAGS = $(unittest_CPPFLAGS) list_test_LDADD = $(TESS_LIBS) loadlang_test_SOURCES = unittest/loadlang_test.cc loadlang_test_CPPFLAGS = $(unittest_CPPFLAGS) loadlang_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) lstm_recode_test_SOURCES = unittest/lstm_recode_test.cc lstm_recode_test_CPPFLAGS = $(unittest_CPPFLAGS) lstm_recode_test_LDADD = $(TRAINING_LIBS) lstm_squashed_test_SOURCES = unittest/lstm_squashed_test.cc lstm_squashed_test_CPPFLAGS = $(unittest_CPPFLAGS) lstm_squashed_test_LDADD = $(TRAINING_LIBS) lstm_test_SOURCES = unittest/lstm_test.cc lstm_test_CPPFLAGS = $(unittest_CPPFLAGS) lstm_test_LDADD = $(TRAINING_LIBS) lstmtrainer_test_SOURCES = unittest/lstmtrainer_test.cc lstmtrainer_test_CPPFLAGS = $(unittest_CPPFLAGS) lstmtrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE mastertrainer_test_SOURCES = unittest/mastertrainer_test.cc mastertrainer_test_CPPFLAGS = $(unittest_CPPFLAGS) mastertrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) endif # !DISABLED_LEGACY_ENGINE matrix_test_SOURCES = unittest/matrix_test.cc matrix_test_CPPFLAGS = $(unittest_CPPFLAGS) matrix_test_LDADD = $(TESS_LIBS) networkio_test_SOURCES = unittest/networkio_test.cc networkio_test_CPPFLAGS = $(unittest_CPPFLAGS) networkio_test_LDADD = $(TESS_LIBS) normstrngs_test_SOURCES = unittest/normstrngs_test.cc normstrngs_test_CPPFLAGS = $(unittest_CPPFLAGS) normstrngs_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) nthitem_test_SOURCES = unittest/nthitem_test.cc nthitem_test_CPPFLAGS = $(unittest_CPPFLAGS) nthitem_test_LDADD = $(TESS_LIBS) if !DISABLED_LEGACY_ENGINE osd_test_SOURCES = unittest/osd_test.cc osd_test_CPPFLAGS = $(unittest_CPPFLAGS) osd_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) endif # !DISABLED_LEGACY_ENGINE pagesegmode_test_SOURCES = unittest/pagesegmode_test.cc pagesegmode_test_CPPFLAGS = $(unittest_CPPFLAGS) pagesegmode_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_SOURCES = unittest/pango_font_info_test.cc pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS) pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_LDADD += $(ICU_I18N_LIBS) pango_font_info_test_LDADD += $(pangocairo_LIBS) pango_font_info_test_LDADD += $(pangoft2_LIBS) paragraphs_test_SOURCES = unittest/paragraphs_test.cc paragraphs_test_CPPFLAGS = $(unittest_CPPFLAGS) paragraphs_test_LDADD = $(TESS_LIBS) if !DISABLED_LEGACY_ENGINE params_model_test_SOURCES = unittest/params_model_test.cc params_model_test_CPPFLAGS = $(unittest_CPPFLAGS) params_model_test_LDADD = $(TRAINING_LIBS) endif # !DISABLED_LEGACY_ENGINE progress_test_SOURCES = unittest/progress_test.cc progress_test_CPPFLAGS = $(unittest_CPPFLAGS) progress_test_LDFLAGS = $(LEPTONICA_LIBS) progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) qrsequence_test_SOURCES = unittest/qrsequence_test.cc qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS) qrsequence_test_LDADD = $(TESS_LIBS) recodebeam_test_SOURCES = unittest/recodebeam_test.cc recodebeam_test_CPPFLAGS = $(unittest_CPPFLAGS) recodebeam_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) rect_test_SOURCES = unittest/rect_test.cc rect_test_CPPFLAGS = $(unittest_CPPFLAGS) rect_test_LDADD = $(TESS_LIBS) resultiterator_test_SOURCES = unittest/resultiterator_test.cc resultiterator_test_CPPFLAGS = $(unittest_CPPFLAGS) resultiterator_test_LDADD = $(TRAINING_LIBS) resultiterator_test_LDADD += $(LEPTONICA_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) scanutils_test_SOURCES = unittest/scanutils_test.cc scanutils_test_CPPFLAGS = $(unittest_CPPFLAGS) scanutils_test_LDADD = $(TRAINING_LIBS) if !DISABLED_LEGACY_ENGINE shapetable_test_SOURCES = unittest/shapetable_test.cc shapetable_test_CPPFLAGS = $(unittest_CPPFLAGS) shapetable_test_LDADD = $(TRAINING_LIBS) endif # !DISABLED_LEGACY_ENGINE stats_test_SOURCES = unittest/stats_test.cc stats_test_CPPFLAGS = $(unittest_CPPFLAGS) stats_test_LDADD = $(TESS_LIBS) stridemap_test_SOURCES = unittest/stridemap_test.cc stridemap_test_CPPFLAGS = $(unittest_CPPFLAGS) stridemap_test_LDADD = $(TESS_LIBS) stringrenderer_test_SOURCES = unittest/stringrenderer_test.cc stringrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS) stringrenderer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) stringrenderer_test_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) stringrenderer_test_LDADD += $(pangocairo_LIBS) $(pangoft2_LIBS) stringrenderer_test_LDADD += $(cairo_LIBS) $(pango_LIBS) tablefind_test_SOURCES = unittest/tablefind_test.cc tablefind_test_CPPFLAGS = $(unittest_CPPFLAGS) tablefind_test_LDADD = $(TESS_LIBS) tablerecog_test_SOURCES = unittest/tablerecog_test.cc tablerecog_test_CPPFLAGS = $(unittest_CPPFLAGS) tablerecog_test_LDADD = $(TESS_LIBS) tabvector_test_SOURCES = unittest/tabvector_test.cc tabvector_test_CPPFLAGS = $(unittest_CPPFLAGS) tabvector_test_LDADD = $(TESS_LIBS) tatweel_test_SOURCES = unittest/tatweel_test.cc tatweel_test_SOURCES += unittest/third_party/utf/rune.c tatweel_test_SOURCES += unittest/util/utf8/unicodetext.cc tatweel_test_SOURCES += unittest/util/utf8/unilib.cc tatweel_test_CPPFLAGS = $(unittest_CPPFLAGS) tatweel_test_LDADD = $(TRAINING_LIBS) textlineprojection_test_SOURCES = unittest/textlineprojection_test.cc textlineprojection_test_CPPFLAGS = $(unittest_CPPFLAGS) textlineprojection_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) tfile_test_SOURCES = unittest/tfile_test.cc tfile_test_CPPFLAGS = $(unittest_CPPFLAGS) tfile_test_LDADD = $(TESS_LIBS) unichar_test_SOURCES = unittest/unichar_test.cc unichar_test_CPPFLAGS = $(unittest_CPPFLAGS) unichar_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS) unicharcompress_test_SOURCES = unittest/unicharcompress_test.cc unicharcompress_test_CPPFLAGS = $(unittest_CPPFLAGS) unicharcompress_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS) unicharset_test_SOURCES = unittest/unicharset_test.cc unicharset_test_CPPFLAGS = $(unittest_CPPFLAGS) unicharset_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS) validate_grapheme_test_SOURCES = unittest/validate_grapheme_test.cc validate_grapheme_test_CPPFLAGS = $(unittest_CPPFLAGS) validate_grapheme_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) validate_indic_test_SOURCES = unittest/validate_indic_test.cc validate_indic_test_CPPFLAGS = $(unittest_CPPFLAGS) validate_indic_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) validate_khmer_test_SOURCES = unittest/validate_khmer_test.cc validate_khmer_test_CPPFLAGS = $(unittest_CPPFLAGS) validate_khmer_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) validate_myanmar_test_SOURCES = unittest/validate_myanmar_test.cc validate_myanmar_test_CPPFLAGS = $(unittest_CPPFLAGS) validate_myanmar_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) validator_test_SOURCES = unittest/validator_test.cc validator_test_CPPFLAGS = $(unittest_CPPFLAGS) validator_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS) # for windows if T_WIN apiexample_test_LDADD += -lws2_32 intsimdmatrix_test_LDADD += -lws2_32 matrix_test_LDADD += -lws2_32 if !DISABLED_LEGACY_ENGINE osd_test_LDADD += -lws2_32 endif # !DISABLED_LEGACY_ENGINE loadlang_test_LDADD += -lws2_32 endif EXTRA_apiexample_test_DEPENDENCIES = $(abs_top_builddir)/test/testing/phototest.tif EXTRA_apiexample_test_DEPENDENCIES += $(abs_top_builddir)/test/testing/phototest.txt $(abs_top_builddir)/test/testing/phototest.tif: mkdir -p $(top_builddir)/test/testing ln -s $(TESTING_DIR)/phototest.tif $(top_builddir)/test/testing/phototest.tif $(abs_top_builddir)/test/testing/phototest.txt: mkdir -p $(top_builddir)/test/testing ln -s $(TESTING_DIR)/phototest.txt $(top_builddir)/test/testing/phototest.txt # Some tests require a local tmp directory. $(check_PROGRAMS): | tmp tmp: mkdir -p tmp # Some tests require a well defined set of the following font files. fonts = ae_Arab.ttf fonts += Arial_Bold_Italic.ttf fonts += DejaVuSans-ExtraLight.ttf fonts += Lohit-Hindi.ttf fonts += Times_New_Roman.ttf fonts += UnBatang.ttf fonts += Verdana.ttf # These tests depend on installed model files and fonts: # # apiexample_test baseapi_test lang_model_test layout_test # ligature_table_test loadlang_test lstm_recode_test lstm_squashed_test # lstm_test lstmtrainer_test mastertrainer_test osd_test # pagesegmode_test pango_font_info_test progress_test # recodebeam_test resultiterator_test stringrenderer_test # textlineprojection_test unicharcompress_test # # Instead of fine-tuned dependencies the following lines # simply require those dependencies for all tests. # That can be improved if necessary. $(check_PROGRAMS): | $(LANGDATA_DIR) $(check_PROGRAMS): | $(TESSDATA_DIR) $(check_PROGRAMS): | $(TESSDATA_BEST_DIR) $(check_PROGRAMS): | $(TESSDATA_FAST_DIR) $(check_PROGRAMS): | $(fonts:%=$(TESTING_DIR)/%) $(LANGDATA_DIR) $(TESSDATA_DIR) $(TESSDATA_BEST_DIR) $(TESSDATA_FAST_DIR): @echo "Some unit tests require $@." @echo "It can be installed manually by running this command:" @echo " git clone https://github.com/tesseract-ocr/$$(basename $@).git $@" @exit 1 $(TESTING_DIR)/Arial_Bold_Italic.ttf: curl -sSL -o Arial.exe https://sourceforge.net/projects/corefonts/files/the%20fonts/final/arial32.exe/download cabextract -F Arialbi.TTF -q Arial.exe rm Arial.exe mv Arialbi.TTF $@ $(TESTING_DIR)/DejaVuSans-ExtraLight.ttf: curl -sSL http://sourceforge.net/projects/dejavu/files/dejavu/2.37/dejavu-fonts-ttf-2.37.tar.bz2 | \ tar -xjO dejavu-fonts-ttf-2.37/ttf/DejaVuSans-ExtraLight.ttf >$@ $(TESTING_DIR)/Lohit-Hindi.ttf: curl -sSL https://releases.pagure.org/lohit/lohit-hindi-ttf-2.4.3.tar.gz | \ tar -xzO lohit-hindi-ttf-2.4.3/Lohit-Hindi.ttf >$@ $(TESTING_DIR)/Times_New_Roman.ttf: curl -sSL -o Times.exe https://sourceforge.net/projects/corefonts/files/the%20fonts/final/times32.exe/download cabextract -F Times.TTF -q Times.exe rm Times.exe mv Times.TTF $@ $(TESTING_DIR)/UnBatang.ttf: curl -sSL -o $@ https://salsa.debian.org/fonts-team/fonts-unfonts-core/-/raw/master/UnBatang.ttf $(TESTING_DIR)/Verdana.ttf: curl -sSL -o Verdana.exe https://sourceforge.net/projects/corefonts/files/the%20fonts/final/verdan32.exe/download cabextract -F Verdana.TTF -q Verdana.exe rm Verdana.exe mv Verdana.TTF $@ $(TESTING_DIR)/ae_Arab.ttf: curl -sSL -o $@ https://salsa.debian.org/fonts-team/fonts-arabeyes/-/raw/master/ae_Arab.ttf tesseract-5.5.0/README.md000066400000000000000000000177161471420406600150150ustar00rootroot00000000000000# Tesseract OCR [![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr) [![CodeQL](https://github.com/tesseract-ocr/tesseract/workflows/CodeQL/badge.svg)](https://github.com/tesseract-ocr/tesseract/security/code-scanning) [![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://issues.oss-fuzz.com/issues?q=is:open%20title:tesseract-ocr) \ [![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/main/LICENSE) [![Downloads](https://img.shields.io/badge/download-all%20releases-brightgreen.svg)](https://github.com/tesseract-ocr/tesseract/releases/) ## Table of Contents * [Tesseract OCR](#tesseract-ocr) * [About](#about) * [Brief history](#brief-history) * [Installing Tesseract](#installing-tesseract) * [Running Tesseract](#running-tesseract) * [For developers](#for-developers) * [Support](#support) * [License](#license) * [Dependencies](#dependencies) * [Latest Version of README](#latest-version-of-readme) ## About This package contains an **OCR engine** - `libtesseract` and a **command line program** - `tesseract`. Tesseract 4 adds a new neural net (LSTM) based [OCR engine](https://en.wikipedia.org/wiki/Optical_character_recognition) which is focused on line recognition, but also still supports the legacy Tesseract OCR engine of Tesseract 3 which works by recognizing character patterns. Compatibility with Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0). It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository. Stefan Weil is the current lead developer. Ray Smith was the lead developer until 2018. The maintainer is Zdenko Podobny. For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS) and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors). Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)** "out of the box". Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF. Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV, ALTO and PAGE. You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract. This project **does not include a GUI application**. If you need one, please see the [3rdParty](https://tesseract-ocr.github.io/tessdoc/User-Projects-%E2%80%93-3rdParty.html) documentation. Tesseract **can be trained to recognize other languages**. See [Tesseract Training](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html) for more information. ## Brief history Tesseract was originally developed at Hewlett-Packard Laboratories Bristol UK and at Hewlett-Packard Co, Greeley Colorado USA between 1985 and 1994, with some more changes made in 1996 to port to Windows, and some C++izing in 1998. In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google. Major version 5 is the current stable version and started with release [5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021. Newer minor versions and bugfix versions are available from [GitHub](https://github.com/tesseract-ocr/tesseract/releases/). Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main). Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues), and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html). See **[Release Notes](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html)** and **[Change Log](https://github.com/tesseract-ocr/tesseract/blob/main/ChangeLog)** for more details of the releases. ## Installing Tesseract You can either [Install Tesseract via pre-built binary package](https://tesseract-ocr.github.io/tessdoc/Installation.html) or [build it from source](https://tesseract-ocr.github.io/tessdoc/Compiling.html). Before building Tesseract from source, please check that your system has a compiler which is one of the [supported compilers](https://tesseract-ocr.github.io/tessdoc/supported-compilers.html). ## Running Tesseract Basic **[command line usage](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html)**: tesseract imagename outputbase [-l lang] [--oem ocrenginemode] [--psm pagesegmode] [configfiles...] For more information about the various command line options use `tesseract --help` or `man tesseract`. Examples can be found in the [documentation](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#simplest-invocation-to-ocr-an-image). ## For developers Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://tesseract-ocr.github.io/tessdoc/AddOns.html#tesseract-wrappers) section in the AddOns documentation. Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](https://tesseract-ocr.github.io/). ## Support Before you submit an issue, please review **[the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md)**. For support, first read the [documentation](https://tesseract-ocr.github.io/tessdoc/), particularly the [FAQ](https://tesseract-ocr.github.io/tessdoc/FAQ.html) to see if your problem is addressed there. If not, search the [Tesseract user forum](https://groups.google.com/g/tesseract-ocr), the [Tesseract developer forum](https://groups.google.com/g/tesseract-dev) and [past issues](https://github.com/tesseract-ocr/tesseract/issues), and if you still can't find what you need, ask for support in the mailing-lists. Mailing-lists: * [tesseract-ocr](https://groups.google.com/g/tesseract-ocr) - For tesseract users. * [tesseract-dev](https://groups.google.com/g/tesseract-dev) - For tesseract developers. Please report an issue only for a **bug**, not for asking questions. ## License The code in this repository is licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. **NOTE**: This software depends on other packages that may be licensed under different open source licenses. Tesseract uses [Leptonica library](http://leptonica.com/) which essentially uses a [BSD 2-clause license](http://leptonica.com/about-the-license.html). ## Dependencies Tesseract uses [Leptonica library](https://github.com/DanBloomberg/leptonica) for opening input images (e.g. not documents like pdf). It is suggested to use leptonica with built-in support for [zlib](https://zlib.net), [png](https://sourceforge.net/projects/libpng) and [tiff](http://www.simplesystems.org/libtiff) (for multipage tiff). ## Latest Version of README For the latest online version of the README.md see: tesseract-5.5.0/VERSION000066400000000000000000000000061471420406600145660ustar00rootroot000000000000005.5.0 tesseract-5.5.0/appveyor.yml000066400000000000000000000027461471420406600161230ustar00rootroot00000000000000environment: matrix: - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022 platform: Win64 configuration: - Release cache: - c:/Users/appveyor/.sw -> appveyor.yml only_commits: files: - appveyor.yml - '**.cpp' - '**.h' - 'unittest/**.c' - 'unittest/**.cc' before_build: - git submodule update --init --recursive - curl -fsS -L -o dl.zip https://software-network.org/client/sw-master-windows_x86_64-client.zip - 7z x dl.zip - set PATH=%PATH%;%cd% build_script: - sw -version # -show-output - show command output # debug build causes long builds (> 1h), appveyor drops them - sw -platform %platform% -config r build -Dwith-tests=1 # test - git clone https://github.com/egorpugin/tessdata tessdata_unittest - ps: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse - sw -platform %platform% -config r test -Dwith-tests=1 -Dskip-tests=lstm,lstm_recode after_build: - 7z a tesseract.zip %APPVEYOR_BUILD_FOLDER%\.sw\out\**\*.exe %APPVEYOR_BUILD_FOLDER%\.sw\out\**\*.dll #- 7z a tesseract.zip %APPVEYOR_BUILD_FOLDER%\.sw\Windows_*_Shared_Release_MSVC_*\*.exe %APPVEYOR_BUILD_FOLDER%\.sw\Windows_*_Shared_Release_MSVC_*\*.dll on_finish: # gather tests - ps: $wc = New-Object 'System.Net.WebClient' - ps: $wc.UploadFile("https://ci.appveyor.com/api/testresults/junit/$($env:APPVEYOR_JOB_ID)", (Resolve-Path .\.sw\test\results.xml)) artifacts: - path: tesseract.zip name: tesseract-$(APPVEYOR_BUILD_VERSION) tesseract-5.5.0/autogen.sh000077500000000000000000000076651471420406600155410ustar00rootroot00000000000000#!/bin/sh # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This is a simple script which is meant to help developers # better deal with the GNU autotools, specifically: # # aclocal # libtoolize # autoconf # autoheader # automake # # The whole thing is quite complex... # # The idea is to run this collection of tools on a single platform, # typically the main development platform, running a recent version of # autoconf. In theory, if we had these tools on each platform where we # ever expected to port the software, we would never need to checkin # more than a few autotools configuration files. However, the whole # idea is to generate a configure script and associated files in a way # that is portable across platforms, so we *have* to check in a whole # bunch of files generated by all these tools. # The real source files are: # # acinclude.m4 (used by aclocal) # configure.ac (main autoconf file) # Makefile.am, */Makefile.am (automake config files) # # All the rest is auto-generated. if [ "$1" = "clean" ]; then echo "Cleaning..." rm configure aclocal.m4 rm m4/l* rm config/* rmdir config find . -iname "Makefile.in" -type f -exec rm '{}' + fi bail_out() { echo echo " Something went wrong, bailing out!" echo exit 1 } # Prevent any errors that might result from failing to properly invoke # `libtoolize` or `glibtoolize,` whichever is present on your system, # from occurring by testing for its existence and capturing the absolute path to # its location for caching purposes prior to using it later on in 'Step 2:' if command -v libtoolize >/dev/null 2>&1; then LIBTOOLIZE="$(command -v libtoolize)" elif command -v glibtoolize >/dev/null 2>&1; then LIBTOOLIZE="$(command -v glibtoolize)" else echo "Unable to find a valid copy of libtoolize or glibtoolize in your PATH!" bail_out fi # --- Step 1: Generate aclocal.m4 from: # . acinclude.m4 # . config/*.m4 (these files are referenced in acinclude.m4) mkdir -p config echo "Running aclocal" aclocal -I config || bail_out # --- Step 2: echo "Running $LIBTOOLIZE" $LIBTOOLIZE -f -c || bail_out $LIBTOOLIZE --automake || bail_out # Run aclocal a 2nd time because glibtoolize created additional m4 files. echo "Running aclocal" aclocal -I config || bail_out # --- Step 3: Generate configure and include/miaconfig.h from: # . configure.ac # echo "Running autoconf" autoconf || bail_out if grep -q PKG_CHECK_MODULES configure; then # The generated configure is invalid because pkg-config is unavailable. rm configure echo "Missing pkg-config. Check the build requirements." bail_out fi # --- Step 4: Generate config.h.in from: # . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag) echo "Running autoheader" autoheader -f || bail_out # --- Step 5: Generate Makefile.in, src/Makefile.in, and a whole bunch of # files in config (config.guess, config.sub, depcomp, # install-sh, missing, mkinstalldirs) plus COPYING and # INSTALL from: # . Makefile.am # . src/Makefile.am # # Using --add-missing --copy makes sure that, if these files are missing, # they are copied from the system so they can be used in a distribution. echo "Running automake --add-missing --copy" automake --add-missing --copy --warnings=all || bail_out echo "" echo "All done." echo "To build the software now, do something like:" echo "" echo "$ ./configure [--enable-debug] [...other options]" tesseract-5.5.0/cmake/000077500000000000000000000000001471420406600146025ustar00rootroot00000000000000tesseract-5.5.0/cmake/BuildFunctions.cmake000066400000000000000000000020211471420406600205270ustar00rootroot00000000000000# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ # # macros and functions # ################################################################################ ######################################## # FUNCTION project_group ######################################## function(project_group target name) set_target_properties(${target} PROPERTIES FOLDER ${name}) endfunction(project_group) ################################################################################ tesseract-5.5.0/cmake/CheckFunctions.cmake000066400000000000000000000046171471420406600205220ustar00rootroot00000000000000# Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by # applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. # ############################################################################## # # macros and functions # # ############################################################################## # ############################################################################## # FUNCTION check_leptonica_tiff_support # ############################################################################## function(check_leptonica_tiff_support) # check if leptonica was build with tiff support set result to # LEPT_TIFF_RESULT set(TIFF_TEST "#include \"leptonica/allheaders.h\"\n" "int main() {\n" " l_uint8 *data = NULL;\n" " size_t size = 0;\n" " PIX* pix = pixCreate(3, 3, 4);\n" " l_int32 ret_val = pixWriteMemTiff(&data, &size, pix, IFF_TIFF_G3);\n" " pixDestroy(&pix);\n" " lept_free(data);\n" " return ret_val;}\n") if(${CMAKE_VERSION} VERSION_LESS "3.25") message(STATUS "Testing TIFF support in Leptonica is available with CMake >= 3.25 (you have ${CMAKE_VERSION}))") else() set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) try_run( LEPT_TIFF_RESULT LEPT_TIFF_COMPILE_SUCCESS SOURCE_FROM_CONTENT tiff_test.cpp "${TIFF_TEST}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${Leptonica_INCLUDE_DIRS}" LINK_LIBRARIES ${Leptonica_LIBRARIES} COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT) if(NOT LEPT_TIFF_COMPILE_SUCCESS) message(STATUS "COMPILE_OUTPUT: ${COMPILE_OUTPUT}") message(STATUS "Leptonica_INCLUDE_DIRS: ${Leptonica_INCLUDE_DIRS}") message(STATUS "Leptonica_LIBRARIES: ${Leptonica_LIBRARIES}") message(STATUS "LEPT_TIFF_RESULT: ${LEPT_TIFF_RESULT}") message(STATUS "LEPT_TIFF_COMPILE: ${LEPT_TIFF_COMPILE}") message(WARNING "Failed to compile test") endif() endif() endfunction(check_leptonica_tiff_support) # ############################################################################## tesseract-5.5.0/cmake/Configure.cmake000066400000000000000000000101701471420406600175240ustar00rootroot00000000000000# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ # # configure # ################################################################################ ######################################## # FUNCTION check_includes ######################################## function(check_includes files) foreach(F ${${files}}) set(name ${F}) string(REPLACE "-" "_" name ${name}) string(REPLACE "." "_" name ${name}) string(REPLACE "/" "_" name ${name}) string(TOUPPER ${name} name) check_include_files(${F} HAVE_${name}) file(APPEND ${AUTOCONFIG_SRC} "/* Define to 1 if you have the <${F}> header file. */\n") file(APPEND ${AUTOCONFIG_SRC} "#cmakedefine HAVE_${name} 1\n") file(APPEND ${AUTOCONFIG_SRC} "\n") endforeach() endfunction(check_includes) ######################################## # FUNCTION check_functions ######################################## function(check_functions functions) foreach(F ${${functions}}) set(name ${F}) string(TOUPPER ${name} name) check_function_exists(${F} HAVE_${name}) file(APPEND ${AUTOCONFIG_SRC} "/* Define to 1 if you have the `${F}' function. */\n") file(APPEND ${AUTOCONFIG_SRC} "#cmakedefine HAVE_${name} 1\n") file(APPEND ${AUTOCONFIG_SRC} "\n") endforeach() endfunction(check_functions) ######################################## # FUNCTION check_types ######################################## function(check_types types) foreach(T ${${types}}) set(name ${T}) string(REPLACE " " "_" name ${name}) string(REPLACE "-" "_" name ${name}) string(REPLACE "." "_" name ${name}) string(REPLACE "/" "_" name ${name}) string(TOUPPER ${name} name) check_type_size(${T} HAVE_${name}) file(APPEND ${AUTOCONFIG_SRC} "/* Define to 1 if the system has the type `${T}'. */\n") file(APPEND ${AUTOCONFIG_SRC} "#cmakedefine HAVE_${name} 1\n") file(APPEND ${AUTOCONFIG_SRC} "\n") endforeach() endfunction(check_types) ######################################## file(WRITE ${AUTOCONFIG_SRC}) include(CheckCSourceCompiles) include(CheckCSourceRuns) include(CheckCXXSourceCompiles) include(CheckCXXSourceRuns) include(CheckFunctionExists) include(CheckIncludeFiles) include(CheckLibraryExists) include(CheckPrototypeDefinition) include(CheckStructHasMember) include(CheckSymbolExists) include(CheckTypeSize) include(TestBigEndian) set(include_files_list dlfcn.h inttypes.h memory.h stdint.h stdlib.h string.h sys/stat.h sys/types.h unistd.h cairo/cairo-version.h pango-1.0/pango/pango-features.h unicode/uchar.h ) # check_includes(include_files_list) set(types_list "long long int" wchar_t ) # check_types(types_list) list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) list(APPEND CMAKE_REQUIRED_LIBRARIES -lm) set(functions_list feenableexcept ) check_functions(functions_list) file(APPEND ${AUTOCONFIG_SRC} " /* Version number */ #cmakedefine PACKAGE_VERSION \"${PACKAGE_VERSION}\" #cmakedefine GRAPHICS_DISABLED ${GRAPHICS_DISABLED} #cmakedefine FAST_FLOAT ${FAST_FLOAT} #cmakedefine DISABLED_LEGACY_ENGINE ${DISABLED_LEGACY_ENGINE} #cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H} #cmakedefine HAVE_NEON ${HAVE_NEON} #cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE} #cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL} ") if(TESSDATA_PREFIX) file(APPEND ${AUTOCONFIG_SRC} " #cmakedefine TESSDATA_PREFIX \"${TESSDATA_PREFIX}\" ") endif() ######################################## ################################################################################ tesseract-5.5.0/cmake/SourceGroups.cmake000066400000000000000000000026251471420406600202510ustar00rootroot00000000000000# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #include(SourceGroups) set(SSRC ${CMAKE_SOURCE_DIR}) set(BSRC ${CMAKE_BINARY_DIR}) set(_CPP ".*\\.cpp") set(CPP "${_CPP}$") set(_H ".*\\.h") set(H "${_H}$") set(H_CPP "(${H}|${CPP})") source_group("Resource files" ".*\\.(rc|ico)") source_group("api" "${SSRC}/api/${H_CPP}") source_group("arch" "${SSRC}/arch/${H_CPP}") source_group("ccmain" "${SSRC}/ccmain/${H_CPP}") source_group("ccstruct" "${SSRC}/ccstruct/${H_CPP}") source_group("ccutil" "${SSRC}/ccutil/${H_CPP}") source_group("classify" "${SSRC}/classify/${H_CPP}") source_group("cutil" "${SSRC}/cutil/${H_CPP}") source_group("dict" "${SSRC}/dict/${H_CPP}") source_group("lstm" "${SSRC}/lstm/${H_CPP}") source_group("textord" "${SSRC}/textord/${H_CPP}") source_group("viewer" "${SSRC}/viewer/${H_CPP}") source_group("wordrec" "${SSRC}/wordrec/${H_CPP}") tesseract-5.5.0/cmake/templates/000077500000000000000000000000001471420406600166005ustar00rootroot00000000000000tesseract-5.5.0/cmake/templates/TesseractConfig.cmake.in000066400000000000000000000032561471420406600233000ustar00rootroot00000000000000# =================================================================================== # The Tesseract CMake configuration file # # ** File generated automatically, do not modify ** # # Usage from an external project: # In your CMakeLists.txt, add these lines: # # find_package(Tesseract REQUIRED) # target_link_libraries(MY_TARGET_NAME Tesseract::libtesseract) # # This file will define the following variables: # - Tesseract_LIBRARIES : The list of all imported targets. # - Tesseract_INCLUDE_DIRS : The Tesseract include directories. # - Tesseract_LIBRARY_DIRS : The Tesseract library directories. # - Tesseract_VERSION : The version of this Tesseract build: "@VERSION_PLAIN@" # - Tesseract_VERSION_MAJOR : Major version part of Tesseract_VERSION: "@VERSION_MAJOR@" # - Tesseract_VERSION_MINOR : Minor version part of Tesseract_VERSION: "@VERSION_MINOR@" # - Tesseract_VERSION_PATCH : Patch version part of Tesseract_VERSION: "@VERSION_PATCH@" # # =================================================================================== include(CMakeFindDependencyMacro) find_dependency(Leptonica) include(${CMAKE_CURRENT_LIST_DIR}/TesseractTargets.cmake) @PACKAGE_INIT@ SET(Tesseract_VERSION @VERSION_PLAIN@) SET(Tesseract_VERSION_MAJOR @VERSION_MAJOR@) SET(Tesseract_VERSION_MINOR @VERSION_MINOR@) SET(Tesseract_VERSION_PATCH @VERSION_PATCH@) set_and_check(Tesseract_INCLUDE_DIRS "@PACKAGE_INCLUDE_DIR@") set_and_check(Tesseract_LIBRARY_DIRS "@PACKAGE_LIBRARY_DIRS@") set(Tesseract_LIBRARIES @tesseract_OUTPUT_NAME@) check_required_components(Tesseract) tesseract-5.5.0/cmake/templates/cmake_uninstall.cmake.in000066400000000000000000000021051471420406600233560ustar00rootroot00000000000000# https://gitlab.kitware.com/cmake/community/wikis/FAQ#can-i-do-make-uninstall-with-cmake if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) string(REGEX REPLACE "\n" ";" files "${files}") foreach(file ${files}) message(STATUS "Uninstalling $ENV{DESTDIR}${file}") if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") exec_program( "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" OUTPUT_VARIABLE rm_out RETURN_VALUE rm_retval ) if(NOT "${rm_retval}" STREQUAL 0) message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") endif(NOT "${rm_retval}" STREQUAL 0) else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") message(STATUS "File $ENV{DESTDIR}${file} does not exist.") endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") endforeach(file) tesseract-5.5.0/configure.ac000066400000000000000000000470301471420406600160140ustar00rootroot00000000000000# -*-Shell-script-*- # # Copyright (c) Luc Vincent # ---------------------------------------- # Initialization # ---------------------------------------- AC_PREREQ([2.69]) AC_INIT([tesseract], [m4_esyscmd_s([test -d .git && git describe --abbrev=4 2>/dev/null || cat VERSION])], [https://github.com/tesseract-ocr/tesseract/issues],, [https://github.com/tesseract-ocr/tesseract/]) # Store command like options for CXXFLAGS OLD_CXXFLAGS=$CXXFLAGS AC_PROG_CXX([g++ clang++]) # reset compiler flags to initial flags AC_LANG([C++]) AC_LANG_COMPILER_REQUIRE CXXFLAGS=${CXXFLAGS:-""} AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([config]) AC_CONFIG_SRCDIR([src/tesseract.cpp]) AC_PREFIX_DEFAULT([/usr/local]) # Automake configuration. Do not require README file (we use README.md). AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc]) # Define date of package, etc. Could be useful in auto-generated # documentation. PACKAGE_YEAR=2024 PACKAGE_DATE="11/10" abs_top_srcdir=`AS_DIRNAME([$0])` AC_DEFINE_UNQUOTED([PACKAGE_NAME], ["${PACKAGE_NAME}"], [Name of package]) AC_DEFINE_UNQUOTED([PACKAGE_VERSION], ["${PACKAGE_VERSION}"], [Version number]) AC_DEFINE_UNQUOTED([PACKAGE_YEAR], ["$PACKAGE_YEAR"], [Official year for this release]) AC_DEFINE_UNQUOTED([PACKAGE_DATE], ["$PACKAGE_DATE"], [Official date of release]) AC_SUBST([PACKAGE_NAME]) AC_SUBST([PACKAGE_VERSION]) AC_SUBST([PACKAGE_YEAR]) AC_SUBST([PACKAGE_DATE]) GENERIC_LIBRARY_NAME=tesseract # Release versioning. Get versions from PACKAGE_VERSION. AX_SPLIT_VERSION GENERIC_MAJOR_VERSION=$(echo "$AX_MAJOR_VERSION" | $SED 's/^[[^0-9]]*//') GENERIC_MINOR_VERSION=$AX_MINOR_VERSION GENERIC_MICRO_VERSION=`echo "$AX_POINT_VERSION" | $SED 's/^\([[0-9]][[0-9]]*\).*/\1/'` # API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION) GENERIC_API_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION GENERIC_LIBRARY_VERSION=$GENERIC_MAJOR_VERSION:$GENERIC_MINOR_VERSION AC_SUBST([GENERIC_API_VERSION]) AC_SUBST([GENERIC_MAJOR_VERSION]) AC_SUBST([GENERIC_MINOR_VERSION]) AC_SUBST([GENERIC_MICRO_VERSION]) AC_SUBST([GENERIC_LIBRARY_VERSION]) PACKAGE=$GENERIC_LIBRARY_NAME AC_SUBST([GENERIC_LIBRARY_NAME]) GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION AC_SUBST([GENERIC_RELEASE]) AC_SUBST([GENERIC_VERSION]) AC_CONFIG_HEADERS([include/config_auto.h:config/config.h.in]) # default conditional AM_CONDITIONAL([T_WIN], false) AM_CONDITIONAL([MINGW], false) AM_CONDITIONAL([GRAPHICS_DISABLED], false) AC_SUBST([AM_CPPFLAGS]) # Be less noisy by default. # Can be overridden with `configure --disable-silent-rules` or with `make V=1`. AM_SILENT_RULES([yes]) ############################# # # Platform specific setup # ############################# AC_CANONICAL_HOST case "${host_os}" in mingw*) AC_DEFINE_UNQUOTED([MINGW], 1, [This is a MinGW system]) AM_CONDITIONAL([T_WIN], true) AM_CONDITIONAL([MINGW], true) AM_CONDITIONAL([ADD_RT], false) AC_SUBST([AM_LDFLAGS], ['-no-undefined']) ;; cygwin*) AM_CONDITIONAL([ADD_RT], false) AC_SUBST([NOUNDEFINED], ['-no-undefined']) ;; solaris*) LIBS="$LIBS -lsocket -lnsl -lrt -lxnet" AM_CONDITIONAL([ADD_RT], true) ;; *darwin*) AM_CONDITIONAL([ADD_RT], false) ;; *android*|openbsd*) AM_CONDITIONAL([ADD_RT], false) ;; powerpc-*-darwin*) ;; *) # default AM_CONDITIONAL([ADD_RT], true) ;; esac WERROR=-Werror # The test code used by AX_CHECK_COMPILE_FLAG uses an empty statement # and unused macros which must not raise a compiler error, but it must # be an error if flags like -avx are ignored on ARM and other # architectures because they are unsupported. AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=unused-command-line-argument]) ## Checks for supported compiler options. AM_CONDITIONAL([HAVE_AVX], false) AM_CONDITIONAL([HAVE_AVX2], false) AM_CONDITIONAL([HAVE_AVX512F], false) AM_CONDITIONAL([HAVE_FMA], false) AM_CONDITIONAL([HAVE_SSE4_1], false) AM_CONDITIONAL([HAVE_NEON], false) AM_CONDITIONAL([HAVE_RVV], false) case "${host_cpu}" in amd64|*86*) AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false], [$WERROR]) AM_CONDITIONAL([HAVE_AVX], ${avx}) if $avx; then AC_DEFINE([HAVE_AVX], [1], [Enable AVX instructions]) fi AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false], [$WERROR]) AM_CONDITIONAL([HAVE_AVX2], $avx2) if $avx2; then AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions]) fi AX_CHECK_COMPILE_FLAG([-mavx512f], [avx512f=true], [avx512f=false], [$WERROR]) AM_CONDITIONAL([HAVE_AVX512F], $avx512f) if $avx512f; then AC_DEFINE([HAVE_AVX512F], [1], [Enable AVX512F instructions]) fi AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR]) AM_CONDITIONAL([HAVE_FMA], $fma) if $fma; then AC_DEFINE([HAVE_FMA], [1], [Enable FMA instructions]) fi AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false], [$WERROR]) AM_CONDITIONAL([HAVE_SSE4_1], $sse41) if $sse41; then AC_DEFINE([HAVE_SSE4_1], [1], [Enable SSE 4.1 instructions]) fi ;; aarch64*|arm64) # ARMv8 always has NEON and does not need special compiler flags. AM_CONDITIONAL([HAVE_NEON], true) AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions]) ;; arm*) AX_CHECK_COMPILE_FLAG([-mfpu=neon], [neon=true], [neon=false], [$WERROR]) AM_CONDITIONAL([HAVE_NEON], $neon) if $neon; then AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions]) NEON_CXXFLAGS="-mfpu=neon" AC_SUBST([NEON_CXXFLAGS]) check_for_neon=1 fi ;; riscv*) AX_CHECK_COMPILE_FLAG([-march=rv64gcv], [rvv=true], [rvv=false], [$WERROR]) AM_CONDITIONAL([HAVE_RVV], [$rvv]) if $rvv; then AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) check_for_rvv=1 fi ;; *) AC_MSG_WARN([No compiler options for $host_cpu]) esac # check whether feenableexcept is supported. some C libraries (e.g. uclibc) don't. AC_CHECK_FUNCS([feenableexcept]) # additional checks for NEON targets if test x$check_for_neon = x1; then AC_MSG_NOTICE([checking how to detect NEON availability]) AC_CHECK_FUNCS([getauxval elf_aux_info android_getCpuFamily]) if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no && test $ac_cv_func_android_getCpuFamily = no; then AC_MSG_WARN([NEON is available, but we don't know how to check for it. Will not be able to use NEON.]) fi fi # additional checks for RVV targets if test x$check_for_rvv = x1; then AC_MSG_NOTICE([checking how to detect RVV availability]) AC_CHECK_FUNCS([getauxval]) if test $ac_cv_func_getauxval = no; then AC_MSG_WARN([RVV is available, but we don't know how to check for it. Will not be able to use RVV.]) fi fi AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR]) AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd) AC_ARG_WITH([extra-includes], [AS_HELP_STRING([--with-extra-includes=DIR], [Define an additional directory for include files])], [if test -d "$withval" ; then CFLAGS="$CFLAGS -I$withval" else AC_MSG_ERROR([Cannot stat directory $withval]) fi]) AC_ARG_WITH([extra-libraries], [AS_HELP_STRING([--with-extra-libraries=DIR], [Define an additional directory for library files])], [if test -d "$withval" ; then LDFLAGS="$LDFLAGS -L$withval" else AC_MSG_ERROR([Cannot stat directory $withval]) fi]) AC_MSG_CHECKING([--enable-float32 argument]) AC_ARG_ENABLE([float32], AS_HELP_STRING([--disable-float32], [disable float and enable double for LSTM])) AC_MSG_RESULT([$enable_float32]) if test "$enable_float32" != "no"; then AC_DEFINE([FAST_FLOAT], [1], [Enable float for LSTM]) fi AC_MSG_CHECKING([--enable-graphics argument]) AC_ARG_ENABLE([graphics], AS_HELP_STRING([--disable-graphics], [disable graphics (ScrollView)])) AC_MSG_RESULT([$enable_graphics]) if test "$enable_graphics" = "no"; then AC_DEFINE([GRAPHICS_DISABLED], [], [Disable graphics]) AM_CONDITIONAL([GRAPHICS_DISABLED], true) fi AC_MSG_CHECKING([--enable-legacy argument]) AC_ARG_ENABLE([legacy], AS_HELP_STRING([--disable-legacy], [disable the legacy OCR engine])) AC_MSG_RESULT([$enable_legacy]) AM_CONDITIONAL([DISABLED_LEGACY_ENGINE], test "$enable_legacy" = "no") if test "$enable_legacy" = "no"; then AC_DEFINE([DISABLED_LEGACY_ENGINE], [1], [Disable legacy OCR engine]) fi # check whether to build OpenMP support AC_OPENMP have_tiff=false # Note that the first usage of AC_CHECK_HEADERS must be unconditional. AC_CHECK_HEADERS([tiffio.h], [have_tiff=true], [have_tiff=false]) # Configure arguments which allow disabling some optional libraries. AC_ARG_WITH([archive], AS_HELP_STRING([--with-archive], [Build with libarchive which supports compressed model files @<:@default=check@:>@]), [], [with_archive=check]) AC_ARG_WITH([curl], AS_HELP_STRING([--with-curl], [Build with libcurl which supports processing an image URL @<:@default=check@:>@]), [], [with_curl=check]) # https://lists.apple.com/archives/unix-porting/2009/Jan/msg00026.html m4_define([MY_CHECK_FRAMEWORK], [AC_CACHE_CHECK([if -framework $1 works],[my_cv_framework_$1], [save_LIBS="$LIBS" LIBS="$LIBS -framework $1" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [my_cv_framework_$1=yes], [my_cv_framework_$1=no]) LIBS="$save_LIBS" ]) if test "$my_cv_framework_$1"="yes"; then AC_DEFINE(AS_TR_CPP([HAVE_FRAMEWORK_$1]), 1, [Define if you have the $1 framework]) AS_TR_CPP([FRAMEWORK_$1])="-framework $1" AC_SUBST(AS_TR_CPP([FRAMEWORK_$1])) fi] ) case "${host_os}" in *darwin* | *-macos10*) MY_CHECK_FRAMEWORK([Accelerate]) if test $my_cv_framework_Accelerate = yes; then AM_CPPFLAGS="-DHAVE_FRAMEWORK_ACCELERATE $AM_CPPFLAGS" AM_LDFLAGS="$AM_LDFLAGS -framework Accelerate" fi ;; *) # default ;; esac # check whether to build tesseract with -fvisibility=hidden -fvisibility-inlines-hidden # http://gcc.gnu.org/wiki/Visibility # https://groups.google.com/g/tesseract-dev/c/l2ZFrpgYkSc/m/_cdYSRDSXuUJ AC_MSG_CHECKING([--enable-visibility argument]) AC_ARG_ENABLE([visibility], AS_HELP_STRING([--enable-visibility], [enable experimental build with -fvisibility [default=no]])) AC_MSG_RESULT([$enable_visibility]) AM_CONDITIONAL([VISIBILITY], [test "$enable_visibility" = "yes"]) # Check if tessdata-prefix is disabled AC_MSG_CHECKING([whether to use tessdata-prefix]) AC_ARG_ENABLE([tessdata-prefix], [AS_HELP_STRING([--disable-tessdata-prefix], [don't set TESSDATA-PREFIX during compile])], [tessdata_prefix="no"], [tessdata_prefix="yes"]) AC_MSG_RESULT([$tessdata_prefix]) AM_CONDITIONAL([NO_TESSDATA_PREFIX], [test "$tessdata_prefix" = "no"]) # Detect Clang compiler AC_MSG_CHECKING([if compiling with clang]) AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([], [[ #ifndef __clang__ not clang #endif ]])], [CLANG=yes], [CLANG=no]) AC_MSG_RESULT([$CLANG]) # Check whether to enable debugging AC_MSG_CHECKING([whether to enable debugging]) AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [turn on debugging [default=no]])) AC_MSG_RESULT([$enable_debug]) if test x"$enable_debug" = x"yes"; then CXXFLAGS=${CXXFLAGS:-"-O2"} AM_CPPFLAGS="$AM_CPPFLAGS -g -Wall -DDEBUG -pedantic" AM_CXXFLAGS="$AM_CXXFLAGS -g -Wall -DDEBUG -pedantic" if test "x$CLANG" = "xyes"; then # https://clang.llvm.org/docs/CommandGuide/clang.html # clang treats -Og as -O1 AM_CPPFLAGS="$AM_CPPFLAGS -O0" AM_CXXFLAGS="$AM_CXXFLAGS -O0" else AM_CPPFLAGS="$AM_CPPFLAGS -Og" AM_CXXFLAGS="$AM_CXXFLAGS -Og" fi else AM_CXXFLAGS="$AM_CXXFLAGS -O2 -DNDEBUG" AM_CPPFLAGS="$AM_CPPFLAGS -O2 -DNDEBUG" fi # ---------------------------------------- # Init libtool # ---------------------------------------- LT_INIT # ---------------------------------------- # C++ related options # ---------------------------------------- dnl ********************** dnl Turn on C++17 or newer dnl ********************** CPLUSPLUS= AX_CHECK_COMPILE_FLAG([-std=c++17], [CPLUSPLUS=17], [], [$WERROR]) AX_CHECK_COMPILE_FLAG([-std=c++20], [CPLUSPLUS=20], [], [$WERROR]) if test -z "$CPLUSPLUS"; then AC_MSG_ERROR([Your compiler does not have the necessary C++17 support! Cannot proceed.]) fi # Set C++17 or newer support based on platform/compiler case "${host_os}" in cygwin*) CXXFLAGS="$CXXFLAGS -std=gnu++$CPLUSPLUS" ;; *-darwin* | *-macos10*) CXXFLAGS="$CXXFLAGS -std=c++$CPLUSPLUS" if test "x$CLANG" = "xyes"; then LDFLAGS="$LDFLAGS -stdlib=libc++" fi ;; *) # default CXXFLAGS="$CXXFLAGS -std=c++$CPLUSPLUS" ;; esac # ---------------------------------------- # Check for libraries # ---------------------------------------- AC_SEARCH_LIBS([pthread_create], [pthread]) # Set PKG_CONFIG_PATH for macOS with Homebrew unless it is already set. AC_CHECK_PROG([have_brew], brew, true, false) if $have_brew; then brew_prefix=$(brew --prefix) if test -z "$PKG_CONFIG_PATH"; then PKG_CONFIG_PATH=$brew_prefix/opt/icu4c/lib/pkgconfig:$brew_prefix/opt/libarchive/lib/pkgconfig export PKG_CONFIG_PATH fi fi # ---------------------------------------- # Check for programs needed to build documentation. # ---------------------------------------- AM_CONDITIONAL([ASCIIDOC], false) AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], false) AC_ARG_ENABLE([doc], AS_HELP_STRING([--disable-doc], [disable build of documentation]) [], [: m4_divert_text([DEFAULTS], [enable_doc=check])]) AS_IF([test "$enable_doc" != "no"], [ AC_CHECK_PROG([have_asciidoc], asciidoc, true, false) AC_CHECK_PROG([have_xsltproc], xsltproc, true, false) # macOS with Homebrew requires the environment variable # XML_CATALOG_FILES for xsltproc. if $have_asciidoc && $have_xsltproc; then AM_CONDITIONAL([ASCIIDOC], true) XML_CATALOG_FILES= if $have_brew; then catalog_file=$brew_prefix/etc/xml/catalog if test -f $catalog_file; then AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true) XML_CATALOG_FILES=file:$catalog_file else AC_MSG_WARN([Missing file $catalog_file.]) fi fi AC_SUBST([XML_CATALOG_FILES]) else AS_IF([test "x$enable_doc" != xcheck], [ AC_MSG_FAILURE( [--enable-doc was given, but test for asciidoc and xsltproc failed]) ]) fi ]) # ---------------------------------------- # Checks for typedefs, structures, and compiler characteristics. # ---------------------------------------- AC_CHECK_TYPES([wchar_t],,, [#include "wchar.h"]) AC_CHECK_TYPES([long long int]) # ---------------------------------------- # Test auxiliary packages # ---------------------------------------- AM_CONDITIONAL([HAVE_LIBCURL], false) AS_IF([test "x$with_curl" != xno], [ PKG_CHECK_MODULES([libcurl], [libcurl], [have_libcurl=true], [have_libcurl=false]) AM_CONDITIONAL([HAVE_LIBCURL], $have_libcurl) if $have_libcurl; then AC_DEFINE([HAVE_LIBCURL], [1], [Enable libcurl]) else AS_IF([test "x$with_curl" != xcheck], [ AC_MSG_FAILURE( [--with-curl was given, but test for libcurl failed]) ]) fi ]) PKG_CHECK_MODULES([LEPTONICA], [lept >= 1.74], [have_lept=true], [have_lept=false]) if $have_lept; then CPPFLAGS="$CPPFLAGS $LEPTONICA_CFLAGS" else AC_MSG_ERROR([Leptonica 1.74 or higher is required. Try to install libleptonica-dev package.]) fi AM_CONDITIONAL([HAVE_LIBARCHIVE], false) AS_IF([test "x$with_archive" != xno], [ PKG_CHECK_MODULES([libarchive], [libarchive], [have_libarchive=true], [have_libarchive=false]) AM_CONDITIONAL([HAVE_LIBARCHIVE], [$have_libarchive]) if $have_libarchive; then AC_DEFINE([HAVE_LIBARCHIVE], [1], [Enable libarchive]) CPPFLAGS="$CPPFLAGS $libarchive_CFLAGS" else AS_IF([test "x$with_archive" != xcheck], [ AC_MSG_FAILURE( [--with-archive was given, but test for libarchive failed]) ]) fi ]) AM_CONDITIONAL([ENABLE_TRAINING], true) # Check availability of ICU packages. PKG_CHECK_MODULES([ICU_UC], [icu-uc >= 52.1], [have_icu_uc=true], [have_icu_uc=false]) PKG_CHECK_MODULES([ICU_I18N], [icu-i18n >= 52.1], [have_icu_i18n=true], [have_icu_i18n=false]) if !($have_icu_uc && $have_icu_i18n); then AC_MSG_WARN([icu 52.1 or higher is required, but was not found.]) AC_MSG_WARN([Training tools WILL NOT be built.]) AC_MSG_WARN([Try to install libicu-dev package.]) AM_CONDITIONAL([ENABLE_TRAINING], false) fi # Check location of pango headers PKG_CHECK_MODULES([pango], [pango >= 1.38.0], [have_pango=true], [have_pango=false]) if !($have_pango); then AC_MSG_WARN([pango 1.38.0 or higher is required, but was not found.]) AC_MSG_WARN([Training tools WILL NOT be built.]) AC_MSG_WARN([Try to install libpango1.0-dev package.]) AM_CONDITIONAL([ENABLE_TRAINING], false) fi # Check location of cairo headers PKG_CHECK_MODULES([cairo], [cairo], [have_cairo=true], [have_cairo=false]) if !($have_cairo); then AC_MSG_WARN([Training tools WILL NOT be built because of missing cairo library.]) AC_MSG_WARN([Try to install libcairo-dev?? package.]) AM_CONDITIONAL([ENABLE_TRAINING], false) fi PKG_CHECK_MODULES([pangocairo], [pangocairo], [], [false]) PKG_CHECK_MODULES([pangoft2], [pangoft2], [], [false]) # ---------------------------------------- # Final Tasks and Output # ---------------------------------------- # Output files AC_CONFIG_FILES([include/tesseract/version.h]) AC_CONFIG_FILES([Makefile tesseract.pc]) AC_CONFIG_FILES([tessdata/Makefile]) AC_CONFIG_FILES([tessdata/configs/Makefile]) AC_CONFIG_FILES([tessdata/tessconfigs/Makefile]) AC_CONFIG_FILES([java/Makefile]) AC_CONFIG_FILES([java/com/Makefile]) AC_CONFIG_FILES([java/com/google/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/events/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/ui/Makefile]) AC_CONFIG_FILES([nsis/Makefile]) AC_OUTPUT # Final message echo "" echo "Configuration is done." echo "You can now build and install $PACKAGE_NAME by running:" echo "" echo "$ make" echo "$ sudo make install" echo "$ sudo ldconfig" echo "" AM_COND_IF([ASCIIDOC], [ echo "This will also build the documentation." ], [ AS_IF([test "$enable_doc" = "no"], [ echo "Documentation will not be built because it was disabled." ], [ echo "Documentation will not be built because asciidoc or xsltproc is missing." ]) ]) # echo "$ sudo make install LANGS=\"eng ara deu\"" # echo " Or:" # echo "$ sudo make install-langs" echo "" AM_COND_IF([ENABLE_TRAINING], [ echo "Training tools can be built and installed with:" echo "" echo "$ make training" echo "$ sudo make training-install" echo ""], [ echo "You cannot build training tools because of missing dependency." echo "Check configure output for details." echo ""] ) # ---------------------------------------- # CONFIG Template # ---------------------------------------- # Fence added in configuration file AH_TOP([ #ifndef CONFIG_AUTO_H #define CONFIG_AUTO_H /* config_auto.h: begin */ ]) # Stuff added at bottom of file AH_BOTTOM([ /* Miscellaneous defines */ #define AUTOCONF 1 /* Not used yet #ifndef NO_GETTEXT #define USING_GETTEXT #endif */ /* config_auto.h: end */ #endif ]) tesseract-5.5.0/doc/000077500000000000000000000000001471420406600142675ustar00rootroot00000000000000tesseract-5.5.0/doc/Doxyfile000066400000000000000000003334321471420406600160050ustar00rootroot00000000000000# Doxyfile 1.8.16 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = $(name) # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = $(version) # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = doc/ # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = $(builddir) # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = YES # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files), VHDL, tcl. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is # Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # (including Cygwin) ands Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = YES # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = $(builddir)/doc/DoxyWarn.log #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = $(srcdir)/include $(srcdir)/src $(srcdir)/unittest # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: https://www.gnu.org/software/libiconv/) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, # *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.d \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.idl \ *.odl \ *.cs \ *.php \ *.php3 \ *.inc \ *.m \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.vhd \ *.vhdl # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = */.svn/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = YES # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the # path to the compilation database (see: # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files # were built. This is equivalent to specifying the "-p" option to a clang tool, # such as clang-check. These options will then be passed to the parser. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = NO # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via Javascript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have Javascript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: https://developer.apple.com/xcode/), introduced with OSX # 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://www.mathjax.org/mathjax # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /