pax_global_header00006660000000000000000000000064150667320370014523gustar00rootroot0000000000000052 comment=72632094336524a9c809e129e8b1c52154543a5a ggml-org-ggml-7ec8045/000077500000000000000000000000001506673203700145235ustar00rootroot00000000000000ggml-org-ggml-7ec8045/.editorconfig000066400000000000000000000005671506673203700172100ustar00rootroot00000000000000# https://EditorConfig.org # Top-most EditorConfig file root = true # Unix-style newlines with a newline ending every file, utf-8 charset [*] end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true charset = utf-8 indent_style = space indent_size = 4 [*.md] indent_size = 2 [Makefile] indent_style = tab [prompts/*.txt] insert_final_newline = unset ggml-org-ggml-7ec8045/.github/000077500000000000000000000000001506673203700160635ustar00rootroot00000000000000ggml-org-ggml-7ec8045/.github/pull_request_template.md000066400000000000000000000003421506673203700230230ustar00rootroot00000000000000*For changes to the core `ggml` library (including to the CMake build system), please open a PR in https://github.com/ggml-org/llama.cpp. Doing so will make your PR more visible, better tested and more likely to be reviewed.* ggml-org-ggml-7ec8045/.github/workflows/000077500000000000000000000000001506673203700201205ustar00rootroot00000000000000ggml-org-ggml-7ec8045/.github/workflows/ci.yml000066400000000000000000000156221506673203700212440ustar00rootroot00000000000000name: CI on: push: branches: [ master ] pull_request: branches: [ master ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} cancel-in-progress: true jobs: build: strategy: matrix: os: [ubuntu-latest, macos-latest, macos-13, windows-latest] libraries: [shared, static] runs-on: ${{ matrix.os }} steps: - name: Clone uses: actions/checkout@v4 - name: Dependencies for Ubuntu if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install llvm - name: Add msbuild to PATH if: matrix.os == 'windows-latest' uses: microsoft/setup-msbuild@v2 - name: Create Build Environment run: mkdir build - name: Configure CMake working-directory: ./build run: cmake .. ${{ contains(matrix.os, 'windows') && '-A x64' || '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++' }} ${{ matrix.libraries == 'static' && '-DBUILD_SHARED_LIBS=OFF' || '-DBUILD_SHARED_LIBS=ON' }} -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/installed -DGGML_METAL=OFF - name: Build working-directory: ./build run: cmake --build . ${{ contains(matrix.os, 'windows') && '--config Release' || '' }} - name: Test working-directory: ./build run: ctest --verbose --timeout 900 ${{ contains(matrix.os, 'windows') && '--build-config Release' || '' }} - name: Install working-directory: ./build run: cmake --build . --target install ${{ contains(matrix.os, 'windows') && '--config Release' || '' }} - name: Test CMake config run: | mkdir test-cmake cmake -S examples/test-cmake -B test-cmake -DCMAKE_PREFIX_PATH=${{ github.workspace }}/installed ${{ contains(matrix.os, 'windows') && '-A x64' || '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++' }} cmake --build test-cmake ${{ contains(matrix.os, 'windows') && '--config Release' || '' }} # TODO: simplify the following workflows using a matrix ggml-ci-x64-cpu-low-perf: runs-on: ubuntu-22.04 steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ggml-ci-x64-cpu-low-perf evict-old-files: 1d - name: Dependencies id: depends run: | sudo apt-get update sudo apt-get install build-essential libcurl4-openssl-dev - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-low-perf: runs-on: ubuntu-22.04-arm steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ggml-ci-arm64-cpu-low-perf evict-old-files: 1d - name: Dependencies id: depends run: | sudo apt-get update sudo apt-get install build-essential libcurl4-openssl-dev - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-cpu-high-perf: runs-on: ubuntu-22.04 steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ggml-ci-x64-cpu-high-perf evict-old-files: 1d - name: Dependencies id: depends run: | sudo apt-get update sudo apt-get install build-essential libcurl4-openssl-dev - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-high-perf: runs-on: ubuntu-22.04-arm steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ggml-ci-arm64-cpu-high-perf evict-old-files: 1d - name: Dependencies id: depends run: | sudo apt-get update sudo apt-get install build-essential libcurl4-openssl-dev - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-high-perf-sve: runs-on: ubuntu-22.04-arm steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: key: ggml-ci-arm64-cpu-high-perf-sve evict-old-files: 1d - name: Dependencies id: depends run: | sudo apt-get update sudo apt-get install build-essential libcurl4-openssl-dev - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-nvidia-cuda: runs-on: [self-hosted, Linux, X64, NVIDIA] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | nvidia-smi GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml ggml-ci-x64-nvidia-vulkan-cm: runs-on: [self-hosted, Linux, X64, NVIDIA] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml ggml-ci-x64-nvidia-vulkan-cm2: runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml ggml-ci-x64-cpu-amx: runs-on: [self-hosted, Linux, X64, CPU, AMX] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | bash ./ci/run.sh ~/results/ggml /mnt/ggml ggml-ci-mac-metal: runs-on: [self-hosted, macOS, ARM64] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/ggml ~/mnt/ggml ggml-ci-mac-vulkan: runs-on: [self-hosted, macOS, ARM64] steps: - name: Clone id: checkout uses: actions/checkout@v4 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/ggml ~/mnt/ggml ggml-org-ggml-7ec8045/.github/workflows/release.yml000066400000000000000000000010101506673203700222530ustar00rootroot00000000000000name: Release on: push: tags: - 'v*' jobs: release: runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout code uses: actions/checkout@v4 - name: Create Release id: create_release uses: ggml-org/action-create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref_name }} release_name: Release ${{ github.ref }} draft: false prerelease: false ggml-org-ggml-7ec8045/.gitignore000066400000000000000000000004601506673203700165130ustar00rootroot00000000000000build/ build-*/ out/ tmp/ models/ models-mnt compile_commands.json CMakeSettings.json .vs/ .vscode/ .idea/ .clangd .venv/ ggml_env/ .exrc .cache .DS_Store .stablelm .gpt-2 src/arm_neon.h tests/arm_neon.h zig-out/ zig-cache/ *.o *.d *.dot *.sw? __pycache__/ # Model files ggml-model-f16.bin *.bat ggml-org-ggml-7ec8045/.gitmodules000066400000000000000000000000001506673203700166660ustar00rootroot00000000000000ggml-org-ggml-7ec8045/AUTHORS000066400000000000000000000340271506673203700156010ustar00rootroot00000000000000# date: Tue Feb 4 13:03:51 EET 2025 # this file is auto-generated by scripts/gen-authors.sh 0cc4m 65a <10104049+65a@users.noreply.github.com> AT Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Adam Tazi <52357206+ad1tazi@users.noreply.github.com> Adrien Gallouët Adrien Gallouët Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> AidanBeltonS Akarshan Biswas Akarshan Biswas Albert Jin Alberto Cabrera Pérez Alberto Cabrera Pérez Alex Azarov Alex O'Connell <35843486+acon96@users.noreply.github.com> Alex von Gluck IV AmbientL <107641468+AmbientL@users.noreply.github.com> AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> Ananta Bastola Andreas (Andi) Kunar Andreas Kieslinger <47689530+aendk@users.noreply.github.com> Andrei Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> Andrii Ryzhkov Arjun Ashraful Islam Astariul <43774355+astariul@users.noreply.github.com> AsukaMinato Avi Lumelsky Bart Pelle <3662930+Velocity-@users.noreply.github.com> Ben Ashbaugh Bernhard M. Wiedemann Borislav Stanimirov Brad Ito Brad Murray <59848399+bradmurray-dt@users.noreply.github.com> Brian Bryan Lozano Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> CarterLi999 <664681047@qq.com> Cebtenzzre Changyeon Kim Charles Xu <63788048+chaxu01@users.noreply.github.com> Charles Xu Chen Xi Chen Xi Chenguang Li <87689256+noemotiovon@users.noreply.github.com> Chris Elrod Christian Kastner Clint Herron Conrad Kramer Cordeiro <1471463+ocordeiro@users.noreply.github.com> Cristiano Calcagno DAN™ Dan Forbes Dan Johansson <164997844+eddnjjn@users.noreply.github.com> Dan Johansson Daniel Bevenius Daniel Ziegenberg Daniele <57776841+daniandtheweb@users.noreply.github.com> Daulet Zhanguzin Dave Dave Airlie Dave Airlie David Miller DavidKorczynski Davidson Francis Dibakar Gope Didzis Gosko Diego Devesa Diogo Djip007 <3705339+Djip007@users.noreply.github.com> Djip007 Dou Xinpeng <15529241576@163.com> Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com> Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> Ebey Abraham Eldar Yusupov Emmanuel Durand Engininja2 <139037756+Engininja2@users.noreply.github.com> Eric Zhang <34133756+EZForever@users.noreply.github.com> Erik Scholz Ettore Di Giacinto Eve <139727413+netrunnereve@users.noreply.github.com> F1L1P <78918286+F1L1Pv2@users.noreply.github.com> Faisal Zaghloul FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> Felix Finn Voorhees FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> Frankie Robertson GainLee George Hindle Georgi Gerganov Gilad S <7817232+giladgd@users.noreply.github.com> Gilad S Gilad S. <7817232+giladgd@users.noreply.github.com> Guillaume Wenzek Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> Haus1 Herman Semenov HimariO Hirochika Matsumoto Hong Bo PENG Hugo Rosenkranz-Costa Hyunsung Lee IGUILIZ Salah-Eddine <76955987+salahiguiliz@users.noreply.github.com> Ian Bull Ihar Hrachyshka Ikko Eltociear Ashimine Ivan Ivan Filipov <159561759+vanaka11@users.noreply.github.com> Ivan Stepanov Ivan Zdane Jack Mousseau Jack Vial JacobLinCool Jakob Frick Jan Ploski Jared Van Bortel Jeff Bolz Jeffrey Quesnelle Jeroen Mostert Jiahao Li JidongZhang-THU <1119708529@qq.com> Jiří Podivín <66251151+jpodivin@users.noreply.github.com> Jo Liss Joe Todd Johannes Gäßler John Balis Josh Bleecher Snyder Judd Jun Hee Yoo Junil Kim Justina Cho Justine Tunney Justine Tunney Karol Kontny <82021046+kkontny@users.noreply.github.com> Kawrakow <48489457+ikawrakow@users.noreply.github.com> Kevin Gibbons Konstantin Zhuravlyov Kylin <56434533+KyL0N@users.noreply.github.com> LoganDark LoganDark LostRuins <39025047+LostRuins@users.noreply.github.com> Lukas Möller M Refi D.A <24388107+refinism@users.noreply.github.com> M. Yusuf Sarıgöz Ma Mingfei Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> MaiHD Mark Zhuang Markus Tavenrath Masaya, Kato <62578291+msy-kato@users.noreply.github.com> Mathieu Baudier Mathijs de Bruin Matt Stephenson Max Krasnyansky Max Krasnyansky Mayank Kumar Pal Meng, Hengyu Mengqing Cao Metal Whale <45712559+metalwhale@users.noreply.github.com> Michael Klimenko Michael Podvitskiy Michael Verrilli Molly Sophia Natsu Neo Zhang <14088817+arthw@users.noreply.github.com> Neo Zhang Jianyu Neuman Vong Nevin Nicholai Tukanov Nico Bosshard Nicolò Scipione Nikita Sarychev <42014488+sARY77@users.noreply.github.com> Nouamane Tazi Olivier Chafik Olivier Chafik Ondřej Čertík Ouadie EL FAROUKI PAB Paul Tsochantaris Peter Philpax Pierre Alexandre SCHEMBRI Plamen Minev Playdev Prashant Vithule <119530321+Vithulep@users.noreply.github.com> Przemysław Pawełczyk R0CKSTAR R0CKSTAR Radoslav Gerganov Radosław Gryta Ravindra Marella Ray Cromwell Reinforce-II Rémy Oudompheng Reza Rezvan Rick G <26732651+TheFlipbook@users.noreply.github.com> RiverZhou Robert Ormandi <52251610+ormandi@users.noreply.github.com> Romain Biessy Ronsor Rotem Dan Ryan Hitchman SRHMorris <69468379+SRHMorris@users.noreply.github.com> SXX Salvatore Mesoraca Sam Spilsbury Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Santtu Keskinen Sergio López Sergio López Shanshan Shen <467638484@qq.com> Shijie <821898965@qq.com> Shupei Fan Siddharth Ramakrishnan Sigbjørn Skjæret Skyler Celestinian-Sterling <80314197+Celestinian@users.noreply.github.com> Slava Primenko Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> Steward Garcia <57494570+FSSRepo@users.noreply.github.com> Supreet Sethi Takuya Takeuchi Tamotsu Takahashi Tanmay Tanmay Sachan Timothy Cronin <40186632+4imothy@users.noreply.github.com> Tom Bailey Tom Jobbins <784313+TheBloke@users.noreply.github.com> Tony Wasserka <4840017+neobrain@users.noreply.github.com> Tristan Druyen Tyé singwa <92231658+tye-singwa@users.noreply.github.com> UEXTM.com <84163508+uextm@users.noreply.github.com> WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> William Tambellini William Tambellini XiaotaoChen Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> Xuan Son Nguyen Yavor Ivanov YavorGIvanov Yilong Guo Yilong Guo Yuri Khrustalev Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> Zhiyuan Li Zhiyuan Li a3sh <38979186+A3shTnT@users.noreply.github.com> ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> agray3 amd-dwang amritahs-ibm apcameron <37645737+apcameron@users.noreply.github.com> appvoid <78444142+appvoid@users.noreply.github.com> ariez-xyz <41232910+ariez-xyz@users.noreply.github.com> automaticcat bandoti <141645996+bandoti@users.noreply.github.com> bmwl bobqianic <129547291+bobqianic@users.noreply.github.com> bssrdf chengchi compilade <113953597+compilade@users.noreply.github.com> compilade ddpasa <112642920+ddpasa@users.noreply.github.com> denersc dscripka fitzsim fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> fraxy-v <65565042+fraxy-v@users.noreply.github.com> gn64 goerch goldwaving <77494627+goldwaving@users.noreply.github.com> haopeng <657407891@qq.com> hidenorly hipudding hydai issixx <46835150+issixx@users.noreply.github.com> jaeminSon jdomke <28772296+jdomke@users.noreply.github.com> jiez <373447296@qq.com> johnson442 <56517414+johnson442@users.noreply.github.com> junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> k.h.lai katsu560 <118887472+katsu560@users.noreply.github.com> klosax <131523366+klosax@users.noreply.github.com> kunnis l3utterfly le.chang leejet <31925346+leejet@users.noreply.github.com> leejet leo-pony lhez liuwei-git <14815172+liuwei-git@users.noreply.github.com> luoyu-intel magicse mahorozte <41834471+mahorozte@users.noreply.github.com> mashizora <30516315+mashizora@users.noreply.github.com> matt23654 matteo ochafik otaGran pengxin99 pikalover6 <49179590+pikalover6@users.noreply.github.com> postmasters sjinzh skirodev <57715494+skirodev@users.noreply.github.com> slaren snadampal <87143774+snadampal@users.noreply.github.com> someone13574 <81528246+someone13574@users.noreply.github.com> stduhpf taher <8665427+nullhook@users.noreply.github.com> texmex76 <40733439+texmex76@users.noreply.github.com> the-crypt-keeper <84680712+the-crypt-keeper@users.noreply.github.com> thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> ucag.li ulatekh uvos uvos wangshuai09 <391746016@qq.com> woachk <24752637+woachk@users.noreply.github.com> xctan yangyaofei yuri@FreeBSD zhentaoyu zhouwg <6889919+zhouwg@users.noreply.github.com> zhouwg 谢乃闻 布客飞龙 <562826179@qq.com> 旺旺碎冰冰 <38837039+Cyberhan123@users.noreply.github.com> ggml-org-ggml-7ec8045/CMakeLists.txt000066400000000000000000000446331506673203700172750ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. project("ggml" C CXX ASM) ### GGML Version set(GGML_VERSION_MAJOR 0) set(GGML_VERSION_MINOR 9) set(GGML_VERSION_PATCH 4) set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}") find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH) if(GIT_EXE) # Get current git commit hash execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE GGML_BUILD_COMMIT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET ) # Check if the working directory is dirty (i.e., has uncommitted changes) execute_process(COMMAND ${GIT_EXE} diff-index --quiet HEAD -- . WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE GGML_GIT_DIRTY ERROR_QUIET ) endif() # Build the version string with optional dirty flag set(GGML_VERSION "${GGML_VERSION_BASE}") if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0) set(GGML_VERSION "${GGML_VERSION}-dirty") endif() if(NOT GGML_BUILD_COMMIT) set(GGML_BUILD_COMMIT "unknown") endif() include(CheckIncludeFileCXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(GGML_STANDALONE ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # configure project version # TODO else() set(GGML_STANDALONE OFF) endif() if (EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON) else() if (MINGW) set(BUILD_SHARED_LIBS_DEFAULT OFF) else() set(BUILD_SHARED_LIBS_DEFAULT ON) endif() endif() # remove the lib prefix on win32 mingw if (WIN32) set(CMAKE_STATIC_LIBRARY_PREFIX "") set(CMAKE_SHARED_LIBRARY_PREFIX "") set(CMAKE_SHARED_MODULE_PREFIX "") endif() option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF) set(GGML_BACKEND_DIR "" CACHE PATH "ggml: directory to load dynamic backends from (requires GGML_BACKEND_DL") # # option list # # TODO: mark all options as advanced when not GGML_STANDALONE if (APPLE) set(GGML_METAL_DEFAULT ON) set(GGML_BLAS_DEFAULT ON) set(GGML_BLAS_VENDOR_DEFAULT "Apple") else() set(GGML_METAL_DEFAULT OFF) set(GGML_BLAS_DEFAULT OFF) set(GGML_BLAS_VENDOR_DEFAULT "Generic") endif() if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH}) message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF") set(GGML_NATIVE_DEFAULT OFF) else() set(GGML_NATIVE_DEFAULT ON) endif() # defaults if (NOT GGML_LLAMAFILE_DEFAULT) set(GGML_LLAMAFILE_DEFAULT OFF) endif() if (NOT GGML_CUDA_GRAPHS_DEFAULT) set(GGML_CUDA_GRAPHS_DEFAULT OFF) endif() # general option(GGML_STATIC "ggml: static link libraries" OFF) option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT}) option(GGML_LTO "ggml: enable link time optimization" OFF) option(GGML_CCACHE "ggml: use ccache if available" ON) # debug option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) option(GGML_GPROF "ggml: enable gprof" OFF) # build option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF) # sanitizers option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF) option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF) option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) # instruction set specific if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT) set(INS_ENB OFF) else() set(INS_ENB ON) endif() message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}") message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}") message(DEBUG "INS_ENB : ${INS_ENB}") option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF) option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB}) option(GGML_AVX "ggml: enable AVX" ${INS_ENB}) option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF) option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB}) option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB}) option(GGML_AVX512 "ggml: enable AVX512F" OFF) option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF) option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF) option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF) if (NOT MSVC) # in MSVC F16C and FMA is implied with AVX2/AVX512 option(GGML_FMA "ggml: enable FMA" ${INS_ENB}) option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # MSVC does not seem to support AMX option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF) option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF) option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF) endif() option(GGML_LASX "ggml: enable lasx" ON) option(GGML_LSX "ggml: enable lsx" ON) option(GGML_RVV "ggml: enable rvv" ON) option(GGML_RV_ZFH "ggml: enable riscv zfh" ON) option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON) option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON) option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF) option(GGML_VXE "ggml: enable vxe" ON) option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC") if (MINGW) set(GGML_WIN_VER "0xA00" CACHE STRING "ggml: Windows version") endif() # ggml core set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism") option(GGML_CPU "ggml: enable CPU backend" ON) # 3rd party libs / backends option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON) option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING "ggml: BLAS library vendor") option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT}) option(GGML_CUDA "ggml: use CUDA" OFF) option(GGML_MUSA "ggml: use MUSA" OFF) option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF) set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING "ggml: max. batch size for using peer access") option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING "ggml: cuda link binary compression mode; requires cuda 12.8+") set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size") option(GGML_HIP "ggml: use HIP" OFF) option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF) option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON) option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF) option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF) option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON) option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF) option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF) option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF) option(GGML_VULKAN "ggml: use Vulkan" OFF) option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF) option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF) option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF) option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) option(GGML_ZDNN "ggml: use zDNN" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL}) set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING "ggml: metal minimum macOS version") set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)") option(GGML_OPENMP "ggml: use OpenMP" ON) option(GGML_RPC "ggml: use RPC" OFF) option(GGML_SYCL "ggml: use SYCL" OFF) option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF) option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON) option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON) set (GGML_SYCL_TARGET "INTEL" CACHE STRING "ggml: sycl target device") set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING "ggml: sycl device architecture") option(GGML_OPENCL "ggml: use OpenCL" OFF) option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF) option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON) option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON) set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING "gmml: OpenCL API version to target") # toolchain for vulkan-shaders-gen set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen") # extra artifacts option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # # dependencies # set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED true) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED true) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) include(GNUInstallDirs) # # build the library # add_subdirectory(src) # # tests and examples # if (GGML_BUILD_TESTS) enable_testing() add_subdirectory(tests) endif () if (GGML_BUILD_EXAMPLES) add_subdirectory(examples) endif () # # install # include(CMakePackageConfigHelpers) # all public headers set(GGML_PUBLIC_HEADERS include/ggml.h include/ggml-cpu.h include/ggml-alloc.h include/ggml-backend.h include/ggml-blas.h include/ggml-cann.h include/ggml-cpp.h include/ggml-cuda.h include/ggml-opt.h include/ggml-metal.h include/ggml-rpc.h include/ggml-sycl.h include/ggml-vulkan.h include/ggml-webgpu.h include/gguf.h) set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") #if (GGML_METAL) # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") #endif() install(TARGETS ggml LIBRARY PUBLIC_HEADER) install(TARGETS ggml-base LIBRARY) if (GGML_STANDALONE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc DESTINATION share/pkgconfig) endif() # # Create CMake package # # Capture variables prefixed with GGML_. set(variable_set_statements " ####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() ####### ####### Any changes to this file will be overwritten by the next CMake run ####### ") set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS}) get_cmake_property(all_variables VARIABLES) foreach(variable_name IN LISTS all_variables) if(variable_name MATCHES "^GGML_") string(REPLACE ";" "\\;" variable_value "${${variable_name}}") set(variable_set_statements "${variable_set_statements}set(${variable_name} \"${variable_value}\")\n") endif() endforeach() set(GGML_VARIABLES_EXPANDED ${variable_set_statements}) # Create the CMake package and set install location. set(GGML_INSTALL_VERSION ${GGML_VERSION}) set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") configure_package_config_file( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml PATH_VARS GGML_INCLUDE_INSTALL_DIR GGML_LIB_INSTALL_DIR GGML_BIN_INSTALL_DIR) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake VERSION ${GGML_INSTALL_VERSION} COMPATIBILITY SameMajorVersion) target_compile_definitions(ggml-base PRIVATE GGML_VERSION="${GGML_INSTALL_VERSION}" GGML_COMMIT="${GGML_BUILD_COMMIT}" ) message(STATUS "ggml version: ${GGML_INSTALL_VERSION}") message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) if (MSVC) set(MSVC_WARNING_FLAGS /wd4005 # Macro redefinition /wd4244 # Conversion from one type to another type, possible loss of data /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data /wd4305 # Conversion from 'type1' to 'type2', possible loss of data /wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data /wd4996 # Disable POSIX deprecation warnings /wd4702 # Unreachable code warnings ) function(disable_msvc_warnings target_name) if(TARGET ${target_name}) target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) endif() endfunction() disable_msvc_warnings(ggml-base) disable_msvc_warnings(ggml) disable_msvc_warnings(ggml-cpu) disable_msvc_warnings(ggml-cpu-x64) disable_msvc_warnings(ggml-cpu-sse42) disable_msvc_warnings(ggml-cpu-sandybridge) disable_msvc_warnings(ggml-cpu-haswell) disable_msvc_warnings(ggml-cpu-skylakex) disable_msvc_warnings(ggml-cpu-icelake) disable_msvc_warnings(ggml-cpu-alderlake) if (GGML_BUILD_EXAMPLES) disable_msvc_warnings(common-ggml) disable_msvc_warnings(common) disable_msvc_warnings(mnist-common) disable_msvc_warnings(mnist-eval) disable_msvc_warnings(mnist-train) disable_msvc_warnings(gpt-2-ctx) disable_msvc_warnings(gpt-2-alloc) disable_msvc_warnings(gpt-2-backend) disable_msvc_warnings(gpt-2-sched) disable_msvc_warnings(gpt-2-quantize) disable_msvc_warnings(gpt-2-batched) disable_msvc_warnings(gpt-j) disable_msvc_warnings(gpt-j-quantize) disable_msvc_warnings(magika) disable_msvc_warnings(yolov3-tiny) disable_msvc_warnings(sam) disable_msvc_warnings(simple-ctx) disable_msvc_warnings(simple-backend) endif() if (GGML_BUILD_TESTS) disable_msvc_warnings(test-mul-mat) disable_msvc_warnings(test-arange) disable_msvc_warnings(test-backend-ops) disable_msvc_warnings(test-cont) disable_msvc_warnings(test-conv-transpose) disable_msvc_warnings(test-conv-transpose-1d) disable_msvc_warnings(test-conv1d) disable_msvc_warnings(test-conv2d) disable_msvc_warnings(test-conv2d-dw) disable_msvc_warnings(test-customop) disable_msvc_warnings(test-dup) disable_msvc_warnings(test-opt) disable_msvc_warnings(test-pool) endif () endif() ggml-org-ggml-7ec8045/CONTRIBUTING.md000066400000000000000000000005511506673203700167550ustar00rootroot00000000000000Please use [llama.cpp's contribution guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) for this project. *For changes to the core `ggml` library (including to the CMake build system), please open a PR in https://github.com/ggml-org/llama.cpp. Doing so will make your PR more visible, better tested and more likely to be reviewed.* ggml-org-ggml-7ec8045/LICENSE000066400000000000000000000020661506673203700155340ustar00rootroot00000000000000MIT License Copyright (c) 2023-2024 The ggml authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ggml-org-ggml-7ec8045/README.md000066400000000000000000000056001506673203700160030ustar00rootroot00000000000000# ggml [Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) Tensor library for machine learning ***Note that this project is under active development. \ Some of the development is currently happening in the [llama.cpp](https://github.com/ggerganov/llama.cpp) and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) repos*** ## Features - Low-level cross-platform implementation - Integer quantization support - Broad hardware support - Automatic differentiation - ADAM and L-BFGS optimizers - No third-party dependencies - Zero memory allocations during runtime ## Build ```bash git clone https://github.com/ggml-org/ggml cd ggml # install python dependencies in a virtual environment python3.10 -m venv .venv source .venv/bin/activate pip install -r requirements.txt # build the examples mkdir build && cd build cmake .. cmake --build . --config Release -j 8 ``` ## GPT inference (example) ```bash # run the GPT-2 small 117M model ../examples/gpt-2/download-ggml-model.sh 117M ./bin/gpt-2-backend -m models/gpt-2-117M/ggml-model.bin -p "This is an example" ``` For more information, checkout the corresponding programs in the [examples](examples) folder. ## Using CUDA ```bash # fix the path to point to your CUDA compiler cmake -DGGML_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc .. ``` ## Using hipBLAS ```bash cmake -DCMAKE_C_COMPILER="$(hipconfig -l)/clang" -DCMAKE_CXX_COMPILER="$(hipconfig -l)/clang++" -DGGML_HIP=ON ``` ## Using SYCL ```bash # linux source /opt/intel/oneapi/setvars.sh cmake -G "Ninja" -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON .. # windows "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DGGML_SYCL=ON .. ``` ## Compiling for Android Download and unzip the NDK from this download [page](https://developer.android.com/ndk/downloads). Set the NDK_ROOT_PATH environment variable or provide the absolute path to the CMAKE_ANDROID_NDK in the command below. ```bash cmake .. \ -DCMAKE_SYSTEM_NAME=Android \ -DCMAKE_SYSTEM_VERSION=33 \ -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \ -DCMAKE_ANDROID_NDK=$NDK_ROOT_PATH -DCMAKE_ANDROID_STL_TYPE=c++_shared ``` ```bash # create directories adb shell 'mkdir /data/local/tmp/bin' adb shell 'mkdir /data/local/tmp/models' # push the compiled binaries to the folder adb push bin/* /data/local/tmp/bin/ # push the ggml library adb push src/libggml.so /data/local/tmp/ # push model files adb push models/gpt-2-117M/ggml-model.bin /data/local/tmp/models/ adb shell cd /data/local/tmp export LD_LIBRARY_PATH=/data/local/tmp ./bin/gpt-2-backend -m models/ggml-model.bin -p "this is an example" ``` ## Resources - [Introduction to ggml](https://huggingface.co/blog/introduction-to-ggml) - [The GGUF file format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) ggml-org-ggml-7ec8045/ci/000077500000000000000000000000001506673203700151165ustar00rootroot00000000000000ggml-org-ggml-7ec8045/ci/run.sh000066400000000000000000000243051506673203700162620ustar00rootroot00000000000000#/bin/bash # # sample usage: # # mkdir tmp # # # CPU-only build # bash ./ci/run.sh ./tmp/results ./tmp/mnt # # # with CUDA support # GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # # # With SYCL support # GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # if [ -z "$2" ]; then echo "usage: $0 " exit 1 fi mkdir -p "$1" mkdir -p "$2" OUT=$(realpath "$1") MNT=$(realpath "$2") rm -v $OUT/*.log rm -v $OUT/*.exit rm -v $OUT/*.md sd=`dirname $0` cd $sd/../ SRC=`pwd` CMAKE_EXTRA="" CTEST_EXTRA="" if [ ! -z ${GG_BUILD_METAL} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" fi if [ ! -z ${GG_BUILD_CUDA} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON" if command -v nvidia-smi >/dev/null 2>&1; then CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.') if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}" else echo "Warning: Using fallback CUDA architectures" CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89" fi else echo "Error: nvidia-smi not found, cannot build with CUDA" exit 1 fi fi if [ ! -z ${GG_BUILD_ROCM} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_HIP=ON" if [ -z ${GG_BUILD_AMDGPU_TARGETS} ]; then echo "Missing GG_BUILD_AMDGPU_TARGETS, please set it to your GPU architecture (e.g. gfx90a, gfx1100, etc.)" exit 1 fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" fi if [ ! -z ${GG_BUILD_SYCL} ]; then if [ -z ${ONEAPI_ROOT} ]; then echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:" echo "source /opt/intel/oneapi/setvars.sh" exit 1 fi # Use only main GPU export ONEAPI_DEVICE_SELECTOR="level_zero:0" # Enable sysman for correct memory reporting export ZES_ENABLE_SYSMAN=1 # to circumvent precision issues on CPY operations export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt" CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" fi if [ ! -z ${GG_BUILD_VULKAN} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" # if on Mac, disable METAL if [[ "$OSTYPE" == "darwin"* ]]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF" fi fi if [ ! -z ${GG_BUILD_WEBGPU} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1" fi if [ ! -z ${GG_BUILD_MUSA} ]; then # Use qy1 by default (MTT S80) MUSA_ARCH=${MUSA_ARCH:-21} CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}" fi if [ ! -z ${GG_BUILD_NO_SVE} ]; then # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm" fi ## helpers # download a file if it does not exist or if it is outdated function gg_wget { local out=$1 local url=$2 local cwd=`pwd` mkdir -p $out cd $out # should not re-download if file is the same wget -nv -N $url cd $cwd } function gg_printf { printf -- "$@" >> $OUT/README.md } function gg_run { ci=$1 set -o pipefail set -x gg_run_$ci | tee $OUT/$ci.log cur=$? echo "$cur" > $OUT/$ci.exit set +x set +o pipefail gg_sum_$ci ret=$((ret | cur)) } ## ci # ctest_debug function gg_run_ctest_debug { cd ${SRC} rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug set -e (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest ${CTEST_EXTRA} --output-on-failure -E "test-opt|test-backend-ops" ) 2>&1 | tee -a $OUT/${ci}-ctest.log set +e } function gg_sum_ctest_debug { gg_printf '### %s\n\n' "${ci}" gg_printf 'Runs ctest in debug mode\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" gg_printf '```\n' gg_printf '\n' } # ctest_release function gg_run_ctest_release { cd ${SRC} rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release set -e (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log if [ -z $GG_BUILD_LOW_PERF ]; then (time ctest ${CTEST_EXTRA} --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log else (time ctest ${CTEST_EXTRA} --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log fi set +e } function gg_sum_ctest_release { gg_printf '### %s\n\n' "${ci}" gg_printf 'Runs ctest in release mode\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" gg_printf '```\n' } # gpt_2 function gg_run_gpt_2 { cd ${SRC} gg_wget models-mnt/gpt-2 https://huggingface.co/ggerganov/ggml/resolve/main/ggml-model-gpt-2-117M.bin cd build-ci-release set -e model="../models-mnt/gpt-2/ggml-model-gpt-2-117M.bin" prompts="../examples/prompts/gpt-2.txt" (time ./bin/gpt-2-backend --model ${model} -s 1234 -n 64 -tt ${prompts} ) 2>&1 | tee -a $OUT/${ci}-tg.log (time ./bin/gpt-2-backend --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log (time ./bin/gpt-2-sched --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log (time ./bin/gpt-2-batched --model ${model} -s 1234 -n 64 -np 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log set +e } function gg_sum_gpt_2 { gg_printf '### %s\n\n' "${ci}" gg_printf 'Runs short GPT-2 text generation\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)" gg_printf '```\n' } # TODO: update ## mnist # #function gg_run_mnist { # cd ${SRC} # # cd build-ci-release # # set -e # # mkdir -p models/mnist # python3 ../examples/mnist/convert-h5-to-ggml.py ../examples/mnist/models/mnist/mnist_model.state_dict # # model_f32="./models/mnist/ggml-model-f32.bin" # samples="../examples/mnist/models/mnist/t10k-images.idx3-ubyte" # # # first command runs and exports "mnist.ggml", the second command runs the exported model # # (time ./bin/mnist ${model_f32} ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log # (time ./bin/mnist-cpu ./mnist.ggml ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log # # set +e #} # #function gg_sum_mnist { # gg_printf '### %s\n\n' "${ci}" # # gg_printf 'MNIST\n' # gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" # gg_printf '```\n' # gg_printf '%s\n' "$(cat $OUT/${ci}-mnist.log)" # gg_printf '```\n' #} # sam function gg_run_sam { cd ${SRC} gg_wget models-mnt/sam/ https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth gg_wget models-mnt/sam/ https://raw.githubusercontent.com/YavorGIvanov/sam.cpp/ceafb7467bff7ec98e0c4f952e58a9eb8fd0238b/img.jpg cd build-ci-release set -e path_models="../models-mnt/sam/" model_f16="${path_models}/ggml-model-f16.bin" img_0="${path_models}/img.jpg" python3 ../examples/sam/convert-pth-to-ggml.py ${path_models}/sam_vit_b_01ec64.pth ${path_models}/ 1 # Test default parameters (time ./bin/sam -m ${model_f16} -i ${img_0} ) 2>&1 | tee -a $OUT/${ci}-main.log grep -q "point prompt" $OUT/${ci}-main.log grep -q "bbox (371, 436), (144, 168)" $OUT/${ci}-main.log # Test box prompt and single mask output (time ./bin/sam -m ${model_f16} -i ${img_0} -b 368,144,441,173 -sm) 2>&1 | tee -a $OUT/${ci}-main.log grep -q "box prompt" $OUT/${ci}-main.log grep -q "bbox (370, 439), (144, 169)" $OUT/${ci}-main.log set +e } function gg_sum_sam { gg_printf '### %s\n\n' "${ci}" gg_printf 'Run SAM\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-main.log)" gg_printf '```\n' } # yolo function gg_run_yolo { cd ${SRC} gg_wget models-mnt/yolo/ https://huggingface.co/ggml-org/models/resolve/main/yolo/yolov3-tiny.weights gg_wget models-mnt/yolo/ https://huggingface.co/ggml-org/models/resolve/main/yolo/dog.jpg cd build-ci-release cp -r ../examples/yolo/data . set -e path_models="../models-mnt/yolo/" python3 ../examples/yolo/convert-yolov3-tiny.py ${path_models}/yolov3-tiny.weights (time ./bin/yolov3-tiny -m yolov3-tiny.gguf -i ${path_models}/dog.jpg ) 2>&1 | tee -a $OUT/${ci}-main.log grep -qE "dog: (55|56|57|58|59)%" $OUT/${ci}-main.log grep -qE "car: (50|51|52|53|54)%" $OUT/${ci}-main.log grep -qE "truck: (54|55|56|57|58)%" $OUT/${ci}-main.log grep -qE "bicycle: (57|58|59|60|61)%" $OUT/${ci}-main.log set +e } function gg_sum_yolo { gg_printf '### %s\n\n' "${ci}" gg_printf 'Run YOLO\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-main.log)" gg_printf '```\n' } ## main if true ; then # Create symlink: ./ggml/models-mnt -> $MNT/models/models-mnt rm -rf ${SRC}/models-mnt mnt_models=${MNT}/models mkdir -p ${mnt_models} ln -sfn ${mnt_models} ${SRC}/models-mnt # Create a fresh python3 venv and enter it if ! python3 -m venv "$MNT/venv"; then echo "Error: Failed to create Python virtual environment at $MNT/venv." exit 1 fi source "$MNT/venv/bin/activate" pip install -r ${SRC}/requirements.txt --disable-pip-version-check fi ret=0 test $ret -eq 0 && gg_run ctest_debug test $ret -eq 0 && gg_run ctest_release test $ret -eq 0 && gg_run gpt_2 #test $ret -eq 0 && gg_run mnist test $ret -eq 0 && gg_run sam test $ret -eq 0 && gg_run yolo if [ -z $GG_BUILD_LOW_PERF ]; then # run tests meant for low-perf runners date fi cat $OUT/README.md exit $ret ggml-org-ggml-7ec8045/cmake/000077500000000000000000000000001506673203700156035ustar00rootroot00000000000000ggml-org-ggml-7ec8045/cmake/BuildTypes.cmake000066400000000000000000000037651506673203700207040ustar00rootroot00000000000000# Add new build types # ReleaseGG - Release with enabled asserts SET(CMAKE_CXX_FLAGS_RELEASEGG "-O3" CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts." FORCE ) SET(CMAKE_C_FLAGS_RELEASEGG "-O3" CACHE STRING "Flags used by the compiler during release builds with enabled asserts." FORCE ) SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG "" CACHE STRING "Flags used for linking binaries during release builds with enabled asserts." FORCE ) SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG "" CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts." FORCE ) MARK_AS_ADVANCED( CMAKE_CXX_FLAGS_RELEASEGG CMAKE_C_FLAGS_RELEASEGG CMAKE_EXE_LINKER_FLAGS_RELEASEGG CMAKE_SHARED_LINKER_FLAGS_RELEASEGG ) # RelWithDebInfoGG - RelWithDebInfo with enabled asserts SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG "-O2 -g" CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts." FORCE ) SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG "-O2 -g" CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts." FORCE ) SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG "" CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts." FORCE ) SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG "" CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts." FORCE ) MARK_AS_ADVANCED( CMAKE_CXX_FLAGS_RELWITHDEBINFOGG CMAKE_C_FLAGS_RELWITHDEBINFOGG CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG ) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG") endif() ggml-org-ggml-7ec8045/cmake/GitVars.cmake000066400000000000000000000013151506673203700201640ustar00rootroot00000000000000find_package(Git) # the commit's SHA1 execute_process(COMMAND "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA1 ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) # the date of the commit execute_process(COMMAND "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) # the subject of the commit execute_process(COMMAND "${GIT_EXECUTABLE}" log -1 --format=%s WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ggml-org-ggml-7ec8045/cmake/common.cmake000066400000000000000000000041151506673203700200760ustar00rootroot00000000000000function(ggml_get_flags CCID CCVER) set(C_FLAGS "") set(CXX_FLAGS "") if (CCID MATCHES "Clang") set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) if ( (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) ) list(APPEND C_FLAGS -Wdouble-promotion) endif() elseif (CCID STREQUAL "GNU") set(C_FLAGS -Wdouble-promotion) set(CXX_FLAGS -Wno-array-bounds) if (CCVER VERSION_GREATER_EQUAL 8.1.0) list(APPEND CXX_FLAGS -Wextra-semi) endif() endif() set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) endfunction() function(ggml_get_system_arch) if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) set(GGML_SYSTEM_ARCH "ARM" PARENT_SCOPE) elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$")) set(GGML_SYSTEM_ARCH "x86" PARENT_SCOPE) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc|power") set(GGML_SYSTEM_ARCH "PowerPC" PARENT_SCOPE) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") set(GGML_SYSTEM_ARCH "loongarch64" PARENT_SCOPE) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") set(GGML_SYSTEM_ARCH "riscv64" PARENT_SCOPE) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x") set(GGML_SYSTEM_ARCH "s390x" PARENT_SCOPE) else() set(GGML_SYSTEM_ARCH "UNKNOWN" PARENT_SCOPE) endif() endfunction() ggml-org-ggml-7ec8045/cmake/ggml-config.cmake.in000066400000000000000000000154021506673203700214050ustar00rootroot00000000000000@PACKAGE_INIT@ @GGML_VARIABLES_EXPANDED@ # Find all dependencies before creating any target. include(CMakeFindDependencyMacro) find_dependency(Threads) if (NOT GGML_SHARED_LIB) set(GGML_CPU_INTERFACE_LINK_LIBRARIES "") set(GGML_CPU_INTERFACE_LINK_OPTIONS "") if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if(NOT ACCELERATE_FRAMEWORK) set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0) return() endif() list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK}) endif() if (GGML_OPENMP_ENABLED) find_dependency(OpenMP) list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX) endif() if (GGML_CPU_HBM) find_library(memkind memkind) if(NOT memkind) set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0) return() endif() list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind) endif() if (GGML_BLAS) find_dependency(BLAS) list(APPEND GGML_BLAS_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES}) list(APPEND GGML_BLAS_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS}) endif() if (GGML_CUDA) set(GGML_CUDA_INTERFACE_LINK_LIBRARIES "") find_dependency(CUDAToolkit) if (GGML_STATIC) list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $) if (WIN32) list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $ $) else() list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $ $) endif() endif() if (NOT GGML_CUDA_NO_VMM) list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $) endif() endif() if (GGML_METAL) find_library(FOUNDATION_LIBRARY Foundation) find_library(METAL_FRAMEWORK Metal) find_library(METALKIT_FRAMEWORK MetalKit) if(NOT FOUNDATION_LIBRARY OR NOT METAL_FRAMEWORK OR NOT METALKIT_FRAMEWORK) set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0) return() endif() set(GGML_METAL_INTERFACE_LINK_LIBRARIES ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK}) endif() if (GGML_OPENCL) find_dependency(OpenCL) set(GGML_OPENCL_INTERFACE_LINK_LIBRARIES $) endif() if (GGML_VULKAN) find_dependency(Vulkan) set(GGML_VULKAN_INTERFACE_LINK_LIBRARIES $) endif() if (GGML_HIP) find_dependency(hip) find_dependency(hipblas) find_dependency(rocblas) set(GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas) endif() if (GGML_SYCL) set(GGML_SYCL_INTERFACE_LINK_LIBRARIES "") find_package(DNNL) if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl) endif() if (WIN32) find_dependency(IntelSYCL) find_dependency(MKL) list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) endif() endif() endif() set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@") set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@") #set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@") if(NOT TARGET ggml::ggml) find_package(Threads REQUIRED) find_library(GGML_LIBRARY ggml REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) add_library(ggml::ggml UNKNOWN IMPORTED) set_target_properties(ggml::ggml PROPERTIES IMPORTED_LOCATION "${GGML_LIBRARY}") find_library(GGML_BASE_LIBRARY ggml-base REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) add_library(ggml::ggml-base UNKNOWN IMPORTED) set_target_properties(ggml::ggml-base PROPERTIES IMPORTED_LOCATION "${GGML_BASE_LIBRARY}") set(_ggml_all_targets "") if (NOT GGML_BACKEND_DL) foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS}) string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}") string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx) find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend} REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}") add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED) set_target_properties(ggml::${_ggml_backend} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}" IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}" INTERFACE_COMPILE_FEATURES c_std_90 POSITION_INDEPENDENT_CODE ON) string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}") if(is_cpu_variant) list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base") set_target_properties(ggml::${_ggml_backend} PROPERTIES INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}") if(GGML_CPU_INTERFACE_LINK_OPTIONS) set_target_properties(ggml::${_ggml_backend} PROPERTIES INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}") endif() else() list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base") set_target_properties(ggml::${_ggml_backend} PROPERTIES INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}") if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS) set_target_properties(ggml::${_ggml_backend} PROPERTIES INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}") endif() endif() list(APPEND _ggml_all_targets ggml::${_ggml_backend}) endforeach() endif() list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}") set_target_properties(ggml::ggml PROPERTIES INTERFACE_LINK_LIBRARIES "${GGML_INTERFACE_LINK_LIBRARIES}") add_library(ggml::all INTERFACE IMPORTED) set_target_properties(ggml::all PROPERTIES INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}") endif() check_required_components(ggml) ggml-org-ggml-7ec8045/docs/000077500000000000000000000000001506673203700154535ustar00rootroot00000000000000ggml-org-ggml-7ec8045/docs/gguf.md000066400000000000000000001205451506673203700167340ustar00rootroot00000000000000# GGUF GGUF is a file format for storing models for inference with GGML and executors based on GGML. GGUF is a binary format that is designed for fast loading and saving of models, and for ease of reading. Models are traditionally developed using PyTorch or another framework, and then converted to GGUF for use in GGML. It is a successor file format to GGML, GGMF and GGJT, and is designed to be unambiguous by containing all the information needed to load a model. It is also designed to be extensible, so that new information can be added to models without breaking compatibility. For more information about the motivation behind GGUF, see [Historical State of Affairs](#historical-state-of-affairs). ## Specification GGUF is a format based on the existing GGJT, but makes a few changes to the format to make it more extensible and easier to use. The following features are desired: - Single-file deployment: they can be easily distributed and loaded, and do not require any external files for additional information. - Extensible: new features can be added to GGML-based executors/new information can be added to GGUF models without breaking compatibility with existing models. - `mmap` compatibility: models can be loaded using `mmap` for fast loading and saving. - Easy to use: models can be easily loaded and saved using a small amount of code, with no need for external libraries, regardless of the language used. - Full information: all information needed to load a model is contained in the model file, and no additional information needs to be provided by the user. The key difference between GGJT and GGUF is the use of a key-value structure for the hyperparameters (now referred to as metadata), rather than a list of untyped values. This allows for new metadata to be added without breaking compatibility with existing models, and to annotate the model with additional information that may be useful for inference or for identifying the model. ### GGUF Naming Convention GGUF follow a naming convention of `.gguf` where each component is delimitated by a `-` if present. Ultimately this is intended to make it easier for humans to at a glance get the most important details of a model. It is not intended to be perfectly parsable in the field due to the diversity of existing gguf filenames. The components are: 1. **BaseName**: A descriptive name for the model base type or architecture. - This can be derived from gguf metadata `general.basename` substituting spaces for dashes. 1. **SizeLabel**: Parameter weight class (useful for leader boards) represented as `x` - This can be derived from gguf metadata `general.size_label` if available or calculated if missing. - Rounded decimal point is supported in count with a single letter scale prefix to assist in floating point exponent shown below - `Q`: Quadrillion parameters. - `T`: Trillion parameters. - `B`: Billion parameters. - `M`: Million parameters. - `K`: Thousand parameters. - Additional `-` can be appended as needed to indicate other attributes of interest 1. **FineTune**: A descriptive name for the model fine tuning goal (e.g. Chat, Instruct, etc...) - This can be derived from gguf metadata `general.finetune` substituting spaces for dashes. 1. **Version**: (Optional) Denotes the model version number, formatted as `v.` - If model is missing a version number then assume `v1.0` (First Public Release) - This can be derived from gguf metadata `general.version` 1. **Encoding**: Indicates the weights encoding scheme that was applied to the model. Content, type mixture and arrangement however are determined by user code and can vary depending on project needs. 1. **Type**: Indicates the kind of gguf file and the intended purpose for it - If missing, then file is by default a typical gguf tensor model file - `LoRA` : GGUF file is a LoRA adapter - `vocab` : GGUF file with only vocab data and metadata 1. **Shard**: (Optional) Indicates and denotes that the model has been split into multiple shards, formatted as `-of-`. - *ShardNum* : Shard position in this model. Must be 5 digits padded by zeros. - Shard number always starts from `00001` onwards (e.g. First shard always starts at `00001-of-XXXXX` rather than `00000-of-XXXXX`). - *ShardTotal* : Total number of shards in this model. Must be 5 digits padded by zeros. #### Validating Above Naming Convention At a minimum all model files should have at least BaseName, SizeLabel, Version, in order to be easily validated as a file that is keeping with the GGUF Naming Convention. An example of this issue is that it is easy for Encoding to be mistaken as a FineTune if Version is omitted. To validate you can use this regular expression `^(?[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))-(?:(?(?:\d+x)?(?:\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?[A-Za-z0-9\s-]+))?)?-(?:(?v\d+(?:\.\d+)*))(?:-(?(?!LoRA|vocab)[\w_]+))?(?:-(?LoRA|vocab))?(?:-(?\d{5}-of-\d{5}))?\.gguf$` which will check that you got the minimum BaseName, SizeLabel and Version present in the correct order. For example: * `Mixtral-8x7B-v0.1-KQ2.gguf`: - Model Name: Mixtral - Expert Count: 8 - Parameter Count: 7B - Version Number: v0.1 - Weight Encoding Scheme: KQ2 * `Hermes-2-Pro-Llama-3-8B-F16.gguf`: - Model Name: Hermes 2 Pro Llama 3 - Expert Count: 0 - Parameter Count: 8B - Version Number: v1.0 - Weight Encoding Scheme: F16 - Shard: N/A * `Grok-100B-v1.0-Q4_0-00003-of-00009.gguf` - Model Name: Grok - Expert Count: 0 - Parameter Count: 100B - Version Number: v1.0 - Weight Encoding Scheme: Q4_0 - Shard: 3 out of 9 total shards
Example Node.js Regex Function ```js #!/usr/bin/env node const ggufRegex = /^(?[A-Za-z0-9\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9\s]*)|(?:[0-9\s]*)))*))-(?:(?(?:\d+x)?(?:\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?[A-Za-z0-9\s-]+))?)?-(?:(?v\d+(?:\.\d+)*))(?:-(?(?!LoRA|vocab)[\w_]+))?(?:-(?LoRA|vocab))?(?:-(?\d{5}-of-\d{5}))?\.gguf$/; function parseGGUFFilename(filename) { const match = ggufRegex.exec(filename); if (!match) return null; const {BaseName = null, SizeLabel = null, FineTune = null, Version = "v1.0", Encoding = null, Type = null, Shard = null} = match.groups; return {BaseName: BaseName, SizeLabel: SizeLabel, FineTune: FineTune, Version: Version, Encoding: Encoding, Type: Type, Shard: Shard}; } const testCases = [ {filename: 'Mixtral-8x7B-v0.1-KQ2.gguf', expected: { BaseName: 'Mixtral', SizeLabel: '8x7B', FineTune: null, Version: 'v0.1', Encoding: 'KQ2', Type: null, Shard: null}}, {filename: 'Grok-100B-v1.0-Q4_0-00003-of-00009.gguf', expected: { BaseName: 'Grok', SizeLabel: '100B', FineTune: null, Version: 'v1.0', Encoding: 'Q4_0', Type: null, Shard: "00003-of-00009"}}, {filename: 'Hermes-2-Pro-Llama-3-8B-v1.0-F16.gguf', expected: { BaseName: 'Hermes-2-Pro-Llama-3', SizeLabel: '8B', FineTune: null, Version: 'v1.0', Encoding: 'F16', Type: null, Shard: null}}, {filename: 'Phi-3-mini-3.8B-ContextLength4k-instruct-v1.0.gguf', expected: { BaseName: 'Phi-3-mini', SizeLabel: '3.8B-ContextLength4k', FineTune: 'instruct', Version: 'v1.0', Encoding: null, Type: null, Shard: null}}, {filename: 'not-a-known-arrangement.gguf', expected: null}, ]; testCases.forEach(({ filename, expected }) => { const result = parseGGUFFilename(filename); const passed = JSON.stringify(result) === JSON.stringify(expected); console.log(`${filename}: ${passed ? "PASS" : "FAIL"}`); if (!passed) { console.log(result); console.log(expected); } }); ```
### File Structure ![image](https://github.com/ggerganov/ggml/assets/1991296/c3623641-3a1d-408e-bfaf-1b7c4e16aa63) *diagram by [@mishig25](https://github.com/mishig25) (GGUF v3)* GGUF files are structured as follows. They use a global alignment specified in the `general.alignment` metadata field, referred to as `ALIGNMENT` below. Where required, the file is padded with `0x00` bytes to the next multiple of `general.alignment`. Fields, including arrays, are written sequentially without alignment unless otherwise specified. Models are little-endian by default. They can also come in big-endian for use with big-endian computers; in this case, all values (including metadata values and tensors) will also be big-endian. At the time of writing, there is no way to determine if a model is big-endian; this may be rectified in future versions. If no additional information is provided, assume the model is little-endian. ```c enum ggml_type: uint32_t { GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_1 = 3, // GGML_TYPE_Q4_2 = 4, support has been removed // GGML_TYPE_Q4_3 = 5, support has been removed GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q8_0 = 8, GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q2_K = 10, GGML_TYPE_Q3_K = 11, GGML_TYPE_Q4_K = 12, GGML_TYPE_Q5_K = 13, GGML_TYPE_Q6_K = 14, GGML_TYPE_Q8_K = 15, GGML_TYPE_IQ2_XXS = 16, GGML_TYPE_IQ2_XS = 17, GGML_TYPE_IQ3_XXS = 18, GGML_TYPE_IQ1_S = 19, GGML_TYPE_IQ4_NL = 20, GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ4_XS = 23, GGML_TYPE_I8 = 24, GGML_TYPE_I16 = 25, GGML_TYPE_I32 = 26, GGML_TYPE_I64 = 27, GGML_TYPE_F64 = 28, GGML_TYPE_IQ1_M = 29, GGML_TYPE_BF16 = 30, // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files // GGML_TYPE_Q4_0_4_8 = 32, // GGML_TYPE_Q4_0_8_8 = 33, GGML_TYPE_TQ1_0 = 34, GGML_TYPE_TQ2_0 = 35, // GGML_TYPE_IQ4_NL_4_4 = 36, // GGML_TYPE_IQ4_NL_4_8 = 37, // GGML_TYPE_IQ4_NL_8_8 = 38, GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block) GGML_TYPE_COUNT = 40, }; enum gguf_metadata_value_type: uint32_t { // The value is a 8-bit unsigned integer. GGUF_METADATA_VALUE_TYPE_UINT8 = 0, // The value is a 8-bit signed integer. GGUF_METADATA_VALUE_TYPE_INT8 = 1, // The value is a 16-bit unsigned little-endian integer. GGUF_METADATA_VALUE_TYPE_UINT16 = 2, // The value is a 16-bit signed little-endian integer. GGUF_METADATA_VALUE_TYPE_INT16 = 3, // The value is a 32-bit unsigned little-endian integer. GGUF_METADATA_VALUE_TYPE_UINT32 = 4, // The value is a 32-bit signed little-endian integer. GGUF_METADATA_VALUE_TYPE_INT32 = 5, // The value is a 32-bit IEEE754 floating point number. GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6, // The value is a boolean. // 1-byte value where 0 is false and 1 is true. // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy. GGUF_METADATA_VALUE_TYPE_BOOL = 7, // The value is a UTF-8 non-null-terminated string, with length prepended. GGUF_METADATA_VALUE_TYPE_STRING = 8, // The value is an array of other values, with the length and type prepended. /// // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes. GGUF_METADATA_VALUE_TYPE_ARRAY = 9, // The value is a 64-bit unsigned little-endian integer. GGUF_METADATA_VALUE_TYPE_UINT64 = 10, // The value is a 64-bit signed little-endian integer. GGUF_METADATA_VALUE_TYPE_INT64 = 11, // The value is a 64-bit IEEE754 floating point number. GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12, }; // A string in GGUF. struct gguf_string_t { // The length of the string, in bytes. uint64_t len; // The string as a UTF-8 non-null-terminated string. char string[len]; }; union gguf_metadata_value_t { uint8_t uint8; int8_t int8; uint16_t uint16; int16_t int16; uint32_t uint32; int32_t int32; float float32; uint64_t uint64; int64_t int64; double float64; bool bool_; gguf_string_t string; struct { // Any value type is valid, including arrays. gguf_metadata_value_type type; // Number of elements, not bytes uint64_t len; // The array of values. gguf_metadata_value_t array[len]; } array; }; struct gguf_metadata_kv_t { // The key of the metadata. It is a standard GGUF string, with the following caveats: // - It must be a valid ASCII string. // - It must be a hierarchical key, where each segment is `lower_snake_case` and separated by a `.`. // - It must be at most 2^16-1/65535 bytes long. // Any keys that do not follow these rules are invalid. gguf_string_t key; // The type of the value. // Must be one of the `gguf_metadata_value_type` values. gguf_metadata_value_type value_type; // The value. gguf_metadata_value_t value; }; struct gguf_header_t { // Magic number to announce that this is a GGUF file. // Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`. // Your executor might do little-endian byte order, so it might be // check for 0x46554747 and letting the endianness cancel out. // Consider being *very* explicit about the byte order here. uint32_t magic; // The version of the format implemented. // Must be `3` for version described in this spec, which introduces big-endian support. // // This version should only be increased for structural changes to the format. // Changes that do not affect the structure of the file should instead update the metadata // to signify the change. uint32_t version; // The number of tensors in the file. // This is explicit, instead of being included in the metadata, to ensure it is always present // for loading the tensors. uint64_t tensor_count; // The number of metadata key-value pairs. uint64_t metadata_kv_count; // The metadata key-value pairs. gguf_metadata_kv_t metadata_kv[metadata_kv_count]; }; uint64_t align_offset(uint64_t offset) { return offset + (ALIGNMENT - (offset % ALIGNMENT)) % ALIGNMENT; } struct gguf_tensor_info_t { // The name of the tensor. It is a standard GGUF string, with the caveat that // it must be at most 64 bytes long. gguf_string_t name; // The number of dimensions in the tensor. // Currently at most 4, but this may change in the future. uint32_t n_dimensions; // The dimensions of the tensor. uint64_t dimensions[n_dimensions]; // The type of the tensor. ggml_type type; // The offset of the tensor's data in this file in bytes. // // This offset is relative to `tensor_data`, not to the start // of the file, to make it easier for writers to write the file. // Readers should consider exposing this offset relative to the // file to make it easier to read the data. // // Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`. uint64_t offset; }; struct gguf_file_t { // The header of the file. gguf_header_t header; // Tensor infos, which can be used to locate the tensor data. gguf_tensor_info_t tensor_infos[header.tensor_count]; // Padding to the nearest multiple of `ALIGNMENT`. // // That is, if `sizeof(header) + sizeof(tensor_infos)` is not a multiple of `ALIGNMENT`, // this padding is added to make it so. // // This can be calculated as `align_offset(position) - position`, where `position` is // the position of the end of `tensor_infos` (i.e. `sizeof(header) + sizeof(tensor_infos)`). uint8_t _padding[]; // Tensor data. // // This is arbitrary binary data corresponding to the weights of the model. This data should be close // or identical to the data in the original model file, but may be different due to quantization or // other optimizations for inference. Any such deviations should be recorded in the metadata or as // part of the architecture definition. // // Each tensor's data must be stored within this array, and located through its `tensor_infos` entry. // The offset of each tensor's data must be a multiple of `ALIGNMENT`, and the space between tensors // should be padded to `ALIGNMENT` bytes. uint8_t tensor_data[]; }; ``` ## Standardized key-value pairs The following key-value pairs are standardized. This list may grow in the future as more use cases are discovered. Where possible, names are shared with the original model definitions to make it easier to map between the two. Not all of these are required, but they are all recommended. Keys that are required are bolded. For omitted pairs, the reader should assume that the value is unknown and either default or error as appropriate. The community can develop their own key-value pairs to carry additional data. However, these should be namespaced with the relevant community name to avoid collisions. For example, the `rustformers` community might use `rustformers.` as a prefix for all of their keys. If a particular community key is widely used, it may be promoted to a standardized key. By convention, most counts/lengths/etc are `uint64` unless otherwise specified. This is to allow for larger models to be supported in the future. Some models may use `uint32` for their values; it is recommended that readers support both. ### General #### Required - **`general.architecture: string`**: describes what architecture this model implements. All lowercase ASCII, with only `[a-z0-9]+` characters allowed. Known values include: - `llama` - `mpt` - `gptneox` - `gptj` - `gpt2` - `bloom` - `falcon` - `mamba` - `rwkv` - **`general.quantization_version: uint32`**: The version of the quantization format. Not required if the model is not quantized (i.e. no tensors are quantized). If any tensors are quantized, this _must_ be present. This is separate to the quantization scheme of the tensors itself; the quantization version may change without changing the scheme's name (e.g. the quantization scheme is Q5_K, and the quantization version is 4). - **`general.alignment: uint32`**: the global alignment to use, as described above. This can vary to allow for different alignment schemes, but it must be a multiple of 8. Some writers may not write the alignment. If the alignment is **not** specified, assume it is `32`. #### General metadata - `general.name: string`: The name of the model. This should be a human-readable name that can be used to identify the model. It should be unique within the community that the model is defined in. - `general.author: string`: The author of the model. - `general.version: string`: The version of the model. - `general.organization: string`: The organization of the model. - `general.basename: string`: The base model name / architecture of the model - `general.finetune: string`: What has the base model been optimized toward. - `general.description: string`: free-form description of the model including anything that isn't covered by the other fields - `general.quantized_by: string`: The name of the individual who quantized the model - `general.size_label: string`: Size class of the model, such as number of weights and experts. (Useful for leader boards) - `general.license: string`: License of the model, expressed as a [SPDX license expression](https://spdx.github.io/spdx-spec/v2-draft/SPDX-license-expressions/) (e.g. `"MIT OR Apache-2.0`). Do not include any other information, such as the license text or the URL to the license. - `general.license.name: string`: Human friendly license name - `general.license.link: string`: URL to the license. - `general.url: string`: URL to the model's homepage. This can be a GitHub repo, a paper, etc. - `general.doi: string`: Digital Object Identifier (DOI) https://www.doi.org/ - `general.uuid: string`: [Universally unique identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier) - `general.repo_url: string`: URL to the model's repository such as a GitHub repo or HuggingFace repo - `general.tags: string[]`: List of tags that can be used as search terms for a search engine or social media - `general.languages: string[]`: What languages can the model speak. Encoded as [ISO 639](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) two letter codes - `general.datasets: string[]`: Links or references to datasets that the model was trained upon - `general.file_type: uint32`: An enumerated value describing the type of the majority of the tensors in the file. Optional; can be inferred from the tensor types. - `ALL_F32 = 0` - `MOSTLY_F16 = 1` - `MOSTLY_Q4_0 = 2` - `MOSTLY_Q4_1 = 3` - `MOSTLY_Q4_1_SOME_F16 = 4` - `MOSTLY_Q4_2 = 5` (support removed) - `MOSTLY_Q4_3 = 6` (support removed) - `MOSTLY_Q8_0 = 7` - `MOSTLY_Q5_0 = 8` - `MOSTLY_Q5_1 = 9` - `MOSTLY_Q2_K = 10` - `MOSTLY_Q3_K_S = 11` - `MOSTLY_Q3_K_M = 12` - `MOSTLY_Q3_K_L = 13` - `MOSTLY_Q4_K_S = 14` - `MOSTLY_Q4_K_M = 15` - `MOSTLY_Q5_K_S = 16` - `MOSTLY_Q5_K_M = 17` - `MOSTLY_Q6_K = 18` #### Source metadata Information about where this model came from. This is useful for tracking the provenance of the model, and for finding the original source if the model is modified. For a model that was converted from GGML, for example, these keys would point to the model that was converted from. - `general.source.url: string`: URL to the source of the model's homepage. This can be a GitHub repo, a paper, etc. - `general.source.doi: string`: Source Digital Object Identifier (DOI) https://www.doi.org/ - `general.source.uuid: string`: Source [Universally unique identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier) - `general.source.repo_url: string`: URL to the source of the model's repository such as a GitHub repo or HuggingFace repo - `general.base_model.count: uint32`: Number of parent models - `general.base_model.{id}.name: string`: The name of the parent model. - `general.base_model.{id}.author: string`: The author of the parent model. - `general.base_model.{id}.version: string`: The version of the parent model. - `general.base_model.{id}.organization: string`: The organization of the parent model. - `general.base_model.{id}.url: string`: URL to the source of the parent model's homepage. This can be a GitHub repo, a paper, etc. - `general.base_model.{id}.doi: string`: Parent Digital Object Identifier (DOI) https://www.doi.org/ - `general.base_model.{id}.uuid: string`: Parent [Universally unique identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier) - `general.base_model.{id}.repo_url: string`: URL to the source of the parent model's repository such as a GitHub repo or HuggingFace repo ### LLM In the following, `[llm]` is used to fill in for the name of a specific LLM architecture. For example, `llama` for LLaMA, `mpt` for MPT, etc. If mentioned in an architecture's section, it is required for that architecture, but not all keys are required for all architectures. Consult the relevant section for more information. - `[llm].context_length: uint64`: Also known as `n_ctx`. length of the context (in tokens) that the model was trained on. For most architectures, this is the hard limit on the length of the input. Architectures, like RWKV, that are not reliant on transformer-style attention may be able to handle larger inputs, but this is not guaranteed. - `[llm].embedding_length: uint64`: Also known as `n_embd`. Embedding layer size. - `[llm].block_count: uint64`: The number of blocks of attention+feed-forward layers (i.e. the bulk of the LLM). Does not include the input or embedding layers. - `[llm].feed_forward_length: uint64`: Also known as `n_ff`. The length of the feed-forward layer. - `[llm].use_parallel_residual: bool`: Whether or not the parallel residual logic should be used. - `[llm].tensor_data_layout: string`: When a model is converted to GGUF, tensors may be rearranged to improve performance. This key describes the layout of the tensor data. This is not required; if not present, it is assumed to be `reference`. - `reference`: tensors are laid out in the same order as the original model - further options can be found for each architecture in their respective sections - `[llm].expert_count: uint32`: Number of experts in MoE models (optional for non-MoE arches). - `[llm].expert_used_count: uint32`: Number of experts used during each token token evaluation (optional for non-MoE arches). #### Attention - `[llm].attention.head_count: uint64`: Also known as `n_head`. Number of attention heads. - `[llm].attention.head_count_kv: uint64`: The number of heads per group used in Grouped-Query-Attention. If not present or if present and equal to `[llm].attention.head_count`, the model does not use GQA. - `[llm].attention.max_alibi_bias: float32`: The maximum bias to use for ALiBI. - `[llm].attention.clamp_kqv: float32`: Value (`C`) to clamp the values of the `Q`, `K`, and `V` tensors between (`[-C, C]`). - `[llm].attention.layer_norm_epsilon: float32`: Layer normalization epsilon. - `[llm].attention.layer_norm_rms_epsilon: float32`: Layer RMS normalization epsilon. - `[llm].attention.key_length: uint32`: The optional size of a key head, $d_k$. If not specified, it will be `n_embd / n_head`. - `[llm].attention.value_length: uint32`: The optional size of a value head, $d_v$. If not specified, it will be `n_embd / n_head`. #### RoPE - `[llm].rope.dimension_count: uint64`: The number of rotary dimensions for RoPE. - `[llm].rope.freq_base: float32`: The base frequency for RoPE. ##### Scaling The following keys describe RoPE scaling parameters: - `[llm].rope.scaling.type: string`: Can be `none`, `linear`, or `yarn`. - `[llm].rope.scaling.factor: float32`: A scale factor for RoPE to adjust the context length. - `[llm].rope.scaling.original_context_length: uint32_t`: The original context length of the base model. - `[llm].rope.scaling.finetuned: bool`: True if model has been finetuned with RoPE scaling. Note that older models may not have these keys, and may instead use the following key: - `[llm].rope.scale_linear: float32`: A linear scale factor for RoPE to adjust the context length. It is recommended that models use the newer keys if possible, as they are more flexible and allow for more complex scaling schemes. Executors will need to support both indefinitely. #### SSM - `[llm].ssm.conv_kernel: uint32`: The size of the rolling/shift state. - `[llm].ssm.inner_size: uint32`: The embedding size of the states. - `[llm].ssm.state_size: uint32`: The size of the recurrent state. - `[llm].ssm.time_step_rank: uint32`: The rank of time steps. #### Models The following sections describe the metadata for each model architecture. Each key specified _must_ be present. ##### LLaMA - `llama.context_length` - `llama.embedding_length` - `llama.block_count` - `llama.feed_forward_length` - `llama.rope.dimension_count` - `llama.attention.head_count` - `llama.attention.layer_norm_rms_epsilon` ###### Optional - `llama.rope.scale` - `llama.attention.head_count_kv` - `llama.tensor_data_layout`: - `Meta AI original pth`: ```python def permute(weights: NDArray, n_head: int) -> NDArray: return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) .swapaxes(1, 2) .reshape(weights.shape)) ``` - `llama.expert_count` - `llama.expert_used_count` ##### MPT - `mpt.context_length` - `mpt.embedding_length` - `mpt.block_count` - `mpt.attention.head_count` - `mpt.attention.alibi_bias_max` - `mpt.attention.clip_kqv` - `mpt.attention.layer_norm_epsilon` ##### GPT-NeoX - `gptneox.context_length` - `gptneox.embedding_length` - `gptneox.block_count` - `gptneox.use_parallel_residual` - `gptneox.rope.dimension_count` - `gptneox.attention.head_count` - `gptneox.attention.layer_norm_epsilon` ###### Optional - `gptneox.rope.scale` ##### GPT-J - `gptj.context_length` - `gptj.embedding_length` - `gptj.block_count` - `gptj.rope.dimension_count` - `gptj.attention.head_count` - `gptj.attention.layer_norm_epsilon` ###### Optional - `gptj.rope.scale` ##### GPT-2 - `gpt2.context_length` - `gpt2.embedding_length` - `gpt2.block_count` - `gpt2.attention.head_count` - `gpt2.attention.layer_norm_epsilon` ##### BLOOM - `bloom.context_length` - `bloom.embedding_length` - `bloom.block_count` - `bloom.feed_forward_length` - `bloom.attention.head_count` - `bloom.attention.layer_norm_epsilon` ##### Falcon - `falcon.context_length` - `falcon.embedding_length` - `falcon.block_count` - `falcon.attention.head_count` - `falcon.attention.head_count_kv` - `falcon.attention.use_norm` - `falcon.attention.layer_norm_epsilon` ###### Optional - `falcon.tensor_data_layout`: - `jploski` (author of the original GGML implementation of Falcon): ```python # The original query_key_value tensor contains n_head_kv "kv groups", # each consisting of n_head/n_head_kv query weights followed by one key # and one value weight (shared by all query heads in the kv group). # This layout makes it a big pain to work with in GGML. # So we rearrange them here,, so that we have n_head query weights # followed by n_head_kv key weights followed by n_head_kv value weights, # in contiguous fashion. if "query_key_value" in src: qkv = model[src].view( n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head) q = qkv[:, :-2 ].reshape(n_head * head_dim, head_dim * n_head) k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head) v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head) model[src] = torch.cat((q,k,v)).reshape_as(model[src]) ``` ##### Mamba - `mamba.context_length` - `mamba.embedding_length` - `mamba.block_count` - `mamba.ssm.conv_kernel` - `mamba.ssm.inner_size` - `mamba.ssm.state_size` - `mamba.ssm.time_step_rank` - `mamba.attention.layer_norm_rms_epsilon` ##### RWKV The vocabulary size is the same as the number of rows in the `head` matrix. - `rwkv.architecture_version: uint32`: The only allowed value currently is 4. Version 5 is expected to appear some time in the future. - `rwkv.context_length: uint64`: Length of the context used during training or fine-tuning. RWKV is able to handle larger context than this limit, but the output quality may suffer. - `rwkv.block_count: uint64` - `rwkv.embedding_length: uint64` - `rwkv.feed_forward_length: uint64` ##### Whisper Keys that do not have types defined should be assumed to share definitions with `llm.` keys. (For example, `whisper.context_length` is equivalent to `llm.context_length`.) This is because they are both transformer models. - `whisper.encoder.context_length` - `whisper.encoder.embedding_length` - `whisper.encoder.block_count` - `whisper.encoder.mels_count: uint64` - `whisper.encoder.attention.head_count` - `whisper.decoder.context_length` - `whisper.decoder.embedding_length` - `whisper.decoder.block_count` - `whisper.decoder.attention.head_count` #### Prompting **TODO**: Include prompt format, and/or metadata about how it should be used (instruction, conversation, autocomplete, etc). ### LoRA **TODO**: Figure out what metadata is needed for LoRA. Probably desired features: - match an existing model exactly, so that it can't be misapplied - be marked as a LoRA so executors won't try to run it by itself Should this be an architecture, or should it share the details of the original model with additional fields to mark it as a LoRA? ### Tokenizer The following keys are used to describe the tokenizer of the model. It is recommended that model authors support as many of these as possible, as it will allow for better tokenization quality with supported executors. #### GGML GGML supports an embedded vocabulary that enables inference of the model, but implementations of tokenization using this vocabulary (i.e. `llama.cpp`'s tokenizer) may have lower accuracy than the original tokenizer used for the model. When a more accurate tokenizer is available and supported, it should be used instead. It is not guaranteed to be standardized across models, and may change in the future. It is recommended that model authors use a more standardized tokenizer if possible. - `tokenizer.ggml.model: string`: The name of the tokenizer model. - `llama`: Llama style SentencePiece (tokens and scores extracted from HF `tokenizer.model`) - `replit`: Replit style SentencePiece (tokens and scores extracted from HF `spiece.model`) - `gpt2`: GPT-2 / GPT-NeoX style BPE (tokens extracted from HF `tokenizer.json`) - `rwkv`: RWKV tokenizer - `tokenizer.ggml.tokens: array[string]`: A list of tokens indexed by the token ID used by the model. - `tokenizer.ggml.scores: array[float32]`: If present, the score/probability of each token. If not present, all tokens are assumed to have equal probability. If present, it must have the same length and index as `tokens`. - `tokenizer.ggml.token_type: array[int32]`: The token type (1=normal, 2=unknown, 3=control, 4=user defined, 5=unused, 6=byte). If present, it must have the same length and index as `tokens`. - `tokenizer.ggml.merges: array[string]`: If present, the merges of the tokenizer. If not present, the tokens are assumed to be atomic. - `tokenizer.ggml.added_tokens: array[string]`: If present, tokens that were added after training. ##### Special tokens - `tokenizer.ggml.bos_token_id: uint32`: Beginning of sequence marker - `tokenizer.ggml.eos_token_id: uint32`: End of sequence marker - `tokenizer.ggml.unknown_token_id: uint32`: Unknown token - `tokenizer.ggml.separator_token_id: uint32`: Separator token - `tokenizer.ggml.padding_token_id: uint32`: Padding token #### Hugging Face Hugging Face maintains their own `tokenizers` library that supports a wide variety of tokenizers. If your executor uses this library, it may be able to use the model's tokenizer directly. - `tokenizer.huggingface.json: string`: the entirety of the HF `tokenizer.json` for a given model (e.g. ). Included for compatibility with executors that support HF tokenizers directly. #### Other Other tokenizers may be used, but are not necessarily standardized. They may be executor-specific. They will be documented here as they are discovered/further developed. - `tokenizer.rwkv.world: string`: a RWKV World tokenizer, like [this](https://github.com/BlinkDL/ChatRWKV/blob/main/tokenizer/rwkv_vocab_v20230424.txt). This text file should be included verbatim. - `tokenizer.chat_template : string`: a Jinja template that specifies the input format expected by the model. For more details see: ### Computation graph This is a future extension and still needs to be discussed, and may necessitate a new GGUF version. At the time of writing, the primary blocker is the stabilization of the computation graph format. A sample computation graph of GGML nodes could be included in the model itself, allowing an executor to run the model without providing its own implementation of the architecture. This would allow for a more consistent experience across executors, and would allow for more complex architectures to be supported without requiring the executor to implement them. ## Standardized tensor names To minimize complexity and maximize compatibility, it is recommended that models using the transformer architecture use the following naming convention for their tensors: ### Base layers `AA.weight` `AA.bias` where `AA` can be: - `token_embd`: Token embedding layer - `pos_embd`: Position embedding layer - `output_norm`: Output normalization layer - `output`: Output layer ### Attention and feed-forward layer blocks `blk.N.BB.weight` `blk.N.BB.bias` where N signifies the block number a layer belongs to, and where `BB` could be: - `attn_norm`: Attention normalization layer - `attn_norm_2`: Attention normalization layer - `attn_qkv`: Attention query-key-value layer - `attn_q`: Attention query layer - `attn_k`: Attention key layer - `attn_v`: Attention value layer - `attn_output`: Attention output layer - `ffn_norm`: Feed-forward network normalization layer - `ffn_up`: Feed-forward network "up" layer - `ffn_gate`: Feed-forward network "gate" layer - `ffn_down`: Feed-forward network "down" layer - `ffn_gate_inp`: Expert-routing layer for the Feed-forward network in MoE models - `ffn_gate_exp`: Feed-forward network "gate" layer per expert in MoE models - `ffn_down_exp`: Feed-forward network "down" layer per expert in MoE models - `ffn_up_exp`: Feed-forward network "up" layer per expert in MoE models - `ssm_in`: State space model input projections layer - `ssm_conv1d`: State space model rolling/shift layer - `ssm_x`: State space model selective parametrization layer - `ssm_a`: State space model state compression layer - `ssm_d`: State space model skip connection layer - `ssm_dt`: State space model time step layer - `ssm_out`: State space model output projection layer ## Version History This document is actively updated to describe the current state of the metadata, and these changes are not tracked outside of the commits. However, the format _itself_ has changed. The following sections describe the changes to the format itself. ### v3 Adds big-endian support. ### v2 Most countable values (lengths, etc) were changed from `uint32` to `uint64` to allow for larger models to be supported in the future. ### v1 Initial version. ## Historical State of Affairs The following information is provided for context, but is not necessary to understand the rest of this document. ### Overview At present, there are three GGML file formats floating around for LLMs: - **GGML** (unversioned): baseline format, with no versioning or alignment. - **GGMF** (versioned): the same as GGML, but with versioning. Only one version exists. - **GGJT**: Aligns the tensors to allow for use with `mmap`, which requires alignment. v1, v2 and v3 are identical, but the latter versions use a different quantization scheme that is incompatible with previous versions. GGML is primarily used by the examples in `ggml`, while GGJT is used by `llama.cpp` models. Other executors may use any of the three formats, but this is not 'officially' supported. These formats share the same fundamental structure: - a magic number with an optional version number - model-specific hyperparameters, including - metadata about the model, such as the number of layers, the number of heads, etc. - a `ftype` that describes the type of the majority of the tensors, - for GGML files, the quantization version is encoded in the `ftype` divided by 1000 - an embedded vocabulary, which is a list of strings with length prepended. The GGMF/GGJT formats embed a float32 score next to the strings. - finally, a list of tensors with their length-prepended name, type, and (aligned, in the case of GGJT) tensor data Notably, this structure does not identify what model architecture the model belongs to, nor does it offer any flexibility for changing the structure of the hyperparameters. This means that the only way to add new hyperparameters is to add them to the end of the list, which is a breaking change for existing models. ### Drawbacks Unfortunately, over the last few months, there are a few issues that have become apparent with the existing models: - There's no way to identify which model architecture a given model is for, because that information isn't present - Similarly, existing programs cannot intelligently fail upon encountering new architectures - Adding or removing any new hyperparameters is a breaking change, which is impossible for a reader to detect without using heuristics - Each model architecture requires its own conversion script to their architecture's variant of GGML - Maintaining backwards compatibility without breaking the structure of the format requires clever tricks, like packing the quantization version into the ftype, which are not guaranteed to be picked up by readers/writers, and are not consistent between the two formats ### Why not other formats? There are a few other formats that could be used, but issues include: - requiring additional dependencies to load or save the model, which is complicated in a C environment - limited or no support for 4-bit quantization - existing cultural expectations (e.g. whether or not the model is a directory or a file) - lack of support for embedded vocabularies - lack of control over direction of future development Ultimately, it is likely that GGUF will remain necessary for the foreseeable future, and it is better to have a single format that is well-documented and supported by all executors than to contort an existing format to fit the needs of GGML. ggml-org-ggml-7ec8045/examples/000077500000000000000000000000001506673203700163415ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/CMakeLists.txt000066400000000000000000000015471506673203700211100ustar00rootroot00000000000000if (GGML_ALL_WARNINGS) if (NOT MSVC) set(cxx_flags # TODO(marella): Add other warnings. -Wpedantic -Wunused-variable -Wno-unused-function -Wno-multichar ) add_compile_options("$<$:${cxx_flags}>") endif() endif() add_library(common STATIC common.cpp) target_include_directories(common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_library(common-ggml STATIC common-ggml.cpp) target_link_libraries(common-ggml PRIVATE ggml) target_include_directories(common-ggml PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(yolo) if (NOT GGML_BACKEND_DL) add_subdirectory(gpt-2) add_subdirectory(gpt-j) add_subdirectory(mnist) add_subdirectory(sam) add_subdirectory(simple) add_subdirectory(magika) endif() if (GGML_METAL) add_subdirectory(perf-metal) endif() ggml-org-ggml-7ec8045/examples/common-ggml.cpp000066400000000000000000000206531506673203700212670ustar00rootroot00000000000000#include "common-ggml.h" #include #include static const std::map GGML_FTYPE_MAP = { {"q4_0", GGML_FTYPE_MOSTLY_Q4_0}, {"q4_1", GGML_FTYPE_MOSTLY_Q4_1}, {"q5_0", GGML_FTYPE_MOSTLY_Q5_0}, {"q5_1", GGML_FTYPE_MOSTLY_Q5_1}, {"q8_0", GGML_FTYPE_MOSTLY_Q8_0}, {"q2_k", GGML_FTYPE_MOSTLY_Q2_K}, {"q3_k", GGML_FTYPE_MOSTLY_Q3_K}, {"q4_k", GGML_FTYPE_MOSTLY_Q4_K}, {"q5_k", GGML_FTYPE_MOSTLY_Q5_K}, {"q6_k", GGML_FTYPE_MOSTLY_Q6_K}, }; void ggml_print_ftypes(FILE * fp) { for (auto it = GGML_FTYPE_MAP.begin(); it != GGML_FTYPE_MAP.end(); it++) { fprintf(fp, " type = \"%s\" or %d\n", it->first.c_str(), it->second); } } enum ggml_ftype ggml_parse_ftype(const char * str) { enum ggml_ftype ftype; if (str[0] == 'q') { const auto it = GGML_FTYPE_MAP.find(str); if (it == GGML_FTYPE_MAP.end()) { fprintf(stderr, "%s: unknown ftype '%s'\n", __func__, str); return GGML_FTYPE_UNKNOWN; } ftype = it->second; } else { ftype = (enum ggml_ftype) atoi(str); } return ftype; } bool ggml_common_quantize_0( std::ifstream & finp, std::ofstream & fout, const ggml_ftype ftype, const std::vector & to_quant, const std::vector & to_skip) { ggml_type qtype = GGML_TYPE_F32; switch (ftype) { case GGML_FTYPE_MOSTLY_Q4_0: qtype = GGML_TYPE_Q4_0; break; case GGML_FTYPE_MOSTLY_Q4_1: qtype = GGML_TYPE_Q4_1; break; case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break; case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break; case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break; case GGML_FTYPE_MOSTLY_Q2_K: qtype = GGML_TYPE_Q2_K; break; case GGML_FTYPE_MOSTLY_Q3_K: qtype = GGML_TYPE_Q3_K; break; case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break; case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break; case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break; case GGML_FTYPE_UNKNOWN: case GGML_FTYPE_ALL_F32: case GGML_FTYPE_MOSTLY_F16: case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: case GGML_FTYPE_MOSTLY_IQ2_XXS: case GGML_FTYPE_MOSTLY_IQ2_XS: case GGML_FTYPE_MOSTLY_IQ2_S: case GGML_FTYPE_MOSTLY_IQ3_XXS: case GGML_FTYPE_MOSTLY_IQ3_S: case GGML_FTYPE_MOSTLY_IQ1_S: case GGML_FTYPE_MOSTLY_IQ4_NL: case GGML_FTYPE_MOSTLY_IQ4_XS: case GGML_FTYPE_MOSTLY_IQ1_M: case GGML_FTYPE_MOSTLY_BF16: case GGML_FTYPE_MOSTLY_MXFP4: { fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype); return false; } }; if (!ggml_is_quantized(qtype)) { fprintf(stderr, "%s: invalid quantization type %d (%s)\n", __func__, qtype, ggml_type_name(qtype)); return false; } size_t total_size_org = 0; size_t total_size_new = 0; std::vector work; std::vector data_u8; std::vector data_f16; std::vector data_f32; while (true) { int32_t n_dims; int32_t length; int32_t ttype; finp.read(reinterpret_cast(&n_dims), sizeof(n_dims)); finp.read(reinterpret_cast(&length), sizeof(length)); finp.read(reinterpret_cast(&ttype), sizeof(ttype)); if (finp.eof()) { break; } int32_t nelements = 1; int32_t ne[4] = { 1, 1, 1, 1 }; for (int i = 0; i < n_dims; ++i) { finp.read (reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); finp.read (&name[0], length); printf("%64s - [%5d, %5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype)); bool quantize = false; // check if we should quantize this tensor for (const auto & s : to_quant) { if (std::regex_match(name, std::regex(s))) { quantize = true; break; } } // check if we should skip this tensor for (const auto & s : to_skip) { if (std::regex_match(name, std::regex(s))) { quantize = false; break; } } // quantize only 2D tensors quantize &= (n_dims == 2); if (quantize) { if (ttype != GGML_TYPE_F32 && ttype != GGML_TYPE_F16) { fprintf(stderr, "%s: unsupported ttype %d (%s) for integer quantization\n", __func__, ttype, ggml_type_name((ggml_type) ttype)); return false; } if (ttype == GGML_TYPE_F16) { data_f16.resize(nelements); finp.read(reinterpret_cast(data_f16.data()), nelements * sizeof(ggml_fp16_t)); data_f32.resize(nelements); for (int i = 0; i < nelements; ++i) { data_f32[i] = ggml_fp16_to_fp32(data_f16[i]); } } else { data_f32.resize(nelements); finp.read(reinterpret_cast(data_f32.data()), nelements * sizeof(float)); } ttype = qtype; } else { const int bpe = (ttype == 0) ? sizeof(float) : sizeof(uint16_t); data_u8.resize(nelements*bpe); finp.read(reinterpret_cast(data_u8.data()), nelements * bpe); } fout.write(reinterpret_cast(&n_dims), sizeof(n_dims)); fout.write(reinterpret_cast(&length), sizeof(length)); fout.write(reinterpret_cast(&ttype), sizeof(ttype)); for (int i = 0; i < n_dims; ++i) { fout.write(reinterpret_cast(&ne[i]), sizeof(ne[i])); } fout.write(&name[0], length); if (quantize) { work.resize(nelements); // for quantization size_t cur_size = 0; switch ((ggml_type) ttype) { case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: { cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr); } break; case GGML_TYPE_F32: case GGML_TYPE_F16: case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: case GGML_TYPE_I64: case GGML_TYPE_F64: case GGML_TYPE_Q8_1: case GGML_TYPE_Q8_K: case GGML_TYPE_IQ2_XXS: case GGML_TYPE_IQ2_XS: case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: case GGML_TYPE_IQ1_S: case GGML_TYPE_IQ4_NL: case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ1_M: case GGML_TYPE_BF16: case GGML_TYPE_TQ1_0: case GGML_TYPE_TQ2_0: case GGML_TYPE_MXFP4: case GGML_TYPE_COUNT: { fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype)); return false; } } fout.write(reinterpret_cast(work.data()), cur_size); total_size_new += cur_size; printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0); } else { printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0); fout.write(reinterpret_cast(data_u8.data()), data_u8.size()); total_size_new += data_u8.size(); } total_size_org += nelements * sizeof(float); } printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0); printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype)); return true; } ggml-org-ggml-7ec8045/examples/common-ggml.h000066400000000000000000000006321506673203700207270ustar00rootroot00000000000000#pragma once #include "ggml.h" #include #include #include enum ggml_ftype ggml_parse_ftype(const char * str); void ggml_print_ftypes(FILE * fp = stderr); bool ggml_common_quantize_0( std::ifstream & finp, std::ofstream & fout, const ggml_ftype ftype, const std::vector & to_quant, const std::vector & to_skip); ggml-org-ggml-7ec8045/examples/common.cpp000066400000000000000000000553531506673203700203500ustar00rootroot00000000000000#define _USE_MATH_DEFINES // for M_PI #include "common.h" #include #include #include #include #include #include #include // Function to check if the next argument exists static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) { if (i + 1 < argc && argv[i + 1][0] != '-') { return argv[++i]; } else { fprintf(stderr, "error: %s requires one argument.\n", flag.c_str()); gpt_print_usage(argc, argv, params); exit(0); } } bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { for (int i = 1; i < argc; i++) { std::string arg = argv[i]; if (arg == "-s" || arg == "--seed") { params.seed = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-p" || arg == "--prompt") { params.prompt = get_next_arg(i, argc, argv, arg, params); } else if (arg == "-n" || arg == "--n_predict") { params.n_predict = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-np" || arg == "--n_parallel") { params.n_parallel = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--top_k") { params.top_k = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--top_p") { params.top_p = std::stof(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--temp") { params.temp = std::stof(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--repeat-last-n") { params.repeat_last_n = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--repeat-penalty") { params.repeat_penalty = std::stof(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-b" || arg == "--batch_size") { params.n_batch= std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-c" || arg == "--context") { params.n_ctx= std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers") { params.n_gpu_layers = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "--ignore-eos") { params.ignore_eos = true; } else if (arg == "-m" || arg == "--model") { params.model = get_next_arg(i, argc, argv, arg, params); } else if (arg == "-i" || arg == "--interactive") { params.interactive = true; } else if (arg == "-ip" || arg == "--interactive-port") { params.interactive = true; params.interactive_port = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); } else if (arg == "-f" || arg == "--file") { get_next_arg(i, argc, argv, arg, params); std::ifstream file(argv[i]); if (!file) { fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); break; } std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); if (params.prompt.back() == '\n') { params.prompt.pop_back(); } } else if (arg == "-tt" || arg == "--token_test") { params.token_test = get_next_arg(i, argc, argv, arg, params); } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); gpt_print_usage(argc, argv, params); exit(0); } } return true; } void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); fprintf(stderr, " prompt to start generation with (default: random)\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); fprintf(stderr, " load prompt from a file\n"); fprintf(stderr, " -tt TOKEN_TEST, --token_test TOKEN_TEST\n"); fprintf(stderr, " test tokenization\n"); fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict); fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k); fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled)\n", params.repeat_last_n); fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -c N, --context N context / KV cache size (default: %d)\n", params.n_ctx); fprintf(stderr, " --ignore-eos ignore EOS token during generation\n"); fprintf(stderr, " -ngl N, --gpu-layers N number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, "\n"); } std::string gpt_random_prompt(std::mt19937 & rng) { const int r = rng() % 10; switch (r) { case 0: return "So"; case 1: return "Once upon a time"; case 2: return "When"; case 3: return "The"; case 4: return "After"; case 5: return "If"; case 6: return "import"; case 7: return "He"; case 8: return "She"; case 9: return "They"; } return "The"; } std::string trim(const std::string & s) { std::regex e("^\\s+|\\s+$"); return std::regex_replace(s, e, ""); } std::string replace(const std::string & s, const std::string & from, const std::string & to) { std::string result = s; size_t pos = 0; while ((pos = result.find(from, pos)) != std::string::npos) { result.replace(pos, from.length(), to); pos += to.length(); } return result; } void gpt_vocab::add_special_token(const std::string & token) { special_tokens.push_back(token); } std::map json_parse(const std::string & fname) { std::map result; // read file into string std::string json; { std::ifstream ifs(fname); if (!ifs) { fprintf(stderr, "Failed to open %s\n", fname.c_str()); exit(1); } json = std::string((std::istreambuf_iterator(ifs)), (std::istreambuf_iterator())); } if (json[0] != '{') { return result; } // parse json { bool has_key = false; bool in_token = false; std::string str_key = ""; std::string str_val = ""; int n = json.size(); for (int i = 1; i < n; ++i) { if (!in_token) { if (json[i] == ' ') continue; if (json[i] == '"') { in_token = true; continue; } } else { if (json[i] == '\\' && i+1 < n) { if (has_key == false) { str_key += json[i]; } else { str_val += json[i]; } ++i; } else if (json[i] == '"') { if (has_key == false) { has_key = true; ++i; while (json[i] == ' ') ++i; ++i; // : while (json[i] == ' ') ++i; if (json[i] != '\"') { while (json[i] != ',' && json[i] != '}') { str_val += json[i++]; } has_key = false; } else { in_token = true; continue; } } else { has_key = false; } str_key = ::replace(str_key, "\\u0120", " " ); // \u0120 -> space str_key = ::replace(str_key, "\\u010a", "\n"); // \u010a -> new line str_key = ::replace(str_key, "\\\"", "\""); // \\\" -> " try { result[str_key] = std::stoi(str_val); } catch (...) { //fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str()); } str_key = ""; str_val = ""; in_token = false; continue; } if (has_key == false) { str_key += json[i]; } else { str_val += json[i]; } } } } return result; } void gpt_split_words(std::string str, std::vector& words) { const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; const std::regex re(pattern); std::smatch m; while (std::regex_search(str, m, re)) { for (auto x : m) { words.push_back(x); } str = m.suffix(); } } std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text) { std::vector words; // first split the text into words { std::string str = text; // Generate the subpattern from the special_tokens vector if it's not empty if (!vocab.special_tokens.empty()) { const std::regex escape(R"([\[\\\^\$\.\|\?\*\+\(\)\{\}])"); std::string special_tokens_subpattern; for (const auto & token : vocab.special_tokens) { if (!special_tokens_subpattern.empty()) { special_tokens_subpattern += "|"; } special_tokens_subpattern += std::regex_replace(token, escape, R"(\$&)"); } std::regex re(special_tokens_subpattern); std::smatch m; // Split the text by special tokens. while (std::regex_search(str, m, re)) { // Split the substrings in-between special tokens into words. gpt_split_words(m.prefix(), words); // Add matched special tokens as words. for (auto x : m) { words.push_back(x); } str = m.suffix(); } // Remaining text without special tokens will be handled below. } gpt_split_words(str, words); } // find the longest token that forms each word in words: std::vector tokens; for (const auto & word : words) { for (int i = 0; i < (int) word.size(); ){ for (int j = word.size() - 1; j >= i; j--){ auto cand = word.substr(i, j-i+1); auto it = vocab.token_to_id.find(cand); if (it != vocab.token_to_id.end()){ // word.substr(i, j-i+1) in vocab tokens.push_back(it->second); i = j + 1; break; } else if (j == i){ // word.substr(i, 1) has no matching fprintf(stderr, "%s: unknown token '%s'\n", __func__, word.substr(i, 1).data()); i++; } } } } return tokens; } static std::vector parse_tokens_from_string(const std::string& input, char delimiter) { std::vector output; std::stringstream ss(input); std::string token; while (std::getline(ss, token, delimiter)) { output.push_back(std::stoi(token)); } return output; } static std::map> extract_tests_from_file(const std::string & fpath_test){ if (fpath_test.empty()){ fprintf(stderr, "%s : No test file found.\n", __func__); return std::map>(); } std::map> tests; auto fin = std::ifstream(fpath_test, std::ios_base::in); const char * delimeter = " => "; const char del_tok = ','; std::string line; while (std::getline(fin, line)) { size_t delimiterPos = line.find(delimeter); if (delimiterPos != std::string::npos) { std::string text = line.substr(0, delimiterPos); std::string s_tokens = line.substr(delimiterPos + std::strlen(delimeter)); tests[text] = parse_tokens_from_string(s_tokens, del_tok); } } return tests; } void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test){ std::map> tests = extract_tests_from_file(fpath_test); size_t n_fails = 0; for (const auto & test : tests) { std::vector tokens = gpt_tokenize(vocab, test.first); if (tokens != test.second){ n_fails++; // print out failure cases fprintf(stderr, "%s : failed test: '%s'\n", __func__, test.first.c_str()); fprintf(stderr, "%s : tokens in hf: ", __func__); for (const auto & t : test.second) { fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t); } fprintf(stderr, "\n"); fprintf(stderr, "%s : tokens in ggml: ", __func__); for (const auto & t : tokens) { fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t); } fprintf(stderr, "\n"); } } fprintf(stderr, "%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size()); } bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { printf("%s: loading vocab from '%s'\n", __func__, fname.c_str()); vocab.token_to_id = ::json_parse(fname); for (const auto & kv : vocab.token_to_id) { vocab.id_to_token[kv.second] = kv.first; } printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size()); // print the vocabulary //for (auto kv : vocab.token_to_id) { // printf("'%s' -> %d\n", kv.first.data(), kv.second); //} return true; } gpt_vocab::id gpt_sample_top_k_top_p( const gpt_vocab & vocab, const float * logits, int top_k, double top_p, double temp, std::mt19937 & rng) { int n_logits = vocab.id_to_token.size(); std::vector> logits_id; logits_id.reserve(n_logits); { const double scale = 1.0/temp; for (int i = 0; i < n_logits; ++i) { logits_id.push_back(std::make_pair(logits[i]*scale, i)); } } // find the top K tokens std::partial_sort( logits_id.begin(), logits_id.begin() + top_k, logits_id.end(), [](const std::pair & a, const std::pair & b) { return a.first > b.first; }); logits_id.resize(top_k); double maxl = -INFINITY; for (const auto & kv : logits_id) { maxl = std::max(maxl, kv.first); } // compute probs for the top K tokens std::vector probs; probs.reserve(logits_id.size()); double sum = 0.0; for (const auto & kv : logits_id) { double p = exp(kv.first - maxl); probs.push_back(p); sum += p; } // normalize the probs for (auto & p : probs) { p /= sum; } if (top_p < 1.0f) { double cumsum = 0.0f; for (int i = 0; i < top_k; i++) { cumsum += probs[i]; if (cumsum >= top_p) { top_k = i + 1; probs.resize(top_k); logits_id.resize(top_k); break; } } cumsum = 1.0/cumsum; for (int i = 0; i < (int) probs.size(); i++) { probs[i] *= cumsum; } } //printf("\n"); //for (int i = 0; i < (int) probs.size(); i++) { // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); //} //exit(0); std::discrete_distribution<> dist(probs.begin(), probs.end()); int idx = dist(rng); return logits_id[idx].second; } gpt_vocab::id gpt_sample_top_k_top_p_repeat( const gpt_vocab & vocab, const float * logits, const int32_t * last_n_tokens_data, size_t last_n_tokens_data_size, int top_k, double top_p, double temp, int repeat_last_n, float repeat_penalty, std::mt19937 & rng) { int n_logits = vocab.id_to_token.size(); const auto * plogits = logits; const auto last_n_tokens = std::vector(last_n_tokens_data, last_n_tokens_data + last_n_tokens_data_size); if (temp <= 0) { // select the token with the highest logit directly float max_logit = plogits[0]; gpt_vocab::id max_id = 0; for (int i = 1; i < n_logits; ++i) { if (plogits[i] > max_logit) { max_logit = plogits[i]; max_id = i; } } return max_id; } std::vector> logits_id; logits_id.reserve(n_logits); { const float scale = 1.0f/temp; for (int i = 0; i < n_logits; ++i) { // repetition penalty from ctrl paper (https://arxiv.org/abs/1909.05858) // credit https://github.com/facebookresearch/llama/compare/main...shawwn:llama:main if (repeat_last_n > 0 && std::find(last_n_tokens.end()-repeat_last_n, last_n_tokens.end(), i) != last_n_tokens.end()) { // if score < 0 then repetition penalty has to multiplied to reduce the previous token probability if (plogits[i] < 0.0f) { logits_id.push_back(std::make_pair(plogits[i]*scale*repeat_penalty, i)); } else { logits_id.push_back(std::make_pair(plogits[i]*scale/repeat_penalty, i)); } } else { logits_id.push_back(std::make_pair(plogits[i]*scale, i)); } } } // find the top K tokens std::partial_sort( logits_id.begin(), logits_id.begin() + top_k, logits_id.end(), [](const std::pair & a, const std::pair & b) { return a.first > b.first; }); logits_id.resize(top_k); double maxl = -INFINITY; for (const auto & kv : logits_id) { maxl = std::max(maxl, kv.first); } // compute probs for the top K tokens std::vector probs; probs.reserve(logits_id.size()); double sum = 0.0; for (const auto & kv : logits_id) { double p = exp(kv.first - maxl); probs.push_back(p); sum += p; } // normalize the probs for (auto & p : probs) { p /= sum; } if (top_p < 1.0f) { double cumsum = 0.0f; for (int i = 0; i < top_k; i++) { cumsum += probs[i]; if (cumsum >= top_p) { top_k = i + 1; probs.resize(top_k); logits_id.resize(top_k); break; } } cumsum = 1.0/cumsum; for (int i = 0; i < (int) probs.size(); i++) { probs[i] *= cumsum; } } // printf("\n"); // for (int i = 0; i < (int) probs.size(); i++) { // for (int i = 0; i < 10; i++) { // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); // } std::discrete_distribution<> dist(probs.begin(), probs.end()); int idx = dist(rng); return logits_id[idx].second; } void high_pass_filter(std::vector & data, float cutoff, float sample_rate) { const float rc = 1.0f / (2.0f * M_PI * cutoff); const float dt = 1.0f / sample_rate; const float alpha = dt / (rc + dt); float y = data[0]; for (size_t i = 1; i < data.size(); i++) { y = alpha * (y + data[i] - data[i - 1]); data[i] = y; } } bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) { const int n_samples = pcmf32.size(); const int n_samples_last = (sample_rate * last_ms) / 1000; if (n_samples_last >= n_samples) { // not enough samples - assume no speech return false; } if (freq_thold > 0.0f) { high_pass_filter(pcmf32, freq_thold, sample_rate); } float energy_all = 0.0f; float energy_last = 0.0f; for (int i = 0; i < n_samples; i++) { energy_all += fabsf(pcmf32[i]); if (i >= n_samples - n_samples_last) { energy_last += fabsf(pcmf32[i]); } } energy_all /= n_samples; energy_last /= n_samples_last; if (verbose) { fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold); } if (energy_last > vad_thold*energy_all) { return false; } return true; } float similarity(const std::string & s0, const std::string & s1) { const size_t len0 = s0.size() + 1; const size_t len1 = s1.size() + 1; std::vector col(len1, 0); std::vector prevCol(len1, 0); for (size_t i = 0; i < len1; i++) { prevCol[i] = i; } for (size_t i = 0; i < len0; i++) { col[0] = i; for (size_t j = 1; j < len1; j++) { col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1)); } col.swap(prevCol); } const float dist = prevCol[len1 - 1]; return 1.0f - (dist / std::max(s0.size(), s1.size())); } bool is_file_exist(const char * filename) { std::ifstream infile(filename); return infile.good(); } ggml-org-ggml-7ec8045/examples/common.h000066400000000000000000000226351506673203700200120ustar00rootroot00000000000000// Various helper functions and utilities #pragma once #include #include #include #include #include #include #include #include // // GPT CLI argument parsing // struct gpt_params { int32_t seed = -1; // RNG seed int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 200; // new tokens to predict int32_t n_parallel = 1; // number of parallel streams int32_t n_batch = 32; // batch size for prompt processing int32_t n_ctx = 2048; // context size (this is the KV cache max size) int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU bool ignore_eos = false; // ignore EOS token when generating text // sampling parameters int32_t top_k = 40; float top_p = 0.9f; float temp = 0.9f; int32_t repeat_last_n = 64; float repeat_penalty = 1.00f; std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path std::string prompt = ""; std::string token_test = ""; bool interactive = false; int32_t interactive_port = -1; }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); void gpt_print_usage(int argc, char ** argv, const gpt_params & params); std::string gpt_random_prompt(std::mt19937 & rng); // // Vocab utils // std::string trim(const std::string & s); std::string replace( const std::string & s, const std::string & from, const std::string & to); struct gpt_vocab { using id = int32_t; using token = std::string; std::map token_to_id; std::map id_to_token; std::vector special_tokens; void add_special_token(const std::string & token); }; // poor-man's JSON parsing std::map json_parse(const std::string & fname); std::string convert_to_utf8(const std::wstring & input); std::wstring convert_to_wstring(const std::string & input); void gpt_split_words(std::string str, std::vector& words); // split text into tokens // // ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 // // Regex (Python): // r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" // // Regex (C++): // R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)" // std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text); // test outputs of gpt_tokenize // // - compare with tokens generated by the huggingface tokenizer // - test cases are chosen based on the model's main language (under 'prompt' directory) // - if all sentences are tokenized identically, print 'All tests passed.' // - otherwise, print sentence, huggingface tokens, ggml tokens // void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test); // load the tokens from encoder.json bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); // sample next token given probabilities for each embedding // // - consider only the top K tokens // - from them, consider only the top tokens with cumulative probability > P // // TODO: not sure if this implementation is correct // TODO: temperature is not implemented // gpt_vocab::id gpt_sample_top_k_top_p( const gpt_vocab & vocab, const float * logits, int top_k, double top_p, double temp, std::mt19937 & rng); gpt_vocab::id gpt_sample_top_k_top_p_repeat( const gpt_vocab & vocab, const float * logits, const int32_t * last_n_tokens_data, size_t last_n_tokens_data_size, int top_k, double top_p, double temp, int repeat_last_n, float repeat_penalty, std::mt19937 & rng); // // Audio utils // // Write PCM data into WAV audio file class wav_writer { private: std::ofstream file; uint32_t dataSize = 0; std::string wav_filename; bool write_header(const uint32_t sample_rate, const uint16_t bits_per_sample, const uint16_t channels) { file.write("RIFF", 4); file.write("\0\0\0\0", 4); // Placeholder for file size file.write("WAVE", 4); file.write("fmt ", 4); const uint32_t sub_chunk_size = 16; const uint16_t audio_format = 1; // PCM format const uint32_t byte_rate = sample_rate * channels * bits_per_sample / 8; const uint16_t block_align = channels * bits_per_sample / 8; file.write(reinterpret_cast(&sub_chunk_size), 4); file.write(reinterpret_cast(&audio_format), 2); file.write(reinterpret_cast(&channels), 2); file.write(reinterpret_cast(&sample_rate), 4); file.write(reinterpret_cast(&byte_rate), 4); file.write(reinterpret_cast(&block_align), 2); file.write(reinterpret_cast(&bits_per_sample), 2); file.write("data", 4); file.write("\0\0\0\0", 4); // Placeholder for data size return true; } // It is assumed that PCM data is normalized to a range from -1 to 1 bool write_audio(const float * data, size_t length) { for (size_t i = 0; i < length; ++i) { const int16_t intSample = int16_t(data[i] * 32767); file.write(reinterpret_cast(&intSample), sizeof(int16_t)); dataSize += sizeof(int16_t); } if (file.is_open()) { file.seekp(4, std::ios::beg); uint32_t fileSize = 36 + dataSize; file.write(reinterpret_cast(&fileSize), 4); file.seekp(40, std::ios::beg); file.write(reinterpret_cast(&dataSize), 4); file.seekp(0, std::ios::end); } return true; } bool open_wav(const std::string & filename) { if (filename != wav_filename) { if (file.is_open()) { file.close(); } } if (!file.is_open()) { file.open(filename, std::ios::binary); wav_filename = filename; dataSize = 0; } return file.is_open(); } public: bool open(const std::string & filename, const uint32_t sample_rate, const uint16_t bits_per_sample, const uint16_t channels) { if (open_wav(filename)) { write_header(sample_rate, bits_per_sample, channels); } else { return false; } return true; } bool close() { file.close(); return true; } bool write(const float * data, size_t length) { return write_audio(data, length); } ~wav_writer() { if (file.is_open()) { file.close(); } } }; // Apply a high-pass frequency filter to PCM audio // Suppresses frequencies below cutoff Hz void high_pass_filter( std::vector & data, float cutoff, float sample_rate); // Basic voice activity detection (VAD) using audio energy adaptive threshold bool vad_simple( std::vector & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose); // compute similarity between two strings using Levenshtein distance float similarity(const std::string & s0, const std::string & s1); // // Terminal utils // #define SQR(X) ((X) * (X)) #define UNCUBE(x) x < 48 ? 0 : x < 115 ? 1 : (x - 35) / 40 /** * Quantizes 24-bit RGB to xterm256 code range [16,256). */ static int rgb2xterm256(int r, int g, int b) { unsigned char cube[] = {0, 0137, 0207, 0257, 0327, 0377}; int av, ir, ig, ib, il, qr, qg, qb, ql; av = r * .299 + g * .587 + b * .114 + .5; ql = (il = av > 238 ? 23 : (av - 3) / 10) * 10 + 8; qr = cube[(ir = UNCUBE(r))]; qg = cube[(ig = UNCUBE(g))]; qb = cube[(ib = UNCUBE(b))]; if (SQR(qr - r) + SQR(qg - g) + SQR(qb - b) <= SQR(ql - r) + SQR(ql - g) + SQR(ql - b)) return ir * 36 + ig * 6 + ib + 020; return il + 0350; } static std::string set_xterm256_foreground(int r, int g, int b) { int x = rgb2xterm256(r, g, b); std::ostringstream oss; oss << "\033[38;5;" << x << "m"; return oss.str(); } // Lowest is red, middle is yellow, highest is green. Color scheme from // Paul Tol; it is colorblind friendly https://sronpersonalpages.nl/~pault const std::vector k_colors = { set_xterm256_foreground(220, 5, 12), set_xterm256_foreground(232, 96, 28), set_xterm256_foreground(241, 147, 45), set_xterm256_foreground(246, 193, 65), set_xterm256_foreground(247, 240, 86), set_xterm256_foreground(144, 201, 135), set_xterm256_foreground( 78, 178, 101), }; // ANSI formatting codes static std::string set_inverse() { return "\033[7m"; } static std::string set_underline() { return "\033[4m"; } static std::string set_dim() { return "\033[2m"; } // Style scheme for different confidence levels const std::vector k_styles = { set_inverse(), // Low confidence - inverse (highlighted) set_underline(), // Medium confidence - underlined set_dim(), // High confidence - dim }; // // Other utils // // check if file exists using ifstream bool is_file_exist(const char * filename); ggml-org-ggml-7ec8045/examples/gpt-2/000077500000000000000000000000001506673203700172725ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/gpt-2/CMakeLists.txt000066400000000000000000000016421506673203700220350ustar00rootroot00000000000000# # gpt-2 set(TEST_TARGET gpt-2-ctx) add_executable(${TEST_TARGET} main-ctx.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) set(TEST_TARGET gpt-2-alloc) add_executable(${TEST_TARGET} main-alloc.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) set(TEST_TARGET gpt-2-backend) add_executable(${TEST_TARGET} main-backend.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) set(TEST_TARGET gpt-2-sched) add_executable(${TEST_TARGET} main-sched.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) # # gpt-2-quantize set(TEST_TARGET gpt-2-quantize) add_executable(${TEST_TARGET} quantize.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) # # gpt-2-batched set(TEST_TARGET gpt-2-batched) add_executable(${TEST_TARGET} main-batched.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) ggml-org-ggml-7ec8045/examples/gpt-2/README.md000066400000000000000000000211331506673203700205510ustar00rootroot00000000000000# gpt-2 This is a C++ example running GPT-2 inference using the [ggml](https://github.com/ggerganov/ggml) library. The program runs on the CPU - no video card is required. The [Cerebras-GPT](https://huggingface.co/cerebras) models are also supported. The example supports the following GPT-2 models: | Model | Description | Disk Size | | --- | --- | --- | | 117M | Small model | 240 MB | | 345M | Medium model | 680 MB | | 774M | Large model | 1.5 GB | | 1558M | XL model | 3.0 GB | Sample performance on MacBook M1 Pro: | Model | Size | Time / Token | | --- | --- | --- | | GPT-2 | 117M | 5 ms | | GPT-2 | 345M | 12 ms | | GPT-2 | 774M | 23 ms | | GPT-2 | 1558M | 42 ms | *TODO: add tables for Cerebras-GPT models* Sample output: ```bash $ ./bin/gpt-2 -h usage: ./bin/gpt-2 [options] options: -h, --help show this help message and exit -s SEED, --seed SEED RNG seed (default: -1) -t N, --threads N number of threads to use during computation (default: 8) -p PROMPT, --prompt PROMPT prompt to start generation with (default: random) -n N, --n_predict N number of tokens to predict (default: 200) --top_k N top-k sampling (default: 40) --top_p N top-p sampling (default: 0.9) --temp N temperature (default: 1.0) -b N, --batch_size N batch size for prompt processing (default: 8) -m FNAME, --model FNAME model path (default: models/gpt-2-117M/ggml-model.bin) $ ./bin/gpt-2 gpt2_model_load: loading model from 'models/gpt-2-117M/ggml-model.bin' gpt2_model_load: n_vocab = 50257 gpt2_model_load: n_ctx = 1024 gpt2_model_load: n_embd = 768 gpt2_model_load: n_head = 12 gpt2_model_load: n_layer = 12 gpt2_model_load: f16 = 1 gpt2_model_load: ggml ctx size = 311.12 MB gpt2_model_load: memory size = 72.00 MB, n_mem = 12288 gpt2_model_load: model size = 239.08 MB main: number of tokens in prompt = 1 So this is going to be the end of the line for us. If the Dolphins continue to do their business, it's possible that the team could make a bid to bring in new defensive coordinator Scott Linehan. Linehan's job is a little daunting, but he's a great coach and an excellent coach. I don't believe we're going to make the playoffs. We're going to have to work hard to keep our heads down and get ready to go.<|endoftext|> main: mem per token = 2048612 bytes main: load time = 106.32 ms main: sample time = 7.10 ms main: predict time = 506.40 ms / 5.06 ms per token main: total time = 629.84 ms ``` ## Downloading and converting the original models (GPT-2) You can download the original model files using the [download-model.sh](download-model.sh) Bash script. The models are in Tensorflow format, so in order to use them with ggml, you need to convert them to appropriate format. This is done via the [convert-ckpt-to-ggml.py](convert-ckpt-to-ggml.py) python script. Here is the entire process for the GPT-2 117M model (download from official site + conversion): ```bash cd ggml/build ../examples/gpt-2/download-model.sh 117M Downloading model 117M ... models/gpt-2-117M/checkpoint 100%[=============================>] 77 --.-KB/s in 0s models/gpt-2-117M/encoder.json 100%[=============================>] 1018K 1.20MB/s in 0.8s models/gpt-2-117M/hparams.json 100%[=============================>] 90 --.-KB/s in 0s models/gpt-2-117M/model.ckpt.data-00000-of-00001 100%[=============================>] 474.70M 1.21MB/s in 8m 39s models/gpt-2-117M/model.ckpt.index 100%[=============================>] 5.09K --.-KB/s in 0s models/gpt-2-117M/model.ckpt.meta 100%[=============================>] 460.11K 806KB/s in 0.6s models/gpt-2-117M/vocab.bpe 100%[=============================>] 445.62K 799KB/s in 0.6s Done! Model '117M' saved in 'models/gpt-2-117M/' Run the convert-ckpt-to-ggml.py script to convert the model to ggml format. python /Users/john/ggml/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-117M/ 1 ``` This conversion requires that you have python and Tensorflow installed on your computer. Still, if you want to avoid this, you can download the already converted ggml models as described below. ## Downloading and converting the original models (Cerebras-GPT) Clone the respective repository from here: https://huggingface.co/cerebras Use the [convert-cerebras-to-ggml.py](convert-cerebras-to-ggml.py) script to convert the model to `ggml` format: ```bash cd ggml/build git clone https://huggingface.co/cerebras/Cerebras-GPT-111M models/ python ../examples/gpt-2/convert-cerebras-to-ggml.py models/Cerebras-GPT-111M/ ``` ## Downloading the ggml model directly (GPT-2) For convenience, I will be hosting the converted ggml model files in order to make it easier to run the examples. This way, you can directly download a single binary file and start using it. No python or Tensorflow is required. Here is how to get the 117M ggml model: ```bash cd ggml/build ../examples/gpt-2/download-ggml-model.sh 117M Downloading ggml model 117M ... models/gpt-2-117M/ggml-model.bin 100%[===============================>] 239.58M 8.52MB/s in 28s Done! Model '117M' saved in 'models/gpt-2-117M/ggml-model.bin' You can now use it like this: $ ./bin/gpt-2 -m models/gpt-2-117M/ggml-model.bin -p "This is an example" ``` At some point, I might decide to stop hosting these models. So in that case, simply revert to the manual process above. ## Quantizing the models You can also try to quantize the `ggml` models via 4-bit integer quantization. Keep in mind that for smaller models, this will render them completely useless. You generally want to quantize larger models. ```bash # quantize GPT-2 F16 to Q4_0 (faster but less precise) ./bin/gpt-2-quantize models/gpt-2-1558M/ggml-model-f16.bin models/gpt-2-1558M/ggml-model-q4_0.bin 2 ./bin/gpt-2 -m models/gpt-2-1558M/ggml-model-q4_0.bin -p "This is an example" # quantize Cerebras F16 to Q4_1 (slower but more precise) ./bin/gpt-2-quantize models/Cerebras-GPT-6.7B/ggml-model-f16.bin models/Cerebras-GPT-6.7B/ggml-model-q4_1.bin 3 ./bin/gpt-2 -m models/Cerebras-GPT-6.7B/ggml-model-q4_1.bin -p "This is an example" ``` ## Batched generation example You can try the batched generation from a given prompt using the gpt-2-batched binary. Sample output: ```bash $ gpt-2-batched -np 5 -m models/gpt-2-117M/ggml-model.bin -p "Hello my name is" -n 50 main: seed = 1697037431 gpt2_model_load: loading model from 'models/gpt-2-117M/ggml-model.bin' gpt2_model_load: n_vocab = 50257 gpt2_model_load: n_ctx = 1024 gpt2_model_load: n_embd = 768 gpt2_model_load: n_head = 12 gpt2_model_load: n_layer = 12 gpt2_model_load: ftype = 1 gpt2_model_load: qntvr = 0 gpt2_model_load: ggml tensor size = 320 bytes gpt2_model_load: backend buffer size = 312.72 MB ggml_init_cublas: found 1 CUDA devices: Device 0: NVIDIA GeForce GTX 1660, compute capability 7.5 gpt2_model_load: using CPU backend gpt2_model_load: memory size = 72.00 MB, n_mem = 12288 gpt2_model_load: model size = 239.08 MB extract_tests_from_file : No test file found. test_gpt_tokenizer : 0 tests failed out of 0 tests. main: compute buffer size: 3.26 MB main: generating 5 sequences ... main: prompt: 'Hello my name is' main: number of tokens in prompt = 4, first 8 tokens: 15496 616 1438 318 sequence 0: Hello my name is John. You can call me any way you want, if you want, but for my very first date, I will be on the phone with you. We're both in our early 20s, but I feel like it's all sequence 1: Hello my name is Robert, and I want to say that we're proud to have your company here on the world's largest platform for sharing your stories with us. This is a huge opportunity for our community. We have hundreds of people on this team and sequence 2: Hello my name is Jack. I'm the one who created you. Jack is a boy with a big smile and a big heart. He is a handsome guy. He loves the outdoors and loves the people he meets. He wants to be a sequence 3: Hello my name is John. I am a Canadian citizen with a large number of family in Quebec and I am interested in studying. My aim is to take up a post in the Journal of the International Academy of Sciences of Canada which I am currently finishing. sequence 4: Hello my name is Dan. I am an entrepreneur. I am a great father. I am a great husband. I am a great husband. I am a great dad. And I am a great husband. I love my life. I love main: load time = 880.80 ms main: sample time = 91.43 ms main: predict time = 2518.29 ms main: total time = 3544.32 ms ``` ggml-org-ggml-7ec8045/examples/gpt-2/convert-cerebras-to-ggml.py000066400000000000000000000142571506673203700244650ustar00rootroot00000000000000# Convert Cerebras models to ggml format # # ref: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/ # import sys import struct import json import torch import numpy as np import re from transformers import AutoModelForCausalLM # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. This is a signficant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on. """ bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) cs = bs[:] n = 0 for b in range(2**8): if b not in bs: bs.append(b) cs.append(2**8+n) n += 1 cs = [chr(n) for n in cs] return dict(zip(bs, cs)) if len(sys.argv) < 2: print("Usage: convert-cerebras-to-ggml.py dir-model [use-f32]\n") sys.exit(1) # output in the same directory as the model dir_model = sys.argv[1] fname_out = sys.argv[1] + "/ggml-model-f16.bin" with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: encoder = json.load(f) with open(dir_model + "/config.json", "r", encoding="utf-8") as f: hparams = json.load(f) # use 16-bit or 32-bit floats use_f16 = True if len(sys.argv) > 2: use_f16 = False fname_out = sys.argv[1] + "/ggml-model-f32.bin" model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True) #print (model) list_vars = model.state_dict() #print (list_vars) print(hparams) fout = open(fname_out, "wb") fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex fout.write(struct.pack("i", hparams["vocab_size"])) fout.write(struct.pack("i", hparams["n_positions"])) fout.write(struct.pack("i", hparams["n_embd"])) fout.write(struct.pack("i", hparams["n_head"])) fout.write(struct.pack("i", hparams["n_layer"])) fout.write(struct.pack("i", use_f16)) byte_encoder = bytes_to_unicode() byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(encoder))) for key in encoder: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for name in list_vars.keys(): data = list_vars[name].squeeze().numpy() print("Processing variable: " + name + " with shape: ", data.shape) # rename headers to keep compatibility if name == "transformer.ln_f.weight": name = "model/ln_f/g" elif name == "transformer.ln_f.bias": name = "model/ln_f/b" elif name == "transformer.wte.weight": name = "model/wte" elif name == "transformer.wpe.weight": name = "model/wpe" elif name == "lm_head.weight": name = "model/lm_head" elif re.match(r"transformer.h\.\d+\.ln_1\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_1/g" elif re.match(r"transformer.h\.\d+\.ln_1\.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_1/b" elif re.match(r"transformer.h\.\d+\.attn\.c_attn\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_attn/w" elif re.match(r"transformer.h\.\d+\.attn\.c_attn\.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_attn/b" elif re.match(r"transformer.h\.\d+\.attn\.c_proj\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_proj/w" elif re.match(r"transformer.h.\d+.attn.c_proj.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_proj/b" elif re.match(r"transformer.h.\d+.ln_2.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_2/g" elif re.match(r"transformer.h.\d+.ln_2.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_2/b" elif re.match(r"transformer.h.\d+.mlp.c_fc.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_fc/w" elif re.match(r"transformer.h.\d+.mlp.c_fc.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_fc/b" elif re.match(r"transformer.h.\d+.mlp.c_proj.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_proj/w" elif re.match(r"transformer.h.\d+.mlp.c_proj.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_proj/b" else: print("Unrecognized variable name. %s", name) # we don't need these if name.endswith("attn.masked_bias") or name.endswith(".attn.bias"): print(" Skipping variable: " + name) continue n_dims = len(data.shape); # ftype == 0 -> float32, ftype == 1 -> float16 ftype = 0; if use_f16: if (name == "model/wte" or name == "model/lm_head" or name[-2:] == "/g" or name[-2:] == "/w") and n_dims == 2: print(" Converting to float16") data = data.astype(np.float16) ftype = 1 else: print(" Converting to float32") data = data.astype(np.float32) ftype = 0 # for efficiency - transpose the projection matrices # "model/h.*/attn/c_attn/w" # "model/h.*/attn/c_proj/w" # "model/h.*/mlp/c_fc/w" # "model/h.*/mlp/c_proj/w" if name[-14:] == "/attn/c_attn/w" or \ name[-14:] == "/attn/c_proj/w" or \ name[-11:] == "/mlp/c_fc/w" or \ name[-13:] == "/mlp/c_proj/w": print(" Transposing") data = data.transpose() # header str = name.encode('utf-8') fout.write(struct.pack("iii", n_dims, len(str), ftype)) for i in range(n_dims): fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) fout.write(str); # data data.tofile(fout) fout.close() print("Done. Output file: " + fname_out) print("") ggml-org-ggml-7ec8045/examples/gpt-2/convert-ckpt-to-ggml.py000066400000000000000000000114111506673203700236250ustar00rootroot00000000000000# Convert a model checkpoint to a ggml compatible file # # Load the model using TensorFlow. # Iterate over all variables and write them to a binary file. # # For each variable, write the following: # - Number of dimensions (int) # - Name length (int) # - Dimensions (int[n_dims]) # - Name (char[name_length]) # - Data (float[n_dims]) # # By default, the bigger matrices are converted to 16-bit floats. # This can be disabled by adding the "use-f32" CLI argument. # # At the start of the ggml file we write the model parameters # and vocabulary. # import sys import json import struct import numpy as np import tensorflow as tf # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. This is a signficant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on. """ bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) cs = bs[:] n = 0 for b in range(2**8): if b not in bs: bs.append(b) cs.append(2**8+n) n += 1 cs = [chr(n) for n in cs] return dict(zip(bs, cs)) # helper method to convert a numpy array to different float types def convert_to_ftype(data, ftype): # fp16 if ftype == 1: return data.astype(np.float16) assert False, "Invalid ftype: " + str(ftype) if len(sys.argv) < 3: print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n") print(" ftype == 0 -> float32") print(" ftype == 1 -> float16") sys.exit(1) # output in the same directory as the model dir_model = sys.argv[1] fname_out = sys.argv[1] + "/ggml-model.bin" with open(dir_model + "/encoder.json", "r", encoding="utf-8") as f: encoder = json.load(f) with open(dir_model + "/hparams.json", "r", encoding="utf-8") as f: hparams = json.load(f) # possible data types # ftype == 0 -> float32 # ftype == 1 -> float16 # # map from ftype to string ftype_str = ["f32", "f16"] ftype = 1 if len(sys.argv) > 2: ftype = int(sys.argv[2]) if ftype < 0 or ftype > 1: print("Invalid ftype: " + str(ftype)) sys.exit(1) fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" list_vars = tf.train.list_variables(dir_model) fout = open(fname_out, "wb") fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex fout.write(struct.pack("i", hparams["n_vocab"])) fout.write(struct.pack("i", hparams["n_ctx"])) fout.write(struct.pack("i", hparams["n_embd"])) fout.write(struct.pack("i", hparams["n_head"])) fout.write(struct.pack("i", hparams["n_layer"])) fout.write(struct.pack("i", ftype)) byte_encoder = bytes_to_unicode() byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(encoder))) for key in encoder: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for name, shape in list_vars: print("Processing variable: " + name + " with shape: ", shape) data = tf.train.load_variable(dir_model, name).squeeze() n_dims = len(data.shape); # for efficiency - transpose the projection matrices # "model/h.*/attn/c_attn/w" # "model/h.*/attn/c_proj/w" # "model/h.*/mlp/c_fc/w" # "model/h.*/mlp/c_proj/w" if name[-14:] == "/attn/c_attn/w" or \ name[-14:] == "/attn/c_proj/w" or \ name[-11:] == "/mlp/c_fc/w" or \ name[-13:] == "/mlp/c_proj/w": print(" Transposing") data = data.transpose() dshape = data.shape ftype_cur = 0 if ftype != 0: # match name: # "model/wte" # "model/h.*/attn/c_attn/w" # "model/h.*/attn/c_proj/w" # "model/h.*/mlp/c_fc/w" # "model/h.*/mlp/c_proj/w" if name == "model/wte" or name[-2:] == "/w": print(" Converting to " + ftype_str[ftype]) data = convert_to_ftype(data, ftype) ftype_cur = ftype else: print(" Converting to float32") data = data.astype(np.float32) ftype_cur = 0 # header str = name.encode('utf-8') fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) for i in range(n_dims): fout.write(struct.pack("i", dshape[n_dims - 1 - i])) fout.write(str); # data data.tofile(fout) fout.close() print("Done. Output file: " + fname_out) print("") ggml-org-ggml-7ec8045/examples/gpt-2/convert-h5-to-ggml.py000066400000000000000000000144441506673203700232110ustar00rootroot00000000000000# Convert GPT-2 h5 transformer model to ggml format # # Load the model using GPT2Model. # Iterate over all variables and write them to a binary file. # # For each variable, write the following: # - Number of dimensions (int) # - Name length (int) # - Dimensions (int[n_dims]) # - Name (char[name_length]) # - Data (float[n_dims]) # # By default, the bigger matrices are converted to 16-bit floats. # This can be disabled by adding the "use-f32" CLI argument. # # At the start of the ggml file we write the model parameters # and vocabulary. # import sys import struct import json import numpy as np import re from transformers import GPT2Model # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. This is a signficant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on. """ bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) cs = bs[:] n = 0 for b in range(2**8): if b not in bs: bs.append(b) cs.append(2**8+n) n += 1 cs = [chr(n) for n in cs] return dict(zip(bs, cs)) if len(sys.argv) < 2: print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n") sys.exit(1) # output in the same directory as the model dir_model = sys.argv[1] fname_out = sys.argv[1] + "/ggml-model.bin" with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: encoder = json.load(f) with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f: encoder_added = json.load(f) with open(dir_model + "/config.json", "r", encoding="utf-8") as f: hparams = json.load(f) # use 16-bit or 32-bit floats use_f16 = True if len(sys.argv) > 2: use_f16 = False fname_out = sys.argv[1] + "/ggml-model-f32.bin" model = GPT2Model.from_pretrained(dir_model, low_cpu_mem_usage=True) #print (model) list_vars = model.state_dict() #print (list_vars) fout = open(fname_out, "wb") fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex fout.write(struct.pack("i", hparams["vocab_size"])) fout.write(struct.pack("i", hparams["n_positions"])) fout.write(struct.pack("i", hparams["n_embd"])) fout.write(struct.pack("i", hparams["n_head"])) fout.write(struct.pack("i", hparams["n_layer"])) #fout.write(struct.pack("i", hparams["rotary_dim"])) fout.write(struct.pack("i", use_f16)) byte_encoder = bytes_to_unicode() byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(encoder) + len(encoder_added))) for key in encoder: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for key in encoder_added: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for name in list_vars.keys(): data = list_vars[name].squeeze().numpy() print("Processing variable: " + name + " with shape: ", data.shape) # we don't need these if name.endswith("attn.masked_bias") or name.endswith(".attn.bias"): print(" Skipping variable: " + name) continue n_dims = len(data.shape); # ftype == 0 -> float32, ftype == 1 -> float16 ftype = 0; if use_f16: if name[-7:] == ".weight" and n_dims == 2: print(" Converting to float16") data = data.astype(np.float16) ftype = 1 else: print(" Converting to float32") data = data.astype(np.float32) ftype = 0 # for efficiency - transpose these matrices: # "transformer.h.*.mlp.c_proj.weight if name.endswith(".mlp.c_proj.weight"): print(" Transposing") data = data.transpose() # rename headers to keep compatibility if name == "ln_f.weight": name = "model/ln_f/g" elif name == "ln_f.bias": name = "model/ln_f/b" elif name == "wte.weight": name = "model/wte" elif name == "wpe.weight": name = "model/wpe" elif re.match(r"h\.\d+\.ln_1\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_1/g" elif re.match(r"h\.\d+\.ln_1\.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_1/b" elif re.match(r"h\.\d+\.attn\.c_attn\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_attn/w" elif re.match(r"h\.\d+\.attn\.c_attn\.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_attn/b" elif re.match(r"h\.\d+\.attn\.c_proj\.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_proj/w" elif re.match(r"h.\d+.attn.c_proj.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/attn/c_proj/b" elif re.match(r"h.\d+.ln_2.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_2/g" elif re.match(r"h.\d+.ln_2.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/ln_2/b" elif re.match(r"h.\d+.mlp.c_fc.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_fc/w" elif re.match(r"h.\d+.mlp.c_fc.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_fc/b" elif re.match(r"h.\d+.mlp.c_proj.weight", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_proj/w" elif re.match(r"h.\d+.mlp.c_proj.bias", name): i = re.findall("\d+", name)[0] name = f"model/h{i}/mlp/c_proj/b" else: print("Unrecognized variable name. %s", name) str = name.encode('utf-8') fout.write(struct.pack("iii", n_dims, len(str), ftype)) for i in range(n_dims): fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) fout.write(str); # data data.tofile(fout) fout.close() print("Done. Output file: " + fname_out) print("") ggml-org-ggml-7ec8045/examples/gpt-2/download-ggml-model.sh000077500000000000000000000033301506673203700234610ustar00rootroot00000000000000#!/bin/bash # This script downloads GPT-2 model files that have already been converted to ggml format. # This way you don't have to convert them yourself. # # If you want to download the original GPT-2 model files, use the "download-model.sh" script instead. #src="https://ggml.ggerganov.com" #pfx="ggml-model-gpt-2" src="https://huggingface.co/ggerganov/ggml" pfx="resolve/main/ggml-model-gpt-2" ggml_path=$(dirname $(realpath $0)) # GPT-2 models models=( "117M" "345M" "774M" "1558M" ) # list available models function list_models { printf "\n" printf " Available models:" for model in "${models[@]}"; do printf " $model" done printf "\n\n" } if [ "$#" -ne 1 ]; then printf "Usage: $0 \n" list_models exit 1 fi model=$1 if [[ ! " ${models[@]} " =~ " ${model} " ]]; then printf "Invalid model: $model\n" list_models exit 1 fi # download ggml model printf "Downloading ggml model $model ...\n" mkdir -p models/gpt-2-$model if [ -x "$(command -v wget)" ]; then wget --quiet --show-progress -O models/gpt-2-$model/ggml-model.bin $src/$pfx-$model.bin elif [ -x "$(command -v curl)" ]; then curl -L --output models/gpt-2-$model/ggml-model.bin $src/$pfx-$model.bin else printf "Either wget or curl is required to download models.\n" exit 1 fi if [ $? -ne 0 ]; then printf "Failed to download ggml model $model \n" printf "Please try again later or download the original GPT-2 model files and convert them yourself.\n" exit 1 fi printf "Done! Model '$model' saved in 'models/gpt-2-$model/ggml-model.bin'\n" printf "You can now use it like this:\n\n" printf " $ ./bin/gpt-2 -m models/gpt-2-$model/ggml-model.bin -p \"This is an example\"\n" printf "\n" ggml-org-ggml-7ec8045/examples/gpt-2/download-model.sh000077500000000000000000000021351506673203700225370ustar00rootroot00000000000000#!/bin/bash ggml_path=$(dirname $(realpath $0)) # GPT-2 models models=( "117M" "345M" "774M" "1558M" ) # list available models function list_models { printf "\n" printf " Available models:" for model in "${models[@]}"; do printf " $model" done printf "\n\n" } if [ "$#" -ne 1 ]; then printf "Usage: $0 \n" list_models exit 1 fi model=$1 if [[ ! " ${models[@]} " =~ " ${model} " ]]; then printf "Invalid model: $model\n" list_models exit 1 fi # download model printf "Downloading model $model ...\n" mkdir -p models/gpt-2-$model for file in checkpoint encoder.json hparams.json model.ckpt.data-00000-of-00001 model.ckpt.index model.ckpt.meta vocab.bpe; do wget --quiet --show-progress -O models/gpt-2-$model/$file https://openaipublic.blob.core.windows.net/gpt-2/models/$model/$file done printf "Done! Model '$model' saved in 'models/gpt-2-$model/'\n\n" printf "Run the convert-ckpt-to-ggml.py script to convert the model to ggml format.\n" printf "\n" printf " python $ggml_path/convert-ckpt-to-ggml.py models/gpt-2-$model/\n" printf "\n" ggml-org-ggml-7ec8045/examples/gpt-2/main-alloc.cpp000066400000000000000000000742641506673203700220270ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; float eps = 1e-5f; }; struct gpt2_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; struct ggml_tensor * ln_2_g; struct ggml_tensor * ln_2_b; // attention struct ggml_tensor * c_attn_attn_w; struct ggml_tensor * c_attn_attn_b; struct ggml_tensor * c_attn_proj_w; struct ggml_tensor * c_attn_proj_b; // mlp struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gpt2_model { gpt2_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // token embedding struct ggml_tensor * wpe; // position embedding struct ggml_tensor * lm_head; // language model head std::vector layers; // key + value memory struct ggml_tensor * memory_k; struct ggml_tensor * memory_v; // struct ggml_context * ctx_w; std::map tensors; }; // load the model's weights from a file bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx_w; size_t ctx_size = 0; { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b ctx_size += ggml_row_size(wtype, n_vocab*n_embd); // wte ctx_size += ggml_row_size(GGML_TYPE_F32 , n_ctx*n_embd); // wpe ctx_size += ggml_row_size(wtype, n_vocab*n_embd); // lm_head ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_g ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_b ctx_size += n_layer*(ggml_row_size(wtype, 3*n_embd*n_embd)); // c_attn_attn_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 3*n_embd)); // c_attn_attn_b ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_proj_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // c_attn_proj_b ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_fc_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_fc_b ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_proj_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_proj_b ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_k ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_v ctx_size += (6 + 12*n_layer)*512; // object overhead printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor)); printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); } // create the ggml context { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; model.ctx_w = ggml_init(params); if (!model.ctx_w) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // prepare memory for the weights { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); // map by name model.tensors["model/ln_f/g"] = model.ln_f_g; model.tensors["model/ln_f/b"] = model.ln_f_b; model.tensors["model/wte"] = model.wte; model.tensors["model/wpe"] = model.wpe; model.tensors["model/lm_head"] = model.lm_head; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3*n_embd); layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = layer.c_mlp_fc_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = layer.c_mlp_fc_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = layer.c_mlp_proj_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = layer.c_mlp_proj_b; } } // key + value memory { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); } // load weights { size_t total_size = 0; bool has_lm_head = false; while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); // GPT-2 models share the WTE tensor as the LM head if (name == "model/wte" && has_lm_head == false) { memcpy(model.lm_head->data, tensor->data, ggml_nbytes(tensor)); } if (name == "model/lm_head") { has_lm_head = true; } total_size += ggml_nbytes(tensor); } printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0); } fin.close(); return true; } // build the computation graph struct ggml_cgraph * gpt2_graph( const gpt2_model & model, const int n_past, const int n_tokens) { const int N = n_tokens; const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); static std::vector buf(buf_size); struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; struct ggml_context * ctx = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph(ctx); struct ggml_tensor * embd = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); // at this point, the tensor data is not allocated yet and cannot be set // we will find the tensor after the graph is allocated by its name, and set the data then ggml_set_name(embd, "embd"); // setting a tensor as an input will ensure that it is allocated at the beginning of the graph // this is important to ensure that the input tensors are not overwritten before they are used ggml_set_input(embd); struct ggml_tensor * position = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); ggml_set_name(position, "position"); ggml_set_input(position); // wte + wpe struct ggml_tensor * inpL = ggml_add(ctx, ggml_get_rows(ctx, model.wte, embd), ggml_get_rows(ctx, model.wpe, position)); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { // [ 768, N] cur = ggml_norm(ctx, inpL, hparams.eps); // cur = ln_1_g*cur + ln_1_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, ggml_repeat(ctx, model.layers[il].ln_1_g, cur), cur), ggml_repeat(ctx, model.layers[il].ln_1_b, cur)); } // attn // [2304, 768] - model.layers[il].c_attn_attn_w // [2304, 1] - model.layers[il].c_attn_attn_b // [ 768, N] - cur (in) // [2304, N] - cur (out) // // cur = attn_w*cur + attn_b // [2304, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_attn_w, cur); cur = ggml_add(ctx, ggml_repeat(ctx, model.layers[il].c_attn_attn_b, cur), cur); } // self-attention { struct ggml_tensor * Qcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd); struct ggml_tensor * Kcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd); struct ggml_tensor * Vcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd); // store key and value to memory if (N >= 1) { struct ggml_tensor * k = ggml_view_1d(ctx, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_1d(ctx, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // [64, N, 12] struct ggml_tensor * Q = ggml_permute(ctx, ggml_cont_3d(ctx, Qcur, n_embd/n_head, n_head, N), 0, 2, 1, 3); // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) // [64, n_past + N, 12] struct ggml_tensor * K = ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), n_embd/n_head, n_head, n_past + N), 0, 2, 1, 3); // GG: flash attention //struct ggml_tensor * V = // ggml_cpy(ctx0, // ggml_permute(ctx0, // ggml_reshape_3d(ctx0, // ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), // n_embd/n_head, n_head, n_past + N), // 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head)); //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); // K * Q // [n_past + N, N, 12] struct ggml_tensor * KQ = ggml_mul_mat(ctx, K, Q); // KQ_scaled = KQ / sqrt(n_embd/n_head) // [n_past + N, N, 12] struct ggml_tensor * KQ_scaled = ggml_scale(ctx, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // [n_past + N, N, 12] struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx, KQ_scaled, n_past); // KQ = soft_max(KQ_masked) // [n_past + N, N, 12] struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx, KQ_masked); // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() // [n_past + N, 64, 12] struct ggml_tensor * V_trans = ggml_cont_3d(ctx, ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), n_embd/n_head, n_head, n_past + N), 1, 2, 0, 3), n_past + N, n_embd/n_head, n_head); // KQV = transpose(V) * KQ_soft_max // [64, N, 12] struct ggml_tensor * KQV = ggml_mul_mat(ctx, V_trans, KQ_soft_max); // KQV_merged = KQV.permute(0, 2, 1, 3) // [64, 12, N] struct ggml_tensor * KQV_merged = ggml_permute(ctx, KQV, 0, 2, 1, 3); // cur = KQV_merged.contiguous().view(n_embd, N) // [768, N] cur = ggml_cont_2d(ctx, KQV_merged, n_embd, N); } // projection // [ 768, 768] - model.layers[il].c_attn_proj_w // [ 768, 1] - model.layers[il].c_attn_proj_b // [ 768, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_proj_w, cur); cur = ggml_add(ctx, ggml_repeat(ctx, model.layers[il].c_attn_proj_b, cur), cur); } // add the input cur = ggml_add(ctx, cur, inpL); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx, inpFF, hparams.eps); // cur = ln_2_g*cur + ln_2_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, ggml_repeat(ctx, model.layers[il].ln_2_g, cur), cur), ggml_repeat(ctx, model.layers[il].ln_2_b, cur)); } // fully connected // [3072, 768] - model.layers[il].c_mlp_fc_w // [3072, 1] - model.layers[il].c_mlp_fc_b // [ 768, N] - cur (in) // [3072, N] - cur (out) // // cur = fc_w*cur + fc_b // [3072, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_fc_w, cur); cur = ggml_add(ctx, ggml_repeat(ctx, model.layers[il].c_mlp_fc_b, cur), cur); // GELU activation // [3072, N] cur = ggml_gelu(ctx, cur); // projection // [ 768, 3072] - model.layers[il].c_mlp_proj_w // [ 768, 1] - model.layers[il].c_mlp_proj_b // [3072, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_proj_w, cur); cur = ggml_add(ctx, ggml_repeat(ctx, model.layers[il].c_mlp_proj_b, cur), cur); } // input for next layer inpL = ggml_add(ctx, cur, inpFF); } // norm { // [ 768, N] inpL = ggml_norm(ctx, inpL, hparams.eps); // inpL = ln_f_g*inpL + ln_f_b // [ 768, N] inpL = ggml_add(ctx, ggml_mul(ctx, ggml_repeat(ctx, model.ln_f_g, inpL), inpL), ggml_repeat(ctx, model.ln_f_b, inpL)); } // inpL = WTE * inpL // [ 768, 50257] - model.lm_head // [ 768, N] - inpL inpL = ggml_mul_mat(ctx, model.lm_head, inpL); ggml_set_name(inpL, "logits"); // setting a tensor as the output will ensure that it is not overwritten by subsequent operations ggml_set_output(inpL); // logits -> probs //inpL = ggml_soft_max(ctx0, inpL); ggml_build_forward_expand(gf, inpL); ggml_free(ctx); return gf; } // evaluate the transformer // // - model: the model // - allocr: ggml_gallocr to use to allocate the compute buffer // - n_threads: number of threads to use // - n_past: the context size so far // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // bool gpt2_eval( const gpt2_model & model, ggml_gallocr_t allocr, const int n_threads, const int n_past, const std::vector & embd_inp, std::vector & embd_w) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_vocab = hparams.n_vocab; struct ggml_cgraph * gf = gpt2_graph(model, n_past, embd_inp.size()); // allocate the graph tensors ggml_gallocr_alloc_graph(allocr, gf); // set the graph inputs struct ggml_tensor * embd = ggml_graph_get_tensor(gf, "embd"); memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd)); struct ggml_tensor * position = ggml_graph_get_tensor(gf, "position"); for (int i = 0; i < N; ++i) { ((int32_t *) position->data)[i] = n_past + i; } // run the computation struct ggml_cplan plan = ggml_graph_plan(gf, n_threads, nullptr); static std::vector work_buffer; work_buffer.resize(plan.work_size); plan.work_data = work_buffer.data(); ggml_graph_compute(gf, &plan); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); //} // get the graph outputs struct ggml_tensor * logits = ggml_graph_get_tensor(gf, "logits"); //embd_w.resize(n_vocab*N); //memcpy(embd_w.data(), ggml_get_data(logits), sizeof(float)*n_vocab*N); // return result just for the last token embd_w.resize(n_vocab); memcpy(embd_w.data(), (float *) ggml_get_data(logits) + (n_vocab*(N-1)), sizeof(float)*n_vocab); return true; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/gpt-2-117M/ggml-model.bin"; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gpt2_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_load(params.model, model, vocab)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } ggml_gallocr_t allocr = NULL; // allocate the compute buffer { allocr = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); // create the worst case graph for memory usage estimation int n_tokens = std::min(model.hparams.n_ctx, params.n_batch); int n_past = model.hparams.n_ctx - n_tokens; struct ggml_cgraph * gf = gpt2_graph(model, n_past, n_tokens); // pre-allocate the compute buffer for the worst case (optional) ggml_gallocr_reserve(allocr, gf); size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0); fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0/1024.0); } int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size()); for (int i = 0; i < std::min(8, (int) embd_inp.size()); i++) { printf("%d ", embd_inp[i]); } printf("\n\n"); // submit the input prompt token-by-token // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning std::vector embd; for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); if (!gpt2_eval(model, allocr, params.n_threads, n_past, embd, logits)) { printf("Failed to predict\n"); return 1; } t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); embd.clear(); if (i >= embd_inp.size()) { // sample next token const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; const int n_vocab = model.hparams.n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits.data() + (logits.size() - n_vocab), top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context embd.push_back(id); } else { // if here, it means we are still processing the input prompt for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); if (int32_t(embd.size()) >= params.n_batch) { break; } } i += embd.size() - 1; } // display text for (auto id : embd) { printf("%s", vocab.id_to_token[id].c_str()); } fflush(stdout); // end of text token if (embd.back() == 50256) { break; } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx_w); return 0; } ggml-org-ggml-7ec8045/examples/gpt-2/main-backend.cpp000066400000000000000000000772001506673203700223150ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif #define GPT2_MAX_NODES 4096 static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) { (void) level; (void) user_data; fputs(text, stderr); fflush(stderr); } // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; float eps = 1e-5f; }; struct gpt2_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; struct ggml_tensor * ln_2_g; struct ggml_tensor * ln_2_b; // attention struct ggml_tensor * c_attn_attn_w; struct ggml_tensor * c_attn_attn_b; struct ggml_tensor * c_attn_proj_w; struct ggml_tensor * c_attn_proj_b; // mlp struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gpt2_model { gpt2_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // token embedding struct ggml_tensor * wpe; // position embedding struct ggml_tensor * lm_head; // language model head std::vector layers; // key + value memory struct ggml_tensor * memory_k; struct ggml_tensor * memory_v; // struct ggml_context * ctx_w; struct ggml_context * ctx_kv; ggml_backend_t backend = NULL; ggml_backend_buffer_t buffer_w; ggml_backend_buffer_t buffer_kv; std::map tensors; }; // load the model's weights from a file bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, int n_ctx, int n_gpu_layers) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } ggml_log_set(ggml_log_callback_default, nullptr); auto & ctx = model.ctx_w; // create the ggml context { size_t n_tensors = 2 + 6 + 12*model.hparams.n_layer; struct ggml_init_params params = { /*.mem_size =*/ ggml_tensor_overhead() * n_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; ctx = ggml_init(params); if (!ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // initialize the backend #ifdef GGML_USE_CUDA if (n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); if (!model.backend) { fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); } } #endif #ifdef GGML_USE_METAL if (n_gpu_layers > 0) { fprintf(stderr, "%s: using Metal backend\n", __func__); model.backend = ggml_backend_metal_init(); if (!model.backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } } #endif if (!model.backend) { // fallback to CPU backend fprintf(stderr, "%s: using CPU backend\n", __func__); model.backend = ggml_backend_cpu_init(); } if (!model.backend) { fprintf(stderr, "%s: ggml_backend_cpu_init() failed\n", __func__); return false; } // create the tensors for the model { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); // map by name model.tensors["model/ln_f/g"] = model.ln_f_g; model.tensors["model/ln_f/b"] = model.ln_f_b; model.tensors["model/wte"] = model.wte; model.tensors["model/wpe"] = model.wpe; model.tensors["model/lm_head"] = model.lm_head; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3*n_embd); layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = layer.c_mlp_fc_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = layer.c_mlp_fc_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = layer.c_mlp_proj_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = layer.c_mlp_proj_b; } } // allocate the model tensors in a backend buffer model.buffer_w = ggml_backend_alloc_ctx_tensors(ctx, model.backend); printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor)); printf("%s: backend buffer size = %6.2f MB\n", __func__, ggml_backend_buffer_get_size(model.buffer_w)/(1024.0*1024.0)); // override the default training context with the user-provided model.hparams.n_ctx = n_ctx; // key + value memory { auto * ctx = model.ctx_kv; // create the ggml context { size_t n_tensors = 2; struct ggml_init_params params = { /*.mem_size =*/ ggml_tensor_overhead() * n_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; ctx = ggml_init(params); if (!ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; // k and v here can also be GGML_TYPE_F16 to save memory and speed up the computation // if backend supports it model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); // allocate the KV memory in a backend buffer model.buffer_kv = ggml_backend_alloc_ctx_tensors(ctx, model.backend); const size_t memory_size = ggml_backend_buffer_get_size(model.buffer_kv); printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); } // load weights { size_t total_size = 0; bool has_lm_head = false; std::vector read_buf; while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; ggml_set_name(tensor, name.c_str()); if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } if (ggml_backend_buffer_is_host(model.buffer_w)) { // for some backends such as CPU and Metal, the tensor data is in system memory and we can read directly into it fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); } else { // read into a temporary buffer first, then copy to device memory read_buf.resize(ggml_nbytes(tensor)); fin.read(read_buf.data(), ggml_nbytes(tensor)); ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor)); } // GPT-2 models share the WTE tensor as the LM head if (name == "model/wte" && has_lm_head == false) { //ggml_backend_tensor_copy(tensor, model.lm_head); model.lm_head = tensor; } if (name == "model/lm_head") { has_lm_head = true; } total_size += ggml_nbytes(tensor); } printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0); } fin.close(); return true; } // build the computation graph struct ggml_cgraph * gpt2_graph( const gpt2_model & model, const int n_past, const int n_tokens) { const int N = n_tokens; const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data static size_t buf_size = ggml_tensor_overhead()*GPT2_MAX_NODES + ggml_graph_overhead_custom(GPT2_MAX_NODES, false); static std::vector buf(buf_size); struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; struct ggml_context * ctx = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, GPT2_MAX_NODES, false); struct ggml_tensor * embd = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); // at this point, the tensor data is not allocated yet and cannot be set // we will find the tensor after the graph is allocated by its name, and set the data then ggml_set_name(embd, "embd"); // setting a tensor as an input will ensure that it is allocated at the beginning of the graph // this is important to ensure that the input tensors are not overwritten before they are used ggml_set_input(embd); struct ggml_tensor * position = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); ggml_set_name(position, "position"); ggml_set_input(position); // wte + wpe struct ggml_tensor * inpL = ggml_add(ctx, ggml_get_rows(ctx, model.wte, embd), ggml_get_rows(ctx, model.wpe, position)); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { // [ 768, N] cur = ggml_norm(ctx, inpL, hparams.eps); // cur = ln_1_g*cur + ln_1_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_1_g), model.layers[il].ln_1_b); } // attn // [2304, 768] - model.layers[il].c_attn_attn_w // [2304, 1] - model.layers[il].c_attn_attn_b // [ 768, N] - cur (in) // [2304, N] - cur (out) // // cur = attn_w*cur + attn_b // [2304, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_attn_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_attn_attn_b); } // self-attention { struct ggml_tensor * Qcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd); struct ggml_tensor * Kcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd); struct ggml_tensor * Vcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd); // store key and value to memory if (N >= 1) { struct ggml_tensor * k = ggml_view_1d(ctx, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_1d(ctx, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // [64, N, 12] struct ggml_tensor * Q = ggml_permute(ctx, ggml_cont_3d(ctx, Qcur, n_embd/n_head, n_head, N), 0, 2, 1, 3); // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) // [64, n_past + N, 12] struct ggml_tensor * K = ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), n_embd/n_head, n_head, n_past + N), 0, 2, 1, 3); // GG: flash attention //struct ggml_tensor * V = // ggml_cpy(ctx0, // ggml_permute(ctx0, // ggml_reshape_3d(ctx0, // ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), // n_embd/n_head, n_head, n_past + N), // 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head)); //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); // K * Q // [n_past + N, N, 12] struct ggml_tensor * KQ = ggml_mul_mat(ctx, K, Q); // KQ_scaled = KQ / sqrt(n_embd/n_head) // [n_past + N, N, 12] struct ggml_tensor * KQ_scaled = ggml_scale(ctx, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // [n_past + N, N, 12] struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx, KQ_scaled, n_past); // KQ = soft_max(KQ_masked) // [n_past + N, N, 12] struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx, KQ_masked); // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() // [n_past + N, 64, 12] struct ggml_tensor * V_trans = ggml_cont_3d(ctx, ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), n_embd/n_head, n_head, n_past + N), 1, 2, 0, 3), n_past + N, n_embd/n_head, n_head); // KQV = transpose(V) * KQ_soft_max // [64, N, 12] struct ggml_tensor * KQV = ggml_mul_mat(ctx, V_trans, KQ_soft_max); // KQV_merged = KQV.permute(0, 2, 1, 3) // [64, 12, N] struct ggml_tensor * KQV_merged = ggml_permute(ctx, KQV, 0, 2, 1, 3); // cur = KQV_merged.contiguous().view(n_embd, N) // [768, N] cur = ggml_cont_2d(ctx, KQV_merged, n_embd, N); } // projection // [ 768, 768] - model.layers[il].c_attn_proj_w // [ 768, 1] - model.layers[il].c_attn_proj_b // [ 768, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_proj_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_attn_proj_b); } // add the input cur = ggml_add(ctx, cur, inpL); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx, inpFF, hparams.eps); // cur = ln_2_g*cur + ln_2_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_2_g), model.layers[il].ln_2_b); } // fully connected // [3072, 768] - model.layers[il].c_mlp_fc_w // [3072, 1] - model.layers[il].c_mlp_fc_b // [ 768, N] - cur (in) // [3072, N] - cur (out) // // cur = fc_w*cur + fc_b // [3072, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_fc_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_fc_b); // GELU activation // [3072, N] cur = ggml_gelu(ctx, cur); // projection // [ 768, 3072] - model.layers[il].c_mlp_proj_w // [ 768, 1] - model.layers[il].c_mlp_proj_b // [3072, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_proj_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_proj_b); } // input for next layer inpL = ggml_add(ctx, cur, inpFF); } // norm { // [ 768, N] inpL = ggml_norm(ctx, inpL, hparams.eps); // inpL = ln_f_g*inpL + ln_f_b // [ 768, N] inpL = ggml_add(ctx, ggml_mul(ctx, inpL, model.ln_f_g), model.ln_f_b); } // inpL = WTE * inpL // [ 768, 50257] - model.lm_head // [ 768, N] - inpL inpL = ggml_mul_mat(ctx, model.lm_head, inpL); ggml_set_name(inpL, "logits"); // setting a tensor as the output will ensure that it is not overwritten by subsequent operations ggml_set_output(inpL); // logits -> probs //inpL = ggml_soft_max(ctx0, inpL); ggml_build_forward_expand(gf, inpL); ggml_free(ctx); return gf; } // evaluate the transformer // // - model: the model // - allocr: ggml_gallocr to use to allocate the compute buffer // - n_threads: number of threads to use // - n_past: the context size so far // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // bool gpt2_eval( const gpt2_model & model, ggml_gallocr_t allocr, const int n_threads, const int n_past, const std::vector & embd_inp, std::vector & embd_w) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_vocab = hparams.n_vocab; struct ggml_cgraph * gf = gpt2_graph(model, n_past, embd_inp.size()); // allocate the graph tensors ggml_gallocr_alloc_graph(allocr, gf); // set the graph inputs struct ggml_tensor * embd = ggml_graph_get_tensor(gf, "embd"); ggml_backend_tensor_set(embd, embd_inp.data(), 0, N*ggml_element_size(embd)); struct ggml_tensor * position = ggml_graph_get_tensor(gf, "position"); for (int i = 0; i < N; ++i) { int32_t v = n_past + i; ggml_backend_tensor_set(position, &v, i*sizeof(int32_t), sizeof(v)); } // set backend options if (ggml_backend_is_cpu(model.backend)) { ggml_backend_cpu_set_n_threads(model.backend, n_threads); } // run the computation ggml_backend_graph_compute(model.backend, gf); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); //} // get the graph outputs struct ggml_tensor * logits = ggml_graph_get_tensor(gf, "logits"); //embd_w.resize(n_vocab*N); //ggml_backend_tensor_get(logits, embd_w.data(), 0, sizeof(float)*n_vocab*N); // return result just for the last token embd_w.resize(n_vocab); ggml_backend_tensor_get(logits, embd_w.data(), (n_vocab*(N-1))*sizeof(float), sizeof(float)*n_vocab); return true; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/gpt-2-117M/ggml-model.bin"; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gpt2_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_load(params.model, model, vocab, params.n_ctx, params.n_gpu_layers)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } ggml_gallocr_t allocr = NULL; // allocate the compute buffer { // create a graph allocator with the backend's default buffer type allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); // create the worst case graph for memory usage estimation int n_tokens = std::min(model.hparams.n_ctx, params.n_batch); int n_past = model.hparams.n_ctx - n_tokens; struct ggml_cgraph * gf = gpt2_graph(model, n_past, n_tokens); // pre-allocate the compute buffer for the worst case (optional) ggml_gallocr_reserve(allocr, gf); size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0); fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0/1024.0); } int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size()); for (int i = 0; i < std::min(8, (int) embd_inp.size()); i++) { printf("%d ", embd_inp[i]); } printf("\n\n"); // submit the input prompt token-by-token // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning std::vector embd; for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); if (!gpt2_eval(model, allocr, params.n_threads, n_past, embd, logits)) { printf("Failed to predict\n"); return 1; } t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); embd.clear(); if (i >= embd_inp.size()) { // sample next token const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; const int n_vocab = model.hparams.n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits.data() + (logits.size() - n_vocab), top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context embd.push_back(id); } else { // if here, it means we are still processing the input prompt for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); if (int32_t(embd.size()) >= params.n_batch) { break; } } i += embd.size() - 1; } // display text for (auto id : embd) { printf("%s", vocab.id_to_token[id].c_str()); } fflush(stdout); // end of text token if (!params.ignore_eos && embd.back() == 50256) { break; } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx_w); ggml_gallocr_free(allocr); ggml_backend_buffer_free(model.buffer_w); ggml_backend_buffer_free(model.buffer_kv); ggml_backend_free(model.backend); return 0; } ggml-org-ggml-7ec8045/examples/gpt-2/main-batched.cpp000066400000000000000000001205221506673203700223140ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif #define GPT2_MAX_NODES 4096 static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) { (void) level; (void) user_data; fputs(text, stderr); fflush(stderr); } typedef int32_t gpt2_pos; typedef int32_t gpt2_seq_id; // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; float eps = 1e-5f; }; struct gpt2_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; struct ggml_tensor * ln_2_g; struct ggml_tensor * ln_2_b; // attention struct ggml_tensor * c_attn_attn_w; struct ggml_tensor * c_attn_attn_b; struct ggml_tensor * c_attn_proj_w; struct ggml_tensor * c_attn_proj_b; // mlp struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gpt2_kv_cell { gpt2_pos pos = -1; gpt2_pos delta = 0; std::set seq_id; bool has_seq_id(const gpt2_seq_id & id) const { return seq_id.find(id) != seq_id.end(); } }; struct gpt2_kv_cache { // key + value memory struct ggml_tensor * k; struct ggml_tensor * v; // uint32_t head = 0; uint32_t size = 0; // computed before each graph build uint32_t n = 0; std::vector cells; ggml_backend_buffer_t buffer; }; struct gpt2_model { gpt2_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // token embedding struct ggml_tensor * wpe; // position embedding struct ggml_tensor * lm_head; // language model head std::vector layers; gpt2_kv_cache kv_cache; struct ggml_context * ctx_w; ggml_backend_t backend = NULL; ggml_backend_buffer_t buffer_w; std::map tensors; }; // Input data for gpt2_decode // A gpt2_batch object can contain input about one or many sequences // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens // // - token : the token ids of the input (used when embd is NULL) // - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL) // - pos : the positions of the respective token in the sequence // - seq_id : the sequence to which the respective token belongs // - logits : if zero, the logits for the respective token will not be output // struct gpt2_batch { int32_t n_tokens = -1; gpt_vocab::id * token = {}; float * embd = {}; gpt2_pos * pos = {}; gpt2_seq_id * seq_id = {}; int8_t * logits = {}; }; // load the model's weights from a file bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, int n_ctx, int n_gpu_layers) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx_w; size_t buffer_size = 0; { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; buffer_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g buffer_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b buffer_size += ggml_row_size(wtype, n_vocab*n_embd); // wte buffer_size += ggml_row_size(GGML_TYPE_F32, n_ctx*n_embd); // wpe buffer_size += ggml_row_size(wtype, n_vocab*n_embd); // lm_head buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_g buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_b buffer_size += n_layer*(ggml_row_size(wtype, 3*n_embd*n_embd)); // c_attn_attn_w buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 3*n_embd)); // c_attn_attn_b buffer_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_proj_w buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // c_attn_proj_b buffer_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_fc_w buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_fc_b buffer_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_proj_w buffer_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_proj_b buffer_size += (6 + 12*n_layer)*128; // alignment overhead printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor)); printf("%s: backend buffer size = %6.2f MB\n", __func__, buffer_size/(1024.0*1024.0)); } ggml_log_set(ggml_log_callback_default, nullptr); // create the ggml context { size_t n_tensors = 2 + 6 + 12*model.hparams.n_layer; struct ggml_init_params params = { /*.mem_size =*/ ggml_tensor_overhead() * n_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; model.ctx_w = ggml_init(params); if (!model.ctx_w) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // initialize the backend #ifdef GGML_USE_CUDA if (n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); if (!model.backend) { fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); } } #endif #ifdef GGML_USE_METAL if (n_gpu_layers > 0) { fprintf(stderr, "%s: using Metal backend\n", __func__); model.backend = ggml_backend_metal_init(); if (!model.backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } } #endif if (!model.backend) { // fallback to CPU backend fprintf(stderr, "%s: using CPU backend\n", __func__); model.backend = ggml_backend_cpu_init(); } if (!model.backend) { fprintf(stderr, "%s: ggml_backend_cpu_init() failed\n", __func__); return false; } // allocate weights buffer model.buffer_w = ggml_backend_alloc_buffer(model.backend, buffer_size); // prepare memory for the weights { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); // map by name model.tensors["model/ln_f/g"] = model.ln_f_g; model.tensors["model/ln_f/b"] = model.ln_f_b; model.tensors["model/wte"] = model.wte; model.tensors["model/wpe"] = model.wpe; model.tensors["model/lm_head"] = model.lm_head; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3*n_embd); layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = layer.c_mlp_fc_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = layer.c_mlp_fc_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = layer.c_mlp_proj_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = layer.c_mlp_proj_b; } } // override the default training context with the user-provided model.hparams.n_ctx = n_ctx; // key + value memory { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; model.kv_cache.k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.kv_cache.v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.kv_cache.head = 0; model.kv_cache.size = n_ctx; model.kv_cache.cells.resize(n_ctx); const size_t memory_size = ggml_nbytes(model.kv_cache.k) + ggml_nbytes(model.kv_cache.v); printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); // create a backend buffer (can be in host or device memory) model.kv_cache.buffer = ggml_backend_alloc_buffer(model.backend, memory_size + 256); // allocate the tensors into the backend buffer { ggml_tallocr alloc = ggml_tallocr_new(model.kv_cache.buffer); // this updates the pointers in the tensors to point to the correct location in the buffer // this is necessary since the ggml_context is .no_alloc == true // note that the buffer can actually be a device buffer, depending on the backend ggml_tallocr_alloc(&alloc, model.kv_cache.k); ggml_tallocr_alloc(&alloc, model.kv_cache.v); } } // load weights { ggml_tallocr alloc = ggml_tallocr_new(model.buffer_w); size_t total_size = 0; bool has_lm_head = false; std::vector read_buf; while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; ggml_set_name(tensor, name.c_str()); if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } ggml_tallocr_alloc(&alloc, tensor); if (ggml_backend_is_cpu (model.backend) #ifdef GGML_USE_METAL || ggml_backend_is_metal(model.backend) #endif ) { // for the CPU and Metal backend, we can read directly into the tensor fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); } else { // read into a temporary buffer first, then copy to device memory read_buf.resize(ggml_nbytes(tensor)); fin.read(read_buf.data(), ggml_nbytes(tensor)); ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor)); } // GPT-2 models share the WTE tensor as the LM head if (name == "model/wte" && has_lm_head == false) { //ggml_tallocr_alloc(alloc, model.lm_head); //ggml_backend_tensor_copy(tensor, model.lm_head); model.lm_head = tensor; } if (name == "model/lm_head") { has_lm_head = true; } total_size += ggml_nbytes(tensor); } printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0); } fin.close(); return true; } // build the computation graph struct ggml_cgraph * gpt2_graph( const gpt2_model & model, const gpt2_batch & batch, bool measure) { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; const auto & kv_cache = model.kv_cache; const int32_t n_tokens = batch.n_tokens; const int32_t n_kv = measure ? n_ctx : kv_cache.n; const int32_t kv_head = measure ? n_ctx - n_tokens : kv_cache.head; // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data static size_t buf_size = ggml_tensor_overhead()*GPT2_MAX_NODES + ggml_graph_overhead_custom(GPT2_MAX_NODES, false); static std::vector buf(buf_size); struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; struct ggml_context * ctx = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, GPT2_MAX_NODES, false); struct ggml_tensor * inpL; if (batch.token) { struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_tokens); ggml_set_name(inp_tokens, "inp_tokens"); ggml_set_input(inp_tokens); struct ggml_tensor * position = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_tokens); ggml_set_name(position, "position"); ggml_set_input(position); // wte + wpe inpL = ggml_add(ctx, ggml_get_rows(ctx, model.wte, inp_tokens), ggml_get_rows(ctx, model.wpe, position)); } else { GGML_ASSERT(batch.embd); inpL = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens); ggml_set_name(inpL, "embd"); ggml_set_input(inpL); } // KQ_mask (mask for 1 head, it will be broadcasted to all heads) struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, n_kv, n_tokens, 1); ggml_set_name(KQ_mask, "KQ_mask"); ggml_set_input(KQ_mask); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { // [ 768, N] cur = ggml_norm(ctx, inpL, hparams.eps); // cur = ln_1_g*cur + ln_1_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_1_g), model.layers[il].ln_1_b); } // attn // [2304, 768] - model.layers[il].c_attn_attn_w // [2304, 1] - model.layers[il].c_attn_attn_b // [ 768, n_tokens] - cur (in) // [2304, n_tokens] - cur (out) // // cur = attn_w*cur + attn_b // [2304, n_tokens] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_attn_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_attn_attn_b); } // self-attention { struct ggml_tensor * Qcur = ggml_view_2d(ctx, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*n_embd); struct ggml_tensor * Kcur = ggml_view_2d(ctx, cur, n_embd, n_tokens, cur->nb[1], 1*sizeof(float)*n_embd); struct ggml_tensor * Vcur = ggml_view_2d(ctx, cur, n_embd, n_tokens, cur->nb[1], 2*sizeof(float)*n_embd); // store key and value to memory if (n_tokens >= 1) { struct ggml_tensor * k = ggml_view_1d(ctx, model.kv_cache.k, n_tokens*n_embd, (ggml_element_size(model.kv_cache.k)*n_embd)*(il*n_ctx + kv_head)); struct ggml_tensor * v = ggml_view_1d(ctx, model.kv_cache.v, n_tokens*n_embd, (ggml_element_size(model.kv_cache.v)*n_embd)*(il*n_ctx + kv_head)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // [64, N, 12] struct ggml_tensor * Q = ggml_permute(ctx, ggml_cont_3d(ctx, Qcur, n_embd/n_head, n_head, n_tokens), 0, 2, 1, 3); // K = Kmem.view(n_embd/n_head, n_head, n_kv).permute(0, 2, 1, 3) // [64, n_kv, 12] struct ggml_tensor * K = ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.kv_cache.k, n_kv*n_embd, il*n_ctx*ggml_element_size(model.kv_cache.k)*n_embd), n_embd/n_head, n_head, n_kv), 0, 2, 1, 3); // GG: flash attention //struct ggml_tensor * V = // ggml_cpy(ctx0, // ggml_permute(ctx0, // ggml_reshape_3d(ctx0, // ggml_view_1d(ctx0, model.kv_cache.v, n_kv*n_embd, il*n_ctx*ggml_element_size(model.kv_cache.v)*n_embd), // n_embd/n_head, n_head, n_kv), // 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, n_embd/n_head, n_head)); //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); // K * Q // [n_kv, n_tokens, 12] struct ggml_tensor * KQ = ggml_mul_mat(ctx, K, Q); // KQ_scaled = KQ / sqrt(n_embd/n_head) // [n_kv, n_tokens, 12] struct ggml_tensor * KQ_scaled = ggml_scale(ctx, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // [n_kv, n_tokens, 12] struct ggml_tensor * KQ_masked = ggml_add(ctx, KQ_scaled, KQ_mask); // KQ = soft_max(KQ_masked) // [n_kv, N, 12] struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx, KQ_masked); // V_trans = Vmem.view(n_embd/n_head, n_head, n_kv).permute(1, 2, 0, 3).contiguous() // [n_kv, 64, 12] struct ggml_tensor * V_trans = ggml_cont_3d(ctx, ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.kv_cache.v, n_kv*n_embd, il*n_ctx*ggml_element_size(model.kv_cache.v)*n_embd), n_embd/n_head, n_head, n_kv), 1, 2, 0, 3), n_kv, n_embd/n_head, n_head); // KQV = transpose(V) * KQ_soft_max // [64, n_tokens, 12] struct ggml_tensor * KQV = ggml_mul_mat(ctx, V_trans, KQ_soft_max); // KQV_merged = KQV.permute(0, 2, 1, 3) // [64, 12, n_tokens] struct ggml_tensor * KQV_merged = ggml_permute(ctx, KQV, 0, 2, 1, 3); // cur = KQV_merged.contiguous().view(n_embd, N) // [768, n_tokens] cur = ggml_cont_2d(ctx, KQV_merged, n_embd, n_tokens); } // projection // [ 768, 768] - model.layers[il].c_attn_proj_w // [ 768, 1] - model.layers[il].c_attn_proj_b // [ 768, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_proj_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_attn_proj_b); } // add the input cur = ggml_add(ctx, cur, inpL); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx, inpFF, hparams.eps); // cur = ln_2_g*cur + ln_2_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_2_g), model.layers[il].ln_2_b); } // fully connected // [3072, 768] - model.layers[il].c_mlp_fc_w // [3072, 1] - model.layers[il].c_mlp_fc_b // [ 768, N] - cur (in) // [3072, N] - cur (out) // // cur = fc_w*cur + fc_b // [3072, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_fc_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_fc_b); // GELU activation // [3072, N] cur = ggml_gelu(ctx, cur); // projection // [ 768, 3072] - model.layers[il].c_mlp_proj_w // [ 768, 1] - model.layers[il].c_mlp_proj_b // [3072, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_proj_w, cur); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_proj_b); } // input for next layer inpL = ggml_add(ctx, cur, inpFF); } // norm { // [ 768, N] inpL = ggml_norm(ctx, inpL, hparams.eps); // inpL = ln_f_g*inpL + ln_f_b // [ 768, N] inpL = ggml_add(ctx, ggml_mul(ctx, inpL, model.ln_f_g), model.ln_f_b); } // inpL = WTE * inpL // [ 768, 50257] - model.lm_head // [ 768, N] - inpL inpL = ggml_mul_mat(ctx, model.lm_head, inpL); // logits -> probs //inpL = ggml_soft_max(ctx0, inpL); ggml_build_forward_expand(gf, inpL); ggml_free(ctx); return gf; } static void gpt2_kv_cache_seq_cp( struct gpt2_kv_cache & cache, gpt2_seq_id seq_id_src, gpt2_seq_id seq_id_dst, gpt2_pos p0, gpt2_pos p1) { if (p0 < 0) p0 = 0; if (p1 < 0) p1 = std::numeric_limits::max(); for (uint32_t i = 0; i < cache.size; ++i) { if (cache.cells[i].has_seq_id(seq_id_src) && cache.cells[i].pos >= p0 && cache.cells[i].pos < p1) { cache.cells[i].seq_id.insert(seq_id_dst); } } } struct gpt2_batch gpt2_batch_init(int32_t n_tokens, int32_t embd) { gpt2_batch batch; if (embd) { batch.embd = (float *) malloc(sizeof(float) * n_tokens * embd); } else { batch.token = (gpt_vocab::id *) malloc(sizeof(gpt_vocab::id) * n_tokens); } batch.pos = (gpt2_pos *) malloc(sizeof(gpt2_pos) * n_tokens); batch.seq_id = (gpt2_seq_id *) malloc(sizeof(gpt2_seq_id) * n_tokens); batch.logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens); return batch; } void gpt2_batch_free(struct gpt2_batch batch) { if (batch.token) free(batch.token); if (batch.embd) free(batch.embd); if (batch.pos) free(batch.pos); if (batch.seq_id) free(batch.seq_id); if (batch.logits) free(batch.logits); } // Positive return values does not mean a fatal error, but rather a warning. // 0 - success // < 0 - error int gpt2_decode( struct gpt2_model & model, ggml_gallocr_t allocr, struct gpt2_batch batch, int n_threads, std::vector & logits) { const int32_t n_tokens = batch.n_tokens; const auto & hparams = model.hparams; const int n_vocab = hparams.n_vocab; if (n_tokens == 0) { printf("%s: n_tokens == 0", __func__); return -1; } GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); auto & cache = model.kv_cache; for (int i = 0; i < n_tokens; i++) { cache.cells[cache.head + i].pos = batch.pos[i]; cache.cells[cache.head + i].seq_id.insert(batch.seq_id[i]); } cache.n = cache.head + n_tokens; struct ggml_cgraph * gf = gpt2_graph(model, batch, false); // allocate tensors ggml_gallocr_alloc_graph(allocr, gf); // set the graph inputs if (batch.token) { struct ggml_tensor * inp_tokens = ggml_graph_get_tensor(gf, "inp_tokens"); ggml_backend_tensor_set(inp_tokens, batch.token, 0, n_tokens*ggml_element_size(inp_tokens)); struct ggml_tensor * position = ggml_graph_get_tensor(gf, "position"); for (int i = 0; i < n_tokens; ++i) { int32_t v = batch.pos[i]; ggml_backend_tensor_set(position, &v, i*sizeof(int32_t), sizeof(v)); } } else { struct ggml_tensor * embd = ggml_graph_get_tensor(gf, "embd"); ggml_backend_tensor_set(embd, batch.embd, 0, n_tokens * hparams.n_embd * ggml_element_size(embd)); } { struct ggml_tensor * KQ_mask = ggml_graph_get_tensor(gf, "KQ_mask"); const auto & kv_cache = model.kv_cache; const int32_t n_tokens = batch.n_tokens; const int32_t n_kv = kv_cache.n; std::vector data_buf(n_kv*n_tokens); const float neg_inf_v = -INFINITY; for (int h = 0; h < 1; ++h) { int h_offset = h*(n_kv*n_tokens); for (int j = 0; j < n_tokens; ++j) { const gpt2_pos pos = batch.pos[j]; const gpt2_seq_id seq_id = batch.seq_id[j]; for (int i = 0; i < n_kv; ++i) { if (!kv_cache.cells[i].has_seq_id(seq_id) || kv_cache.cells[i].pos > pos) { data_buf[h_offset + j*n_kv + i] = neg_inf_v; } } } } ggml_backend_tensor_set(KQ_mask, data_buf.data(), 0, data_buf.size() * sizeof(float)); } // run the computation if (ggml_backend_is_cpu(model.backend)) { ggml_backend_cpu_set_n_threads(model.backend, n_threads); } ggml_backend_graph_compute(model.backend, gf); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); //} // in this case, the output tensor is the last one in the graph struct ggml_tensor * inpL = ggml_graph_node(gf, -1); if (batch.logits) { // return logits for all tokens logits.resize(n_vocab*n_tokens); for (int32_t i = 0; i < n_tokens; i++) { if (batch.logits[i] == 0) { continue; } ggml_backend_tensor_get(inpL, logits.data() + n_vocab*i, n_vocab*i*sizeof(float), sizeof(float)*n_vocab); } } else { // return result just for the last token logits.resize(n_vocab); ggml_backend_tensor_get(inpL, logits.data(), (n_vocab*(n_tokens-1))*sizeof(float), sizeof(float)*n_vocab); } // update the kv ring buffer cache.head += n_tokens; // ensure kv cache head points to a valid index. if (cache.head >= cache.size) { printf("%s: cache.head >= cache.size\n", __func__); return -2; } return 0; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gpt2_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_load(params.model, model, vocab, params.n_ctx, params.n_gpu_layers)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); const int n_parallel = params.n_parallel; const int n_batch_max = std::max(embd_inp.size(), (size_t)n_parallel); // create a gpt2_batch // we use this object to submit token data for decoding gpt2_batch batch = gpt2_batch_init(n_batch_max, 0); // prepare required memory and allocate the compute buffer ggml_gallocr_t allocr = NULL; { // create an allocator to measure the memory usage allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); // create the worst case graph for memory usage estimation batch.n_tokens = n_batch_max; struct ggml_cgraph * gf = gpt2_graph(model, batch, true); // pre-allocate the compute buffer for the worst case (optional) ggml_gallocr_reserve(allocr, gf); size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0); fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0/1024.0); } int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // evaluate the initial prompt batch.n_tokens = embd_inp.size(); for (int32_t i = 0; i < batch.n_tokens; i++) { batch.token[i] = embd_inp[i]; batch.pos[i] = i; batch.seq_id[i] = 0; batch.logits[i] = false; } // gpt2_decode will output logits only for the last token of the prompt batch.logits[batch.n_tokens - 1] = true; if (gpt2_decode(model, allocr, batch, params.n_threads, logits) != 0) { printf("%s: gpt2_decode() failed\n", __func__); return 1; } // assign the system KV cache to all parallel sequences // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them for (int32_t i = 1; i < n_parallel; ++i) { gpt2_kv_cache_seq_cp(model.kv_cache, 0, i, 0, batch.n_tokens); } if (n_parallel > 1) { printf("\n\n%s: generating %d sequences ...\n", __func__, n_parallel); } params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size()); for (int i = 0; i < std::min(8, (int) embd_inp.size()); i++) { printf("%d ", embd_inp[i]); } printf("\n\n"); std::vector streams(n_parallel); // remember the batch index of the last token for each parallel sequence // we need this to determine which logits to sample from std::vector i_batch(n_parallel, batch.n_tokens - 1); int n_cur = batch.n_tokens; int n_len = batch.n_tokens + params.n_predict; int n_decoded = 0; const int n_vocab = model.hparams.n_vocab; const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; while (n_cur < n_len) { batch.n_tokens = 0; for (int32_t i = 0; i < n_parallel; ++i) { if (i_batch[i] < 0) { // the stream has already finished continue; } auto * logits_i = logits.data() + i_batch[i]*n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits_i, top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // is it an end of stream? -> mark the stream as finished if ((!params.ignore_eos && id == 50256) || n_cur == n_len - 1) { i_batch[i] = -1; printf("\n"); if (n_parallel > 1) { printf("%s: stream %d finished at n_cur = %d", __func__, i, n_cur); } continue; } auto& token = vocab.id_to_token[id]; if (n_parallel == 1) { printf("%s", token.c_str()); fflush(stdout); } streams[i] += token; // push this new token for next evaluation batch.token [batch.n_tokens] = id; batch.pos [batch.n_tokens] = n_cur; batch.seq_id[batch.n_tokens] = i; batch.logits[batch.n_tokens] = true; i_batch[i] = batch.n_tokens; batch.n_tokens += 1; n_decoded += 1; } // all streams are finished if (batch.n_tokens == 0) { break; } n_cur += 1; { const int64_t t_start_us = ggml_time_us(); // evaluate the current batch with the transformer model int ret_code = gpt2_decode(model, allocr, batch, params.n_threads, logits); if (ret_code != 0) { fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, ret_code); return 1; } t_predict_us += ggml_time_us() - t_start_us; } } if (n_parallel > 1) { printf("\n"); for (int32_t i = 0; i < n_parallel; ++i) { printf("sequence %d:\n\n%s%s\n\n", i, params.prompt.c_str(), streams[i].c_str()); } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: n_decoded = %8d\n", __func__, n_decoded); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms\n", __func__, t_predict_us/1000.0f); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } gpt2_batch_free(batch); ggml_free(model.ctx_w); ggml_gallocr_free(allocr); ggml_backend_buffer_free(model.buffer_w); ggml_backend_buffer_free(model.kv_cache.buffer); ggml_backend_free(model.backend); return 0; } ggml-org-ggml-7ec8045/examples/gpt-2/main-ctx.cpp000066400000000000000000000714431506673203700215270ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; float eps = 1e-5f; }; struct gpt2_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; struct ggml_tensor * ln_2_g; struct ggml_tensor * ln_2_b; // attention struct ggml_tensor * c_attn_attn_w; struct ggml_tensor * c_attn_attn_b; struct ggml_tensor * c_attn_proj_w; struct ggml_tensor * c_attn_proj_b; // mlp struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gpt2_model { gpt2_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // token embedding struct ggml_tensor * wpe; // position embedding struct ggml_tensor * lm_head; // language model head std::vector layers; // key + value memory struct ggml_tensor * memory_k; struct ggml_tensor * memory_v; // struct ggml_context * ctx_w; std::map tensors; }; // load the model's weights from a file bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx_w; size_t ctx_size = 0; { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b ctx_size += ggml_row_size(wtype, n_vocab*n_embd); // wte ctx_size += ggml_row_size(GGML_TYPE_F32, n_ctx*n_embd); // wpe ctx_size += ggml_row_size(wtype, n_vocab*n_embd); // lm_head ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_g ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_2_b ctx_size += n_layer*(ggml_row_size(wtype, 3*n_embd*n_embd)); // c_attn_attn_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 3*n_embd)); // c_attn_attn_b ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_proj_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // c_attn_proj_b ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_fc_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_fc_b ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_proj_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_proj_b ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_k ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F32, n_embd); // memory_v ctx_size += (6 + 12*n_layer)*512; // object overhead printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor)); printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); } // create the ggml context { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; model.ctx_w = ggml_init(params); if (!model.ctx_w) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // prepare memory for the weights { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); // map by name model.tensors["model/ln_f/g"] = model.ln_f_g; model.tensors["model/ln_f/b"] = model.ln_f_b; model.tensors["model/wte"] = model.wte; model.tensors["model/wpe"] = model.wpe; model.tensors["model/lm_head"] = model.lm_head; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3*n_embd); layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = layer.c_mlp_fc_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = layer.c_mlp_fc_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = layer.c_mlp_proj_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = layer.c_mlp_proj_b; } } // key + value memory { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); } // load weights { size_t total_size = 0; bool has_lm_head = false; while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); // GPT-2 models share the WTE tensor as the LM head if (name == "model/wte" && has_lm_head == false) { memcpy(model.lm_head->data, tensor->data, ggml_nbytes(tensor)); } if (name == "model/lm_head") { has_lm_head = true; } total_size += ggml_nbytes(tensor); } printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0); } fin.close(); return true; } // evaluate the transformer // // - model: the model // - n_threads: number of threads to use // - n_past: the context size so far // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // bool gpt2_eval( const gpt2_model & model, const int n_threads, const int n_past, const std::vector & embd_inp, std::vector & embd_w, size_t & mem_per_token) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; const int n_vocab = hparams.n_vocab; static size_t buf_size = 256u*1024*1024; static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); // reallocate buf_size = buf_size_new; buf = realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; } } struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, }; struct ggml_context * ctx0 = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd)); struct ggml_tensor * position = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); for (int i = 0; i < N; ++i) { ((int32_t *) position->data)[i] = n_past + i; } // wte + wpe struct ggml_tensor * inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.wte, embd), ggml_get_rows(ctx0, model.wpe, position)); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { // [ 768, N] cur = ggml_norm(ctx0, inpL, hparams.eps); // cur = ln_1_g*cur + ln_1_b // [ 768, N] cur = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), cur), ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); } // attn // [2304, 768] - model.layers[il].c_attn_attn_w // [2304, 1] - model.layers[il].c_attn_attn_b // [ 768, N] - cur (in) // [2304, N] - cur (out) // // cur = attn_w*cur + attn_b // [2304, N] { cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_attn_w, cur); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur), cur); } // self-attention { struct ggml_tensor * Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd); struct ggml_tensor * Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd); struct ggml_tensor * Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd); // store key and value to memory if (N >= 1) { struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // [64, N, 12] struct ggml_tensor * Q = ggml_permute(ctx0, ggml_cpy(ctx0, Qcur, ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd/n_head, n_head, N)), 0, 2, 1, 3); // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) // [64, n_past + N, 12] struct ggml_tensor * K = ggml_permute(ctx0, ggml_reshape_3d(ctx0, ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), n_embd/n_head, n_head, n_past + N), 0, 2, 1, 3); // GG: flash attention //struct ggml_tensor * V = // ggml_cpy(ctx0, // ggml_permute(ctx0, // ggml_reshape_3d(ctx0, // ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), // n_embd/n_head, n_head, n_past + N), // 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head)); //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); // K * Q // [n_past + N, N, 12] struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); // KQ_scaled = KQ / sqrt(n_embd/n_head) // [n_past + N, N, 12] struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, 1.0f/sqrt(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // [n_past + N, N, 12] struct ggml_tensor * KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); // KQ = soft_max(KQ_masked) // [n_past + N, N, 12] struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() // [n_past + N, 64, 12] struct ggml_tensor * V_trans = ggml_cpy(ctx0, ggml_permute(ctx0, ggml_reshape_3d(ctx0, ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), n_embd/n_head, n_head, n_past + N), 1, 2, 0, 3), ggml_new_tensor_3d(ctx0, model.memory_v->type, n_past + N, n_embd/n_head, n_head)); // KQV = transpose(V) * KQ_soft_max // [64, N, 12] struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); // KQV_merged = KQV.permute(0, 2, 1, 3) // [64, 12, N] struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); // cur = KQV_merged.contiguous().view(n_embd, N) // [768, N] cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); } // projection // [ 768, 768] - model.layers[il].c_attn_proj_w // [ 768, 1] - model.layers[il].c_attn_proj_b // [ 768, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] { cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur), cur); } // add the input cur = ggml_add(ctx0, cur, inpL); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx0, inpFF, hparams.eps); // cur = ln_2_g*cur + ln_2_b // [ 768, N] cur = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_2_g, cur), cur), ggml_repeat(ctx0, model.layers[il].ln_2_b, cur)); } // fully connected // [3072, 768] - model.layers[il].c_mlp_fc_w // [3072, 1] - model.layers[il].c_mlp_fc_b // [ 768, N] - cur (in) // [3072, N] - cur (out) // // cur = fc_w*cur + fc_b // [3072, N] cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_fc_w, cur); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur), cur); // GELU activation // [3072, N] cur = ggml_gelu(ctx0, cur); // projection // [ 768, 3072] - model.layers[il].c_mlp_proj_w // [ 768, 1] - model.layers[il].c_mlp_proj_b // [3072, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_proj_w, cur); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur), cur); } // input for next layer inpL = ggml_add(ctx0, cur, inpFF); } // norm { // [ 768, N] inpL = ggml_norm(ctx0, inpL, hparams.eps); // inpL = ln_f_g*inpL + ln_f_b // [ 768, N] inpL = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), ggml_repeat(ctx0, model.ln_f_b, inpL)); } // inpL = WTE * inpL // [ 768, 50257] - model.lm_head // [ 768, N] - inpL inpL = ggml_mul_mat(ctx0, model.lm_head, inpL); // logits -> probs //inpL = ggml_soft_max_inplace(ctx0, inpL); // run the computation ggml_build_forward_expand(gf, inpL); ggml_graph_compute_with_ctx(ctx0, gf, n_threads); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); //} //embd_w.resize(n_vocab*N); //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); // return result just for the last token embd_w.resize(n_vocab); memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); if (mem_per_token == 0) { mem_per_token = ggml_used_mem(ctx0)/N; } //printf("used_mem = %zu\n", ggml_used_mem(ctx0)); ggml_free(ctx0); return true; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/gpt-2-117M/ggml-model.bin"; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gpt2_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_load(params.model, model, vocab)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size()); for (int i = 0; i < std::min(8, (int) embd_inp.size()); i++) { printf("%d ", embd_inp[i]); } printf("\n\n"); // submit the input prompt token-by-token // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning std::vector embd; // determine the required inference memory per token: size_t mem_per_token = 0; gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); if (!gpt2_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) { printf("Failed to predict\n"); return 1; } t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); embd.clear(); if (i >= embd_inp.size()) { // sample next token const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; const int n_vocab = model.hparams.n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits.data() + (logits.size() - n_vocab), top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context embd.push_back(id); } else { // if here, it means we are still processing the input prompt for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); if (int32_t(embd.size()) >= params.n_batch) { break; } } i += embd.size() - 1; } // display text for (auto id : embd) { printf("%s", vocab.id_to_token[id].c_str()); } fflush(stdout); // end of text token if (embd.back() == 50256) { break; } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: mem per token = %8zu bytes\n", __func__, mem_per_token); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx_w); return 0; } ggml-org-ggml-7ec8045/examples/gpt-2/main-sched.cpp000066400000000000000000001132361506673203700220140ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #ifdef GGML_USE_BLAS #include "ggml-blas.h" #endif #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif #define GPT2_MAX_NODES 4096 static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) { (void) level; (void) user_data; fputs(text, stderr); fflush(stderr); } // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; float eps = 1e-5f; }; struct gpt2_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; struct ggml_tensor * ln_2_g; struct ggml_tensor * ln_2_b; // attention struct ggml_tensor * c_attn_attn_w; struct ggml_tensor * c_attn_attn_b; struct ggml_tensor * c_attn_proj_w; struct ggml_tensor * c_attn_proj_b; // mlp struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gpt2_model { gpt2_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // tkoen embedding struct ggml_tensor * wpe; // position embedding struct ggml_tensor * lm_head; // language model head std::vector layers; // key + value memory struct ggml_tensor * memory_k; struct ggml_tensor * memory_v; // struct ggml_context * ctx_w; std::vector backends; std::vector buffers_w; ggml_backend_buffer_t buffer_kv; ggml_backend_buffer_t buffer_input; std::map tensors; // inputs/constants struct ggml_tensor * embd; struct ggml_tensor * position; }; void init_backends(gpt2_model & model, const gpt_params & params) { ggml_backend_t gpu_backend = NULL; ggml_log_set(ggml_log_callback_default, nullptr); // initialize the backends #ifdef GGML_USE_CUDA if (params.n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); gpu_backend = ggml_backend_cuda_init(0); if (!gpu_backend) { fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); } } #endif #ifdef GGML_USE_METAL if (params.n_gpu_layers > 0) { fprintf(stderr, "%s: using Metal backend\n", __func__); gpu_backend = ggml_backend_metal_init(); if (!gpu_backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } } #endif if (gpu_backend) { model.backends.push_back(gpu_backend); } #ifdef GGML_USE_BLAS ggml_backend_t blas_backend = ggml_backend_blas_init(); if (!blas_backend) { fprintf(stderr, "%s: failed to initialize BLAS backend\n", __func__); } else { ggml_backend_blas_set_n_threads(blas_backend, params.n_threads); model.backends.push_back(blas_backend); } #endif // always add the CPU backend as a fallback ggml_backend_t cpu_backend = ggml_backend_cpu_init(); ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads); model.backends.push_back(cpu_backend); } // load the model's weights from a file bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, const gpt_params & params) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx_w; // create the ggml context { size_t n_tensors = 3 /* input */ + 2 /* kv */ + 6 + 12*model.hparams.n_layer; struct ggml_init_params params = { /*.mem_size =*/ ggml_tensor_overhead() * n_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; model.ctx_w = ggml_init(params); if (!model.ctx_w) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // create tensors for the weights { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); // map by name model.tensors["model/ln_f/g"] = model.ln_f_g; model.tensors["model/ln_f/b"] = model.ln_f_b; model.tensors["model/wte"] = model.wte; model.tensors["model/wpe"] = model.wpe; model.tensors["model/lm_head"] = model.lm_head; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3*n_embd); layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = layer.c_attn_attn_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = layer.c_attn_attn_b; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = layer.c_attn_proj_w; model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = layer.c_attn_proj_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = layer.c_mlp_fc_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = layer.c_mlp_fc_b; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = layer.c_mlp_proj_w; model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = layer.c_mlp_proj_b; } } // assign tensors to backends init_backends(model, params); ggml_backend_t backend_gpu = model.backends.front(); ggml_backend_t backend_cpu = model.backends.back(); std::map tensor_backends; { const int i_gpu_first_layer = model.hparams.n_layer - params.n_gpu_layers; for (auto it : model.tensors) { const std::string & name = it.first; // input tensors if (name == "model/wte" || name == "model/wpe") { if (params.n_gpu_layers > model.hparams.n_layer) { tensor_backends[name] = backend_gpu; } else { tensor_backends[name] = backend_cpu; } } // output tensors if (name == "model/ln_f/g" || name == "model/ln_f/b" || name == "model/lm_head") { if (params.n_gpu_layers > 0) { tensor_backends[name] = backend_gpu; } else { tensor_backends[name] = backend_cpu; } } // layer tensors if (name.substr(0, 7) == "model/h") { // parse layer number int layer = std::stoi(name.substr(7, 2)); if (layer >= i_gpu_first_layer) { tensor_backends[name] = backend_gpu; } else { tensor_backends[name] = backend_cpu; } } } } // allocate buffers std::map backend_buffers; for (auto backend : model.backends) { // compute the size of the buffer size_t size = 0; for (auto it : model.tensors) { if (tensor_backends[it.first] == backend) { size += ggml_nbytes(it.second) + 512; } } if (size > 0) { printf("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(backend), size/1024.0/1024.0); // allocate the buffer ggml_backend_buffer_t buffer = ggml_backend_alloc_buffer(backend, size); ggml_backend_buffer_set_usage(buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); model.buffers_w.push_back(buffer); // create an allocator for the buffer to allocate the tensors auto alloc = ggml_tallocr_new(buffer); backend_buffers.insert(std::make_pair(backend, std::move(alloc))); } else { model.buffers_w.push_back(NULL); } } // allocate key + value memory { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); ggml_set_name(model.memory_k, "model/memory_k"); ggml_set_name(model.memory_v, "model/memory_v"); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); // create a backend buffer (can be in host or device memory) ggml_backend_t backend_kv = params.n_gpu_layers >= hparams.n_layer/2 ? backend_gpu : backend_cpu; printf("%s: backend_kv = %s\n", __func__, ggml_backend_name(backend_kv)); model.buffer_kv = ggml_backend_alloc_buffer(backend_kv, memory_size + 512*2); // allocate the tensors into the backend buffer { ggml_tallocr alloc = ggml_tallocr_new(model.buffer_kv); // this updates the pointers in the tensors to point to the correct location in the buffer // this is necessary since the ggml_context is .no_alloc == true // note that the buffer can actually be a device buffer, depending on the backend ggml_tallocr_alloc(&alloc, model.memory_k); ggml_tallocr_alloc(&alloc, model.memory_v); } } // load weights { size_t total_size = 0; bool has_lm_head = false; std::vector read_buf; while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; ggml_set_name(tensor, name.c_str()); if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } // allocate the tensor ggml_backend_t backend = tensor_backends[name]; ggml_tallocr * alloc = &backend_buffers.find(backend)->second; ggml_tallocr_alloc(alloc, tensor); //printf("%s: [%5.5s] %s\n", __func__, ggml_backend_name(backend), name.c_str()); if (ggml_backend_is_cpu(backend) #ifdef GGML_USE_METAL || ggml_backend_is_metal(backend) #endif ) { // for the CPU and Metal backend, we can read directly into the tensor fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); } else { // read into a temporary buffer first, then copy to device memory read_buf.resize(ggml_nbytes(tensor)); fin.read(read_buf.data(), ggml_nbytes(tensor)); ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor)); } // GPT-2 models share the WTE tensor as the LM head if (name == "model/wte" && has_lm_head == false) { ggml_tallocr * alloc_head = &backend_buffers.find(tensor_backends["model/lm_head"])->second; ggml_tallocr_alloc(alloc_head, model.lm_head); //printf("%s: [%5.5s] %s (copied)\n", __func__, ggml_backend_name(tensor_backends["model/lm_head"]), "model/lm_head"); ggml_backend_tensor_copy(tensor, model.lm_head); total_size += ggml_nbytes(model.lm_head); } if (name == "model/lm_head") { has_lm_head = true; } total_size += ggml_nbytes(tensor); } printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0); } fin.close(); // allocate input tensors { model.embd = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, model.hparams.n_ctx); model.position = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, model.hparams.n_ctx); ggml_set_name(model.embd, "in/embd"); ggml_set_name(model.position, "in/position"); // add input tensors to cpu backend size_t input_size = ggml_nbytes(model.embd) + ggml_nbytes(model.position); // FIXME: use cpu backend after sched impl ggml_backend_t backend_input = params.n_gpu_layers >= model.hparams.n_layer ? backend_gpu : backend_cpu; model.buffer_input = ggml_backend_alloc_buffer(backend_input, input_size + 512*3); printf("%s: backend_in = %s (%zu bytes)\n", __func__, ggml_backend_name(backend_input), input_size); // allocate the tensors into the backend buffer ggml_tallocr alloc = ggml_tallocr_new(model.buffer_input); ggml_tallocr_alloc(&alloc, model.embd); ggml_tallocr_alloc(&alloc, model.position); } return true; } // build the computation graph struct ggml_cgraph * gpt2_graph( const gpt2_model & model, const int n_past, const std::vector & embd_inp) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data static size_t buf_size = ggml_tensor_overhead()*GPT2_MAX_NODES + ggml_graph_overhead_custom(GPT2_MAX_NODES, false); static std::vector buf(buf_size); struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; struct ggml_context * ctx = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, GPT2_MAX_NODES, false); struct ggml_tensor * embd = ggml_view_1d(ctx, model.embd, N, 0); // set inputs // TODO: move to gpt2_eval ggml_backend_tensor_set(model.embd, embd_inp.data(), 0, N*ggml_element_size(embd)); struct ggml_tensor * position = ggml_view_1d(ctx, model.position, N, 0); for (int i = 0; i < N; ++i) { int32_t v = n_past + i; ggml_backend_tensor_set(model.position, &v, i*sizeof(int32_t), sizeof(v)); } const float KQ_scale = 1.0f/sqrtf(float(model.hparams.n_embd)/model.hparams.n_head); // wte + wpe struct ggml_tensor * inpL = ggml_add(ctx, ggml_get_rows(ctx, model.wte, embd), ggml_get_rows(ctx, model.wpe, position)); ggml_set_name(inpL, "inpL"); ggml_set_name(inpL->src[0], "wte"); ggml_set_name(inpL->src[1], "wpe"); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { // [ 768, N] cur = ggml_norm(ctx, inpL, hparams.eps); ggml_format_name(cur, "l%d.norm", il); // cur = ln_1_g*cur + ln_1_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_1_g), model.layers[il].ln_1_b); ggml_format_name(cur, "l%d.ln_1_b", il); ggml_format_name(cur->src[0], "l%d.ln_1_g", il); } // attn // [2304, 768] - model.layers[il].c_attn_attn_w // [2304, 1] - model.layers[il].c_attn_attn_b // [ 768, N] - cur (in) // [2304, N] - cur (out) // // cur = attn_w*cur + attn_b // [2304, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_attn_w, cur); ggml_format_name(cur, "l%d.attn_w", il); cur = ggml_add(ctx, cur, model.layers[il].c_attn_attn_b); ggml_format_name(cur, "l%d.attn_b", il); } // self-attention { struct ggml_tensor * Qcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 0*sizeof(float)*n_embd); struct ggml_tensor * Kcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 1*sizeof(float)*n_embd); struct ggml_tensor * Vcur = ggml_view_2d(ctx, cur, n_embd, N, cur->nb[1], 2*sizeof(float)*n_embd); ggml_format_name(Qcur, "l%d.Qcur", il); ggml_format_name(Kcur, "l%d.Kcur", il); ggml_format_name(Vcur, "l%d.Vcur", il); // store key and value to memory if (N >= 1) { struct ggml_tensor * k = ggml_view_1d(ctx, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_1d(ctx, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) // [64, N, 12] struct ggml_tensor * Q = ggml_permute(ctx, ggml_cont_3d(ctx, Qcur, n_embd/n_head, n_head, N), 0, 2, 1, 3); ggml_format_name(Q, "l%d.Q", il); // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) // [64, n_past + N, 12] struct ggml_tensor * K = ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), n_embd/n_head, n_head, n_past + N), 0, 2, 1, 3); ggml_format_name(K, "l%d.K", il); // GG: flash attention //struct ggml_tensor * V = // ggml_cpy(ctx0, // ggml_permute(ctx0, // ggml_reshape_3d(ctx0, // ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), // n_embd/n_head, n_head, n_past + N), // 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, n_embd/n_head, n_head)); //struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); // K * Q // [n_past + N, N, 12] struct ggml_tensor * KQ = ggml_mul_mat(ctx, K, Q); ggml_format_name(KQ, "l%d.KQ", il); // KQ_scaled = KQ / sqrt(n_embd/n_head) // [n_past + N, N, 12] struct ggml_tensor * KQ_scaled = ggml_scale(ctx, KQ, KQ_scale); ggml_format_name(KQ_scaled, "l%d.KQ_scaled", il); // KQ_masked = mask_past(KQ_scaled) // [n_past + N, N, 12] struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx, KQ_scaled, n_past); ggml_format_name(KQ_masked, "l%d.KQ_masked", il); // KQ = soft_max(KQ_masked) // [n_past + N, N, 12] struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx, KQ_masked); ggml_format_name(KQ_soft_max, "l%d.KQ_soft_max", il); // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() // [n_past + N, 64, 12] struct ggml_tensor * V_trans = ggml_cont_3d(ctx, ggml_permute(ctx, ggml_reshape_3d(ctx, ggml_view_1d(ctx, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), n_embd/n_head, n_head, n_past + N), 1, 2, 0, 3), n_past + N, n_embd/n_head, n_head); // KQV = transpose(V) * KQ_soft_max // [64, N, 12] struct ggml_tensor * KQV = ggml_mul_mat(ctx, V_trans, KQ_soft_max); ggml_format_name(KQV, "l%d.KQV", il); // KQV_merged = KQV.permute(0, 2, 1, 3) // [64, 12, N] struct ggml_tensor * KQV_merged = ggml_permute(ctx, KQV, 0, 2, 1, 3); ggml_format_name(KQV_merged, "l%d.KQV_merged", il); // cur = KQV_merged.contiguous().view(n_embd, N) // [768, N] cur = ggml_cont_2d(ctx, KQV_merged, n_embd, N); ggml_format_name(cur, "l%d.KQV_merged_contiguous", il); } // projection // [ 768, 768] - model.layers[il].c_attn_proj_w // [ 768, 1] - model.layers[il].c_attn_proj_b // [ 768, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] { cur = ggml_mul_mat(ctx, model.layers[il].c_attn_proj_w, cur); ggml_format_name(cur, "l%d.attn_proj_w", il); cur = ggml_add(ctx, cur, model.layers[il].c_attn_proj_b); ggml_format_name(cur, "l%d.attn_proj_b", il); } // add the input cur = ggml_add(ctx, cur, inpL); ggml_format_name(cur, "l%d.add", il); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx, inpFF, hparams.eps); ggml_format_name(cur, "l%d.FFnorm", il); // cur = ln_2_g*cur + ln_2_b // [ 768, N] cur = ggml_add(ctx, ggml_mul(ctx, cur, model.layers[il].ln_2_g), model.layers[il].ln_2_b); ggml_format_name(cur, "l%d.ln_2_b", il); ggml_format_name(cur->src[0], "l%d.ln_2_g", il); } // fully connected // [3072, 768] - model.layers[il].c_mlp_fc_w // [3072, 1] - model.layers[il].c_mlp_fc_b // [ 768, N] - cur (in) // [3072, N] - cur (out) // // cur = fc_w*cur + fc_b // [3072, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_fc_w, cur); ggml_format_name(cur, "l%d.mlp_fc_w", il); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_fc_b); ggml_format_name(cur, "l%d.mlp_fc_b", il); // GELU activation // [3072, N] cur = ggml_gelu(ctx, cur); ggml_format_name(cur, "l%d.gelu", il); // projection // [ 768, 3072] - model.layers[il].c_mlp_proj_w // [ 768, 1] - model.layers[il].c_mlp_proj_b // [3072, N] - cur (in) // [ 768, N] - cur (out) // // cur = proj_w*cur + proj_b // [768, N] cur = ggml_mul_mat(ctx, model.layers[il].c_mlp_proj_w, cur); ggml_format_name(cur, "l%d.mlp_proj_w", il); cur = ggml_add(ctx, cur, model.layers[il].c_mlp_proj_b); ggml_format_name(cur, "l%d.mlp_proj_b", il); } // input for next layer inpL = ggml_add(ctx, cur, inpFF); ggml_format_name(inpL, "l%d.add2", il); } // norm { // [ 768, N] inpL = ggml_norm(ctx, inpL, hparams.eps); ggml_format_name(inpL, "out_norm"); // inpL = ln_f_g*inpL + ln_f_b // [ 768, N] inpL = ggml_add(ctx, ggml_mul(ctx, inpL, model.ln_f_g), model.ln_f_b); ggml_format_name(inpL, "out_ln_f_b"); ggml_format_name(inpL->src[0], "out_ln_f_g"); } // inpL = WTE * inpL // [ 768, 50257] - model.lm_head // [ 768, N] - inpL inpL = ggml_mul_mat(ctx, model.lm_head, inpL); ggml_format_name(inpL, "out_lm_head"); // logits -> probs //inpL = ggml_soft_max(ctx0, inpL); ggml_build_forward_expand(gf, inpL); ggml_free(ctx); return gf; } // evaluate the transformer // // - model: the model // - sched: the backend scheduler // - n_past: the context size so far // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // bool gpt2_eval( const gpt2_model & model, ggml_backend_sched_t sched, const int n_past, const std::vector & embd_inp, std::vector & embd_w) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_vocab = hparams.n_vocab; struct ggml_cgraph * gf = gpt2_graph(model, n_past, embd_inp); // run the computation ggml_backend_sched_reset(sched); ggml_backend_sched_graph_compute(sched, gf); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); //} // in this case, the output tensor is the last one in the graph struct ggml_tensor * inpL = ggml_graph_node(gf, -1); //embd_w.resize(n_vocab*N); //ggml_backend_tensor_get(inpL, embd_w.data(), 0, sizeof(float)*n_vocab*N); // return result just for the last token embd_w.resize(n_vocab); ggml_backend_tensor_get(inpL, embd_w.data(), (n_vocab*(N-1))*sizeof(float), sizeof(float)*n_vocab); return true; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/gpt-2-117M/ggml-model.bin"; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gpt2_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_load(params.model, model, vocab, params)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } // create the backend scheduler // the scheduler handles the allocation of the compute buffers and the scheduling of the computation between the different backends ggml_backend_sched_t sched; { // initialize the scheduler sched = ggml_backend_sched_new(model.backends.data(), NULL, model.backends.size(), GPT2_MAX_NODES, false, true); // create the worst case graph for memory usage estimation int n_tokens = std::min(model.hparams.n_ctx, params.n_batch); int n_past = model.hparams.n_ctx - n_tokens; struct ggml_cgraph * gf = gpt2_graph(model, n_past, std::vector(n_tokens, 0)); ggml_backend_sched_reserve(sched, gf); // compute the required memory size_t mem_size = 0; for (size_t i = 0; i < model.backends.size(); i++) { size_t size = ggml_backend_sched_get_buffer_size(sched, model.backends[i]); if (size > 0) { mem_size += size; printf("%s: %8s compute buffer size = %8.2f MB\n", __func__, ggml_backend_name(model.backends[i]), size/1024.0/1024.0); //printf("%s: %8s compute buffer size = %zu bytes\n", __func__, ggml_backend_name(model.backends[i]), size); } } printf("%s: total compute buffer size: %.2f MB\n", __func__, mem_size/1024.0/1024.0); } int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size()); for (int i = 0; i < std::min(8, (int) embd_inp.size()); i++) { printf("%d ", embd_inp[i]); } printf("\n\n"); // submit the input prompt token-by-token // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning std::vector embd; for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); if (!gpt2_eval(model, sched, n_past, embd, logits)) { printf("Failed to predict\n"); return 1; } t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); embd.clear(); if (i >= embd_inp.size()) { // sample next token const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; const int n_vocab = model.hparams.n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits.data() + (logits.size() - n_vocab), top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context embd.push_back(id); } else { // if here, it means we are still processing the input prompt for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); if (int32_t(embd.size()) >= params.n_batch) { break; } } i += embd.size() - 1; } // display text for (auto id : embd) { printf("%s", vocab.id_to_token[id].c_str()); } fflush(stdout); // end of text token if (embd.back() == 50256) { break; } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx_w); ggml_backend_sched_free(sched); ggml_backend_buffer_free(model.buffer_kv); for (auto buf : model.buffers_w) { ggml_backend_buffer_free(buf); } for (auto backend : model.backends) { ggml_backend_free(backend); } return 0; } ggml-org-ggml-7ec8045/examples/gpt-2/quantize.cpp000066400000000000000000000133361506673203700216440ustar00rootroot00000000000000#include "ggml.h" #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #include // default hparams (GPT-2 117M) struct gpt2_hparams { int32_t n_vocab = 50257; int32_t n_ctx = 1024; int32_t n_embd = 768; int32_t n_head = 12; int32_t n_layer = 12; int32_t ftype = 1; }; // quantize a model bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) { gpt_vocab vocab; printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); auto finp = std::ifstream(fname_inp, std::ios::binary); if (!finp) { fprintf(stderr, "%s: failed to open '%s' for reading\n", __func__, fname_inp.c_str()); return false; } auto fout = std::ofstream(fname_out, std::ios::binary); if (!fout) { fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str()); return false; } // verify magic { uint32_t magic; finp.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); return false; } fout.write((char *) &magic, sizeof(magic)); } gpt2_hparams hparams; // load hparams { finp.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); finp.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); finp.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); finp.read((char *) &hparams.n_head, sizeof(hparams.n_head)); finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); finp.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr_src = hparams.ftype / GGML_QNT_VERSION_FACTOR; const int32_t ftype_dst = GGML_QNT_VERSION * GGML_QNT_VERSION_FACTOR + ftype; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype (src) = %d\n", __func__, hparams.ftype); printf("%s: qntvr (src) = %d\n", __func__, qntvr_src); printf("%s: ftype (dst) = %d\n", __func__, ftype_dst); printf("%s: qntvr (dst) = %d\n", __func__, GGML_QNT_VERSION); fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fout.write((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fout.write((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fout.write((char *) &hparams.n_head, sizeof(hparams.n_head)); fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fout.write((char *) &ftype_dst, sizeof(ftype_dst)); } // load vocab { int32_t n_vocab = 0; finp.read ((char *) &n_vocab, sizeof(n_vocab)); fout.write((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab); return false; } std::string word; for (int i = 0; i < n_vocab; i++) { uint32_t len; finp.read ((char *) &len, sizeof(len)); fout.write((char *) &len, sizeof(len)); word.resize(len); finp.read ((char *) word.data(), len); fout.write((char *) word.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // regexes of tensor names to be quantized const std::vector to_quant = { "model/wte", "model/lm_head", "model/h.*/attn/c_attn/w", "model/h.*/attn/c_proj/w", "model/h.*/mlp/c_fc/w", "model/h.*/mlp/c_proj/w", }; if (!ggml_common_quantize_0(finp, fout, ftype, to_quant, {})) { fprintf(stderr, "%s: failed to quantize model '%s'\n", __func__, fname_inp.c_str()); return false; } finp.close(); fout.close(); return true; } // usage: // ./gpt-2-quantize models/gpt-2-117M/ggml-model.bin models/gpt-2-117M/ggml-model-quant.bin type // int main(int argc, char ** argv) { if (argc != 4) { fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]); ggml_print_ftypes(stderr); return 1; } // needed to initialize f16 tables { struct ggml_init_params params = { 0, NULL, false }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } const std::string fname_inp = argv[1]; const std::string fname_out = argv[2]; const ggml_ftype ftype = ggml_parse_ftype(argv[3]); const int64_t t_main_start_us = ggml_time_us(); int64_t t_quantize_us = 0; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gpt2_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) { fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); return 1; } t_quantize_us = ggml_time_us() - t_start_us; } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n"); printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0f); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } return 0; } ggml-org-ggml-7ec8045/examples/gpt-j/000077500000000000000000000000001506673203700173625ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/gpt-j/CMakeLists.txt000066400000000000000000000004671506673203700221310ustar00rootroot00000000000000# # gpt-j set(TEST_TARGET gpt-j) add_executable(${TEST_TARGET} main.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) # # gpt-j-quantize set(TEST_TARGET gpt-j-quantize) add_executable(${TEST_TARGET} quantize.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) ggml-org-ggml-7ec8045/examples/gpt-j/README.md000066400000000000000000000253151506673203700206470ustar00rootroot00000000000000# gpt-j Local GPT-J inference on your computer using C/C++ No video card required. You just need to have 16 GB of RAM. ## Motivation The GPT-J 6B model is the open-source alternative to OpenAI's GPT-3. It's basically a neural network that allows you to generate coherent, human-like text given a certain context (prompt). The GPT-J model is quite big - the compact version of the model uses 16-bit floating point representation of the weights and is still 12 GB big. This means that in order to run inference on your computer, you would need to have a video card with at least 12 GB of video RAM. Alternatively, you can try to run the python implementations on the CPU, but that would probably not be very efficient as they are primarily optimized for running on a GPU (or at least this is my guess - I don't have much experience with python). I wanted to try and run the model on my MacBook, so I decided to implement the model inference from scratch using my own custom build tensor library. The tensor library (called [ggml](https://github.com/ggerganov/ggml), written in C) is in early development stage, but it already allows me to run the GPT-J model. On my 32GB MacBook M1 Pro, I achieve an inference speed of about `125 ms/token` or about ~6 words per second (1 word typically consists of 1 or 2 tokens). Here is a sample run with prompt `int main(int argc, char ** argv) {`: ```bash $ time ./bin/gpt-j -p "int main(int argc, char ** argv) {" gptj_model_load: loading model from 'models/gpt-j-6B/ggml-model.bin' - please wait ... gptj_model_load: n_vocab = 50400 gptj_model_load: n_ctx = 2048 gptj_model_load: n_embd = 4096 gptj_model_load: n_head = 16 gptj_model_load: n_layer = 28 gptj_model_load: n_rot = 64 gptj_model_load: f16 = 1 gptj_model_load: ggml ctx size = 13334.86 MB gptj_model_load: memory_size = 1792.00 MB, n_mem = 57344 gptj_model_load: ................................... done gptj_model_load: model size = 11542.79 MB / num tensors = 285 main: number of tokens in prompt = 13 int main(int argc, char ** argv) { (void)argc; (void)argv; { struct sockaddr_in addr; int addrlen; char * ip = "192.168.1.4"; int i; if ( (addrlen = sizeof(addr)) == -1 ) return -1; for (i = 0; i < 10; ++i) { addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(ip); main: mem per token = 16430420 bytes main: load time = 6211.48 ms main: sample time = 13.74 ms main: predict time = 26420.34 ms / 124.62 ms per token main: total time = 33035.37 ms real 0m33.171s user 3m32.269s sys 0m3.686s $ ``` It took ~6.2 seconds to load the model to memory. After that, it took ~26.4 seconds to generate 200 tokens of what looks like to be the beginning of a networking program in C. Pretty cool! Here is another run, just for fun: ```bash time ./bin/gpt-j -n 500 -t 8 -p "Ask HN: Inherited the worst code and tech team I have ever seen. How to fix it? " gptj_model_load: loading model from 'models/gpt-j-6B/ggml-model.bin' - please wait ... gptj_model_load: n_vocab = 50400 gptj_model_load: n_ctx = 2048 gptj_model_load: n_embd = 4096 gptj_model_load: n_head = 16 gptj_model_load: n_layer = 28 gptj_model_load: n_rot = 64 gptj_model_load: f16 = 1 gptj_model_load: ggml ctx size = 13334.86 MB gptj_model_load: memory_size = 1792.00 MB, n_mem = 57344 gptj_model_load: ................................... done gptj_model_load: model size = 11542.79 MB / num tensors = 285 main: number of tokens in prompt = 24 Ask HN: Inherited the worst code and tech team I have ever seen. How to fix it? I've inherited a team with some very strange and un-documented practices, one of them is that they use an old custom application with a very slow tech stack written in Python that the team doesn't want to touch but also doesn't want to throw away as it has some "legacy" code in it. The problem is, the tech stack is very very slow. They have a single web server on a VM that is slow. The server is a little bit busy (not very busy though) and they have a lot of processes (30+ that are constantly being spawned by the application) They have an application that is single threaded and was written in Python and the team don't want to touch this, and the application is very slow. My task as a new member of the team is to fix this. I'm a senior dev on the team (3 years on the project) and have been told that I will take the lead on this task. I know next to nothing about Python. So here is what I have so far. What I have done is I've been trying to debug the processes with the "ps" command. This way I can see what is running and where. From what I see, the application spawns 10 processes a minute and some of them are used for nothing. I have also started to look for the code. The application source is not in GitHub or any other repository, it is only on our internal GitLab. What I've found so far: The application uses a custom SQLAlchemy implementation to interact with the data. I've looked at the source, it looks like an object cache or something like that. But from what I've seen, the cache gets full every 20 minutes and then gets cleared with a special command. Another strange thing is that the application creates a file for every entry in the database (even if the entry already exists). I've looked at the file to see if it contains something, but it seems to be a JSON file with lots of records. The other strange thing is that I can only find the database tables in the GitLab repository and not the code. So I can't really understand how the application is supposed to interact with the database. I also found a "log" directory, but the code is encrypted with AES. From what I've found, it is in main: mem per token = 16430420 bytes main: load time = 3900.10 ms main: sample time = 32.58 ms main: predict time = 68049.91 ms / 130.11 ms per token main: total time = 73020.05 ms real 1m13.156s user 9m1.328s sys. 0m7.103s ``` ## Implementation details The high level implementation of the model is contained in the [main.cpp](main.cpp) file. The core computations are performed by the [ggml](https://github.com/ggerganov/ggml/blob/master/include/ggml.h) library. #### Matrix multiplication The most performance critical part of the implementation is of course the matrix multiplication routine. 99% of the time is spent here, so it was important to optimize this as much as possible. On Arm64, I utilize the 128-bit NEON intrinsics for 16-bit floating point operations: https://github.com/ggerganov/ggml/blob/fb558f78d905f85c54813602649ddd628ffe0f3a/src/ggml.c#L187-L243 These instructions allow each core to operate simultaneously on 64 16-bit floats. I'm no expert in SIMD, but after quite some trials this was the most efficient code for dot product of a row and column that I could come up with. Combined with the parallel computation on 8 CPU threads, I believe I'm close to the maximum performance that one could possibly get on the M1 CPU. Still, I'm curious to know if there is a more efficient way to implement this. #### Attempt to use the M1 GPU One interesting property of the GPT-J transformer architecture is that it allows you to perform part of the inference in parallel - i.e. the Feed-forward network can be computed in parallel to the Self-attention layer: https://github.com/ggerganov/ggml/blob/fb558f78d905f85c54813602649ddd628ffe0f3a/examples/gpt-j/main.cpp#L507-L531 So I thought why not try and bring in the M1 GPU to compute half of the neural network in parallel to the CPU and potentially gain some extra performance. Thanks to the M1's shared memory model, it was relatively easy to offload part of the computation to the GPU using Apple's [Metal Performance Shaders](https://developer.apple.com/documentation/metalperformanceshaders). The GPU shares the host memory, so there is no need to copy the data back and forth as you would normally do with Cuda or OpenCL. The weight matrices are directly available to be used by the GPU. However, to my surprise, using MPS together with the CPU did not lead to any performance improvement at all. My conclusion was that the 8-thread NEON CPU computation is already saturating the memory bandwidth of the M1 and since the CPU and the GPU on the MacBook are sharing that bandwidth, it does not help to offload the computation to the GPU. Another observation was that the MPS GPU matrix multiplication using 16-bit floats had the same performance as the 8-thread NEON CPU implementation. Again, I explain this with a saturated memory channel. But of course, my explanation could be totally wrong and somehow the implementation wasn't utilizing the resources correctly. In the end, I decided to not use MPS or the GPU all together. ### Zero memory allocations Another property of my implementation is that it does not perform any memory allocations once the model is loaded into memory. All required memory is allocated at the start of the program with a single `malloc` (technically 2 calls, but that is not important). ## Usage If you want to give this a try and you are on Linux or Mac OS, simply follow these instructions: ```bash # Download the ggml-compatible GPT-J 6B model (requires 12GB disk space) ../examples/gpt-j/download-ggml-model.sh 6B # Run the inference (requires 16GB of CPU RAM) ./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin -p "This is an example" # Input prompt through pipe and run the inference. echo "This is an example" > prompt.txt cat prompt.txt | ./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin ``` To run the `gpt-j` tool, you need the 12GB `ggml-model.bin` file which contains the GPT-J model in [ggml](https://github.com/ggerganov/ggml) compatible format. In the instructions above, the binary file is downloaded from my repository on Hugging Face using the [download-ggml-model.sh](download-ggml-model.sh) script. You can also, download the file manually from this link: https://huggingface.co/ggerganov/ggml/tree/main --- Alternatively, if you don't want to download the 12GB ggml model file, you can perform the conversion yourself using python. First, you need to download the full GPT-J model from here: https://huggingface.co/EleutherAI/gpt-j-6B Note that the full model is quite big - about 72 GB. After you download it, you need to convert it to ggml format using the [convert-h5-to-ggml.py](convert-h5-to-ggml.py) script. This will generate the `ggml-model.bin` file, which you can then use with the `gpt-j` program. ## GPT-2 I also implemented a tool for CPU inference using the smaller GPT-2 models. They have worse quality compared to GPT-J, but are much faster to execute. For example, the Small GPT-2 model is only 240 MB big and the inference speed on my MacBook is about 200 tokens/sec. For more details, checkout the GPT-2 example here: [gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2) ggml-org-ggml-7ec8045/examples/gpt-j/convert-h5-to-ggml.py000066400000000000000000000126051506673203700232760ustar00rootroot00000000000000# Convert GPT-J-6B h5 transformer model to ggml format # # Load the model using GPTJForCausalLM. # Iterate over all variables and write them to a binary file. # # For each variable, write the following: # - Number of dimensions (int) # - Name length (int) # - Dimensions (int[n_dims]) # - Name (char[name_length]) # - Data (float[n_dims]) # # By default, the bigger matrices are converted to 16-bit floats. # This can be disabled by adding the "use-f32" CLI argument. # # At the start of the ggml file we write the model parameters # and vocabulary. # import sys import struct import json import torch import numpy as np from transformers import GPTJForCausalLM # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. This is a signficant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on. """ bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) cs = bs[:] n = 0 for b in range(2**8): if b not in bs: bs.append(b) cs.append(2**8+n) n += 1 cs = [chr(n) for n in cs] return dict(zip(bs, cs)) if len(sys.argv) < 3: print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n") print(" ftype == 0 -> float32") print(" ftype == 1 -> float16") sys.exit(1) # output in the same directory as the model dir_model = sys.argv[1] fname_out = sys.argv[1] + "/ggml-model.bin" with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: encoder = json.load(f) with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f: encoder_added = json.load(f) with open(dir_model + "/config.json", "r", encoding="utf-8") as f: hparams = json.load(f) # possible data types # ftype == 0 -> float32 # ftype == 1 -> float16 # # map from ftype to string ftype_str = ["f32", "f16"] ftype = 1 if len(sys.argv) > 2: ftype = int(sys.argv[2]) if ftype < 0 or ftype > 1: print("Invalid ftype: " + str(ftype)) sys.exit(1) fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" model = GPTJForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True) #print (model) list_vars = model.state_dict() #print (list_vars) fout = open(fname_out, "wb") fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex fout.write(struct.pack("i", hparams["vocab_size"])) fout.write(struct.pack("i", hparams["n_positions"])) fout.write(struct.pack("i", hparams["n_embd"])) fout.write(struct.pack("i", hparams["n_head"])) fout.write(struct.pack("i", hparams["n_layer"])) fout.write(struct.pack("i", hparams["rotary_dim"])) fout.write(struct.pack("i", ftype)) byte_encoder = bytes_to_unicode() byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(encoder) + len(encoder_added))) for key in encoder: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for key in encoder_added: text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text) for name in list_vars.keys(): data = list_vars[name].squeeze().numpy() print("Processing variable: " + name + " with shape: ", data.shape) # we don't need these if name.endswith("attn.masked_bias") or name.endswith(".attn.bias"): print(" Skipping variable: " + name) continue n_dims = len(data.shape); # ftype == 0 -> float32, ftype == 1 -> float16 ftype_cur = 0; if ftype != 0: if name[-7:] == ".weight" and n_dims == 2: print(" Converting to float16") data = data.astype(np.float16) ftype_cur = 1 else: print(" Converting to float32") data = data.astype(np.float32) ftype_cur = 0 else: if data.dtype != np.float32: print(" Converting to float32") data = data.astype(np.float32) ftype_cur = 0 # for efficiency - transpose these matrices: # (note - with latest ggml this is no longer more efficient, so disabling it) # "transformer.h.*.mlp.fc_in.weight" # "transformer.h.*.attn.out_proj.weight" # "transformer.h.*.attn.q_proj.weight" # "transformer.h.*.attn.k_proj.weight" # "transformer.h.*.attn.v_proj.weight" #if name.endswith(".mlp.fc_in.weight") or \ # name.endswith(".attn.out_proj.weight") or \ # name.endswith(".attn.q_proj.weight") or \ # name.endswith(".attn.k_proj.weight") or \ # name.endswith(".attn.v_proj.weight"): # print(" Transposing") # data = data.transpose() # header str = name.encode('utf-8') fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) for i in range(n_dims): fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) fout.write(str); # data data.tofile(fout) fout.close() print("Done. Output file: " + fname_out) print("") ggml-org-ggml-7ec8045/examples/gpt-j/download-ggml-model.sh000077500000000000000000000033001506673203700235460ustar00rootroot00000000000000#!/bin/bash # This script downloads GPT-J model files that have already been converted to ggml format. # This way you don't have to convert them yourself. # # If you want to download the original GPT-J model files, use the "download-model.sh" script instead. #src="https://ggml.ggerganov.com" #pfx="ggml-model-gpt-j" src="https://huggingface.co/ggerganov/ggml" pfx="resolve/main/ggml-model-gpt-j" ggml_path=$(dirname $(realpath $0)) # GPT-J models models=( "6B" ) # list available models function list_models { printf "\n" printf " Available models:" for model in "${models[@]}"; do printf " $model" done printf "\n\n" } if [ "$#" -ne 1 ]; then printf "Usage: $0 \n" list_models exit 1 fi model=$1 if [[ ! " ${models[@]} " =~ " ${model} " ]]; then printf "Invalid model: $model\n" list_models exit 1 fi # download ggml model printf "Downloading ggml model $model ...\n" mkdir -p models/gpt-j-$model if [ -x "$(command -v wget)" ]; then wget --quiet --show-progress -O models/gpt-j-$model/ggml-model.bin $src/$pfx-$model.bin elif [ -x "$(command -v curl)" ]; then curl -L --output models/gpt-j-$model/ggml-model.bin $src/$pfx-$model.bin else printf "Either wget or curl is required to download models.\n" exit 1 fi if [ $? -ne 0 ]; then printf "Failed to download ggml model $model \n" printf "Please try again later or download the original GPT-J model files and convert them yourself.\n" exit 1 fi printf "Done! Model '$model' saved in 'models/gpt-j-$model/ggml-model.bin'\n" printf "You can now use it like this:\n\n" printf " $ ./bin/gpt-j -m models/gpt-j-$model/ggml-model.bin -p \"This is an example\"\n" printf "\n" ggml-org-ggml-7ec8045/examples/gpt-j/download-model.sh000077500000000000000000000010731506673203700226270ustar00rootroot00000000000000#!/bin/bash printf "To obtain the GPT-J 6B model files, please visit: https://huggingface.co/EleutherAI/gpt-j-6B\n\n" printf "The model is very big. For example, the reposirory above is 72GB in size.\n" printf "If you are sure that you want to clone it, simply run the following command:\n\n" printf " $ git clone https://huggingface.co/EleutherAI/gpt-j-6B models/gpt-j-6B\n\n" printf "Alternatively, use the 'download-ggml-model.sh' script to download a 12GB ggml version of the model.\n" printf "This version is enough to run inference using the ggml library.\n\n" ggml-org-ggml-7ec8045/examples/gpt-j/main.cpp000066400000000000000000000631111506673203700210140ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif // default hparams (GPT-J 6B) struct gptj_hparams { int32_t n_vocab = 50400; int32_t n_ctx = 2048; int32_t n_embd = 4096; int32_t n_head = 16; int32_t n_layer = 28; int32_t n_rot = 64; int32_t ftype = 1; float eps = 1e-5f; }; struct gptj_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; // attention struct ggml_tensor * c_attn_q_proj_w; struct ggml_tensor * c_attn_k_proj_w; struct ggml_tensor * c_attn_v_proj_w; struct ggml_tensor * c_attn_proj_w; // ff struct ggml_tensor * c_mlp_fc_w; struct ggml_tensor * c_mlp_fc_b; struct ggml_tensor * c_mlp_proj_w; struct ggml_tensor * c_mlp_proj_b; }; struct gptj_model { gptj_hparams hparams; // normalization struct ggml_tensor * ln_f_g; struct ggml_tensor * ln_f_b; struct ggml_tensor * wte; // token embedding struct ggml_tensor * lmh_g; // language model head struct ggml_tensor * lmh_b; // language model bias std::vector layers; // key + value memory struct ggml_tensor * memory_k; struct ggml_tensor * memory_v; // struct ggml_context * ctx; std::map tensors; }; // load the model's weights from a file bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab & vocab) { printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } // load hparams { auto & hparams = model.hparams; fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: n_rot = %d\n", __func__, hparams.n_rot); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // load vocab { int32_t n_vocab = 0; fin.read((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != model.hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); return false; } std::string word; std::vector buf(128); for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); buf.resize(len); fin.read((char *) buf.data(), len); word.assign(buf.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx; size_t ctx_size = 0; { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_g ctx_size += ggml_row_size(GGML_TYPE_F32, n_embd); // ln_f_b ctx_size += ggml_row_size(wtype, n_embd*n_vocab); // wte ctx_size += ggml_row_size(wtype, n_embd*n_vocab); // lmh_g ctx_size += ggml_row_size(GGML_TYPE_F32, n_vocab); // lmh_b ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_g ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // ln_1_b ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_q_proj_w ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_k_proj_w ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_v_proj_w ctx_size += n_layer*(ggml_row_size(wtype, n_embd*n_embd)); // c_attn_proj_w ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_fc_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, 4*n_embd)); // c_mlp_fc_b ctx_size += n_layer*(ggml_row_size(wtype, 4*n_embd*n_embd)); // c_mlp_proj_w ctx_size += n_layer*(ggml_row_size(GGML_TYPE_F32, n_embd)); // c_mlp_proj_b ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F16, n_embd); // memory_k ctx_size += n_ctx*n_layer*ggml_row_size(GGML_TYPE_F16, n_embd); // memory_v ctx_size += (5 + 10*n_layer)*512; // object overhead printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); } // create the ggml context { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; model.ctx = ggml_init(params); if (!model.ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // prepare memory for the weights { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); model.lmh_g = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); model.lmh_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_vocab); // map by name model.tensors["transformer.wte.weight"] = model.wte; model.tensors["transformer.ln_f.weight"] = model.ln_f_g; model.tensors["transformer.ln_f.bias"] = model.ln_f_b; model.tensors["lm_head.weight"] = model.lmh_g; model.tensors["lm_head.bias"] = model.lmh_b; for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); layer.c_attn_q_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_k_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_v_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4*n_embd); layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_embd); layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4*n_embd, n_embd); layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // map by name model.tensors["transformer.h." + std::to_string(i) + ".ln_1.weight"] = layer.ln_1_g; model.tensors["transformer.h." + std::to_string(i) + ".ln_1.bias"] = layer.ln_1_b; model.tensors["transformer.h." + std::to_string(i) + ".attn.q_proj.weight"] = layer.c_attn_q_proj_w; model.tensors["transformer.h." + std::to_string(i) + ".attn.k_proj.weight"] = layer.c_attn_k_proj_w; model.tensors["transformer.h." + std::to_string(i) + ".attn.v_proj.weight"] = layer.c_attn_v_proj_w; model.tensors["transformer.h." + std::to_string(i) + ".attn.out_proj.weight"] = layer.c_attn_proj_w; model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_in.weight"] = layer.c_mlp_fc_w; model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_in.bias"] = layer.c_mlp_fc_b; model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_out.weight"] = layer.c_mlp_proj_w; model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_out.bias"] = layer.c_mlp_proj_b; } } // key + value memory { const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); printf("%s: memory_size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); } // load weights { int n_tensors = 0; size_t total_size = 0; printf("%s: ", __func__); while (true) { int32_t n_dims; int32_t length; int32_t ttype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ttype), sizeof(ttype)); if (fin.eof()) { break; } int32_t nelements = 1; int32_t ne[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.c_str()); return false; } auto tensor = model.tensors[name]; if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.c_str()); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", __func__, name.c_str(), (int) tensor->ne[0], (int) tensor->ne[1], ne[0], ne[1]); return false; } // for debugging if (0) { printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.c_str(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor)); } const size_t bpe = ggml_type_size(ggml_type(ttype)); if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.c_str(), ggml_nbytes(tensor), nelements*bpe); return false; } fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); //printf("%42s - [%5d, %5d], type = %6s, %6.2f MB\n", name.c_str(), ne[0], ne[1], ttype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); total_size += ggml_nbytes(tensor); if (++n_tensors % 8 == 0) { printf("."); fflush(stdout); } } printf(" done\n"); printf("%s: model size = %8.2f MB / num tensors = %d\n", __func__, total_size/1024.0/1024.0, n_tensors); } fin.close(); return true; } // evaluate the transformer // // - model: the model // - n_threads: number of threads to use // - n_past: the context size so far // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // // The GPT-J model requires about 16MB of memory per input token. // bool gptj_eval( const gptj_model & model, const int n_threads, const int n_past, const std::vector & embd_inp, std::vector & embd_w, size_t & mem_per_token) { const int N = embd_inp.size(); const auto & hparams = model.hparams; const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; const int n_head = hparams.n_head; const int n_vocab = hparams.n_vocab; const int n_rot = hparams.n_rot; static size_t buf_size = 256u*1024*1024; static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); // reallocate buf_size = buf_size_new; buf = realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; } } struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf, /*.no_alloc =*/ false, }; struct ggml_context * ctx0 = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph(ctx0); // KQ_pos - contains the positions struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); int * data = (int *) KQ_pos->data; for (int i = 0; i < N; ++i) { data[i] = n_past + i; } struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd)); // wte struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.wte, embd); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * cur; // norm { cur = ggml_norm(ctx0, inpL, hparams.eps); // cur = ln_1_g*cur + ln_1_b cur = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), cur), ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); } struct ggml_tensor * inpSA = cur; // self-attention { struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0); struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0); // store key and value to memory { struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_v_proj_w, cur)); struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_2d(ctx0, model.memory_v, N, n_embd, ( n_ctx)*ggml_element_size(model.memory_v), (il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k)); ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v)); } // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) struct ggml_tensor * Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) struct ggml_tensor * K = ggml_permute(ctx0, ggml_reshape_3d(ctx0, ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), n_embd/n_head, n_head, n_past + N), 0, 2, 1, 3); // K * Q struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); // KQ_scaled = KQ / sqrt(n_embd/n_head) struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, 1.0f/sqrt(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) struct ggml_tensor * KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); // KQ = soft_max(KQ_masked) struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() struct ggml_tensor * V = ggml_view_3d(ctx0, model.memory_v, n_past + N, n_embd/n_head, n_head, n_ctx*ggml_element_size(model.memory_v), n_ctx*ggml_element_size(model.memory_v)*n_embd/n_head, il*n_ctx*ggml_element_size(model.memory_v)*n_embd); // KQV = transpose(V) * KQ_soft_max struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); // KQV_merged = KQV.permute(0, 2, 1, 3) struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); // cur = KQV_merged.contiguous().view(n_embd, N) cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); // projection (no bias) cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); } struct ggml_tensor * inpFF = cur; // feed-forward network // this is independent of the self-attention result, so it could be done in parallel to the self-attention { // note here we pass inpSA instead of cur cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_fc_w, inpSA); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur), cur); // GELU activation cur = ggml_gelu(ctx0, cur); // projection // cur = proj_w*cur + proj_b cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_proj_w, cur); cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur), cur); } // self-attention + FF cur = ggml_add(ctx0, cur, inpFF); // input for next layer inpL = ggml_add(ctx0, cur, inpL); } // norm { inpL = ggml_norm(ctx0, inpL, hparams.eps); // inpL = ln_f_g*inpL + ln_f_b inpL = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), ggml_repeat(ctx0, model.ln_f_b, inpL)); } // lm_head { inpL = ggml_mul_mat(ctx0, model.lmh_g, inpL); inpL = ggml_add(ctx0, ggml_repeat(ctx0, model.lmh_b, inpL), inpL); } // logits -> probs //inpL = ggml_soft_max_inplace(ctx0, inpL); // run the computation ggml_build_forward_expand(gf, inpL); ggml_graph_compute_with_ctx(ctx0, gf, n_threads); //if (n_past%100 == 0) { // ggml_graph_print (&gf); // ggml_graph_dump_dot(&gf, NULL, "gpt-j.dot"); //} //embd_w.resize(n_vocab*N); //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); // return result for just the last token embd_w.resize(n_vocab); memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); if (mem_per_token == 0) { mem_per_token = ggml_used_mem(ctx0)/N; } //printf("used_mem = %zu\n", ggml_used_mem(ctx0)); ggml_free(ctx0); return true; } int main(int argc, char ** argv) { ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/gpt-j-6B/ggml-model.bin"; if (gpt_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } printf("%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.prompt.empty()) { params.prompt = gpt_random_prompt(rng); } int64_t t_load_us = 0; gpt_vocab vocab; gptj_model model; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gptj_model_load(params.model, model, vocab)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; test_gpt_tokenizer(vocab, params.token_test); } int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; std::vector logits; // tokenize the prompt std::vector embd_inp = ::gpt_tokenize(vocab, params.prompt); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); printf("\n"); std::vector embd; // determine the required inference memory per token: size_t mem_per_token = 0; gptj_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); if (!gptj_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) { printf("Failed to predict\n"); return 1; } t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); embd.clear(); if (i >= embd_inp.size()) { // sample next token const int top_k = params.top_k; const float top_p = params.top_p; const float temp = params.temp; const int n_vocab = model.hparams.n_vocab; gpt_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); id = gpt_sample_top_k_top_p(vocab, logits.data() + (logits.size() - n_vocab), top_k, top_p, temp, rng); t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context embd.push_back(id); } else { // if here, it means we are still processing the input prompt for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); if (int32_t(embd.size()) > params.n_batch) { break; } } i += embd.size() - 1; } // display text for (auto id : embd) { printf("%s", vocab.id_to_token[id].c_str()); } fflush(stdout); // end of text token if (embd.back() == 50256) { break; } } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n\n"); printf("%s: mem per token = %8zu bytes\n", __func__, mem_per_token); printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); printf("%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); printf("%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx); return 0; } ggml-org-ggml-7ec8045/examples/gpt-j/quantize.cpp000066400000000000000000000133401506673203700217270ustar00rootroot00000000000000#include "ggml.h" #include "common.h" #include "common-ggml.h" #include #include #include #include #include #include #include #include #include // default hparams (GPT-J 6B) struct gptj_hparams { int32_t n_vocab = 50400; int32_t n_ctx = 2048; int32_t n_embd = 4096; int32_t n_head = 16; int32_t n_layer = 28; int32_t n_rot = 64; int32_t ftype = 1; }; // quantize a model bool gptj_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) { gpt_vocab vocab; printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); auto finp = std::ifstream(fname_inp, std::ios::binary); if (!finp) { fprintf(stderr, "%s: failed to open '%s' for reading\n", __func__, fname_inp.c_str()); return false; } auto fout = std::ofstream(fname_out, std::ios::binary); if (!fout) { fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str()); return false; } // verify magic { uint32_t magic; finp.read((char *) &magic, sizeof(magic)); if (magic != GGML_FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); return false; } fout.write((char *) &magic, sizeof(magic)); } gptj_hparams hparams; // load hparams { finp.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); finp.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); finp.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); finp.read((char *) &hparams.n_head, sizeof(hparams.n_head)); finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); finp.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); finp.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr_src = hparams.ftype / GGML_QNT_VERSION_FACTOR; const int32_t ftype_dst = GGML_QNT_VERSION * GGML_QNT_VERSION_FACTOR + ftype; printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); printf("%s: n_embd = %d\n", __func__, hparams.n_embd); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); printf("%s: ftype (src) = %d\n", __func__, hparams.ftype); printf("%s: qntvr (src) = %d\n", __func__, qntvr_src); printf("%s: ftype (dst) = %d\n", __func__, ftype_dst); printf("%s: qntvr (dst) = %d\n", __func__, GGML_QNT_VERSION); fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); fout.write((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); fout.write((char *) &hparams.n_embd, sizeof(hparams.n_embd)); fout.write((char *) &hparams.n_head, sizeof(hparams.n_head)); fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer)); fout.write((char *) &hparams.n_rot, sizeof(hparams.n_rot)); fout.write((char *) &ftype_dst, sizeof(ftype_dst)); } // load vocab { int32_t n_vocab = 0; finp.read ((char *) &n_vocab, sizeof(n_vocab)); fout.write((char *) &n_vocab, sizeof(n_vocab)); if (n_vocab != hparams.n_vocab) { fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab); return false; } std::string word; for (int i = 0; i < n_vocab; i++) { uint32_t len; finp.read ((char *) &len, sizeof(len)); fout.write((char *) &len, sizeof(len)); word.resize(len); finp.read ((char *) word.data(), len); fout.write((char *) word.data(), len); vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; } } // regexes of tensor names to be quantized const std::vector to_quant = { ".*weight", }; if (!ggml_common_quantize_0(finp, fout, ftype, to_quant, {})) { fprintf(stderr, "%s: failed to quantize model '%s'\n", __func__, fname_inp.c_str()); return false; } finp.close(); fout.close(); return true; } // usage: // ./gpt-2-quantize models/gpt-2-117M/ggml-model.bin models/gpt-2-117M/ggml-model-quant.bin type // int main(int argc, char ** argv) { if (argc != 4) { fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]); ggml_print_ftypes(stderr); return 1; } // needed to initialize f16 tables { struct ggml_init_params params = { 0, NULL, false }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } const std::string fname_inp = argv[1]; const std::string fname_out = argv[2]; const ggml_ftype ftype = ggml_parse_ftype(argv[3]); const int64_t t_main_start_us = ggml_time_us(); int64_t t_quantize_us = 0; // load the model { const int64_t t_start_us = ggml_time_us(); if (!gptj_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) { fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); return 1; } t_quantize_us = ggml_time_us() - t_start_us; } // report timing { const int64_t t_main_end_us = ggml_time_us(); printf("\n"); printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0f); printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } return 0; } ggml-org-ggml-7ec8045/examples/magika/000077500000000000000000000000001506673203700175725ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/magika/CMakeLists.txt000066400000000000000000000004621506673203700223340ustar00rootroot00000000000000# # magika set(TEST_TARGET magika) add_executable(${TEST_TARGET} main.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml) # # For GPU offloading if (GGML_CUDA) add_compile_definitions(GGML_USE_CUDA) endif() if (GGML_METAL) add_compile_definitions(GGML_USE_METAL) endif() ggml-org-ggml-7ec8045/examples/magika/README.md000066400000000000000000000026341506673203700210560ustar00rootroot00000000000000# Google Magika inference Simple example that shows how to use GGML for inference with the [Google Magika](https://github.com/google/magika) file type detection model. ### Usage - Obtain the Magika model in H5 format - Pinned version: https://github.com/google/magika/blob/4460acb5d3f86807c3b53223229dee2afa50c025/assets_generation/models/standard_v1/model.h5 - Use `convert.py` to convert the model to gguf format: ```bash $ python examples/magika/convert.py /path/to/model.h5 ``` - Invoke the program with the model file and a list of files to identify: ```bash $ build/bin/magika model.h5.gguf examples/sam/example.jpg examples/magika/convert.py README.md src/ggml.c /bin/gcc write.exe jfk.wav examples/sam/example.jpg : jpeg (100.00%) pptx (0.00%) smali (0.00%) shell (0.00%) sevenzip (0.00%) examples/magika/convert.py : python (99.99%) javascript (0.00%) txt (0.00%) asm (0.00%) scala (0.00%) README.md : markdown (100.00%) txt (0.00%) yaml (0.00%) ppt (0.00%) shell (0.00%) src/ggml.c : c (99.95%) txt (0.04%) asm (0.01%) yaml (0.00%) html (0.00%) /bin/gcc : elf (99.98%) odex (0.02%) pptx (0.00%) smali (0.00%) shell (0.00%) write.exe : pebin (100.00%) ppt (0.00%) smali (0.00%) shell (0.00%) sevenzip (0.00%) jfk.wav : wav (100.00%) ppt (0.00%) shell (0.00%) sevenzip (0.00%) scala (0.00%) ``` ggml-org-ggml-7ec8045/examples/magika/convert.py000066400000000000000000000016521506673203700216300ustar00rootroot00000000000000import sys from tensorflow import keras import gguf def convert(model_name): model = keras.models.load_model(model_name, compile=False) gguf_model_name = model_name + ".gguf" gguf_writer = gguf.GGUFWriter(gguf_model_name, "magika") for layer in model.layers: # export layers with weights if layer.weights: for weight in layer.weights: print(f" [{weight.name}] {weight.shape} {weight.dtype}") weight_data = weight.numpy() gguf_writer.add_tensor(weight.name, weight_data.T) gguf_writer.write_header_to_file() gguf_writer.write_kv_data_to_file() gguf_writer.write_tensors_to_file() gguf_writer.close() print("Model converted and saved to '{}'".format(gguf_model_name)) if __name__ == '__main__': if len(sys.argv) > 1: model_file = sys.argv[1] else: model_file = "model.h5" convert(model_file) ggml-org-ggml-7ec8045/examples/magika/main.cpp000066400000000000000000000326311506673203700212270ustar00rootroot00000000000000#include "ggml.h" #include "gguf.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include #include #include #include #include #include static const char * magika_labels[] = { "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "crx", "cs", "css", "csv", "deb", "dex", "dmg", "doc", "docx", "elf", "emf", "eml", "epub", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "mum", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pebin", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "sevenzip", "shell", "smali", "sql", "squashfs", "svg", "swf", "symlinktext", "tar", "tga", "tiff", "torrent", "ttf", "txt", "unknown", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" }; struct magika_hparams { const int block_size = 4096; const int beg_size = 512; const int mid_size = 512; const int end_size = 512; const int min_file_size_for_dl = 16; const int n_label = 113; const float f_norm_eps = 0.001f; const int padding_token = 256; }; struct magika_model { ~magika_model() { ggml_backend_buffer_free(buf_w); ggml_backend_free(backend); ggml_free(ctx_w); } magika_hparams hparams; struct ggml_tensor * dense_w; struct ggml_tensor * dense_b; struct ggml_tensor * layer_norm_gamma; struct ggml_tensor * layer_norm_beta; struct ggml_tensor * dense_1_w; struct ggml_tensor * dense_1_b; struct ggml_tensor * dense_2_w; struct ggml_tensor * dense_2_b; struct ggml_tensor * layer_norm_1_gamma; struct ggml_tensor * layer_norm_1_beta; struct ggml_tensor * target_label_w; struct ggml_tensor * target_label_b; ggml_backend_t backend = ggml_backend_cpu_init(); ggml_backend_buffer_t buf_w = nullptr; struct ggml_context * ctx_w = nullptr; }; struct ggml_tensor * checked_get_tensor(struct ggml_context * ctx, const char * name) { struct ggml_tensor * tensor = ggml_get_tensor(ctx, name); if (!tensor) { fprintf(stderr, "%s: tensor '%s' not found\n", __func__, name); throw std::runtime_error("ggml_get_tensor() failed"); } return tensor; } bool magika_model_load(const std::string & fname, magika_model & model) { auto & ctx = model.ctx_w; struct gguf_init_params params = { /*.no_alloc =*/ true, /*.ctx =*/ &ctx, }; struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params); if (!ctx_gguf) { fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__); return false; } model.buf_w = ggml_backend_alloc_ctx_tensors(ctx, model.backend); if (!model.buf_w) { fprintf(stderr, "%s: ggml_backend_alloc_ctx_tensors() failed\n", __func__); gguf_free(ctx_gguf); return false; } try { model.dense_w = checked_get_tensor(ctx, "dense/kernel:0"); model.dense_b = checked_get_tensor(ctx, "dense/bias:0"); model.layer_norm_gamma = checked_get_tensor(ctx, "layer_normalization/gamma:0"); model.layer_norm_beta = checked_get_tensor(ctx, "layer_normalization/beta:0"); model.dense_1_w = checked_get_tensor(ctx, "dense_1/kernel:0"); model.dense_1_b = checked_get_tensor(ctx, "dense_1/bias:0"); model.dense_2_w = checked_get_tensor(ctx, "dense_2/kernel:0"); model.dense_2_b = checked_get_tensor(ctx, "dense_2/bias:0"); model.layer_norm_1_gamma = checked_get_tensor(ctx, "layer_normalization_1/gamma:0"); model.layer_norm_1_beta = checked_get_tensor(ctx, "layer_normalization_1/beta:0"); model.target_label_w = checked_get_tensor(ctx, "target_label/kernel:0"); model.target_label_b = checked_get_tensor(ctx, "target_label/bias:0"); } catch (const std::exception & e) { fprintf(stderr, "%s: %s\n", __func__, e.what()); gguf_free(ctx_gguf); return false; } FILE * f = fopen(fname.c_str(), "rb"); if (!f) { fprintf(stderr, "%s: fopen() failed\n", __func__); gguf_free(ctx_gguf); return false; } const int n_tensors = gguf_get_n_tensors(ctx_gguf); for (int i = 0; i < n_tensors; i++) { const char * name = gguf_get_tensor_name(ctx_gguf, i); struct ggml_tensor * tensor = ggml_get_tensor(ctx, name); size_t offs = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i); //printf("%-30s: [%3ld, %3ld, %3ld, %3ld] %s\n", // name, // tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], // ggml_type_name(tensor->type)); std::vector buf(ggml_nbytes(tensor)); if (fseek(f, offs, SEEK_SET) != 0) { fprintf(stderr, "%s: fseek() failed\n", __func__); gguf_free(ctx_gguf); fclose(f); return false; } if (fread(buf.data(), 1, buf.size(), f) != buf.size()) { fprintf(stderr, "%s: fread() failed\n", __func__); gguf_free(ctx_gguf); fclose(f); return false; } ggml_backend_tensor_set(tensor, buf.data(), 0, buf.size()); } fclose(f); gguf_free(ctx_gguf); return true; } struct ggml_cgraph * magika_graph( const magika_model & model, const int n_files) { const auto & hparams = model.hparams; static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); static std::vector buf(buf_size); struct ggml_init_params params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, }; struct ggml_context * ctx = ggml_init(params); struct ggml_cgraph * gf = ggml_new_graph(ctx); struct ggml_tensor * input = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 257, 1536, n_files); // one-hot ggml_set_name(input, "input"); ggml_set_input(input); struct ggml_tensor * cur; // dense cur = ggml_mul_mat(ctx, model.dense_w, input); cur = ggml_add(ctx, cur, model.dense_b); // [128, 1536, n_files] cur = ggml_gelu(ctx, cur); // reshape cur = ggml_reshape_3d(ctx, cur, 512, 384, n_files); // [384, 512, n_files] cur = ggml_cont(ctx, ggml_transpose(ctx, cur)); // layer normalization cur = ggml_norm(ctx, cur, hparams.f_norm_eps); cur = ggml_mul(ctx, cur, model.layer_norm_gamma); // [384, 512, n_files] cur = ggml_add(ctx, cur, model.layer_norm_beta); // [384, 512, n_files] // dense_1 cur = ggml_cont(ctx, ggml_transpose(ctx, cur)); cur = ggml_mul_mat(ctx, model.dense_1_w, cur); cur = ggml_add(ctx, cur, model.dense_1_b); // [256, 384, n_files] cur = ggml_gelu(ctx, cur); // dense_2 cur = ggml_mul_mat(ctx, model.dense_2_w, cur); cur = ggml_add(ctx, cur, model.dense_2_b); // [256, 384, n_files] cur = ggml_gelu(ctx, cur); // global_max_pooling1d cur = ggml_cont(ctx, ggml_transpose(ctx, cur)); // [384, 256, n_files] cur = ggml_pool_1d(ctx, cur, GGML_OP_POOL_MAX, 384, 384, 0); // [1, 256, n_files] cur = ggml_reshape_2d(ctx, cur, 256, n_files); // [256, n_files] // layer normalization 1 cur = ggml_norm(ctx, cur, hparams.f_norm_eps); cur = ggml_mul(ctx, cur, model.layer_norm_1_gamma); // [256, n_files] cur = ggml_add(ctx, cur, model.layer_norm_1_beta); // [256, n_files] // target_label cur = ggml_mul_mat(ctx, model.target_label_w, cur); cur = ggml_add(ctx, cur, model.target_label_b); // [n_label, n_files] cur = ggml_soft_max(ctx, cur); // [n_label, n_files] ggml_set_name(cur, "target_label_probs"); ggml_set_output(cur); ggml_build_forward_expand(gf, cur); return gf; } bool magika_eval( struct magika_model & model, const std::vector & fnames) { const auto & hparams = model.hparams; static ggml_gallocr_t alloc = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); struct ggml_cgraph * gf = magika_graph(model, fnames.size()); if (!ggml_gallocr_alloc_graph(alloc, gf)) { fprintf(stderr, "%s: ggml_gallocr_alloc_graph() failed\n", __func__); return false; } struct ggml_tensor * input = ggml_graph_get_tensor(gf, "input"); for (size_t i = 0; i < fnames.size(); i++) { FILE * f = fopen(fnames[i].c_str(), "rb"); if (!f) { fprintf(stderr, "%s: fopen() failed\n", __func__); return false; } fseek(f, 0, SEEK_END); long fsize = ftell(f); // the buffer is padded with the padding_token if the file is smaller than the block size std::vector buf(1536, hparams.padding_token); std::vector read_buf(std::max(hparams.beg_size, std::max(hparams.mid_size, hparams.end_size))); // read beg fseek(f, 0, SEEK_SET); int n_read = fread(read_buf.data(), 1, hparams.beg_size, f); for (int j = 0; j < n_read; j++) { // pad at the end buf[j] = read_buf[j]; } // read mid long mid_offs = std::max(0L, (fsize - hparams.mid_size) / 2); fseek(f, mid_offs, SEEK_SET); n_read = fread(read_buf.data(), 1, hparams.mid_size, f); for (int j = 0; j < n_read; j++) { // pad at both ends long mid_idx = hparams.beg_size + (hparams.mid_size / 2) - n_read / 2 + j; buf[mid_idx] = read_buf[j]; } // read end long end_offs = std::max(0L, fsize - hparams.end_size); fseek(f, end_offs, SEEK_SET); n_read = fread(read_buf.data(), 1, hparams.end_size, f); for (int j = 0; j < n_read; j++) { // pad at the beginning int end_idx = hparams.beg_size + hparams.mid_size + hparams.end_size - n_read + j; buf[end_idx] = read_buf[j]; } fclose(f); const size_t inp_bytes = hparams.beg_size + hparams.mid_size + hparams.end_size; // convert to one-hot std::vector one_hot(257*inp_bytes); for (size_t j = 0; j < inp_bytes; j++) { one_hot[257*j + buf[j]] = 1.0f; } ggml_backend_tensor_set(input, one_hot.data(), 257*inp_bytes*i*sizeof(float), 257*inp_bytes*sizeof(float)); } if (ggml_backend_graph_compute(model.backend, gf) != GGML_STATUS_SUCCESS) { fprintf(stderr, "%s: ggml_backend_graph_compute() failed\n", __func__); return false; } struct ggml_tensor * target_label_probs = ggml_graph_get_tensor(gf, "target_label_probs"); // print probabilities for the top labels of each file for (size_t i = 0; i < fnames.size(); i++) { std::vector probs(hparams.n_label); ggml_backend_tensor_get(target_label_probs, probs.data(), hparams.n_label*i*sizeof(float), hparams.n_label*sizeof(float)); // sort the probabilities std::vector idx(hparams.n_label); std::iota(idx.begin(), idx.end(), 0); std::sort(idx.begin(), idx.end(), [&probs](int i1, int i2) { return probs[i1] > probs[i2]; }); // print the top labels const int top_n = 5; printf("%-30s: ", fnames[i].c_str()); for (int j = 0; j < top_n; j++) { printf("%s (%.2f%%) ", magika_labels[idx[j]], probs[idx[j]]*100); } printf("\n"); } return true; } int main(int argc, const char ** argv) { if (argc < 3) { fprintf(stderr, "usage: %s [ ...]\n", argv[0]); return 1; } const char * model_fname = argv[1]; std::vector fnames; for (int i = 2; i < argc; i++) { fnames.push_back(argv[i]); } magika_model model; if (!magika_model_load(model_fname, model)) { fprintf(stderr, "magika_model_load() failed\n"); return 1; } magika_eval(model, fnames); return 0; } ggml-org-ggml-7ec8045/examples/mnist/000077500000000000000000000000001506673203700174735ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/mnist/.gitignore000066400000000000000000000000241506673203700214570ustar00rootroot00000000000000data/ *.gguf *.ggml ggml-org-ggml-7ec8045/examples/mnist/CMakeLists.txt000066400000000000000000000033711506673203700222370ustar00rootroot00000000000000# # mnist-common set(TEST_TARGET mnist-common) add_library(${TEST_TARGET} STATIC mnist-common.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common) # # mnist-eval set(TEST_TARGET mnist-eval) add_executable(${TEST_TARGET} mnist-eval.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common mnist-common) # # mnist-train set(TEST_TARGET mnist-train) add_executable(${TEST_TARGET} mnist-train.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common mnist-common) # # mnist-wasm if (EMSCRIPTEN) set(TARGET mnist) add_executable(${TARGET} mnist-common.cpp) target_link_libraries(${TARGET} PRIVATE ggml ggml-cpu) set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ --bind \ -s FORCE_FILESYSTEM=1 \ -s USE_PTHREADS=1 \ -s PTHREAD_POOL_SIZE=10 \ -s ASSERTIONS=1 \ -s WASM=1 \ -s EXPORTED_RUNTIME_METHODS=\"['ccall', 'cwrap', 'setValue', 'getValue']\" \ -s EXPORTED_FUNCTIONS=\"['_wasm_eval','_wasm_random_digit','_malloc','_free']\" \ -s ALLOW_MEMORY_GROWTH=1 \ --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/mnist-f32.gguf@/ \ --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/t10k-images-idx3-ubyte@/ \ ") # Copy output to web directory add_custom_command( TARGET ${TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/mnist.js ${CMAKE_CURRENT_SOURCE_DIR}/web/mnist.js COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/mnist.wasm ${CMAKE_CURRENT_SOURCE_DIR}/web/mnist.wasm COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/mnist.worker.js ${CMAKE_CURRENT_SOURCE_DIR}/web/mnist.worker.js ) endif() ggml-org-ggml-7ec8045/examples/mnist/README.md000066400000000000000000000226411506673203700207570ustar00rootroot00000000000000# MNIST Examples for GGML This directory contains simple examples of how to use GGML for training and inference using the [MNIST dataset](https://yann.lecun.com/exdb/mnist/). All commands listed in this README assume the working directory to be `examples/mnist`. Please note that training in GGML is a work-in-progress and not production ready. ## Obtaining the data A description of the dataset can be found on [Yann LeCun's website](https://yann.lecun.com/exdb/mnist/). While it is also in principle possible to download the dataset from this website these downloads are frequently throttled and it is recommended to use [HuggingFace](https://huggingface.co/datasets/ylecun/mnist) instead. The dataset will be downloaded automatically when running `mnist-train-fc.py`. ## Fully connected network For our first example we will train a fully connected network. To train a fully connected model in PyTorch and save it as a GGUF file, run: ```bash $ python3 mnist-train-fc.py mnist-fc-f32.gguf ... Test loss: 0.066377+-0.010468, Test accuracy: 97.94+-0.14% Model tensors saved to mnist-fc-f32.gguf: fc1.weight (500, 784) fc1.bias (500,) fc2.weight (10, 500) fc2.bias (10,) ``` The training script includes an evaluation of the model on the test set. To evaluate the model on the CPU using GGML, run: ```bash $ ../../build/bin/mnist-eval mnist-fc-f32.gguf data/MNIST/raw/t10k-images-idx3-ubyte data/MNIST/raw/t10k-labels-idx1-ubyte ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ __________________________________####__________________ ______________________________########__________________ __________________________##########____________________ ______________________##############____________________ ____________________######________####__________________ __________________________________####__________________ __________________________________####__________________ ________________________________####____________________ ______________________________####______________________ ________________________##########______________________ ______________________########__####____________________ ________________________##__________##__________________ ____________________________________##__________________ __________________________________##____________________ __________________________________##____________________ ________________________________##______________________ ____________________________####________________________ __________##____________######__________________________ __________##############________________________________ ________________####____________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes mnist_model: using CUDA0 (NVIDIA GeForce RTX 3090) as primary backend mnist_model: unsupported operations will be executed on the following fallback backends (in order of priority): mnist_model: - CPU (AMD Ryzen 9 5950X 16-Core Processor) mnist_model_init_from_file: loading model weights from 'mnist-fc-f32.gguf' mnist_model_init_from_file: model arch is mnist-fc mnist_model_init_from_file: successfully loaded weights from mnist-fc-f32.gguf main: loaded model in 109.44 ms mnist_model_eval: model evaluation on 10000 images took 76.92 ms, 7.69 us/image main: predicted digit is 3 main: test_loss=0.066379+-0.009101 main: test_acc=97.94+-0.14% ``` In addition to the evaluation on the test set the GGML evaluation also prints a random image from the test set as well as the model prediction for said image. To train a fully connected model on the CPU using GGML run: ``` bash $ ../../build/bin/mnist-train mnist-fc mnist-fc-f32.gguf data/MNIST/raw/train-images-idx3-ubyte data/MNIST/raw/train-labels-idx1-ubyte ``` It can then be evaluated with the same binary as above. ## Convolutional network To train a convolutional network using TensorFlow run: ```bash $ python3 mnist-train-cnn.py mnist-cnn-f32.gguf ... Test loss: 0.047947 Test accuracy: 98.46% GGUF model saved to 'mnist-cnn-f32.gguf' ``` The saved model can be evaluated on the CPU using the `mnist-eval` binary: ```bash $ ../../build/bin/mnist-eval mnist-fc-f32.gguf data/MNIST/raw/t10k-images-idx3-ubyte data/MNIST/raw/t10k-labels-idx1-ubyte ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ______________________________________##________________ ______________________________________##________________ ______________________________________##________________ ____________________________________##__________________ __________________________________####__________________ __________________________________##____________________ ________________________________##______________________ ______________________________##________________________ ____________________________####________________________ ____________________________##__________________________ __________________________##____________________________ ________________________##______________________________ ______________________##________________________________ ____________________####________________________________ ____________________##__________________________________ __________________##____________________________________ ________________##______________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ________________________________________________________ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes mnist_model: using CUDA0 (NVIDIA GeForce RTX 3090) as primary backend mnist_model: unsupported operations will be executed on the following fallback backends (in order of priority): mnist_model: - CPU (AMD Ryzen 9 5950X 16-Core Processor) mnist_model_init_from_file: loading model weights from 'mnist-cnn-f32.gguf' mnist_model_init_from_file: model arch is mnist-cnn mnist_model_init_from_file: successfully loaded weights from mnist-cnn-f32.gguf main: loaded model in 91.99 ms mnist_model_eval: model evaluation on 10000 images took 267.61 ms, 26.76 us/image main: predicted digit is 1 main: test_loss=0.047955+-0.007029 main: test_acc=98.46+-0.12% ``` Like with the fully connected network the convolutional network can also be trained using GGML: ``` bash $ ../../build/bin/mnist-train mnist-cnn mnist-cnn-f32.gguf data/MNIST/raw/train-images-idx3-ubyte data/MNIST/raw/train-labels-idx1-ubyte ``` As always, the evaluation is done using `mnist-eval` and like with the fully connected network the GGML graph is exported to `mnist-cnn-f32.ggml`. ## Hardware Acceleration Both the training and evaluation code is agnostic in terms of hardware as long as the corresponding GGML backend has implemented the necessary operations. A specific backend can be selected by appending the above commands with a backend name. The compute graphs then schedule the operations to preferentially use the specified backend. Note that if a backend does not implement some of the necessary operations a CPU fallback is used instead which may result in bad performance. ## Web demo The evaluation code can be compiled to WebAssembly using [Emscripten](https://emscripten.org/) (may need to re-login to update `$PATH` after installation). First, copy the GGUF file of either of the trained models to `examples/mnist` and name it `mnist-f32.gguf`. Copy the test set to `examples/mnist` and name it `t10k-images-idx3-ubyte`. Symlinking these files will *not* work! Compile the code like so: ```bash $ cd ../../ $ mkdir -p build-em $ emcmake cmake .. -DGGML_BUILD_EXAMPLES=ON \ -DCMAKE_C_FLAGS="-pthread -matomics -mbulk-memory" \ -DCMAKE_CXX_FLAGS="-pthread -matomics -mbulk-memory" $ make mnist ``` The compilation output is copied into `examples/mnist/web`. To run it, you need an HTTP server. For example: ``` bash $ python3 examples/mnist/server.py Serving directory '/home/danbev/work/ai/ggml/examples/mnist/web' at http://localhost:8000 Application context root: http://localhost:8000/ ``` The web demo can then be accessed via the link printed on the console. Simply draw a digit on the canvas and the model will try to predict what it's supposed to be. Alternatively, click the "Random" button to retrieve a random digit from the test set. Be aware that like all neural networks the one we trained is susceptible to distributional shift: if the numbers you draw look different than the ones in the training set (e.g. because they're not centered) the model will perform comparatively worse. An online demo can be accessed [here](https://mnist.ggerganov.com). ggml-org-ggml-7ec8045/examples/mnist/mnist-common.cpp000066400000000000000000000474661506673203700226400ustar00rootroot00000000000000#include "ggml.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml-opt.h" #include "mnist-common.h" #include #include #include #include #include #include #include #include #include bool mnist_image_load(const std::string & fname, ggml_opt_dataset_t dataset) { auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "failed to open images file %s\n", fname.c_str()); return false; } fin.seekg(16); uint8_t image[MNIST_NINPUT]; struct ggml_tensor * images = ggml_opt_dataset_data(dataset); float * buf = ggml_get_data_f32(images); GGML_ASSERT(images->ne[0] == MNIST_NINPUT); for (int64_t iex = 0; iex < images->ne[1]; ++iex) { fin.read((char *) image, sizeof(image)); for (int64_t i = 0; i < MNIST_NINPUT; ++i) { buf[iex*MNIST_NINPUT + i] = image[i] / 255.0f; // Normalize to [0, 1] } } return true; } void mnist_image_print(FILE * stream, ggml_opt_dataset_t dataset, const int iex) { struct ggml_tensor * images = ggml_opt_dataset_data(dataset); GGML_ASSERT(images->ne[0] == MNIST_NINPUT); GGML_ASSERT(iex < images->ne[1]); const float * image = ggml_get_data_f32(images) + iex*MNIST_NINPUT; for (int64_t row = 0; row < MNIST_HW; row++) { for (int64_t col = 0; col < MNIST_HW; col++) { const int rgb = roundf(255.0f * image[row*MNIST_HW + col]); #ifdef _WIN32 fprintf(stream, "%s", rgb >= 220 ? "##" : "__"); // Represented via text. #else fprintf(stream, "\033[48;2;%d;%d;%dm \033[0m", rgb, rgb, rgb); // Represented via colored blocks. #endif // _WIN32 } fprintf(stream, "\n"); } } bool mnist_label_load(const std::string & fname, ggml_opt_dataset_t dataset) { auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "failed to open labels file %s\n", fname.c_str()); return 0; } fin.seekg(8); uint8_t label; struct ggml_tensor * labels = ggml_opt_dataset_labels(dataset); float * buf = ggml_get_data_f32(labels); GGML_ASSERT(labels->ne[0] == MNIST_NCLASSES); for (int64_t iex = 0; iex < labels->ne[1]; ++iex) { fin.read((char *) &label, sizeof(label)); for (int64_t i = 0; i < MNIST_NCLASSES; ++i) { buf[iex*MNIST_NCLASSES + i] = i == label ? 1.0f : 0.0f; } } return true; } // Temporary util function for loading data from GGUF to a backend != CPU until GGML itself provides this functionality: bool load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf) { FILE * f = ggml_fopen(fname, "rb"); if (!f) { return false; } const size_t buf_size = 4*1024*1024; void * buf = malloc(buf_size); const int n_tensors = gguf_get_n_tensors(ctx_gguf); for (int i = 0; i < n_tensors; i++) { const char * name = gguf_get_tensor_name(ctx_gguf, i); struct ggml_tensor * tensor = ggml_get_tensor(ctx_ggml, name); if (!tensor) { continue; } const size_t offs = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i); if (fseek(f, offs, SEEK_SET) != 0) { fclose(f); free(buf); return false; } const size_t nbytes = ggml_nbytes(tensor); for (size_t pos = 0; pos < nbytes; pos += buf_size) { const size_t nbytes_cpy = buf_size < nbytes - pos ? buf_size : nbytes - pos; if (fread(buf, 1, nbytes_cpy, f) != nbytes_cpy) { fclose(f); free(buf); return false; } ggml_backend_tensor_set(tensor, buf, pos, nbytes_cpy); } } fclose(f); free(buf); return true; } mnist_model mnist_model_init_from_file(const std::string & fname, const std::string & backend, const int nbatch_logical, const int nbatch_physical) { mnist_model model(backend, nbatch_logical, nbatch_physical); fprintf(stderr, "%s: loading model weights from '%s'\n", __func__, fname.c_str()); struct gguf_context * ctx; { struct gguf_init_params params = { /*.no_alloc =*/ true, /*.ctx =*/ &model.ctx_gguf, }; ctx = gguf_init_from_file(fname.c_str(), params); if (!ctx) { fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__); exit(1); } } model.arch = gguf_get_val_str(ctx, gguf_find_key(ctx, "general.architecture")); fprintf(stderr, "%s: model arch is %s\n", __func__, model.arch.c_str()); if (model.arch == "mnist-fc") { model.fc1_weight = ggml_get_tensor(model.ctx_gguf, "fc1.weight"); GGML_ASSERT(model.fc1_weight->ne[0] == MNIST_NINPUT); GGML_ASSERT(model.fc1_weight->ne[1] == MNIST_NHIDDEN); GGML_ASSERT(model.fc1_weight->ne[2] == 1); GGML_ASSERT(model.fc1_weight->ne[3] == 1); model.fc1_bias = ggml_get_tensor(model.ctx_gguf, "fc1.bias"); GGML_ASSERT(model.fc1_bias->ne[0] == MNIST_NHIDDEN); GGML_ASSERT(model.fc1_bias->ne[1] == 1); GGML_ASSERT(model.fc1_bias->ne[2] == 1); GGML_ASSERT(model.fc1_bias->ne[3] == 1); model.fc2_weight = ggml_get_tensor(model.ctx_gguf, "fc2.weight"); GGML_ASSERT(model.fc2_weight->ne[0] == MNIST_NHIDDEN); GGML_ASSERT(model.fc2_weight->ne[1] == MNIST_NCLASSES); GGML_ASSERT(model.fc2_weight->ne[2] == 1); GGML_ASSERT(model.fc2_weight->ne[3] == 1); model.fc2_bias = ggml_get_tensor(model.ctx_gguf, "fc2.bias"); GGML_ASSERT(model.fc2_bias->ne[0] == MNIST_NCLASSES); GGML_ASSERT(model.fc2_bias->ne[1] == 1); GGML_ASSERT(model.fc2_bias->ne[2] == 1); GGML_ASSERT(model.fc2_bias->ne[3] == 1); } else if (model.arch == "mnist-cnn") { model.conv1_kernel = ggml_get_tensor(model.ctx_gguf, "conv1.kernel"); GGML_ASSERT(model.conv1_kernel->type == GGML_TYPE_F32); GGML_ASSERT(model.conv1_kernel->ne[0] == 3); GGML_ASSERT(model.conv1_kernel->ne[1] == 3); GGML_ASSERT(model.conv1_kernel->ne[2] == 1); GGML_ASSERT(model.conv1_kernel->ne[3] == MNIST_CNN_NCB); model.conv1_bias = ggml_get_tensor(model.ctx_gguf, "conv1.bias"); GGML_ASSERT(model.conv1_bias->type == GGML_TYPE_F32); GGML_ASSERT(model.conv1_bias->ne[0] == 1); GGML_ASSERT(model.conv1_bias->ne[1] == 1); GGML_ASSERT(model.conv1_bias->ne[2] == MNIST_CNN_NCB); GGML_ASSERT(model.conv1_bias->ne[3] == 1); model.conv2_kernel = ggml_get_tensor(model.ctx_gguf, "conv2.kernel"); GGML_ASSERT(model.conv2_kernel->type == GGML_TYPE_F32); GGML_ASSERT(model.conv2_kernel->ne[0] == 3); GGML_ASSERT(model.conv2_kernel->ne[1] == 3); GGML_ASSERT(model.conv2_kernel->ne[2] == MNIST_CNN_NCB); GGML_ASSERT(model.conv2_kernel->ne[3] == MNIST_CNN_NCB*2); model.conv2_bias = ggml_get_tensor(model.ctx_gguf, "conv2.bias"); GGML_ASSERT(model.conv2_bias->type == GGML_TYPE_F32); GGML_ASSERT(model.conv2_bias->ne[0] == 1); GGML_ASSERT(model.conv2_bias->ne[1] == 1); GGML_ASSERT(model.conv2_bias->ne[2] == MNIST_CNN_NCB*2); GGML_ASSERT(model.conv2_bias->ne[3] == 1); model.dense_weight = ggml_get_tensor(model.ctx_gguf, "dense.weight"); GGML_ASSERT(model.dense_weight->type == GGML_TYPE_F32); GGML_ASSERT(model.dense_weight->ne[0] == (MNIST_HW/4)*(MNIST_HW/4)*(MNIST_CNN_NCB*2)); GGML_ASSERT(model.dense_weight->ne[1] == MNIST_NCLASSES); GGML_ASSERT(model.dense_weight->ne[2] == 1); GGML_ASSERT(model.dense_weight->ne[3] == 1); model.dense_bias = ggml_get_tensor(model.ctx_gguf, "dense.bias"); GGML_ASSERT(model.dense_bias->type == GGML_TYPE_F32); GGML_ASSERT(model.dense_bias->ne[0] == MNIST_NCLASSES); GGML_ASSERT(model.dense_bias->ne[1] == 1); GGML_ASSERT(model.dense_bias->ne[2] == 1); GGML_ASSERT(model.dense_bias->ne[3] == 1); } else { fprintf(stderr, "%s: unknown model arch: %s\n", __func__, model.arch.c_str()); } model.buf_gguf = ggml_backend_alloc_ctx_tensors(model.ctx_gguf, model.backends[0]); if(!load_from_gguf(fname.c_str(), model.ctx_gguf, ctx)) { fprintf(stderr, "%s: loading weights from %s failed\n", __func__, fname.c_str()); exit(1); } // The space in ctx_gguf exactly fits the model weights, // the images (which also need to be statically allocated) need to be put in a different context. model.images = ggml_new_tensor_2d(model.ctx_static, GGML_TYPE_F32, MNIST_NINPUT, nbatch_physical); ggml_set_name(model.images, "images"); ggml_set_input(model.images); model.buf_static = ggml_backend_alloc_ctx_tensors(model.ctx_static, model.backends[0]); fprintf(stderr, "%s: successfully loaded weights from %s\n", __func__, fname.c_str()); return model; } mnist_model mnist_model_init_random(const std::string & arch, const std::string & backend, const int nbatch_logical, const int nbatch_physical) { mnist_model model(backend, nbatch_logical, nbatch_physical); model.arch = arch; std::random_device rd{}; std::mt19937 gen{rd()}; std::normal_distribution nd{0.0f, 1e-2f}; std::vector init_tensors; if (model.arch == "mnist-fc") { fprintf(stderr, "%s: initializing random weights for a fully connected model\n", __func__); model.fc1_weight = ggml_new_tensor_2d(model.ctx_static, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NHIDDEN); model.fc1_bias = ggml_new_tensor_1d(model.ctx_static, GGML_TYPE_F32, MNIST_NHIDDEN); model.fc2_weight = ggml_new_tensor_2d(model.ctx_static, GGML_TYPE_F32, MNIST_NHIDDEN, MNIST_NCLASSES); model.fc2_bias = ggml_new_tensor_1d(model.ctx_static, GGML_TYPE_F32, MNIST_NCLASSES); ggml_set_name(model.fc1_weight, "fc1.weight"); ggml_set_name(model.fc1_bias, "fc1.bias"); ggml_set_name(model.fc2_weight, "fc2.weight"); ggml_set_name(model.fc2_bias, "fc2.bias"); init_tensors.push_back(model.fc1_weight); init_tensors.push_back(model.fc1_bias); init_tensors.push_back(model.fc2_weight); init_tensors.push_back(model.fc2_bias); } else if (model.arch == "mnist-cnn") { model.conv1_kernel = ggml_new_tensor_4d(model.ctx_static, GGML_TYPE_F32, 3, 3, 1, MNIST_CNN_NCB); model.conv1_bias = ggml_new_tensor_3d(model.ctx_static, GGML_TYPE_F32, 1, 1, MNIST_CNN_NCB); model.conv2_kernel = ggml_new_tensor_4d(model.ctx_static, GGML_TYPE_F32, 3, 3, MNIST_CNN_NCB, MNIST_CNN_NCB*2); model.conv2_bias = ggml_new_tensor_3d(model.ctx_static, GGML_TYPE_F32, 1, 1, MNIST_CNN_NCB*2); model.dense_weight = ggml_new_tensor_2d(model.ctx_static, GGML_TYPE_F32, (MNIST_HW/4)*(MNIST_HW/4)*(MNIST_CNN_NCB*2), MNIST_NCLASSES); model.dense_bias = ggml_new_tensor_1d(model.ctx_static, GGML_TYPE_F32, MNIST_NCLASSES); ggml_set_name(model.conv1_kernel, "conv1.kernel"); ggml_set_name(model.conv1_bias, "conv1.bias"); ggml_set_name(model.conv2_kernel, "conv2.kernel"); ggml_set_name(model.conv2_bias, "conv2.bias"); ggml_set_name(model.dense_weight, "dense.weight"); ggml_set_name(model.dense_bias, "dense.bias"); init_tensors.push_back(model.conv1_kernel); init_tensors.push_back(model.conv1_bias); init_tensors.push_back(model.conv2_kernel); init_tensors.push_back(model.conv2_bias); init_tensors.push_back(model.dense_weight); init_tensors.push_back(model.dense_bias); } else { fprintf(stderr, "%s: unknown model arch: %s\n", __func__, model.arch.c_str()); } model.images = ggml_new_tensor_2d(model.ctx_static, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NBATCH_PHYSICAL); ggml_set_name(model.images, "images"); ggml_set_input(model.images); model.buf_static = ggml_backend_alloc_ctx_tensors(model.ctx_static, model.backends[0]); for (ggml_tensor * t : init_tensors) { GGML_ASSERT(t->type == GGML_TYPE_F32); const int64_t ne = ggml_nelements(t); std::vector tmp(ne); for (int64_t i = 0; i < ne; ++i) { tmp[i] = nd(gen); } ggml_backend_tensor_set(t, tmp.data(), 0, ggml_nbytes(t)); } return model; } void mnist_model_build(mnist_model & model) { if (model.arch == "mnist-fc") { ggml_set_param(model.fc1_weight); ggml_set_param(model.fc1_bias); ggml_set_param(model.fc2_weight); ggml_set_param(model.fc2_bias); ggml_tensor * fc1 = ggml_relu(model.ctx_compute, ggml_add(model.ctx_compute, ggml_mul_mat(model.ctx_compute, model.fc1_weight, model.images), model.fc1_bias)); model.logits = ggml_add(model.ctx_compute, ggml_mul_mat(model.ctx_compute, model.fc2_weight, fc1), model.fc2_bias); } else if (model.arch == "mnist-cnn") { ggml_set_param(model.conv1_kernel); ggml_set_param(model.conv1_bias); ggml_set_param(model.conv2_kernel); ggml_set_param(model.conv2_bias); ggml_set_param(model.dense_weight); ggml_set_param(model.dense_bias); struct ggml_tensor * images_2D = ggml_reshape_4d(model.ctx_compute, model.images, MNIST_HW, MNIST_HW, 1, model.images->ne[1]); struct ggml_tensor * conv1_out = ggml_relu(model.ctx_compute, ggml_add(model.ctx_compute, ggml_conv_2d(model.ctx_compute, model.conv1_kernel, images_2D, 1, 1, 1, 1, 1, 1), model.conv1_bias)); GGML_ASSERT(conv1_out->ne[0] == MNIST_HW); GGML_ASSERT(conv1_out->ne[1] == MNIST_HW); GGML_ASSERT(conv1_out->ne[2] == MNIST_CNN_NCB); GGML_ASSERT(conv1_out->ne[3] == model.nbatch_physical); struct ggml_tensor * conv2_in = ggml_pool_2d(model.ctx_compute, conv1_out, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); GGML_ASSERT(conv2_in->ne[0] == MNIST_HW/2); GGML_ASSERT(conv2_in->ne[1] == MNIST_HW/2); GGML_ASSERT(conv2_in->ne[2] == MNIST_CNN_NCB); GGML_ASSERT(conv2_in->ne[3] == model.nbatch_physical); struct ggml_tensor * conv2_out = ggml_relu(model.ctx_compute, ggml_add(model.ctx_compute, ggml_conv_2d(model.ctx_compute, model.conv2_kernel, conv2_in, 1, 1, 1, 1, 1, 1), model.conv2_bias)); GGML_ASSERT(conv2_out->ne[0] == MNIST_HW/2); GGML_ASSERT(conv2_out->ne[1] == MNIST_HW/2); GGML_ASSERT(conv2_out->ne[2] == MNIST_CNN_NCB*2); GGML_ASSERT(conv2_out->ne[3] == model.nbatch_physical); struct ggml_tensor * dense_in = ggml_pool_2d(model.ctx_compute, conv2_out, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); GGML_ASSERT(dense_in->ne[0] == MNIST_HW/4); GGML_ASSERT(dense_in->ne[1] == MNIST_HW/4); GGML_ASSERT(dense_in->ne[2] == MNIST_CNN_NCB*2); GGML_ASSERT(dense_in->ne[3] == model.nbatch_physical); dense_in = ggml_reshape_2d(model.ctx_compute, ggml_cont(model.ctx_compute, ggml_permute(model.ctx_compute, dense_in, 1, 2, 0, 3)), (MNIST_HW/4)*(MNIST_HW/4)*(MNIST_CNN_NCB*2), model.nbatch_physical); GGML_ASSERT(dense_in->ne[0] == (MNIST_HW/4)*(MNIST_HW/4)*(MNIST_CNN_NCB*2)); GGML_ASSERT(dense_in->ne[1] == model.nbatch_physical); GGML_ASSERT(dense_in->ne[2] == 1); GGML_ASSERT(dense_in->ne[3] == 1); model.logits = ggml_add(model.ctx_compute, ggml_mul_mat(model.ctx_compute, model.dense_weight, dense_in), model.dense_bias); } else { GGML_ASSERT(false); } ggml_set_name(model.logits, "logits"); ggml_set_output(model.logits); GGML_ASSERT(model.logits->type == GGML_TYPE_F32); GGML_ASSERT(model.logits->ne[0] == MNIST_NCLASSES); GGML_ASSERT(model.logits->ne[1] == model.nbatch_physical); GGML_ASSERT(model.logits->ne[2] == 1); GGML_ASSERT(model.logits->ne[3] == 1); } ggml_opt_result_t mnist_model_eval(mnist_model & model, ggml_opt_dataset_t dataset) { ggml_opt_result_t result = ggml_opt_result_init(); ggml_opt_params params = ggml_opt_default_params(model.backend_sched, GGML_OPT_LOSS_TYPE_CROSS_ENTROPY); params.ctx_compute = model.ctx_compute; params.inputs = model.images; params.outputs = model.logits; params.build_type = GGML_OPT_BUILD_TYPE_FORWARD; ggml_opt_context_t opt_ctx = ggml_opt_init(params); { const int64_t t_start_us = ggml_time_us(); ggml_opt_epoch(opt_ctx, dataset, nullptr, result, /*idata_split =*/ 0, nullptr, nullptr); const int64_t t_total_us = ggml_time_us() - t_start_us; const double t_total_ms = 1e-3*t_total_us; const int nex = ggml_opt_dataset_data(dataset)->ne[1]; fprintf(stderr, "%s: model evaluation on %d images took %.2lf ms, %.2lf us/image\n", __func__, nex, t_total_ms, (double) t_total_us/nex); } ggml_opt_free(opt_ctx); return result; } void mnist_model_train(mnist_model & model, ggml_opt_dataset_t dataset, const int nepoch, const float val_split) { ggml_opt_fit(model.backend_sched, model.ctx_compute, model.images, model.logits, dataset, GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, GGML_OPT_OPTIMIZER_TYPE_ADAMW, ggml_opt_get_default_optimizer_params, nepoch, model.nbatch_logical, val_split, false); } void mnist_model_save(mnist_model & model, const std::string & fname) { printf("%s: saving model to '%s'\n", __func__, fname.c_str()); struct ggml_context * ggml_ctx; { struct ggml_init_params params = { /*.mem_size =*/ 100 * 1024*1024, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; ggml_ctx = ggml_init(params); } gguf_context * gguf_ctx = gguf_init_empty(); gguf_set_val_str(gguf_ctx, "general.architecture", model.arch.c_str()); std::vector weights; if (model.arch == "mnist-fc") { weights = {model.fc1_weight, model.fc1_bias, model.fc2_weight, model.fc2_bias}; } else if (model.arch == "mnist-cnn") { weights = {model.conv1_kernel, model.conv1_bias, model.conv2_kernel, model.conv2_bias, model.dense_weight, model.dense_bias}; } else { GGML_ASSERT(false); } for (struct ggml_tensor * t : weights) { struct ggml_tensor * copy = ggml_dup_tensor(ggml_ctx, t); ggml_set_name(copy, t->name); ggml_backend_tensor_get(t, copy->data, 0, ggml_nbytes(t)); gguf_add_tensor(gguf_ctx, copy); } gguf_write_to_file(gguf_ctx, fname.c_str(), false); ggml_free(ggml_ctx); gguf_free(gguf_ctx); } #ifdef __cplusplus extern "C" { #endif int wasm_eval(uint8_t * digitPtr) { std::vector digit(digitPtr, digitPtr + MNIST_NINPUT); ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, 1, 1); struct ggml_tensor * data = ggml_opt_dataset_data(dataset); float * buf = ggml_get_data_f32(data); for (int i = 0; i < MNIST_NINPUT; ++i) { buf[i] = digitPtr[i] / 255.0f; } ggml_set_zero(ggml_opt_dataset_labels(dataset)); // The labels are not needed. mnist_model model = mnist_model_init_from_file("mnist-f32.gguf", "CPU", /*nbatch_logical =*/ 1, /*nbatch_physical =*/ 1); mnist_model_build(model); ggml_opt_result_t result = mnist_model_eval(model, dataset); int32_t pred; ggml_opt_result_pred(result, &pred); return pred; } int wasm_random_digit(char * digitPtr) { auto fin = std::ifstream("t10k-images-idx3-ubyte", std::ios::binary); if (!fin) { fprintf(stderr, "failed to open digits file\n"); return 0; } srand(time(NULL)); // Seek to a random digit: 16-byte header + 28*28 * (random 0 - 10000) fin.seekg(16 + MNIST_NINPUT * (rand() % MNIST_NTEST)); fin.read(digitPtr, MNIST_NINPUT); return 1; } #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/examples/mnist/mnist-common.h000066400000000000000000000156721506673203700222770ustar00rootroot00000000000000#include #include #include #include #include #include #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml.h" #include "gguf.h" #include "ggml-cpu.h" #include "ggml-opt.h" #define MNIST_NTRAIN 60000 #define MNIST_NTEST 10000 // Gradient accumulation can be achieved by setting the logical batch size to a multiple of the physical one. // The logical batch size determines how many datapoints are used for a gradient update. // The physical batch size determines how many datapoints are processed in parallel, larger values utilize compute better but need more memory. #define MNIST_NBATCH_LOGICAL 1000 #define MNIST_NBATCH_PHYSICAL 500 static_assert(MNIST_NBATCH_LOGICAL % MNIST_NBATCH_PHYSICAL == 0, "MNIST_NBATCH_LOGICAL % MNIST_NBATCH_PHYSICAL != 0"); static_assert(MNIST_NTRAIN % MNIST_NBATCH_LOGICAL == 0, "MNIST_NTRAIN % MNIST_NBATCH_LOGICAL != 0"); static_assert(MNIST_NTEST % MNIST_NBATCH_LOGICAL == 0, "MNIST_NTRAIN % MNIST_NBATCH_LOGICAL != 0"); #define MNIST_HW 28 #define MNIST_NINPUT (MNIST_HW*MNIST_HW) #define MNIST_NCLASSES 10 #define MNIST_NHIDDEN 500 // NCB = number of channels base #define MNIST_CNN_NCB 8 struct mnist_model { std::string arch; ggml_backend_sched_t backend_sched; std::vector backends; const int nbatch_logical; const int nbatch_physical; struct ggml_tensor * images = nullptr; struct ggml_tensor * logits = nullptr; struct ggml_tensor * fc1_weight = nullptr; struct ggml_tensor * fc1_bias = nullptr; struct ggml_tensor * fc2_weight = nullptr; struct ggml_tensor * fc2_bias = nullptr; struct ggml_tensor * conv1_kernel = nullptr; struct ggml_tensor * conv1_bias = nullptr; struct ggml_tensor * conv2_kernel = nullptr; struct ggml_tensor * conv2_bias = nullptr; struct ggml_tensor * dense_weight = nullptr; struct ggml_tensor * dense_bias = nullptr; struct ggml_context * ctx_gguf = nullptr; struct ggml_context * ctx_static = nullptr; struct ggml_context * ctx_compute = nullptr; ggml_backend_buffer_t buf_gguf = nullptr; ggml_backend_buffer_t buf_static = nullptr; mnist_model(const std::string & backend_name, const int nbatch_logical, const int nbatch_physical) : nbatch_logical(nbatch_logical), nbatch_physical(nbatch_physical) { std::vector devices; const int ncores_logical = std::thread::hardware_concurrency(); const int nthreads = std::min(ncores_logical, (ncores_logical + 4) / 2); // Add primary backend: if (!backend_name.empty()) { ggml_backend_dev_t dev = ggml_backend_dev_by_name(backend_name.c_str()); if (dev == nullptr) { fprintf(stderr, "%s: ERROR: backend %s not found, available:\n", __func__, backend_name.c_str()); for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { ggml_backend_dev_t dev_i = ggml_backend_dev_get(i); fprintf(stderr, " - %s (%s)\n", ggml_backend_dev_name(dev_i), ggml_backend_dev_description(dev_i)); } exit(1); } ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr); GGML_ASSERT(backend); if (ggml_backend_is_cpu(backend)) { ggml_backend_cpu_set_n_threads(backend, nthreads); } backends.push_back(backend); devices.push_back(dev); } // Add all available backends as fallback. // A "backend" is a stream on a physical device so there is no problem with adding multiple backends for the same device. for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr); GGML_ASSERT(backend); if (ggml_backend_is_cpu(backend)) { ggml_backend_cpu_set_n_threads(backend, nthreads); } backends.push_back(backend); devices.push_back(dev); } // The order of the backends passed to ggml_backend_sched_new determines which backend is given priority. backend_sched = ggml_backend_sched_new(backends.data(), nullptr, backends.size(), GGML_DEFAULT_GRAPH_SIZE, false, true); fprintf(stderr, "%s: using %s (%s) as primary backend\n", __func__, ggml_backend_name(backends[0]), ggml_backend_dev_description(devices[0])); if (backends.size() >= 2) { fprintf(stderr, "%s: unsupported operations will be executed on the following fallback backends (in order of priority):\n", __func__); for (size_t i = 1; i < backends.size(); ++i) { fprintf(stderr, "%s: - %s (%s)\n", __func__, ggml_backend_name(backends[i]), ggml_backend_dev_description(devices[i])); } } { const size_t size_meta = 1024*ggml_tensor_overhead(); struct ggml_init_params params = { /*.mem_size =*/ size_meta, /*.mem_buffer =*/ nullptr, /*.no_alloc =*/ true, }; ctx_static = ggml_init(params); } { // The compute context needs a total of 3 compute graphs: forward pass + backwards pass (with/without optimizer step). const size_t size_meta = GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(); struct ggml_init_params params = { /*.mem_size =*/ size_meta, /*.mem_buffer =*/ nullptr, /*.no_alloc =*/ true, }; ctx_compute = ggml_init(params); } } ~mnist_model() { ggml_free(ctx_gguf); ggml_free(ctx_static); ggml_free(ctx_compute); ggml_backend_buffer_free(buf_gguf); ggml_backend_buffer_free(buf_static); ggml_backend_sched_free(backend_sched); for (ggml_backend_t backend : backends) { ggml_backend_free(backend); } } }; bool mnist_image_load(const std::string & fname, ggml_opt_dataset_t dataset); void mnist_image_print(FILE * f, ggml_opt_dataset_t dataset, const int iex); bool mnist_label_load(const std::string & fname, ggml_opt_dataset_t dataset); mnist_model mnist_model_init_from_file(const std::string & fname, const std::string & backend, const int nbatch_logical, const int nbatch_physical); mnist_model mnist_model_init_random(const std::string & arch, const std::string & backend, const int nbatch_logical, const int nbatch_physical); void mnist_model_build(mnist_model & model); ggml_opt_result_t mnist_model_eval(mnist_model & model, ggml_opt_dataset_t dataset); void mnist_model_train(mnist_model & model, ggml_opt_dataset_t dataset, const int nepoch, const float val_split); void mnist_model_save(mnist_model & model, const std::string & fname); ggml-org-ggml-7ec8045/examples/mnist/mnist-eval.cpp000066400000000000000000000040221506673203700222540ustar00rootroot00000000000000#include "ggml.h" #include "ggml-opt.h" #include "mnist-common.h" #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif int main(int argc, char ** argv) { srand(time(NULL)); ggml_time_init(); if (argc != 4 && argc != 5) { fprintf(stderr, "Usage: %s mnist-fc-f32.gguf data/MNIST/raw/t10k-images-idx3-ubyte data/MNIST/raw/t10k-labels-idx1-ubyte [CPU/CUDA0]\n", argv[0]); exit(1); } ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTEST, MNIST_NBATCH_PHYSICAL); if (!mnist_image_load(argv[2], dataset)) { return 1; } if (!mnist_label_load(argv[3], dataset)) { return 1; } const int iex = rand() % MNIST_NTEST; mnist_image_print(stdout, dataset, iex); const std::string backend = argc >= 5 ? argv[4] : ""; const int64_t t_start_us = ggml_time_us(); mnist_model model = mnist_model_init_from_file(argv[1], backend, MNIST_NBATCH_LOGICAL, MNIST_NBATCH_PHYSICAL); mnist_model_build(model); const int64_t t_load_us = ggml_time_us() - t_start_us; fprintf(stdout, "%s: loaded model in %.2lf ms\n", __func__, t_load_us / 1000.0); ggml_opt_result_t result_eval = mnist_model_eval(model, dataset); std::vector pred(MNIST_NTEST); ggml_opt_result_pred(result_eval, pred.data()); fprintf(stdout, "%s: predicted digit is %d\n", __func__, pred[iex]); double loss; double loss_unc; ggml_opt_result_loss(result_eval, &loss, &loss_unc); fprintf(stdout, "%s: test_loss=%.6lf+-%.6lf\n", __func__, loss, loss_unc); double accuracy; double accuracy_unc; ggml_opt_result_accuracy(result_eval, &accuracy, &accuracy_unc); fprintf(stdout, "%s: test_acc=%.2lf+-%.2lf%%\n", __func__, 100.0*accuracy, 100.0*accuracy_unc); ggml_opt_result_free(result_eval); return 0; } ggml-org-ggml-7ec8045/examples/mnist/mnist-train-cnn.py000077500000000000000000000063401506673203700230740ustar00rootroot00000000000000#!/usr/bin/env python3 import sys from time import time import gguf import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers def train(model_path): # Model / data parameters num_classes = 10 input_shape = (28, 28, 1) # Load the data and split it between train and test sets (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() # Scale images to the [0, 1] range x_train = x_train.astype("float32") / 255 x_test = x_test.astype("float32") / 255 x_train = np.expand_dims(x_train, -1) x_test = np.expand_dims(x_test, -1) print("x_train shape:", x_train.shape) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = keras.Sequential( [ keras.Input(shape=input_shape, dtype=tf.float32), layers.Conv2D(8, kernel_size=(3, 3), padding="same", activation="relu", dtype=tf.float32), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(16, kernel_size=(3, 3), padding="same", activation="relu", dtype=tf.float32), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dense(num_classes, activation="softmax", dtype=tf.float32), ] ) model.summary() batch_size = 1000 epochs = 30 model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) t_start = time() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) print(f"Training took {time()-t_start:.2f}s") score = model.evaluate(x_test, y_test, verbose=0) print(f"Test loss: {score[0]:.6f}") print(f"Test accuracy: {100*score[1]:.2f}%") gguf_writer = gguf.GGUFWriter(model_path, "mnist-cnn") conv1_kernel = model.layers[0].weights[0].numpy() conv1_kernel = np.moveaxis(conv1_kernel, [2, 3], [0, 1]) gguf_writer.add_tensor("conv1.kernel", conv1_kernel, raw_shape=(8, 1, 3, 3)) conv1_bias = model.layers[0].weights[1].numpy() gguf_writer.add_tensor("conv1.bias", conv1_bias, raw_shape=(1, 8, 1, 1)) conv2_kernel = model.layers[2].weights[0].numpy() conv2_kernel = np.moveaxis(conv2_kernel, [0, 1, 2, 3], [2, 3, 1, 0]) gguf_writer.add_tensor("conv2.kernel", conv2_kernel, raw_shape=(16, 8, 3, 3)) conv2_bias = model.layers[2].weights[1].numpy() gguf_writer.add_tensor("conv2.bias", conv2_bias, raw_shape=(1, 16, 1, 1)) dense_weight = model.layers[-1].weights[0].numpy() dense_weight = dense_weight.transpose() gguf_writer.add_tensor("dense.weight", dense_weight, raw_shape=(10, 7*7*16)) dense_bias = model.layers[-1].weights[1].numpy() gguf_writer.add_tensor("dense.bias", dense_bias) gguf_writer.write_header_to_file() gguf_writer.write_kv_data_to_file() gguf_writer.write_tensors_to_file() gguf_writer.close() print(f"GGUF model saved to '{model_path}'") if __name__ == '__main__': if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") sys.exit(1) train(sys.argv[1]) ggml-org-ggml-7ec8045/examples/mnist/mnist-train-fc.py000066400000000000000000000106711506673203700227050ustar00rootroot00000000000000import gguf import numpy as np import torch import torch.nn as nn import torchvision.datasets as dsets import torchvision.transforms as transforms from torch.autograd import Variable import sys from time import time input_size = 784 # img_size = (28,28) ---> 28*28=784 in total hidden_size = 500 # number of nodes at hidden layer num_classes = 10 # number of output classes discrete range [0,9] num_epochs = 30 # number of times which the entire dataset is passed throughout the model batch_size = 1000 # the size of input data used for one iteration lr = 1e-3 # size of step class Net(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(Net, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, num_classes) def forward(self, x): out = self.fc1(x) out = self.relu(out) out = self.fc2(out) return out def train(model_path): train_data = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_data = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) assert len(train_data) == 60000 assert len(test_data) == 10000 kwargs_train_test = dict(batch_size=batch_size, num_workers=4, pin_memory=True) train_gen = torch.utils.data.DataLoader(dataset=train_data, shuffle=True, **kwargs_train_test) test_gen = torch.utils.data.DataLoader(dataset=test_data, shuffle=False, **kwargs_train_test) net = Net(input_size, hidden_size, num_classes) if torch.cuda.is_available(): net.cuda() loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=lr) t_start = time() for epoch in range(num_epochs): loss_history = [] ncorrect = 0 for i, (images, labels) in enumerate(train_gen): images = Variable(images.view(-1, 28*28)) labels = Variable(labels) if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = net(images) loss = loss_function(outputs, labels) loss_history.append(loss.cpu().data) _, predictions = torch.max(outputs, 1) ncorrect += (predictions == labels).sum() loss.backward() optimizer.step() if (i + 1)*batch_size % 10000 == 0: loss_mean = np.mean(loss_history) accuracy = ncorrect / ((i + 1) * batch_size) print( f"Epoch [{epoch+1:02d}/{num_epochs}], " f"Step [{(i+1)*batch_size:05d}/{len(train_data)}], " f"Loss: {loss_mean:.4f}, Accuracy: {100*accuracy:.2f}%") print() print(f"Training took {time()-t_start:.2f}s") loss_history = [] ncorrect = 0 for i, (images, labels) in enumerate(test_gen): images = Variable(images.view(-1, 28*28)) labels = Variable(labels) if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() outputs = net(images) loss = loss_function(outputs, labels) loss_history.append(loss.cpu().data) _, predictions = torch.max(outputs, 1) ncorrect += (predictions == labels).sum().cpu().numpy() loss_mean = np.mean(loss_history) loss_uncertainty = np.std(loss_history) / np.sqrt(len(loss_history) - 1) accuracy_mean = ncorrect / (len(test_gen) * batch_size) accuracy_uncertainty = np.sqrt(accuracy_mean * (1.0 - accuracy_mean) / (len(test_gen) * batch_size)) print() print(f"Test loss: {loss_mean:.6f}+-{loss_uncertainty:.6f}, Test accuracy: {100*accuracy_mean:.2f}+-{100*accuracy_uncertainty:.2f}%") gguf_writer = gguf.GGUFWriter(model_path, "mnist-fc") print() print(f"Model tensors saved to {model_path}:") for tensor_name in net.state_dict().keys(): data = net.state_dict()[tensor_name].squeeze().cpu().numpy() print(tensor_name, "\t", data.shape) gguf_writer.add_tensor(tensor_name, data) gguf_writer.write_header_to_file() gguf_writer.write_kv_data_to_file() gguf_writer.write_tensors_to_file() gguf_writer.close() if __name__ == '__main__': if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") sys.exit(1) train(sys.argv[1]) ggml-org-ggml-7ec8045/examples/mnist/mnist-train.cpp000066400000000000000000000026321506673203700224470ustar00rootroot00000000000000#include "ggml-opt.h" #include "mnist-common.h" #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif int main(int argc, char ** argv) { if (argc != 5 && argc != 6) { fprintf(stderr, "Usage: %s mnist-fc mnist-fc-f32.gguf data/MNIST/raw/train-images-idx3-ubyte data/MNIST/raw/train-labels-idx1-ubyte [CPU/CUDA0]\n", argv[0]); exit(0); } // The MNIST model is so small that the overhead from data shuffling is non-negligible, especially with CUDA. // With a shard size of 10 this overhead is greatly reduced at the cost of less shuffling (does not seem to have a significant impact). // A batch of 500 images then consists of 50 random shards of size 10 instead of 500 random shards of size 1. ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTRAIN, /*ndata_shard =*/ 10); if (!mnist_image_load(argv[3], dataset)) { return 1; } if (!mnist_label_load(argv[4], dataset)) { return 1; } mnist_model model = mnist_model_init_random(argv[1], argc >= 6 ? argv[5] : "", MNIST_NBATCH_LOGICAL, MNIST_NBATCH_PHYSICAL); mnist_model_build(model); mnist_model_train(model, dataset, /*nepoch =*/ 30, /*val_split =*/ 0.05f); mnist_model_save(model, argv[2]); } ggml-org-ggml-7ec8045/examples/mnist/server.py000066400000000000000000000022601506673203700213530ustar00rootroot00000000000000import http.server import socketserver import os import sys DIRECTORY = os.path.abspath(os.path.join(os.path.dirname(__file__), 'web')) PORT = 8000 class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): def __init__(self, *args, **kwargs): super().__init__(*args, directory=DIRECTORY, **kwargs) def end_headers(self): # Add required headers for SharedArrayBuffer self.send_header("Cross-Origin-Opener-Policy", "same-origin") self.send_header("Cross-Origin-Embedder-Policy", "require-corp") self.send_header("Access-Control-Allow-Origin", "*") super().end_headers() # Enable address reuse class CustomServer(socketserver.TCPServer): allow_reuse_address = True try: with CustomServer(("", PORT), CustomHTTPRequestHandler) as httpd: print(f"Serving directory '{DIRECTORY}' at http://localhost:{PORT}") print(f"Application context root: http://localhost:{PORT}/") try: httpd.serve_forever() except KeyboardInterrupt: print("\nServer stopped.") # Force complete exit sys.exit(0) except OSError as e: print(f"Error: {e}") sys.exit(1) ggml-org-ggml-7ec8045/examples/mnist/web/000077500000000000000000000000001506673203700202505ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/mnist/web/.gitignore000066400000000000000000000000021506673203700222300ustar00rootroot00000000000000* ggml-org-ggml-7ec8045/examples/mnist/web/index.html000066400000000000000000000127351506673203700222550ustar00rootroot00000000000000 MNIST with GGML

MNIST digit recognizer with GGML

Loading model and data set, please wait ...

Your browser does not support the HTML canvas tag.

ggml-org-ggml-7ec8045/examples/perf-metal/000077500000000000000000000000001506673203700203755ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/perf-metal/CMakeLists.txt000066400000000000000000000002161506673203700231340ustar00rootroot00000000000000# # perf-metal set(TEST_TARGET perf-metal) add_executable(${TEST_TARGET} perf-metal.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) ggml-org-ggml-7ec8045/examples/perf-metal/perf-metal.cpp000066400000000000000000000105531506673203700231410ustar00rootroot00000000000000// basic tool to experiment with the Metal backend // // 1. Get GPU trace of a dummy graph: // // rm -rf /tmp/perf-metal.gputrace // make -j perf-metal && METAL_CAPTURE_ENABLED=1 ./bin/perf-metal // open /tmp/perf-metal.gputrace // // https://github.com/ggerganov/llama.cpp/issues/9507 // #include "ggml.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml-metal.h" #include #include #include int main(int argc, char ** argv) { int n_op = 1024; int n_iter = 128; if (argc > 1) { n_op = std::atoi(argv[1]); } if (argc > 2) { n_iter = std::atoi(argv[2]); } printf("%s: n_op = %d, n_iter = %d\n", __func__, n_op, n_iter); const int ne00 = 8; const int ne01 = 8; const int ne11 = 8; std::vector data0(ne00*ne01, 1.0f); std::vector data1(ne00*ne01, 1.0f/ne00); ggml_backend_t backend = ggml_backend_metal_init(); if (!backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); return 1; } const size_t ctx_size = 2 * ggml_tensor_overhead(); struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; struct ggml_context * ctx = ggml_init(params); struct ggml_tensor * t0 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne01); struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne11); ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend); ggml_backend_tensor_set(t0, data0.data(), 0, ggml_nbytes(t0)); ggml_backend_tensor_set(t1, data1.data(), 0, ggml_nbytes(t1)); struct ggml_cgraph * gf = NULL; struct ggml_context * ctx_cgraph = NULL; // create a dummy compute graph: // // x = mul_mat(t0, t1) // x = x * 1.0f // x = mul_mat(x, t1) // x = x * 1.0f // ... repeat n_op times ... // { struct ggml_init_params params0 = { /*.mem_size =*/ 4*n_op*ggml_tensor_overhead() + ggml_graph_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; ctx_cgraph = ggml_init(params0); gf = ggml_new_graph_custom(ctx_cgraph, 4*n_op, false); struct ggml_tensor * cur = ggml_mul_mat(ctx_cgraph, t0, t1); cur = ggml_scale(ctx_cgraph, cur, 1.0f); for (int i = 0; i < n_op - 1; i++) { cur = ggml_mul_mat(ctx_cgraph, cur, t1); cur = ggml_scale(ctx_cgraph, cur, 1.0f); } cur = ggml_scale(ctx_cgraph, cur, 42.0f); ggml_build_forward_expand(gf, cur); } printf("%s: graph nodes = %d\n", __func__, ggml_graph_n_nodes(gf)); ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend)); ggml_gallocr_alloc_graph(allocr, gf); { // warm-up ggml_backend_graph_compute(backend, gf); const int64_t t_start = ggml_time_us(); for (int iter = 0; iter < n_iter; iter++) { ggml_backend_graph_compute(backend, gf); } const int64_t t_end = ggml_time_us(); // actual trace ggml_backend_metal_capture_next_compute(backend); ggml_backend_graph_compute(backend, gf); //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace! ggml_backend_metal_capture_next_compute(backend); ggml_backend_graph_compute(backend, gf); //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace! ggml_backend_metal_capture_next_compute(backend); ggml_backend_graph_compute(backend, gf); printf("%s: time = %f ms\n", __func__, (t_end - t_start) / 1000.0 / n_iter); } { struct ggml_tensor * res = ggml_graph_node(gf, -1); std::vector data(res->ne[0] * res->ne[1], 0.0f); ggml_backend_tensor_get(res, data.data(), 0, ggml_nbytes(res)); for (int i1 = 0; i1 < res->ne[1]; i1++) { for (int i0 = 0; i0 < res->ne[0]; i0++) { printf("%f ", data[i1*res->ne[0] + i0]); } printf("\n"); } } ggml_free(ctx_cgraph); ggml_gallocr_free(allocr); ggml_free(ctx); ggml_backend_buffer_free(buffer); ggml_backend_free(backend); return 0; } ggml-org-ggml-7ec8045/examples/prompts/000077500000000000000000000000001506673203700200455ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/prompts/dolly-v2.txt000066400000000000000000000200361506673203700222570ustar00rootroot00000000000000Hello World! => 12092,3645,2 I can't believe it's already Friday!" => 42,476,626,2868,352,434,2168,6794,1476 The URL for the website is https://www.example.com." => 510,10611,323,253,4422,310,5987,1358,2700,15,11667,15,681,449 "She said, 'I love to travel.'" => 3,2993,753,13,686,42,2389,281,4288,18574 'The temperature is 25.5°C.' => 8,510,3276,310,2030,15,22,3272,36,2464 "Let's meet at 2:30 p.m. in the park." => 3,1466,434,2525,387,374,27,1229,268,15,78,15,275,253,5603,449 The book costs $19.99 => 510,1984,4815,370,746,15,1525 "John's favorite color is blue." => 3,8732,434,7583,3295,310,4797,449 Th@nk y0u f0r y0ur h3lp! => 1044,33,30664,340,17,86,269,17,83,340,17,321,288,20,24343,2 C@n I g3t a c0ffee, pl3@se? => 36,33,79,309,305,20,85,247,260,17,71,6851,13,499,20,33,339,32 W0w! Th@t's @m@zing! => 56,17,88,2,596,33,85,434,1214,78,33,8537,2 H0w 4re y0u t0d@y? => 41,17,88,577,250,340,17,86,246,17,69,33,90,32 I l0ve t0 tr@vel @r0und the w0rld. => 42,298,17,306,246,17,492,33,652,1214,83,17,1504,253,259,17,83,392,15 Wh@t's y0ur f@v0rite m0vie? => 3152,33,85,434,340,17,321,269,33,87,17,3852,278,17,25858,32 The cat is sleeping on the mat. => 510,5798,310,14343,327,253,1111,15 I need to buy some groceries for dinner. => 42,878,281,4489,690,45160,447,323,8955,15 The sun is shining brightly in the sky. => 510,5101,310,28115,43925,275,253,8467,15 She is reading a book in the park. => 2993,310,4361,247,1984,275,253,5603,15 We went for a walk on the beach yesterday. => 1231,2427,323,247,2940,327,253,11600,11066,15 He plays the guitar like a pro. => 1328,7120,253,12609,751,247,354,15 They are going to the movies tonight. => 3726,403,1469,281,253,11321,11608,15 The flowers are blooming in the garden. => 510,12405,403,30601,272,275,253,10329,15 I enjoy listening to classical music. => 42,4264,11298,281,8946,3440,15 We need to buy groceries for the week. => 1231,878,281,4489,45160,447,323,253,2129,15 The dog is chasing its tail in circles. => 510,4370,310,31702,697,8105,275,14240,15 She is wearing a beautiful red dress. => 2993,310,9398,247,5389,2502,7619,15 He is a talented actor in Hollywood. => 1328,310,247,21220,12353,275,14759,15 The children are playing in the playground. => 510,2151,403,4882,275,253,41008,15 I'm going to visit my grandparents this weekend. => 42,1353,1469,281,4143,619,37186,436,8849,15 The coffee tastes bitter without sugar. => 510,8574,27491,17123,1293,8618,15 They are planning a surprise party for her. => 3726,403,7219,247,9326,3128,323,617,15 She sings like an angel on stage. => 2993,44718,751,271,23087,327,3924,15 We should take a vacation to relax. => 1231,943,1379,247,18125,281,7921,15 He is studying medicine at the university. => 1328,310,12392,9921,387,253,9835,15 The rain is pouring heavily outside. => 510,9313,310,31226,11306,3345,15 I enjoy watching romantic movies. => 42,4264,7487,18109,11321,15 They are celebrating their anniversary today. => 3726,403,28765,616,19054,3063,15 She dances gracefully to the music. => 2993,47078,14426,2920,281,253,3440,15 He is an excellent basketball player. => 1328,310,271,7126,14648,4760,15 The baby is sleeping soundly in the crib. => 510,6858,310,14343,3590,314,275,253,260,725,15 I need to finish my homework before dinner. => 42,878,281,8416,619,32110,1078,8955,15 They are organizing a charity event next month. => 3726,403,26169,247,19489,2362,1735,1770,15 She is cooking a delicious meal for us. => 2993,310,12398,247,17319,11484,323,441,15 We should go hiking in the mountains. => 1231,943,564,33061,275,253,14700,15 The car broke down on the way to work. => 510,1113,9377,1066,327,253,1039,281,789,15 He loves playing video games in his free time. => 1328,14528,4882,3492,3958,275,521,1959,673,15 The birds are chirping in the trees. => 510,11260,403,36494,14650,275,253,7139,15 I want to learn how to play the piano. => 42,971,281,3037,849,281,1132,253,18542,15 They are building a new shopping mall in the city. => 3726,403,3652,247,747,12701,28974,275,253,2846,15 She is writing a novel in her spare time. => 2993,310,4028,247,4460,275,617,18345,673,15 We are going to the zoo this Saturday. => 1231,403,1469,281,253,41089,436,7814,15 The cake looks delicious with chocolate frosting. => 510,15221,4453,17319,342,14354,34724,272,15 He is a talented painter who sells his artwork. => 1328,310,247,21220,27343,665,27924,521,28227,15 The students are studying for their exams. => 510,3484,403,12392,323,616,34666,15 I enjoy swimming in the ocean. => 42,4264,17120,275,253,12927,15 They are renovating their house. => 3726,403,30074,839,616,2419,15 She is practicing yoga to stay healthy. => 2993,310,25815,25551,281,3297,5875,15 We should plant flowers in the garden. => 1231,943,4444,12405,275,253,10329,15 The traffic is heavy during rush hour. => 510,7137,310,5536,1309,16949,4964,15 He is a skilled chef who creates amazing dishes. => 1328,310,247,18024,26540,665,10513,8644,17114,15 The baby is crawling on the floor. => 510,6858,310,44922,327,253,5254,15 I need to buy a new pair of shoes. => 42,878,281,4489,247,747,4667,273,12682,15 They are going on a road trip across the country. => 3726,403,1469,327,247,3971,7408,2439,253,2586,15 She is playing the piano beautifully. => 2993,310,4882,253,18542,27839,15 We are going to a concert tomorrow night. => 1231,403,1469,281,247,12699,10873,2360,15 The cake tastes delicious with vanilla frosting. => 510,15221,27491,17319,342,26724,34724,272,15 He is a dedicated teacher who inspires his students. => 1328,310,247,9940,9732,665,6381,2731,521,3484,15 The students are participating in a science fair. => 510,3484,403,15299,275,247,5859,4344,15 I enjoy hiking in the mountains. => 42,4264,33061,275,253,14700,15 They are organizing a beach cleanup next weekend. => 3726,403,26169,247,11600,34709,1735,8849,15 She is taking photographs of nature. => 2993,310,3192,15928,273,3753,15 We should try a new restaurant in town. => 1231,943,1611,247,747,10301,275,3874,15 The traffic is moving slowly on the highway. => 510,7137,310,4886,7808,327,253,17657,15 He is a talented singer with a beautiful voice. => 1328,310,247,21220,16057,342,247,5389,4318,15 The baby is laughing and giggling. => 510,6858,310,17053,285,41542,1981,15 I need to do laundry and wash my clothes. => 42,878,281,513,29023,285,14841,619,10015,15 They are planning a trip to Europe. => 3726,403,7219,247,7408,281,3060,15 She is learning how to play the guitar. => 2993,310,4715,849,281,1132,253,12609,15 We are going to a museum this Sunday. => 1231,403,1469,281,247,16064,436,6926,15 The coffee smells amazing in the morning. => 510,8574,34247,8644,275,253,4131,15 He is a hardworking farmer who grows crops. => 1328,310,247,1892,21107,24718,665,17202,19492,15 The students are presenting their research projects. => 510,3484,403,15250,616,2561,6493,15 I enjoy playing soccer with my friends. => 42,4264,4882,20391,342,619,3858,15 They are volunteering at a local shelter. => 3726,403,10057,2158,387,247,1980,17824,15 She is practicing martial arts for self-defense. => 2993,310,25815,29731,14635,323,1881,14,29337,15 We should try a new recipe for dinner. => 1231,943,1611,247,747,13612,323,8955,15 The traffic is congest => 510,7137,310,25801 The sun is shining brightly today. => 510,5101,310,28115,43925,3063,15 I enjoy reading books in my free time. => 42,4264,4361,5098,275,619,1959,673,15 She plays the piano beautifully. => 2993,7120,253,18542,27839,15 The cat chased the mouse around the room. => 510,5798,40754,253,6521,1475,253,2316,15 I love eating pizza with extra cheese. => 42,2389,9123,22534,342,4465,12173,15 He always wears a hat wherever he goes. => 1328,1900,31394,247,7856,20312,344,4566,15 The flowers in the garden are blooming. => 510,12405,275,253,10329,403,30601,272,15 She danced gracefully on the stage. => 2993,39860,14426,2920,327,253,3924,15 The dog barked loudly in the park. => 510,4370,21939,264,31311,275,253,5603,15 We went swimming in the ocean yesterday. => 1231,2427,17120,275,253,12927,11066,15 He speaks fluent French and Spanish. => 1328,16544,2938,290,5112,285,9883,15 The train arrived at the station on time. => 510,6194,7244,387,253,4660,327,673,15 She cooked a delicious meal for her family. => 2993,18621,247,17319,11484,323,617,2021,15 ggml-org-ggml-7ec8045/examples/prompts/gpt-2-chinese.txt000066400000000000000000000001141506673203700231470ustar00rootroot00000000000000请问洗手间在哪里? => 6435,7309,3819,2797,7313,1762,1525,7027,8043 ggml-org-ggml-7ec8045/examples/prompts/gpt-2.txt000066400000000000000000000177601506673203700215520ustar00rootroot00000000000000Hello World! => 15496,2159,0 I can't believe it's already Friday!" => 40,460,470,1975,340,338,1541,3217,2474 The URL for the website is https://www.example.com." => 464,10289,329,262,3052,318,3740,1378,2503,13,20688,13,785,526 "She said, 'I love to travel.'" => 1,3347,531,11,705,40,1842,284,3067,11496 'The temperature is 25.5°C.' => 6,464,5951,318,1679,13,20,7200,34,2637 "Let's meet at 2:30 p.m. in the park." => 1,5756,338,1826,379,362,25,1270,279,13,76,13,287,262,3952,526 The book costs $19.99 => 464,1492,3484,720,1129,13,2079 "John's favorite color is blue." => 1,7554,338,4004,3124,318,4171,526 Th@nk y0u f0r y0ur h3lp! => 817,31,77,74,331,15,84,277,15,81,331,15,333,289,18,34431,0 C@n I g3t a c0ffee, pl3@se? => 34,31,77,314,308,18,83,257,269,15,5853,11,458,18,31,325,30 W0w! Th@t's @m@zing! => 54,15,86,0,536,31,83,338,2488,76,31,9510,0 H0w 4re y0u t0d@y? => 39,15,86,604,260,331,15,84,256,15,67,31,88,30 I l0ve t0 tr@vel @r0und the w0rld. => 40,300,15,303,256,15,491,31,626,2488,81,15,917,262,266,15,81,335,13 Wh@t's y0ur f@v0rite m0vie? => 1199,31,83,338,331,15,333,277,31,85,15,6525,285,15,85,494,30 The cat is sleeping on the mat. => 464,3797,318,11029,319,262,2603,13 I need to buy some groceries for dinner. => 40,761,284,2822,617,38464,329,8073,13 The sun is shining brightly in the sky. => 464,4252,318,22751,35254,287,262,6766,13 She is reading a book in the park. => 3347,318,3555,257,1492,287,262,3952,13 We went for a walk on the beach yesterday. => 1135,1816,329,257,2513,319,262,10481,7415,13 He plays the guitar like a pro. => 1544,5341,262,10047,588,257,386,13 They are going to the movies tonight. => 2990,389,1016,284,262,6918,9975,13 The flowers are blooming in the garden. => 464,12734,389,24924,3383,287,262,11376,13 I enjoy listening to classical music. => 40,2883,8680,284,15993,2647,13 We need to buy groceries for the week. => 1135,761,284,2822,38464,329,262,1285,13 The dog is chasing its tail in circles. => 464,3290,318,20023,663,7894,287,13332,13 She is wearing a beautiful red dress. => 3347,318,5762,257,4950,2266,6576,13 He is a talented actor in Hollywood. => 1544,318,257,12356,8674,287,8502,13 The children are playing in the playground. => 464,1751,389,2712,287,262,24817,13 I'm going to visit my grandparents this weekend. => 40,1101,1016,284,3187,616,28571,428,5041,13 The coffee tastes bitter without sugar. => 464,6891,18221,12922,1231,7543,13 They are planning a surprise party for her. => 2990,389,5410,257,5975,2151,329,607,13 She sings like an angel on stage. => 3347,33041,588,281,18304,319,3800,13 We should take a vacation to relax. => 1135,815,1011,257,14600,284,8960,13 He is studying medicine at the university. => 1544,318,11065,9007,379,262,6403,13 The rain is pouring heavily outside. => 464,6290,318,23147,7272,2354,13 I enjoy watching romantic movies. => 40,2883,4964,14348,6918,13 They are celebrating their anniversary today. => 2990,389,17499,511,11162,1909,13 She dances gracefully to the music. => 3347,38207,11542,2759,284,262,2647,13 He is an excellent basketball player. => 1544,318,281,6275,9669,2137,13 The baby is sleeping soundly in the crib. => 464,5156,318,11029,2128,306,287,262,48083,13 I need to finish my homework before dinner. => 40,761,284,5461,616,26131,878,8073,13 They are organizing a charity event next month. => 2990,389,16924,257,11016,1785,1306,1227,13 She is cooking a delicious meal for us. => 3347,318,10801,257,12625,9799,329,514,13 We should go hiking in the mountains. => 1135,815,467,24522,287,262,12269,13 The car broke down on the way to work. => 464,1097,6265,866,319,262,835,284,670,13 He loves playing video games in his free time. => 1544,10408,2712,2008,1830,287,465,1479,640,13 The birds are chirping in the trees. => 464,10087,389,442,343,13886,287,262,7150,13 I want to learn how to play the piano. => 40,765,284,2193,703,284,711,262,19132,13 They are building a new shopping mall in the city. => 2990,389,2615,257,649,9735,17374,287,262,1748,13 She is writing a novel in her spare time. => 3347,318,3597,257,5337,287,607,13952,640,13 We are going to the zoo this Saturday. => 1135,389,1016,284,262,26626,428,3909,13 The cake looks delicious with chocolate frosting. => 464,12187,3073,12625,351,11311,21682,278,13 He is a talented painter who sells his artwork. => 1544,318,257,12356,34537,508,16015,465,16257,13 The students are studying for their exams. => 464,2444,389,11065,329,511,26420,13 I enjoy swimming in the ocean. => 40,2883,14899,287,262,9151,13 They are renovating their house. => 2990,389,24317,803,511,2156,13 She is practicing yoga to stay healthy. => 3347,318,18207,20351,284,2652,5448,13 We should plant flowers in the garden. => 1135,815,4618,12734,287,262,11376,13 The traffic is heavy during rush hour. => 464,4979,318,4334,1141,10484,1711,13 He is a skilled chef who creates amazing dishes. => 1544,318,257,14297,21221,508,8075,4998,16759,13 The baby is crawling on the floor. => 464,5156,318,34499,319,262,4314,13 I need to buy a new pair of shoes. => 40,761,284,2822,257,649,5166,286,10012,13 They are going on a road trip across the country. => 2990,389,1016,319,257,2975,5296,1973,262,1499,13 She is playing the piano beautifully. => 3347,318,2712,262,19132,21104,13 We are going to a concert tomorrow night. => 1135,389,1016,284,257,10010,9439,1755,13 The cake tastes delicious with vanilla frosting. => 464,12187,18221,12625,351,16858,21682,278,13 He is a dedicated teacher who inspires his students. => 1544,318,257,7256,4701,508,38934,465,2444,13 The students are participating in a science fair. => 464,2444,389,11983,287,257,3783,3148,13 I enjoy hiking in the mountains. => 40,2883,24522,287,262,12269,13 They are organizing a beach cleanup next weekend. => 2990,389,16924,257,10481,27425,1306,5041,13 She is taking photographs of nature. => 3347,318,2263,12566,286,3450,13 We should try a new restaurant in town. => 1135,815,1949,257,649,7072,287,3240,13 The traffic is moving slowly on the highway. => 464,4979,318,3867,6364,319,262,12763,13 He is a talented singer with a beautiful voice. => 1544,318,257,12356,14015,351,257,4950,3809,13 The baby is laughing and giggling. => 464,5156,318,14376,290,30442,1359,13 I need to do laundry and wash my clothes. => 40,761,284,466,25724,290,13502,616,8242,13 They are planning a trip to Europe. => 2990,389,5410,257,5296,284,2031,13 She is learning how to play the guitar. => 3347,318,4673,703,284,711,262,10047,13 We are going to a museum this Sunday. => 1135,389,1016,284,257,13257,428,3502,13 The coffee smells amazing in the morning. => 464,6891,25760,4998,287,262,3329,13 He is a hardworking farmer who grows crops. => 1544,318,257,1327,16090,18739,508,13676,14450,13 The students are presenting their research projects. => 464,2444,389,17728,511,2267,4493,13 I enjoy playing soccer with my friends. => 40,2883,2712,11783,351,616,2460,13 They are volunteering at a local shelter. => 2990,389,41434,379,257,1957,11772,13 She is practicing martial arts for self-defense. => 3347,318,18207,15618,10848,329,2116,12,19774,13 We should try a new recipe for dinner. => 1135,815,1949,257,649,8364,329,8073,13 The traffic is congest => 464,4979,318,22791 The sun is shining brightly today. => 464,4252,318,22751,35254,1909,13 I enjoy reading books in my free time. => 40,2883,3555,3835,287,616,1479,640,13 She plays the piano beautifully. => 3347,5341,262,19132,21104,13 The cat chased the mouse around the room. => 464,3797,26172,262,10211,1088,262,2119,13 I love eating pizza with extra cheese. => 40,1842,6600,14256,351,3131,9891,13 He always wears a hat wherever he goes. => 1544,1464,17326,257,6877,14530,339,2925,13 The flowers in the garden are blooming. => 464,12734,287,262,11376,389,24924,3383,13 She danced gracefully on the stage. => 3347,39480,11542,2759,319,262,3800,13 The dog barked loudly in the park. => 464,3290,21405,276,23112,287,262,3952,13 We went swimming in the ocean yesterday. => 1135,1816,14899,287,262,9151,7415,13 He speaks fluent French and Spanish. => 1544,9209,43472,4141,290,7897,13 The train arrived at the station on time. => 464,4512,5284,379,262,4429,319,640,13 She cooked a delicious meal for her family. => 3347,15847,257,12625,9799,329,607,1641,13 ggml-org-ggml-7ec8045/examples/prompts/gpt-j.txt000066400000000000000000000177601506673203700216420ustar00rootroot00000000000000Hello World! => 15496,2159,0 I can't believe it's already Friday!" => 40,460,470,1975,340,338,1541,3217,2474 The URL for the website is https://www.example.com." => 464,10289,329,262,3052,318,3740,1378,2503,13,20688,13,785,526 "She said, 'I love to travel.'" => 1,3347,531,11,705,40,1842,284,3067,11496 'The temperature is 25.5°C.' => 6,464,5951,318,1679,13,20,7200,34,2637 "Let's meet at 2:30 p.m. in the park." => 1,5756,338,1826,379,362,25,1270,279,13,76,13,287,262,3952,526 The book costs $19.99 => 464,1492,3484,720,1129,13,2079 "John's favorite color is blue." => 1,7554,338,4004,3124,318,4171,526 Th@nk y0u f0r y0ur h3lp! => 817,31,77,74,331,15,84,277,15,81,331,15,333,289,18,34431,0 C@n I g3t a c0ffee, pl3@se? => 34,31,77,314,308,18,83,257,269,15,5853,11,458,18,31,325,30 W0w! Th@t's @m@zing! => 54,15,86,0,536,31,83,338,2488,76,31,9510,0 H0w 4re y0u t0d@y? => 39,15,86,604,260,331,15,84,256,15,67,31,88,30 I l0ve t0 tr@vel @r0und the w0rld. => 40,300,15,303,256,15,491,31,626,2488,81,15,917,262,266,15,81,335,13 Wh@t's y0ur f@v0rite m0vie? => 1199,31,83,338,331,15,333,277,31,85,15,6525,285,15,85,494,30 The cat is sleeping on the mat. => 464,3797,318,11029,319,262,2603,13 I need to buy some groceries for dinner. => 40,761,284,2822,617,38464,329,8073,13 The sun is shining brightly in the sky. => 464,4252,318,22751,35254,287,262,6766,13 She is reading a book in the park. => 3347,318,3555,257,1492,287,262,3952,13 We went for a walk on the beach yesterday. => 1135,1816,329,257,2513,319,262,10481,7415,13 He plays the guitar like a pro. => 1544,5341,262,10047,588,257,386,13 They are going to the movies tonight. => 2990,389,1016,284,262,6918,9975,13 The flowers are blooming in the garden. => 464,12734,389,24924,3383,287,262,11376,13 I enjoy listening to classical music. => 40,2883,8680,284,15993,2647,13 We need to buy groceries for the week. => 1135,761,284,2822,38464,329,262,1285,13 The dog is chasing its tail in circles. => 464,3290,318,20023,663,7894,287,13332,13 She is wearing a beautiful red dress. => 3347,318,5762,257,4950,2266,6576,13 He is a talented actor in Hollywood. => 1544,318,257,12356,8674,287,8502,13 The children are playing in the playground. => 464,1751,389,2712,287,262,24817,13 I'm going to visit my grandparents this weekend. => 40,1101,1016,284,3187,616,28571,428,5041,13 The coffee tastes bitter without sugar. => 464,6891,18221,12922,1231,7543,13 They are planning a surprise party for her. => 2990,389,5410,257,5975,2151,329,607,13 She sings like an angel on stage. => 3347,33041,588,281,18304,319,3800,13 We should take a vacation to relax. => 1135,815,1011,257,14600,284,8960,13 He is studying medicine at the university. => 1544,318,11065,9007,379,262,6403,13 The rain is pouring heavily outside. => 464,6290,318,23147,7272,2354,13 I enjoy watching romantic movies. => 40,2883,4964,14348,6918,13 They are celebrating their anniversary today. => 2990,389,17499,511,11162,1909,13 She dances gracefully to the music. => 3347,38207,11542,2759,284,262,2647,13 He is an excellent basketball player. => 1544,318,281,6275,9669,2137,13 The baby is sleeping soundly in the crib. => 464,5156,318,11029,2128,306,287,262,48083,13 I need to finish my homework before dinner. => 40,761,284,5461,616,26131,878,8073,13 They are organizing a charity event next month. => 2990,389,16924,257,11016,1785,1306,1227,13 She is cooking a delicious meal for us. => 3347,318,10801,257,12625,9799,329,514,13 We should go hiking in the mountains. => 1135,815,467,24522,287,262,12269,13 The car broke down on the way to work. => 464,1097,6265,866,319,262,835,284,670,13 He loves playing video games in his free time. => 1544,10408,2712,2008,1830,287,465,1479,640,13 The birds are chirping in the trees. => 464,10087,389,442,343,13886,287,262,7150,13 I want to learn how to play the piano. => 40,765,284,2193,703,284,711,262,19132,13 They are building a new shopping mall in the city. => 2990,389,2615,257,649,9735,17374,287,262,1748,13 She is writing a novel in her spare time. => 3347,318,3597,257,5337,287,607,13952,640,13 We are going to the zoo this Saturday. => 1135,389,1016,284,262,26626,428,3909,13 The cake looks delicious with chocolate frosting. => 464,12187,3073,12625,351,11311,21682,278,13 He is a talented painter who sells his artwork. => 1544,318,257,12356,34537,508,16015,465,16257,13 The students are studying for their exams. => 464,2444,389,11065,329,511,26420,13 I enjoy swimming in the ocean. => 40,2883,14899,287,262,9151,13 They are renovating their house. => 2990,389,24317,803,511,2156,13 She is practicing yoga to stay healthy. => 3347,318,18207,20351,284,2652,5448,13 We should plant flowers in the garden. => 1135,815,4618,12734,287,262,11376,13 The traffic is heavy during rush hour. => 464,4979,318,4334,1141,10484,1711,13 He is a skilled chef who creates amazing dishes. => 1544,318,257,14297,21221,508,8075,4998,16759,13 The baby is crawling on the floor. => 464,5156,318,34499,319,262,4314,13 I need to buy a new pair of shoes. => 40,761,284,2822,257,649,5166,286,10012,13 They are going on a road trip across the country. => 2990,389,1016,319,257,2975,5296,1973,262,1499,13 She is playing the piano beautifully. => 3347,318,2712,262,19132,21104,13 We are going to a concert tomorrow night. => 1135,389,1016,284,257,10010,9439,1755,13 The cake tastes delicious with vanilla frosting. => 464,12187,18221,12625,351,16858,21682,278,13 He is a dedicated teacher who inspires his students. => 1544,318,257,7256,4701,508,38934,465,2444,13 The students are participating in a science fair. => 464,2444,389,11983,287,257,3783,3148,13 I enjoy hiking in the mountains. => 40,2883,24522,287,262,12269,13 They are organizing a beach cleanup next weekend. => 2990,389,16924,257,10481,27425,1306,5041,13 She is taking photographs of nature. => 3347,318,2263,12566,286,3450,13 We should try a new restaurant in town. => 1135,815,1949,257,649,7072,287,3240,13 The traffic is moving slowly on the highway. => 464,4979,318,3867,6364,319,262,12763,13 He is a talented singer with a beautiful voice. => 1544,318,257,12356,14015,351,257,4950,3809,13 The baby is laughing and giggling. => 464,5156,318,14376,290,30442,1359,13 I need to do laundry and wash my clothes. => 40,761,284,466,25724,290,13502,616,8242,13 They are planning a trip to Europe. => 2990,389,5410,257,5296,284,2031,13 She is learning how to play the guitar. => 3347,318,4673,703,284,711,262,10047,13 We are going to a museum this Sunday. => 1135,389,1016,284,257,13257,428,3502,13 The coffee smells amazing in the morning. => 464,6891,25760,4998,287,262,3329,13 He is a hardworking farmer who grows crops. => 1544,318,257,1327,16090,18739,508,13676,14450,13 The students are presenting their research projects. => 464,2444,389,17728,511,2267,4493,13 I enjoy playing soccer with my friends. => 40,2883,2712,11783,351,616,2460,13 They are volunteering at a local shelter. => 2990,389,41434,379,257,1957,11772,13 She is practicing martial arts for self-defense. => 3347,318,18207,15618,10848,329,2116,12,19774,13 We should try a new recipe for dinner. => 1135,815,1949,257,649,8364,329,8073,13 The traffic is congest => 464,4979,318,22791 The sun is shining brightly today. => 464,4252,318,22751,35254,1909,13 I enjoy reading books in my free time. => 40,2883,3555,3835,287,616,1479,640,13 She plays the piano beautifully. => 3347,5341,262,19132,21104,13 The cat chased the mouse around the room. => 464,3797,26172,262,10211,1088,262,2119,13 I love eating pizza with extra cheese. => 40,1842,6600,14256,351,3131,9891,13 He always wears a hat wherever he goes. => 1544,1464,17326,257,6877,14530,339,2925,13 The flowers in the garden are blooming. => 464,12734,287,262,11376,389,24924,3383,13 She danced gracefully on the stage. => 3347,39480,11542,2759,319,262,3800,13 The dog barked loudly in the park. => 464,3290,21405,276,23112,287,262,3952,13 We went swimming in the ocean yesterday. => 1135,1816,14899,287,262,9151,7415,13 He speaks fluent French and Spanish. => 1544,9209,43472,4141,290,7897,13 The train arrived at the station on time. => 464,4512,5284,379,262,4429,319,640,13 She cooked a delicious meal for her family. => 3347,15847,257,12625,9799,329,607,1641,13 ggml-org-ggml-7ec8045/examples/prompts/gpt-neox-japanese.txt000066400000000000000000000001161506673203700241310ustar00rootroot00000000000000明日の天気はどうですか。 => 263,7353,268,18461,271,1722,18405,265 ggml-org-ggml-7ec8045/examples/prompts/gpt-neox.txt000066400000000000000000000200361506673203700223500ustar00rootroot00000000000000Hello World! => 12092,3645,2 I can't believe it's already Friday!" => 42,476,626,2868,352,434,2168,6794,1476 The URL for the website is https://www.example.com." => 510,10611,323,253,4422,310,5987,1358,2700,15,11667,15,681,449 "She said, 'I love to travel.'" => 3,2993,753,13,686,42,2389,281,4288,18574 'The temperature is 25.5°C.' => 8,510,3276,310,2030,15,22,3272,36,2464 "Let's meet at 2:30 p.m. in the park." => 3,1466,434,2525,387,374,27,1229,268,15,78,15,275,253,5603,449 The book costs $19.99 => 510,1984,4815,370,746,15,1525 "John's favorite color is blue." => 3,8732,434,7583,3295,310,4797,449 Th@nk y0u f0r y0ur h3lp! => 1044,33,30664,340,17,86,269,17,83,340,17,321,288,20,24343,2 C@n I g3t a c0ffee, pl3@se? => 36,33,79,309,305,20,85,247,260,17,71,6851,13,499,20,33,339,32 W0w! Th@t's @m@zing! => 56,17,88,2,596,33,85,434,1214,78,33,8537,2 H0w 4re y0u t0d@y? => 41,17,88,577,250,340,17,86,246,17,69,33,90,32 I l0ve t0 tr@vel @r0und the w0rld. => 42,298,17,306,246,17,492,33,652,1214,83,17,1504,253,259,17,83,392,15 Wh@t's y0ur f@v0rite m0vie? => 3152,33,85,434,340,17,321,269,33,87,17,3852,278,17,25858,32 The cat is sleeping on the mat. => 510,5798,310,14343,327,253,1111,15 I need to buy some groceries for dinner. => 42,878,281,4489,690,45160,447,323,8955,15 The sun is shining brightly in the sky. => 510,5101,310,28115,43925,275,253,8467,15 She is reading a book in the park. => 2993,310,4361,247,1984,275,253,5603,15 We went for a walk on the beach yesterday. => 1231,2427,323,247,2940,327,253,11600,11066,15 He plays the guitar like a pro. => 1328,7120,253,12609,751,247,354,15 They are going to the movies tonight. => 3726,403,1469,281,253,11321,11608,15 The flowers are blooming in the garden. => 510,12405,403,30601,272,275,253,10329,15 I enjoy listening to classical music. => 42,4264,11298,281,8946,3440,15 We need to buy groceries for the week. => 1231,878,281,4489,45160,447,323,253,2129,15 The dog is chasing its tail in circles. => 510,4370,310,31702,697,8105,275,14240,15 She is wearing a beautiful red dress. => 2993,310,9398,247,5389,2502,7619,15 He is a talented actor in Hollywood. => 1328,310,247,21220,12353,275,14759,15 The children are playing in the playground. => 510,2151,403,4882,275,253,41008,15 I'm going to visit my grandparents this weekend. => 42,1353,1469,281,4143,619,37186,436,8849,15 The coffee tastes bitter without sugar. => 510,8574,27491,17123,1293,8618,15 They are planning a surprise party for her. => 3726,403,7219,247,9326,3128,323,617,15 She sings like an angel on stage. => 2993,44718,751,271,23087,327,3924,15 We should take a vacation to relax. => 1231,943,1379,247,18125,281,7921,15 He is studying medicine at the university. => 1328,310,12392,9921,387,253,9835,15 The rain is pouring heavily outside. => 510,9313,310,31226,11306,3345,15 I enjoy watching romantic movies. => 42,4264,7487,18109,11321,15 They are celebrating their anniversary today. => 3726,403,28765,616,19054,3063,15 She dances gracefully to the music. => 2993,47078,14426,2920,281,253,3440,15 He is an excellent basketball player. => 1328,310,271,7126,14648,4760,15 The baby is sleeping soundly in the crib. => 510,6858,310,14343,3590,314,275,253,260,725,15 I need to finish my homework before dinner. => 42,878,281,8416,619,32110,1078,8955,15 They are organizing a charity event next month. => 3726,403,26169,247,19489,2362,1735,1770,15 She is cooking a delicious meal for us. => 2993,310,12398,247,17319,11484,323,441,15 We should go hiking in the mountains. => 1231,943,564,33061,275,253,14700,15 The car broke down on the way to work. => 510,1113,9377,1066,327,253,1039,281,789,15 He loves playing video games in his free time. => 1328,14528,4882,3492,3958,275,521,1959,673,15 The birds are chirping in the trees. => 510,11260,403,36494,14650,275,253,7139,15 I want to learn how to play the piano. => 42,971,281,3037,849,281,1132,253,18542,15 They are building a new shopping mall in the city. => 3726,403,3652,247,747,12701,28974,275,253,2846,15 She is writing a novel in her spare time. => 2993,310,4028,247,4460,275,617,18345,673,15 We are going to the zoo this Saturday. => 1231,403,1469,281,253,41089,436,7814,15 The cake looks delicious with chocolate frosting. => 510,15221,4453,17319,342,14354,34724,272,15 He is a talented painter who sells his artwork. => 1328,310,247,21220,27343,665,27924,521,28227,15 The students are studying for their exams. => 510,3484,403,12392,323,616,34666,15 I enjoy swimming in the ocean. => 42,4264,17120,275,253,12927,15 They are renovating their house. => 3726,403,30074,839,616,2419,15 She is practicing yoga to stay healthy. => 2993,310,25815,25551,281,3297,5875,15 We should plant flowers in the garden. => 1231,943,4444,12405,275,253,10329,15 The traffic is heavy during rush hour. => 510,7137,310,5536,1309,16949,4964,15 He is a skilled chef who creates amazing dishes. => 1328,310,247,18024,26540,665,10513,8644,17114,15 The baby is crawling on the floor. => 510,6858,310,44922,327,253,5254,15 I need to buy a new pair of shoes. => 42,878,281,4489,247,747,4667,273,12682,15 They are going on a road trip across the country. => 3726,403,1469,327,247,3971,7408,2439,253,2586,15 She is playing the piano beautifully. => 2993,310,4882,253,18542,27839,15 We are going to a concert tomorrow night. => 1231,403,1469,281,247,12699,10873,2360,15 The cake tastes delicious with vanilla frosting. => 510,15221,27491,17319,342,26724,34724,272,15 He is a dedicated teacher who inspires his students. => 1328,310,247,9940,9732,665,6381,2731,521,3484,15 The students are participating in a science fair. => 510,3484,403,15299,275,247,5859,4344,15 I enjoy hiking in the mountains. => 42,4264,33061,275,253,14700,15 They are organizing a beach cleanup next weekend. => 3726,403,26169,247,11600,34709,1735,8849,15 She is taking photographs of nature. => 2993,310,3192,15928,273,3753,15 We should try a new restaurant in town. => 1231,943,1611,247,747,10301,275,3874,15 The traffic is moving slowly on the highway. => 510,7137,310,4886,7808,327,253,17657,15 He is a talented singer with a beautiful voice. => 1328,310,247,21220,16057,342,247,5389,4318,15 The baby is laughing and giggling. => 510,6858,310,17053,285,41542,1981,15 I need to do laundry and wash my clothes. => 42,878,281,513,29023,285,14841,619,10015,15 They are planning a trip to Europe. => 3726,403,7219,247,7408,281,3060,15 She is learning how to play the guitar. => 2993,310,4715,849,281,1132,253,12609,15 We are going to a museum this Sunday. => 1231,403,1469,281,247,16064,436,6926,15 The coffee smells amazing in the morning. => 510,8574,34247,8644,275,253,4131,15 He is a hardworking farmer who grows crops. => 1328,310,247,1892,21107,24718,665,17202,19492,15 The students are presenting their research projects. => 510,3484,403,15250,616,2561,6493,15 I enjoy playing soccer with my friends. => 42,4264,4882,20391,342,619,3858,15 They are volunteering at a local shelter. => 3726,403,10057,2158,387,247,1980,17824,15 She is practicing martial arts for self-defense. => 2993,310,25815,29731,14635,323,1881,14,29337,15 We should try a new recipe for dinner. => 1231,943,1611,247,747,13612,323,8955,15 The traffic is congest => 510,7137,310,25801 The sun is shining brightly today. => 510,5101,310,28115,43925,3063,15 I enjoy reading books in my free time. => 42,4264,4361,5098,275,619,1959,673,15 She plays the piano beautifully. => 2993,7120,253,18542,27839,15 The cat chased the mouse around the room. => 510,5798,40754,253,6521,1475,253,2316,15 I love eating pizza with extra cheese. => 42,2389,9123,22534,342,4465,12173,15 He always wears a hat wherever he goes. => 1328,1900,31394,247,7856,20312,344,4566,15 The flowers in the garden are blooming. => 510,12405,275,253,10329,403,30601,272,15 She danced gracefully on the stage. => 2993,39860,14426,2920,327,253,3924,15 The dog barked loudly in the park. => 510,4370,21939,264,31311,275,253,5603,15 We went swimming in the ocean yesterday. => 1231,2427,17120,275,253,12927,11066,15 He speaks fluent French and Spanish. => 1328,16544,2938,290,5112,285,9883,15 The train arrived at the station on time. => 510,6194,7244,387,253,4660,327,673,15 She cooked a delicious meal for her family. => 2993,18621,247,17319,11484,323,617,2021,15 ggml-org-ggml-7ec8045/examples/prompts/polyglot-ko.txt000066400000000000000000000002561506673203700230710ustar00rootroot00000000000000이것은 테스트 이다. => 12271,296,6474,28037,17 걱정할 필요 없다. => 18311,482,1062,550,267,17 버그는 언젠가 고쳐진다. => 6904,272,8575,10381,1765,17 ggml-org-ggml-7ec8045/examples/prompts/replit.txt000066400000000000000000000235771506673203700221230ustar00rootroot00000000000000Hello World! => 6466,147,2317,350 I can't believe it's already Friday!" => 286,512,172,185,13392,393,172,155,3239,147,29249,8537 The URL for the website is https://www.example.com." => 505,5635,250,170,11745,235,147,303,262,552,148,811,148,241,148,161 "She said, 'I love to travel.'" => 161,10386,4089,150,206,286,8440,194,147,12363,148,172,161 'The temperature is 25.5°C.' => 172,505,147,9502,235,147,20022,8516,228,148,172 "Let's meet at 2:30 p.m. in the park." => 161,8997,172,155,17120,536,147,162,5245,147,207,148,204,148,219,170,147,17664,148,161 The book costs $19.99 => 505,147,2277,17494,236,166,11824 "John's favorite color is blue." => 161,7475,172,155,147,11105,147,349,235,17046,148,161 Th@nk y0u f0r y0ur h3lp! => 6309,240,9019,147,237,159,247,147,202,159,223,147,237,159,2458,147,226,171,3899,350 C@n I g3t a c0ffee, pl3@se? => 228,240,211,398,147,267,171,185,216,147,196,159,13360,163,150,147,1287,171,240,155,163,272 W0w! Th@t's @m@zing! => 450,159,274,350,147,6309,240,185,172,155,268,204,240,301,248,350 H0w 4re y0u t0d@y? => 304,159,274,320,440,147,237,159,247,147,185,159,182,240,237,272 I l0ve t0 tr@vel @r0und the w0rld. => 286,997,159,1290,147,185,159,147,490,240,3893,268,223,159,3981,170,147,274,159,223,2833,148 Wh@t's y0ur f@v0rite m0vie? => 450,226,240,185,172,155,147,237,159,2458,147,202,240,252,159,5961,163,147,204,159,24373,272 The cat is sleeping on the mat. => 505,147,1604,235,147,3987,248,347,170,147,1297,148 I need to buy some groceries for dinner. => 286,1645,194,147,8068,1499,147,10022,1037,10023,250,147,182,2749,148 The sun is shining brightly in the sky. => 505,147,5852,235,147,7304,2967,147,215,649,391,219,170,147,7310,148 She is reading a book in the park. => 10386,235,9838,216,147,2277,219,170,147,17664,148 We went for a walk on the beach yesterday. => 3250,10825,250,216,147,8156,347,170,294,5371,147,28830,148 He plays the guitar like a pro. => 5301,7084,155,170,147,4604,2214,1425,216,3474,148 They are going to the movies tonight. => 18815,429,6552,194,170,147,15877,194,7907,148 The flowers are blooming in the garden. => 505,147,22953,155,429,147,10411,2799,248,219,170,147,22140,148 I enjoy listening to classical music. => 286,23162,15876,248,194,239,4251,147,7395,148 We need to buy groceries for the week. => 3250,1645,194,147,8068,147,10022,1037,10023,250,170,9238,148 The dog is chasing its tail in circles. => 505,147,6540,235,147,196,916,248,1602,147,5129,219,147,4095,155,148 She is wearing a beautiful red dress. => 10386,235,147,16427,248,216,147,23447,147,1160,147,14592,148 He is a talented actor in Hollywood. => 5301,235,216,147,29750,246,147,5112,219,147,16924,391,10477,148 The children are playing in the playground. => 505,7934,429,7084,248,219,170,7084,12055,148 I'm going to visit my grandparents this weekend. => 286,172,204,6552,194,9939,1247,147,11806,12019,291,9238,314,148 The coffee tastes bitter without sugar. => 505,147,21526,147,20931,155,5145,1430,1988,147,28759,148 They are planning a surprise party for her. => 18815,429,147,23661,216,147,29240,147,7344,250,1869,148 She sings like an angel on stage. => 10386,147,155,6502,1425,426,147,26028,347,12685,148 We should take a vacation to relax. => 3250,936,4654,216,147,15388,946,194,1998,2744,148 He is studying medicine at the university. => 5301,235,7959,248,147,20742,1668,536,170,147,8025,148 The rain is pouring heavily outside. => 505,147,6885,235,5306,248,1189,5451,391,8096,148 I enjoy watching romantic movies. => 286,23162,147,3355,248,147,26080,4140,147,15877,148 They are celebrating their anniversary today. => 18815,429,147,30000,5841,1669,147,24734,5464,1770,13386,148 She dances gracefully to the music. => 10386,147,182,1626,155,147,267,8771,8001,194,170,147,7395,148 He is an excellent basketball player. => 5301,235,426,147,12300,675,185,147,26646,5132,6294,148 The baby is sleeping soundly in the crib. => 505,147,23597,235,147,3987,248,12642,391,219,170,147,7696,215,148 I need to finish my homework before dinner. => 286,1645,194,147,6717,1247,147,1071,2722,2643,147,182,2749,148 They are organizing a charity event next month. => 18815,429,147,16442,248,216,1054,1511,1663,2399,12821,148 She is cooking a delicious meal for us. => 10386,235,147,20453,248,216,3936,23455,147,26658,250,147,539,148 We should go hiking in the mountains. => 3250,936,4242,147,2254,5357,219,170,147,204,18028,155,148 The car broke down on the way to work. => 505,7553,147,510,10036,4288,347,170,3699,194,1916,148 He loves playing video games in his free time. => 5301,8440,155,7084,248,8722,147,11281,219,1439,4002,801,148 The birds are chirping in the trees. => 505,147,13043,155,429,147,3904,223,4639,219,170,5311,155,148 I want to learn how to play the piano. => 286,1857,194,14167,2496,194,7084,170,147,207,23635,148 They are building a new shopping mall in the city. => 18815,429,11038,216,277,147,22184,147,204,609,219,170,147,2416,148 She is writing a novel in her spare time. => 10386,235,3242,216,147,25814,219,1869,6772,2382,801,148 We are going to the zoo this Saturday. => 3250,429,6552,194,170,147,25101,291,147,31426,148 The cake looks delicious with chocolate frosting. => 505,147,24422,16303,3936,23455,312,147,5619,533,2239,147,202,3973,3431,148 He is a talented painter who sells his artwork. => 5301,235,216,147,29750,246,147,9226,279,2888,13004,155,1439,12234,2722,148 The students are studying for their exams. => 505,15707,429,7959,248,250,1669,147,12398,155,148 I enjoy swimming in the ocean. => 286,23162,147,4729,8528,248,219,170,147,26193,148 They are renovating their house. => 18815,429,991,10724,3643,1669,13788,148 She is practicing yoga to stay healthy. => 10386,235,147,18453,248,147,5063,1186,194,15344,147,28550,148 We should plant flowers in the garden. => 3250,936,147,9212,147,22953,155,219,170,147,22140,148 The traffic is heavy during rush hour. => 505,147,11097,235,147,22232,4340,147,22319,147,5686,148 He is a skilled chef who creates amazing dishes. => 5301,235,216,147,8891,246,9784,202,2888,13720,147,28880,147,23852,383,148 The baby is crawling on the floor. => 505,147,23597,235,147,22120,248,347,170,147,5895,148 I need to buy a new pair of shoes. => 286,1645,194,147,8068,216,277,12632,210,147,155,21953,155,148 They are going on a road trip across the country. => 18815,429,6552,347,216,147,6362,147,11395,9762,170,11305,148 She is playing the piano beautifully. => 10386,235,7084,248,170,147,207,23635,147,23447,391,148 We are going to a concert tomorrow night. => 3250,429,6552,194,216,1710,4391,29524,12716,148 The cake tastes delicious with vanilla frosting. => 505,147,24422,147,20931,155,3936,23455,312,5535,7476,147,202,3973,3431,148 He is a dedicated teacher who inspires his students. => 5301,235,216,326,8298,3460,147,9675,2888,147,28801,155,1439,15707,148 The students are participating in a science fair. => 505,15707,429,147,30961,3643,219,216,147,10587,147,7636,148 I enjoy hiking in the mountains. => 286,23162,147,2254,5357,219,170,147,204,18028,155,148 They are organizing a beach cleanup next weekend. => 18815,429,147,16442,248,216,294,5371,147,10401,2399,9238,314,148 She is taking photographs of nature. => 10386,235,147,12345,147,4709,1547,155,210,147,211,8603,148 We should try a new restaurant in town. => 3250,936,147,746,216,277,147,11007,219,147,10200,148 The traffic is moving slowly on the highway. => 505,147,11097,235,147,8601,147,9880,391,347,170,5976,3330,148 He is a talented singer with a beautiful voice. => 5301,235,216,147,29750,246,147,155,248,279,312,216,147,23447,147,9316,148 The baby is laughing and giggling. => 505,147,23597,235,147,23066,248,221,147,2341,3631,2869,148 I need to do laundry and wash my clothes. => 286,1645,194,543,960,3981,2154,221,147,27589,1247,147,22141,383,148 They are planning a trip to Europe. => 18815,429,147,23661,216,147,11395,194,13131,148 She is learning how to play the guitar. => 10386,235,11754,2496,194,7084,170,147,4604,2214,148 We are going to a museum this Sunday. => 3250,429,6552,194,216,147,204,433,1177,291,147,29111,148 The coffee smells amazing in the morning. => 505,147,21526,31454,155,147,28880,219,170,20701,148 He is a hardworking farmer who grows crops. => 5301,235,216,8524,14992,147,16679,279,2888,147,6044,155,147,8650,155,148 The students are presenting their research projects. => 505,15707,429,5130,248,1669,13217,14235,148 I enjoy playing soccer with my friends. => 286,23162,7084,248,147,9351,5318,312,1247,147,5347,155,148 They are volunteering at a local shelter. => 18815,429,147,5238,7478,163,12798,536,216,2491,2905,1359,279,148 She is practicing martial arts for self-defense. => 10386,235,147,18453,248,147,3261,185,4381,12234,155,250,623,153,29896,148 We should try a new recipe for dinner. => 3250,936,147,746,216,277,147,9851,250,147,182,2749,148 The traffic is congest => 505,147,11097,235,1710,14169 The sun is shining brightly today. => 505,147,5852,235,147,7304,2967,147,215,649,391,13386,148 I enjoy reading books in my free time. => 286,23162,9838,147,9670,219,1247,4002,801,148 She plays the piano beautifully. => 10386,7084,155,170,147,207,23635,147,23447,391,148 The cat chased the mouse around the room. => 505,147,1604,147,196,916,246,170,12551,6890,170,9654,148 I love eating pizza with extra cheese. => 286,8440,147,163,3643,147,207,8403,312,8230,9784,383,163,148 He always wears a hat wherever he goes. => 5301,5418,147,16427,155,216,147,4879,2171,2433,1189,16177,148 The flowers in the garden are blooming. => 505,147,22953,155,219,170,147,22140,429,147,10411,2799,248,148 She danced gracefully on the stage. => 10386,13378,12408,147,267,8771,8001,347,170,12685,148 The dog barked loudly in the park. => 505,147,6540,147,973,293,246,147,30182,391,219,170,147,17664,148 We went swimming in the ocean yesterday. => 3250,10825,147,4729,8528,248,219,170,147,26193,147,28830,148 He speaks fluent French and Spanish. => 5301,147,13285,155,147,21677,147,254,17590,221,147,31519,148 The train arrived at the station on time. => 505,147,872,147,20712,182,536,170,147,7184,347,801,148 She cooked a delicious meal for her family. => 10386,147,20453,246,216,3936,23455,147,26658,250,1869,147,2002,148 ggml-org-ggml-7ec8045/examples/prompts/starcoder.txt000066400000000000000000000216561506673203700226060ustar00rootroot00000000000000Hello World! => 8279,10896,19 I can't believe it's already Friday!" => 59,883,1330,13710,561,1182,3425,506,25674,11555 The URL for the website is https://www.example.com." => 1318,3834,436,322,9575,438,1678,555,1499,32,2763,32,508,3107 "She said, 'I love to travel.'" => 20,25387,9884,30,330,59,14290,372,25283,29329 'The temperature is 25.5°C.' => 25,1318,13587,438,225,36,39,32,39,23767,53,4564 "Let's meet at 2:30 p.m. in the park." => 20,9809,1182,18450,821,225,36,44,37,34,298,32,95,32,328,322,880,93,3107 The book costs $19.99 => 1318,7618,25950,398,35,43,32,43,43 "John's favorite color is blue." => 20,19693,1182,27448,1963,438,10087,3107 Th@nk y0u f0r y0ur h3lp! => 1027,50,19877,533,34,103,296,34,100,533,34,305,420,37,1915,19 C@n I g3t a c0ffee, pl3@se? => 53,50,96,439,485,37,102,312,281,34,21298,30,1278,37,50,277,49 W0w! Th@t's @m@zing! => 73,34,105,19,947,50,102,1182,477,95,50,26768,19 H0w 4re y0u t0d@y? => 58,34,105,225,38,268,533,34,103,273,34,86,50,107,49 I l0ve t0 tr@vel @r0und the w0rld. => 59,456,34,587,273,34,554,50,1203,477,100,34,642,322,341,34,100,1381,32 Wh@t's y0ur f@v0rite m0vie? => 2444,50,102,1182,533,34,305,296,50,104,34,1049,345,34,104,1075,49 The cat is sleeping on the mat. => 1318,10501,438,9368,299,544,322,2491,32 I need to buy some groceries for dinner. => 59,1849,372,16968,1629,20234,85,6958,436,343,3369,32 The sun is shining brightly in the sky. => 1318,15323,438,787,19068,38231,631,328,322,26718,32 She is reading a book in the park. => 25387,438,9175,312,7618,328,322,880,93,32 We went for a walk on the beach yesterday. => 3122,14236,436,312,13503,544,322,526,867,39485,32 He plays the guitar like a pro. => 1331,41271,322,3932,19931,2124,312,534,32 They are going to the movies tonight. => 31805,884,6783,372,322,27889,26076,694,32 The flowers are blooming in the garden. => 1318,7290,483,884,323,18466,299,328,322,485,22461,32 I enjoy listening to classical music. => 59,31567,20498,372,443,1578,17522,32 We need to buy groceries for the week. => 3122,1849,372,16968,20234,85,6958,436,322,8209,32 The dog is chasing its tail in circles. => 1318,27435,438,663,9949,2819,13203,328,46428,32 She is wearing a beautiful red dress. => 25387,438,996,6992,312,36493,3346,343,714,32 He is a talented actor in Hollywood. => 1331,438,312,273,9556,318,16038,328,48228,631,21118,32 The children are playing in the playground. => 1318,5713,884,19788,328,322,4654,1749,32 I'm going to visit my grandparents this weekend. => 59,3464,6783,372,7725,1672,33162,19277,458,40618,32 The coffee tastes bitter without sugar. => 1318,36917,273,633,307,3493,391,2876,309,18628,32 They are planning a surprise party for her. => 31805,884,26116,312,6178,9251,15270,436,7791,32 She sings like an angel on stage. => 25387,309,2052,2124,600,600,17691,544,10019,32 We should take a vacation to relax. => 3122,1395,4818,312,29164,367,372,41972,32 He is studying medicine at the university. => 1331,438,14866,299,32388,482,821,322,707,9190,32 The rain is pouring heavily outside. => 1318,36987,438,9202,299,46003,2801,11127,32 I enjoy watching romantic movies. => 59,31567,37652,26045,7268,27889,32 They are celebrating their anniversary today. => 31805,884,48278,839,1741,3623,23921,5810,672,11610,32 She dances gracefully to the music. => 25387,343,3151,31376,4938,372,322,17522,32 He is an excellent basketball player. => 1331,438,600,39203,48400,11653,4362,32 The baby is sleeping soundly in the crib. => 1318,323,17156,438,9368,299,9934,631,328,322,281,7972,32 I need to finish my homework before dinner. => 59,1849,372,11361,1672,6765,1007,2670,343,3369,32 They are organizing a charity event next month. => 31805,884,10558,6183,312,1351,543,1692,2354,6811,32 She is cooking a delicious meal for us. => 25387,438,23682,299,312,409,406,2406,597,279,436,1770,32 We should go hiking in the mountains. => 3122,1395,1983,420,1546,299,328,322,10874,1907,32 The car broke down on the way to work. => 1318,6346,43289,2835,544,322,3352,372,1389,32 He loves playing video games in his free time. => 1331,598,4954,19788,6027,19705,328,6697,3741,1133,32 The birds are chirping in the trees. => 1318,8424,3210,884,663,476,7075,328,322,23453,32 I want to learn how to play the piano. => 59,2637,372,7350,2624,372,4654,322,298,25757,32 They are building a new shopping mall in the city. => 31805,884,9038,312,537,40692,345,464,328,322,11297,32 She is writing a novel in her spare time. => 25387,438,4127,312,32913,328,7791,1869,586,1133,32 We are going to the zoo this Saturday. => 3122,884,6783,372,322,1288,604,458,358,30288,32 The cake looks delicious with chocolate frosting. => 1318,281,1062,7780,409,406,2406,623,10408,27589,296,20932,299,32 He is a talented painter who sells his artwork. => 1331,438,312,273,9556,318,42300,6560,10800,101,6697,5549,1007,32 The students are studying for their exams. => 1318,16512,884,14866,299,436,3623,538,1462,32 I enjoy swimming in the ocean. => 59,31567,2535,449,6714,328,322,337,18857,32 They are renovating their house. => 31805,884,316,15007,1741,3623,17075,32 She is practicing yoga to stay healthy. => 25387,438,11808,11636,533,40067,372,20005,44538,32 We should plant flowers in the garden. => 3122,1395,26795,7290,483,328,322,485,22461,32 The traffic is heavy during rush hour. => 1318,16391,438,32389,5929,540,1372,12021,32 He is a skilled chef who creates amazing dishes. => 1331,438,312,3001,12088,44051,6560,9585,36986,1214,4279,32 The baby is crawling on the floor. => 1318,323,17156,438,281,1294,2920,544,322,17648,32 I need to buy a new pair of shoes. => 59,1849,372,16968,312,537,6092,432,787,37764,32 They are going on a road trip across the country. => 31805,884,6783,544,312,24122,19337,10160,322,10769,32 She is playing the piano beautifully. => 25387,438,19788,322,298,25757,526,4846,325,514,107,32 We are going to a concert tomorrow night. => 3122,884,6783,372,312,457,6989,31841,19212,32 The cake tastes delicious with vanilla frosting. => 1318,281,1062,273,633,307,409,406,2406,623,44653,296,20932,299,32 He is a dedicated teacher who inspires his students. => 1331,438,312,23112,30877,6560,26194,8017,6697,16512,32 The students are participating in a science fair. => 1318,16512,884,24623,1741,328,312,27536,19375,32 I enjoy hiking in the mountains. => 59,31567,420,1546,299,328,322,10874,1907,32 They are organizing a beach cleanup next weekend. => 31805,884,10558,6183,312,526,867,13144,2354,40618,32 She is taking photographs of nature. => 25387,438,15137,15110,23626,432,24406,32 We should try a new restaurant in town. => 3122,1395,1596,312,537,43719,328,38212,32 The traffic is moving slowly on the highway. => 1318,16391,438,14089,12899,631,544,322,3857,3073,32 He is a talented singer with a beautiful voice. => 1331,438,312,273,9556,318,309,10118,623,312,36493,20309,32 The baby is laughing and giggling. => 1318,323,17156,438,2317,2943,299,461,485,365,36088,32 I need to do laundry and wash my clothes. => 59,1849,372,745,2317,642,994,461,341,917,1672,7375,46948,32 They are planning a trip to Europe. => 31805,884,26116,312,19337,372,27268,32 She is learning how to play the guitar. => 25387,438,9608,2624,372,4654,322,3932,19931,32 We are going to a museum this Sunday. => 3122,884,6783,372,312,345,539,378,458,358,28036,32 The coffee smells amazing in the morning. => 1318,36917,309,42153,101,36986,328,322,33768,32 He is a hardworking farmer who grows crops. => 1331,438,312,6784,13578,9019,2302,6560,485,2138,25170,1069,32 The students are presenting their research projects. => 1318,16512,884,5024,299,3623,13234,8528,32 I enjoy playing soccer with my friends. => 59,31567,19788,22682,10035,623,1672,22523,32 They are volunteering at a local shelter. => 31805,884,3920,45585,8637,821,312,2196,309,2542,391,32 She is practicing martial arts for self-defense. => 25387,438,11808,11636,345,502,564,5549,101,436,630,31,43694,32 We should try a new recipe for dinner. => 3122,1395,1596,312,537,15233,436,343,3369,32 The traffic is congest => 1318,16391,438,457,2776 The sun is shining brightly today. => 1318,15323,438,787,19068,38231,631,11610,32 I enjoy reading books in my free time. => 59,31567,9175,21739,328,1672,3741,1133,32 She plays the piano beautifully. => 25387,41271,322,298,25757,526,4846,325,514,107,32 The cat chased the mouse around the room. => 1318,10501,663,16109,322,8459,6835,322,8355,32 I love eating pizza with extra cheese. => 59,14290,484,1741,47630,623,6717,8277,30315,32 He always wears a hat wherever he goes. => 1331,5182,996,4177,312,25793,2154,424,938,13107,32 The flowers in the garden are blooming. => 1318,7290,483,328,322,485,22461,884,323,18466,299,32 She danced gracefully on the stage. => 25387,343,6087,31376,4938,544,322,10019,32 The dog barked loudly in the park. => 1318,27435,323,1087,318,598,836,631,328,322,880,93,32 We went swimming in the ocean yesterday. => 3122,14236,2535,449,6714,328,322,337,18857,39485,32 He speaks fluent French and Spanish. => 1331,24498,101,38055,43652,461,14911,1708,32 The train arrived at the station on time. => 1318,5683,2099,32114,821,322,18662,544,1133,32 She cooked a delicious meal for her family. => 25387,23682,318,312,409,406,2406,597,279,436,7791,13872,32 ggml-org-ggml-7ec8045/examples/prompts/test-cases.txt000066400000000000000000000117461506673203700226720ustar00rootroot00000000000000# test case format # : English: Hello World! English: I can't believe it's already Friday!" English: The URL for the website is https://www.example.com." English: "She said, 'I love to travel.'" English: 'The temperature is 25.5°C.' English: "Let's meet at 2:30 p.m. in the park." English: The book costs $19.99 English: "John's favorite color is blue." English: Th@nk y0u f0r y0ur h3lp! English: C@n I g3t a c0ffee, pl3@se? English: W0w! Th@t's @m@zing! English: H0w 4re y0u t0d@y? English: I l0ve t0 tr@vel @r0und the w0rld. English: Wh@t's y0ur f@v0rite m0vie? English: The cat is sleeping on the mat. English: I need to buy some groceries for dinner. English: The sun is shining brightly in the sky. English: She is reading a book in the park. English: We went for a walk on the beach yesterday. English: He plays the guitar like a pro. English: They are going to the movies tonight. English: The flowers are blooming in the garden. English: I enjoy listening to classical music. English: We need to buy groceries for the week. English: The dog is chasing its tail in circles. English: She is wearing a beautiful red dress. English: He is a talented actor in Hollywood. English: The children are playing in the playground. English: I'm going to visit my grandparents this weekend. English: The coffee tastes bitter without sugar. English: They are planning a surprise party for her. English: She sings like an angel on stage. English: We should take a vacation to relax. English: He is studying medicine at the university. English: The rain is pouring heavily outside. English: I enjoy watching romantic movies. English: They are celebrating their anniversary today. English: She dances gracefully to the music. English: He is an excellent basketball player. English: The baby is sleeping soundly in the crib. English: I need to finish my homework before dinner. English: They are organizing a charity event next month. English: She is cooking a delicious meal for us. English: We should go hiking in the mountains. English: The car broke down on the way to work. English: He loves playing video games in his free time. English: The birds are chirping in the trees. English: I want to learn how to play the piano. English: They are building a new shopping mall in the city. English: She is writing a novel in her spare time. English: We are going to the zoo this Saturday. English: The cake looks delicious with chocolate frosting. English: He is a talented painter who sells his artwork. English: The students are studying for their exams. English: I enjoy swimming in the ocean. English: They are renovating their house. English: She is practicing yoga to stay healthy. English: We should plant flowers in the garden. English: The traffic is heavy during rush hour. English: He is a skilled chef who creates amazing dishes. English: The baby is crawling on the floor. English: I need to buy a new pair of shoes. English: They are going on a road trip across the country. English: She is playing the piano beautifully. English: We are going to a concert tomorrow night. English: The cake tastes delicious with vanilla frosting. English: He is a dedicated teacher who inspires his students. English: The students are participating in a science fair. English: I enjoy hiking in the mountains. English: They are organizing a beach cleanup next weekend. English: She is taking photographs of nature. English: We should try a new restaurant in town. English: The traffic is moving slowly on the highway. English: He is a talented singer with a beautiful voice. English: The baby is laughing and giggling. English: I need to do laundry and wash my clothes. English: They are planning a trip to Europe. English: She is learning how to play the guitar. English: We are going to a museum this Sunday. English: The coffee smells amazing in the morning. English: He is a hardworking farmer who grows crops. English: The students are presenting their research projects. English: I enjoy playing soccer with my friends. English: They are volunteering at a local shelter. English: She is practicing martial arts for self-defense. English: We should try a new recipe for dinner. English: The traffic is congest English: The sun is shining brightly today. English: I enjoy reading books in my free time. English: She plays the piano beautifully. English: The cat chased the mouse around the room. English: I love eating pizza with extra cheese. English: He always wears a hat wherever he goes. English: The flowers in the garden are blooming. English: She danced gracefully on the stage. English: The dog barked loudly in the park. English: We went swimming in the ocean yesterday. English: He speaks fluent French and Spanish. English: The train arrived at the station on time. English: She cooked a delicious meal for her family. Korean: 이것은 테스트 이다. Korean: 걱정할 필요 없다. Korean: 버그는 언젠가 고쳐진다. Japanese: 明日の天気はどうですか。 Chinese: 请问洗手间在哪里? Emoji: I'm feeling 😄 today! Unicode: ◑ ▢ ▣ ◱ggml-org-ggml-7ec8045/examples/prompts/tokenize_huggingface.py000066400000000000000000000063251506673203700246040ustar00rootroot00000000000000import os from transformers import AutoTokenizer os.environ['TOKENIZERS_PARALLELISM'] = "false" list_repo_hf = ["databricks/dolly-v2-3b", # dolly-v2 (3b, 7b, 12b models share the same tokenizer) "gpt2", # gpt-2 (gpt2-xl, gpt2-large share the same tokenizer) "uer/gpt2-chinese-cluecorpussmall", # gpt-2-chinese "EleutherAI/gpt-j-6b", # gpt-j "EleutherAI/gpt-neox-20b", # gpt-neox "EleutherAI/polyglot-ko-1.3b", # gpt-neox (polyglot-ko 5.8b and 12.8b share the same tokenizer") "rinna/japanese-gpt-neox-3.6b", # gpt-neox # mpt-7b (uses gpt-neox-20b tokenizer) "replit/replit-code-v1-3b", # replit "bigcode/starcoder", # starcoder (huggingface-cli login required) "openai/whisper-tiny" # whisper (base, large, large-v2 share the same tokenizer) ] repo2ggml = {"databricks/dolly-v2-3b" : "dolly-v2", "gpt2" : "gpt-2", "uer/gpt2-chinese-cluecorpussmall" : "gpt-2-chinese", "EleutherAI/gpt-j-6b" : "gpt-j", "EleutherAI/gpt-neox-20b" : "gpt-neox", "EleutherAI/polyglot-ko-1.3b" : "polyglot-ko", "rinna/japanese-gpt-neox-3.6b" : "gpt-neox-japanese", "replit/replit-code-v1-3b" : "replit", "bigcode/starcoder" : "starcoder", "openai/whisper-tiny" : "whisper"} repo2language = {"databricks/dolly-v2-3b" : "english", "gpt2" : "english", "uer/gpt2-chinese-cluecorpussmall" : "chinese", "EleutherAI/gpt-j-6b" : "english", "EleutherAI/gpt-neox-20b" : "english", "EleutherAI/polyglot-ko-1.3b" : "korean", "rinna/japanese-gpt-neox-3.6b" : "japanese", "replit/replit-code-v1-3b" : "english", "bigcode/starcoder" : "english", "openai/whisper-tiny" : "english"} delimeter = ": " test_sentences = [] with open("test-cases.txt", "r") as f: lines = [l.rstrip() for l in f.readlines()] for l in lines: if delimeter in l: language = l[:l.index(delimeter)] sentence = l[l.index(delimeter) + len(delimeter):] test_sentences.append((language.lower(), sentence)) for repo in list_repo_hf: target_language = repo2language[repo] tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True) tokens_hf = [] for language, sentence in test_sentences: if language == target_language: tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentence)) tokens_hf.append((sentence, tokens)) save_txt = repo2ggml[repo] + ".txt" with open(save_txt, "w") as f: f.writelines([sentence + " => " + ",".join(str(t) for t in tokens) + "\n" for sentence, tokens in tokens_hf]) ggml-org-ggml-7ec8045/examples/prompts/whisper.txt000066400000000000000000000210271506673203700222710ustar00rootroot00000000000000Hello World! => 15947,3937,0 I can't believe it's already Friday!" => 40,393,380,1697,309,311,1217,6984,2963 The URL for the website is https://www.example.com." => 2278,12905,337,220,3322,3144,307,34426,21492,17919,13,3121,335,781,13,1112,889 "She said, 'I love to travel.'" => 1,9526,848,11,922,40,959,220,1353,220,17227,779,28763 'The temperature is 25.5°C.' => 6,2278,220,18275,610,1503,307,3552,13,20,11782,34,4443 "Let's meet at 2:30 p.m. in the park." => 1,8373,311,1677,412,568,25,3446,280,13,76,13,294,220,3322,3884,889 The book costs $19.99 => 2278,1446,5497,1848,3405,13,8494 "John's favorite color is blue." => 1,16938,311,2954,2017,307,3344,889 Th@nk y0u f0r y0ur h3lp! => 2434,31,77,74,288,15,84,283,15,81,288,15,374,276,18,75,79,0 C@n I g3t a c0ffee, pl3@se? => 34,31,77,286,290,18,83,257,269,15,4617,11,499,18,31,405,30 W0w! Th@t's @m@zing! => 54,15,86,0,334,31,83,311,10428,76,31,8781,0 H0w 4re y0u t0d@y? => 39,15,86,1017,265,288,15,84,220,83,15,67,31,88,30 I l0ve t0 tr@vel @r0und the w0rld. => 40,287,15,303,220,83,15,220,6903,31,779,10428,81,15,997,220,3322,261,15,81,348,13 Wh@t's y0ur f@v0rite m0vie? => 2471,31,83,311,288,15,374,283,31,85,15,35002,275,15,12702,30 The cat is sleeping on the mat. => 2278,3857,307,8296,322,220,3322,3803,13 I need to buy some groceries for dinner. => 40,643,220,1353,2256,512,31391,337,6148,13 The sun is shining brightly in the sky. => 2278,3295,307,18269,47418,294,220,3322,5443,13 She is reading a book in the park. => 9526,307,3760,257,1446,294,220,3322,3884,13 We went for a walk on the beach yesterday. => 4360,1437,337,257,1792,322,220,3322,7534,5186,13 He plays the guitar like a pro. => 5205,5749,220,3322,7531,411,257,447,13 They are going to the movies tonight. => 8829,366,516,220,1353,220,3322,6233,220,1756,397,13 The flowers are blooming in the garden. => 2278,8085,366,45294,294,220,3322,7431,13 I enjoy listening to classical music. => 40,2103,4764,220,1353,13735,1318,13 We need to buy groceries for the week. => 4360,643,220,1353,2256,31391,337,220,3322,1243,13 The dog is chasing its tail in circles. => 2278,3000,307,17876,1080,220,14430,294,13040,13 She is wearing a beautiful red dress. => 9526,307,4769,257,2238,2182,5231,13 He is a talented actor in Hollywood. => 5205,307,257,220,32831,6003,8747,294,11628,13 The children are playing in the playground. => 2278,2227,366,2433,294,220,3322,24646,13 I'm going to visit my grandparents this weekend. => 40,478,516,220,1353,3441,452,21876,220,11176,6711,13 The coffee tastes bitter without sugar. => 2278,4982,220,83,40246,13871,1553,5076,13 They are planning a surprise party for her. => 8829,366,5038,257,6365,3595,337,720,13 She sings like an angel on stage. => 9526,23250,411,364,14250,322,3233,13 We should take a vacation to relax. => 4360,820,220,27612,257,12830,220,1353,5789,13 He is studying medicine at the university. => 5205,307,7601,7195,412,220,3322,5454,13 The rain is pouring heavily outside. => 2278,4830,307,20450,10950,2380,13 I enjoy watching romantic movies. => 40,2103,1976,13590,6233,13 They are celebrating their anniversary today. => 8829,366,15252,220,3322,347,12962,220,83,378,320,13 She dances gracefully to the music. => 9526,28322,10042,2277,220,1353,220,3322,1318,13 He is an excellent basketball player. => 5205,307,364,7103,11767,4256,13 The baby is sleeping soundly in the crib. => 2278,3186,307,8296,1626,356,294,220,3322,47163,13 I need to finish my homework before dinner. => 40,643,220,1353,2413,452,14578,949,6148,13 They are organizing a charity event next month. => 8829,366,17608,257,16863,2280,958,1618,13 She is cooking a delicious meal for us. => 9526,307,6361,257,4809,6791,337,505,13 We should go hiking in the mountains. => 4360,820,352,23784,294,220,3322,10233,13 The car broke down on the way to work. => 2278,1032,6902,760,322,220,3322,636,220,1353,589,13 He loves playing video games in his free time. => 5205,6752,2433,960,2813,294,702,1737,220,3766,13 The birds are chirping in the trees. => 2278,9009,366,36682,294,220,3322,220,3599,279,13 I want to learn how to play the piano. => 40,528,220,1353,1466,577,220,1353,862,220,3322,9211,13 They are building a new shopping mall in the city. => 8829,366,2390,257,777,8688,16026,294,220,3322,2307,13 She is writing a novel in her spare time. => 9526,307,3579,257,7613,294,720,13798,220,3766,13 We are going to the zoo this Saturday. => 4360,366,516,220,1353,220,3322,25347,220,11176,8803,13 The cake looks delicious with chocolate frosting. => 2278,5908,1542,4809,365,6215,37048,13 He is a talented painter who sells his artwork. => 5205,307,257,220,32831,6003,26619,567,20897,702,15829,13 The students are studying for their exams. => 2278,1731,366,7601,337,220,3322,347,20514,13 I enjoy swimming in the ocean. => 40,2103,11989,294,220,3322,7810,13 They are renovating their house. => 8829,366,18845,990,220,3322,347,1782,13 She is practicing yoga to stay healthy. => 9526,307,11350,15128,220,1353,1754,4627,13 We should plant flowers in the garden. => 4360,820,3709,8085,294,220,3322,7431,13 The traffic is heavy during rush hour. => 2278,220,17227,3341,307,4676,1830,9300,1773,13 He is a skilled chef who creates amazing dishes. => 5205,307,257,19690,10530,567,7829,2243,10814,13 The baby is crawling on the floor. => 2278,3186,307,32979,322,220,3322,4123,13 I need to buy a new pair of shoes. => 40,643,220,1353,2256,257,777,6119,295,6654,13 They are going on a road trip across the country. => 8829,366,516,322,257,3060,220,83,8400,2108,220,3322,1941,13 She is playing the piano beautifully. => 9526,307,2433,220,3322,9211,16525,13 We are going to a concert tomorrow night. => 4360,366,516,220,1353,257,8543,220,83,298,3162,1818,13 The cake tastes delicious with vanilla frosting. => 2278,5908,220,83,40246,4809,365,17528,37048,13 He is a dedicated teacher who inspires his students. => 5205,307,257,8374,220,975,4062,567,32566,702,1731,13 The students are participating in a science fair. => 2278,1731,366,13950,294,257,3497,3143,13 I enjoy hiking in the mountains. => 40,2103,23784,294,220,3322,10233,13 They are organizing a beach cleanup next weekend. => 8829,366,17608,257,7534,40991,958,6711,13 She is taking photographs of nature. => 9526,307,220,48625,17649,295,3687,13 We should try a new restaurant in town. => 4360,820,220,83,627,257,777,6383,294,220,30401,13 The traffic is moving slowly on the highway. => 2278,220,17227,3341,307,2684,5692,322,220,3322,17205,13 He is a talented singer with a beautiful voice. => 5205,307,257,220,32831,6003,11564,365,257,2238,3177,13 The baby is laughing and giggling. => 2278,3186,307,5059,293,290,24542,13 I need to do laundry and wash my clothes. => 40,643,220,1353,360,19811,293,5675,452,5534,13 They are planning a trip to Europe. => 8829,366,5038,257,220,83,8400,220,1353,3315,13 She is learning how to play the guitar. => 9526,307,2539,577,220,1353,862,220,3322,7531,13 We are going to a museum this Sunday. => 4360,366,516,220,1353,257,8441,220,11176,7776,13 The coffee smells amazing in the morning. => 2278,4982,10036,2243,294,220,3322,2446,13 He is a hardworking farmer who grows crops. => 5205,307,257,1152,22475,17891,567,13156,16829,13 The students are presenting their research projects. => 2278,1731,366,15578,220,3322,347,2132,4455,13 I enjoy playing soccer with my friends. => 40,2103,2433,15469,365,452,1855,13 They are volunteering at a local shelter. => 8829,366,33237,412,257,2654,13341,13 She is practicing martial arts for self-defense. => 9526,307,11350,20755,8609,337,2698,12,49268,13 We should try a new recipe for dinner. => 4360,820,220,83,627,257,777,6782,337,6148,13 The traffic is congest => 2278,220,17227,3341,307,31871 The sun is shining brightly today. => 2278,3295,307,18269,47418,220,83,378,320,13 I enjoy reading books in my free time. => 40,2103,3760,3642,294,452,1737,220,3766,13 She plays the piano beautifully. => 9526,5749,220,3322,9211,16525,13 The cat chased the mouse around the room. => 2278,3857,33091,220,3322,9719,926,220,3322,1808,13 I love eating pizza with extra cheese. => 40,959,3936,8298,365,2857,5399,13 He always wears a hat wherever he goes. => 5205,1009,20877,257,2385,8660,415,1709,13 The flowers in the garden are blooming. => 2278,8085,294,220,3322,7431,366,45294,13 She danced gracefully on the stage. => 9526,32909,10042,2277,322,220,3322,3233,13 The dog barked loudly in the park. => 2278,3000,16202,292,22958,294,220,3322,3884,13 We went swimming in the ocean yesterday. => 4360,1437,11989,294,220,3322,7810,5186,13 He speaks fluent French and Spanish. => 5205,10789,40799,5522,293,8058,13 The train arrived at the station on time. => 2278,220,83,7146,6678,412,220,3322,5214,322,220,3766,13 She cooked a delicious meal for her family. => 9526,9267,257,4809,6791,337,720,1605,13 ggml-org-ggml-7ec8045/examples/python/000077500000000000000000000000001506673203700176625ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/python/README.md000066400000000000000000000113151506673203700211420ustar00rootroot00000000000000# Simple autogenerated Python bindings for ggml This folder contains: - Scripts to generate full Python bindings from ggml headers (+ stubs for autocompletion in IDEs) - Some barebones utils (see [ggml/utils.py](./ggml/utils.py)): - `ggml.utils.init` builds a context that's freed automatically when the pointer gets GC'd - `ggml.utils.copy` **copies between same-shaped tensors (numpy or ggml), w/ automatic (de/re)quantization** - `ggml.utils.numpy` returns a numpy view over a ggml tensor; if it's quantized, it returns a copy (requires `allow_copy=True`) - Very basic examples (anyone wants to port [llama2.c](https://github.com/karpathy/llama2.c)?) Provided you set `GGML_LIBRARY=.../path/to/libggml_shared.so` (see instructions below), it's trivial to do some operations on quantized tensors: ```python # Make sure libllama.so is in your [DY]LD_LIBRARY_PATH, or set GGML_LIBRARY=.../libggml_shared.so from ggml import lib, ffi from ggml.utils import init, copy, numpy import numpy as np ctx = init(mem_size=12*1024*1024) n = 256 n_threads = 4 a = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) b = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) # Can't both be quantized sum = lib.ggml_add(ctx, a, b) # all zeroes for now. Will be quantized too! gf = ffi.new('struct ggml_cgraph*') lib.ggml_build_forward_expand(gf, sum) copy(np.array([i for i in range(n)], np.float32), a) copy(np.array([i*100 for i in range(n)], np.float32), b) lib.ggml_graph_compute_with_ctx(ctx, gf, n_threads) print(numpy(a, allow_copy=True)) # 0. 1.0439453 2.0878906 3.131836 4.1757812 5.2197266. ... print(numpy(b)) # 0. 100. 200. 300. 400. 500. ... print(numpy(sum, allow_copy=True)) # 0. 105.4375 210.875 316.3125 421.75 527.1875 ... ``` ### Prerequisites You'll need a shared library of ggml to use the bindings. #### Build libggml_shared.so or libllama.so As of this writing the best is to use [ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp)'s generated `libggml_shared.so` or `libllama.so`, which you can build as follows: ```bash git clone https://github.com/ggerganov/llama.cpp # On a CUDA-enabled system add -DLLAMA_CUDA=1 # On a Mac add -DLLAMA_METAL=1 cmake llama.cpp \ -B llama_build \ -DCMAKE_C_FLAGS=-Ofast \ -DLLAMA_NATIVE=1 \ -DLLAMA_LTO=1 \ -DBUILD_SHARED_LIBS=1 \ -DLLAMA_MPI=1 \ -DLLAMA_BUILD_TESTS=0 \ -DLLAMA_BUILD_EXAMPLES=0 ( cd llama_build && make -j ) # On Mac, this will be libggml_shared.dylib instead export GGML_LIBRARY=$PWD/llama_build/libggml_shared.so # Alternatively, you can just copy it to your system's lib dir, e.g /usr/local/lib ``` #### (Optional) Regenerate the bindings and stubs If you added or changed any signatures of the C API, you'll want to regenerate the bindings ([ggml/cffi.py](./ggml/cffi.py)) and stubs ([ggml/__init__.pyi](./ggml/__init__.pyi)). Luckily it's a one-liner using [regenerate.py](./regenerate.py): ```bash pip install -q cffi python regenerate.py ``` By default it assumes `llama.cpp` was cloned in ../../../llama.cpp (alongside the ggml folder). You can override this with: ```bash C_INCLUDE_DIR=$LLAMA_CPP_DIR python regenerate.py ``` You can also edit [api.h](./api.h) to control which files should be included in the generated bindings (defaults to `llama.cpp/ggml*.h`) In fact, if you wanted to only generate bindings for the current version of the `ggml` repo itself (instead of `llama.cpp`; you'd loose support for k-quants), you could run: ```bash API=../../include/ggml.h python regenerate.py ``` ## Develop Run tests: ```bash pytest ``` ### Alternatives This example's goal is to showcase [cffi](https://cffi.readthedocs.io/)-generated bindings that are trivial to use and update, but there are already alternatives in the wild: - https://github.com/abetlen/ggml-python: these bindings seem to be hand-written and use [ctypes](https://docs.python.org/3/library/ctypes.html). It has [high-quality API reference docs](https://ggml-python.readthedocs.io/en/latest/api-reference/#ggml.ggml) that can be used with these bindings too, but it doesn't expose Metal, CUDA, MPI or OpenCL calls, doesn't support transparent (de/re)quantization like this example does (see [ggml.utils](./ggml/utils.py) module), and won't pick up your local changes. - https://github.com/abetlen/llama-cpp-python: these expose the C++ `llama.cpp` interface, which this example cannot easily be extended to support (`cffi` only generates bindings of C libraries) - [pybind11](https://github.com/pybind/pybind11) and [nanobind](https://github.com/wjakob/nanobind) are two alternatives to cffi that support binding C++ libraries, but it doesn't seem either of them have an automatic generator (writing bindings is rather time-consuming). ggml-org-ggml-7ec8045/examples/python/api.h000066400000000000000000000006331506673203700206060ustar00rootroot00000000000000/* List here all the headers you want to expose in the Python bindings, then run `python regenerate.py` (see details in README.md) */ #include "ggml.h" #include "ggml-metal.h" #include "ggml-opencl.h" // Headers below are currently only present in the llama.cpp repository, comment them out if you don't have them. #include "k_quants.h" #include "ggml-alloc.h" #include "ggml-cuda.h" #include "ggml-mpi.h"ggml-org-ggml-7ec8045/examples/python/example_add_quant.py000066400000000000000000000015251506673203700237120ustar00rootroot00000000000000from ggml import lib, ffi from ggml.utils import init, copy, numpy import numpy as np ctx = init(mem_size=12*1024*1024) # automatically freed when pointer is GC'd n = 256 n_threads = 4 a = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) b = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) # can't both be quantized sum = lib.ggml_add(ctx, a, b) # all zeroes for now. Will be quantized too! # See cffi's doc on how to allocate native memory: it's very simple! # https://cffi.readthedocs.io/en/latest/ref.html#ffi-interface gf = ffi.new('struct ggml_cgraph*') lib.ggml_build_forward_expand(gf, sum) copy(np.array([i for i in range(n)], np.float32), a) copy(np.array([i*100 for i in range(n)], np.float32), b) lib.ggml_graph_compute_with_ctx(ctx, gf, n_threads) print(numpy(a, allow_copy=True)) print(numpy(b)) print(numpy(sum, allow_copy=True))ggml-org-ggml-7ec8045/examples/python/example_test_all_quants.py000066400000000000000000000036321506673203700251550ustar00rootroot00000000000000from ggml import ffi, lib from ggml.utils import init, numpy, copy import numpy as np from math import pi, cos, sin, ceil import matplotlib.pyplot as plt ctx = init(mem_size=100*1024*1024) # Will be auto-GC'd n = 256 orig = np.array([ [ cos(j * 2 * pi / n) * (sin(i * 2 * pi / n)) for j in range(n) ] for i in range(n) ], np.float32) orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n) copy(orig, orig_tensor) quants = [ type for type in range(lib.GGML_TYPE_COUNT) if lib.ggml_is_quantized(type) and type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] # Apparently not supported ] # quants = [lib.GGML_TYPE_Q2_K] # Test a single one def get_name(type): name = lib.ggml_type_name(type) return ffi.string(name).decode('utf-8') if name else '?' quants.sort(key=get_name) quants.insert(0, None) print(quants) ncols=4 nrows = ceil(len(quants) / ncols) plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight') for i, type in enumerate(quants): plt.subplot(nrows, ncols, i + 1) try: if type == None: plt.title('Original') plt.imshow(orig) else: quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n) copy(orig_tensor, quantized_tensor) quantized = numpy(quantized_tensor, allow_copy=True) d = quantized - orig results = { "l2": np.linalg.norm(d, 2), "linf": np.linalg.norm(d, np.inf), "compression": round(lib.ggml_nbytes(orig_tensor) / lib.ggml_nbytes(quantized_tensor), 1) } name = get_name(type) print(f'{name}: {results}') plt.title(f'{name} ({results["compression"]}x smaller)') plt.imshow(quantized, interpolation='nearest') except Exception as e: print(f'Error: {e}') plt.show()ggml-org-ggml-7ec8045/examples/python/ggml/000077500000000000000000000000001506673203700206105ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/python/ggml/__init__.py000066400000000000000000000035721506673203700227300ustar00rootroot00000000000000""" Python bindings for the ggml library. Usage example: from ggml import lib, ffi from ggml.utils import init, copy, numpy import numpy as np ctx = init(mem_size=10*1024*1024) n = 1024 n_threads = 4 a = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) b = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) sum = lib.ggml_add(ctx, a, b) gf = ffi.new('struct ggml_cgraph*') lib.ggml_build_forward_expand(gf, sum) copy(np.array([i for i in range(n)], np.float32), a) copy(np.array([i*100 for i in range(n)], np.float32), b) lib.ggml_graph_compute_with_ctx(ctx, gf, n_threads) print(numpy(sum, allow_copy=True)) See https://cffi.readthedocs.io/en/latest/cdef.html for more on cffi. """ try: from ggml.cffi import ffi as ffi except ImportError as e: raise ImportError(f"Couldn't find ggml bindings ({e}). Run `python regenerate.py` or check your PYTHONPATH.") import os, platform __exact_library = os.environ.get("GGML_LIBRARY") if __exact_library: __candidates = [__exact_library] elif platform.system() == "Windows": __candidates = ["ggml_shared.dll", "llama.dll"] else: __candidates = ["libggml_shared.so", "libllama.so"] if platform.system() == "Darwin": __candidates += ["libggml_shared.dylib", "libllama.dylib"] for i, name in enumerate(__candidates): try: # This is where all the functions, enums and constants are defined lib = ffi.dlopen(name) except OSError: if i < len(__candidates) - 1: continue raise OSError(f"Couldn't find ggml's shared library (tried names: {__candidates}). Add its directory to DYLD_LIBRARY_PATH (on Mac) or LD_LIBRARY_PATH, or define GGML_LIBRARY.") # This contains the cffi helpers such as new, cast, string, etc. # https://cffi.readthedocs.io/en/latest/ref.html#ffi-interface ffi = ffi ggml-org-ggml-7ec8045/examples/python/ggml/__init__.pyi000066400000000000000000002701451506673203700231030ustar00rootroot00000000000000# auto-generated file import ggml.ffi as ffi import numpy as np class lib: @property def GGML_BACKEND_CPU(self) -> int: ... @property def GGML_BACKEND_GPU(self) -> int: ... @property def GGML_BACKEND_GPU_SPLIT(self) -> int: ... @property def GGML_FTYPE_ALL_F32(self) -> int: ... @property def GGML_FTYPE_MOSTLY_F16(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q2_K(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q3_K(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q4_0(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q4_1(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q4_1_SOME_F16(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q4_K(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q5_0(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q5_1(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q5_K(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q6_K(self) -> int: ... @property def GGML_FTYPE_MOSTLY_Q8_0(self) -> int: ... @property def GGML_FTYPE_UNKNOWN(self) -> int: ... @property def GGML_LINESEARCH_BACKTRACKING_ARMIJO(self) -> int: ... @property def GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE(self) -> int: ... @property def GGML_LINESEARCH_BACKTRACKING_WOLFE(self) -> int: ... @property def GGML_LINESEARCH_DEFAULT(self) -> int: ... @property def GGML_LINESEARCH_FAIL(self) -> int: ... @property def GGML_LINESEARCH_INVALID_PARAMETERS(self) -> int: ... @property def GGML_LINESEARCH_MAXIMUM_ITERATIONS(self) -> int: ... @property def GGML_LINESEARCH_MAXIMUM_STEP(self) -> int: ... @property def GGML_LINESEARCH_MINIMUM_STEP(self) -> int: ... @property def GGML_OBJECT_GRAPH(self) -> int: ... @property def GGML_OBJECT_TENSOR(self) -> int: ... @property def GGML_OBJECT_WORK_BUFFER(self) -> int: ... @property def GGML_OPT_ADAM(self) -> int: ... @property def GGML_OPT_DID_NOT_CONVERGE(self) -> int: ... @property def GGML_OPT_FAIL(self) -> int: ... @property def GGML_OPT_INVALID_WOLFE(self) -> int: ... @property def GGML_OPT_LBFGS(self) -> int: ... @property def GGML_OPT_NO_CONTEXT(self) -> int: ... @property def GGML_OPT_OK(self) -> int: ... @property def GGML_OP_ACC(self) -> int: ... @property def GGML_OP_ADD(self) -> int: ... @property def GGML_OP_ADD1(self) -> int: ... @property def GGML_OP_ALIBI(self) -> int: ... @property def GGML_OP_ARGMAX(self) -> int: ... @property def GGML_OP_CLAMP(self) -> int: ... @property def GGML_OP_CONT(self) -> int: ... @property def GGML_OP_CONV_1D(self) -> int: ... @property def GGML_OP_CONV_2D(self) -> int: ... @property def GGML_OP_COUNT(self) -> int: ... @property def GGML_OP_CPY(self) -> int: ... @property def GGML_OP_CROSS_ENTROPY_LOSS(self) -> int: ... @property def GGML_OP_CROSS_ENTROPY_LOSS_BACK(self) -> int: ... @property def GGML_OP_DIAG(self) -> int: ... @property def GGML_OP_DIAG_MASK_INF(self) -> int: ... @property def GGML_OP_DIAG_MASK_ZERO(self) -> int: ... @property def GGML_OP_DIV(self) -> int: ... @property def GGML_OP_DUP(self) -> int: ... @property def GGML_OP_FLASH_ATTN(self) -> int: ... @property def GGML_OP_FLASH_ATTN_BACK(self) -> int: ... @property def GGML_OP_FLASH_FF(self) -> int: ... @property def GGML_OP_GET_ROWS(self) -> int: ... @property def GGML_OP_GET_ROWS_BACK(self) -> int: ... @property def GGML_OP_LOG(self) -> int: ... @property def GGML_OP_MAP_BINARY(self) -> int: ... @property def GGML_OP_MAP_CUSTOM1(self) -> int: ... @property def GGML_OP_MAP_CUSTOM1_F32(self) -> int: ... @property def GGML_OP_MAP_CUSTOM2(self) -> int: ... @property def GGML_OP_MAP_CUSTOM2_F32(self) -> int: ... @property def GGML_OP_MAP_CUSTOM3(self) -> int: ... @property def GGML_OP_MAP_CUSTOM3_F32(self) -> int: ... @property def GGML_OP_MAP_UNARY(self) -> int: ... @property def GGML_OP_MEAN(self) -> int: ... @property def GGML_OP_MUL(self) -> int: ... @property def GGML_OP_MUL_MAT(self) -> int: ... @property def GGML_OP_NONE(self) -> int: ... @property def GGML_OP_NORM(self) -> int: ... @property def GGML_OP_OUT_PROD(self) -> int: ... @property def GGML_OP_PERMUTE(self) -> int: ... @property def GGML_OP_POOL_1D(self) -> int: ... @property def GGML_OP_POOL_2D(self) -> int: ... @property def GGML_OP_POOL_AVG(self) -> int: ... @property def GGML_OP_POOL_COUNT(self) -> int: ... @property def GGML_OP_POOL_MAX(self) -> int: ... @property def GGML_OP_REPEAT(self) -> int: ... @property def GGML_OP_REPEAT_BACK(self) -> int: ... @property def GGML_OP_RESHAPE(self) -> int: ... @property def GGML_OP_RMS_NORM(self) -> int: ... @property def GGML_OP_RMS_NORM_BACK(self) -> int: ... @property def GGML_OP_ROPE(self) -> int: ... @property def GGML_OP_ROPE_BACK(self) -> int: ... @property def GGML_OP_SCALE(self) -> int: ... @property def GGML_OP_SET(self) -> int: ... @property def GGML_OP_SILU_BACK(self) -> int: ... @property def GGML_OP_SOFT_MAX(self) -> int: ... @property def GGML_OP_SOFT_MAX_BACK(self) -> int: ... @property def GGML_OP_SQR(self) -> int: ... @property def GGML_OP_SQRT(self) -> int: ... @property def GGML_OP_SUB(self) -> int: ... @property def GGML_OP_SUM(self) -> int: ... @property def GGML_OP_SUM_ROWS(self) -> int: ... @property def GGML_OP_TRANSPOSE(self) -> int: ... @property def GGML_OP_UNARY(self) -> int: ... @property def GGML_OP_VIEW(self) -> int: ... @property def GGML_OP_WIN_PART(self) -> int: ... @property def GGML_OP_WIN_UNPART(self) -> int: ... @property def GGML_TASK_COMPUTE(self) -> int: ... @property def GGML_TASK_FINALIZE(self) -> int: ... @property def GGML_TASK_INIT(self) -> int: ... @property def GGML_TYPE_COUNT(self) -> int: ... @property def GGML_TYPE_F16(self) -> int: ... @property def GGML_TYPE_F32(self) -> int: ... @property def GGML_TYPE_I16(self) -> int: ... @property def GGML_TYPE_I32(self) -> int: ... @property def GGML_TYPE_I8(self) -> int: ... @property def GGML_TYPE_Q2_K(self) -> int: ... @property def GGML_TYPE_Q3_K(self) -> int: ... @property def GGML_TYPE_Q4_0(self) -> int: ... @property def GGML_TYPE_Q4_1(self) -> int: ... @property def GGML_TYPE_Q4_K(self) -> int: ... @property def GGML_TYPE_Q5_0(self) -> int: ... @property def GGML_TYPE_Q5_1(self) -> int: ... @property def GGML_TYPE_Q5_K(self) -> int: ... @property def GGML_TYPE_Q6_K(self) -> int: ... @property def GGML_TYPE_Q8_0(self) -> int: ... @property def GGML_TYPE_Q8_1(self) -> int: ... @property def GGML_TYPE_Q8_K(self) -> int: ... @property def GGML_UNARY_OP_ABS(self) -> int: ... @property def GGML_UNARY_OP_ELU(self) -> int: ... @property def GGML_UNARY_OP_GELU(self) -> int: ... @property def GGML_UNARY_OP_GELU_QUICK(self) -> int: ... @property def GGML_UNARY_OP_NEG(self) -> int: ... @property def GGML_UNARY_OP_RELU(self) -> int: ... @property def GGML_UNARY_OP_SGN(self) -> int: ... @property def GGML_UNARY_OP_SILU(self) -> int: ... @property def GGML_UNARY_OP_STEP(self) -> int: ... @property def GGML_UNARY_OP_TANH(self) -> int: ... @property def GGUF_TYPE_ARRAY(self) -> int: ... @property def GGUF_TYPE_BOOL(self) -> int: ... @property def GGUF_TYPE_COUNT(self) -> int: ... @property def GGUF_TYPE_FLOAT32(self) -> int: ... @property def GGUF_TYPE_INT16(self) -> int: ... @property def GGUF_TYPE_INT32(self) -> int: ... @property def GGUF_TYPE_INT8(self) -> int: ... @property def GGUF_TYPE_STRING(self) -> int: ... @property def GGUF_TYPE_UINT16(self) -> int: ... @property def GGUF_TYPE_UINT32(self) -> int: ... @property def GGUF_TYPE_UINT8(self) -> int: ... def abort_callback(data: ffi.CData) -> bool: """ abort ggml_graph_compute when true bool (*abort_callback)(void * data); """ ... def dequantize_row_q2_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """ Dequantization void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); """ ... def dequantize_row_q3_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);""" ... def dequantize_row_q4_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);""" ... def dequantize_row_q5_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);""" ... def dequantize_row_q6_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);""" ... def dequantize_row_q8_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);""" ... def ggml_abs(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_abs_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_acc(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, nb2: int, nb3: int, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_acc( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); """ ... def ggml_acc_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, nb2: int, nb3: int, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_acc_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); """ ... def ggml_add(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_add1(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_add1( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_add1_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_add1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_add_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_add_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_alibi(ctx: ffi.CData, a: ffi.CData, n_past: int, n_head: int, bias_max: float) -> ffi.CData: """ alibi position embedding in-place, returns view(a) struct ggml_tensor * ggml_alibi( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_head, float bias_max); """ ... def ggml_allocr_alloc(alloc: ffi.CData, tensor: ffi.CData) -> None: """GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);""" ... def ggml_allocr_alloc_graph(alloc: ffi.CData, graph: ffi.CData) -> int: """GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);""" ... def ggml_allocr_free(alloc: ffi.CData) -> None: """GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);""" ... def ggml_allocr_is_measure(alloc: ffi.CData) -> bool: """GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);""" ... def ggml_allocr_new(data: ffi.CData, size: int, alignment: int) -> ffi.CData: """GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);""" ... def ggml_allocr_new_measure(alignment: int) -> ffi.CData: """GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);""" ... def ggml_allocr_reset(alloc: ffi.CData) -> None: """GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);""" ... def ggml_allocr_set_parse_seq(alloc: ffi.CData, list: ffi.CData, n: int) -> None: """ tell the allocator to parse nodes following the order described in the list you should call this if your graph are optimized to execute out-of-order GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n); """ ... def ggml_are_same_shape(t0: ffi.CData, t1: ffi.CData) -> bool: """ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);""" ... def ggml_argmax(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ argmax along rows GGML_API struct ggml_tensor * ggml_argmax( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_blck_size(type: int) -> int: """ GGML_API int ggml_blck_size (enum ggml_type type);""" ... def ggml_build_backward(ctx: ffi.CData, gf: ffi.CData, keep: bool) -> ffi.CData: """ GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);""" ... def ggml_build_forward(tensor: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);""" ... def ggml_build_forward_ctx(ctx: ffi.CData, tensor: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);""" ... def ggml_build_forward_expand(cgraph: ffi.CData, tensor: ffi.CData) -> None: """ GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);""" ... def ggml_cl_can_mul_mat(src0: ffi.CData, src1: ffi.CData, dst: ffi.CData) -> bool: """bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);""" ... def ggml_cl_free_data(tensor: ffi.CData) -> None: """void ggml_cl_free_data(const struct ggml_tensor* tensor);""" ... def ggml_cl_host_free(ptr: ffi.CData) -> None: """void ggml_cl_host_free(void * ptr);""" ... def ggml_cl_host_malloc(size: int) -> ffi.CData: """void * ggml_cl_host_malloc(size_t size);""" ... def ggml_cl_init() -> None: """void ggml_cl_init(void);""" ... def ggml_cl_mul(src0: ffi.CData, src1: ffi.CData, dst: ffi.CData) -> None: """void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);""" ... def ggml_cl_mul_mat(src0: ffi.CData, src1: ffi.CData, dst: ffi.CData, wdata: ffi.CData, wsize: int) -> None: """void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);""" ... def ggml_cl_mul_mat_get_wsize(src0: ffi.CData, src1: ffi.CData, dst: ffi.CData) -> int: """size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);""" ... def ggml_cl_transform_tensor(data: ffi.CData, tensor: ffi.CData) -> None: """void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);""" ... def ggml_clamp(ctx: ffi.CData, a: ffi.CData, min: float, max: float) -> ffi.CData: """ clamp in-place, returns view(a) struct ggml_tensor * ggml_clamp( struct ggml_context * ctx, struct ggml_tensor * a, float min, float max); """ ... def ggml_cont(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ make contiguous GGML_API struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_conv_1d(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, s0: int, p0: int, d0: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_conv_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, // stride int p0, // padding int d0); // dilation """ ... def ggml_conv_1d_ph(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, s: int, d: int) -> ffi.CData: """ conv_1d with padding = half alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d) GGML_API struct ggml_tensor * ggml_conv_1d_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s, int d); """ ... def ggml_conv_2d(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, s0: int, s1: int, p0: int, p1: int, d0: int, d1: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_conv_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, int s1, int p0, int p1, int d0, int d1); """ ... def ggml_cpu_has_arm_fma() -> int: """ GGML_API int ggml_cpu_has_arm_fma (void);""" ... def ggml_cpu_has_avx() -> int: """ GGML_API int ggml_cpu_has_avx (void);""" ... def ggml_cpu_has_avx2() -> int: """ GGML_API int ggml_cpu_has_avx2 (void);""" ... def ggml_cpu_has_avx512() -> int: """ GGML_API int ggml_cpu_has_avx512 (void);""" ... def ggml_cpu_has_avx512_vbmi() -> int: """ GGML_API int ggml_cpu_has_avx512_vbmi(void);""" ... def ggml_cpu_has_avx512_vnni() -> int: """ GGML_API int ggml_cpu_has_avx512_vnni(void);""" ... def ggml_cpu_has_blas() -> int: """ GGML_API int ggml_cpu_has_blas (void);""" ... def ggml_cpu_has_clblast() -> int: """ GGML_API int ggml_cpu_has_clblast (void);""" ... def ggml_cpu_has_cuda() -> int: """ GGML_API int ggml_cpu_has_cuda (void);""" ... def ggml_cpu_has_f16c() -> int: """ GGML_API int ggml_cpu_has_f16c (void);""" ... def ggml_cpu_has_fma() -> int: """ GGML_API int ggml_cpu_has_fma (void);""" ... def ggml_cpu_has_fp16_va() -> int: """ GGML_API int ggml_cpu_has_fp16_va (void);""" ... def ggml_cpu_has_gpublas() -> int: """ GGML_API int ggml_cpu_has_gpublas (void);""" ... def ggml_cpu_has_neon() -> int: """ GGML_API int ggml_cpu_has_neon (void);""" ... def ggml_cpu_has_sse3() -> int: """ GGML_API int ggml_cpu_has_sse3 (void);""" ... def ggml_cpu_has_vsx() -> int: """ GGML_API int ggml_cpu_has_vsx (void);""" ... def ggml_cpu_has_wasm_simd() -> int: """ GGML_API int ggml_cpu_has_wasm_simd (void);""" ... def ggml_cpy(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ a -> b, return view(b) GGML_API struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_cross_entropy_loss(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_cross_entropy_loss_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c); """ ... def ggml_cuda_assign_buffers(tensor: ffi.CData) -> None: """GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);""" ... def ggml_cuda_assign_buffers_force_inplace(tensor: ffi.CData) -> None: """GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);""" ... def ggml_cuda_assign_buffers_no_scratch(tensor: ffi.CData) -> None: """GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);""" ... def ggml_cuda_can_mul_mat(src0: ffi.CData, src1: ffi.CData, dst: ffi.CData) -> bool: """GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);""" ... def ggml_cuda_compute_forward(params: ffi.CData, tensor: ffi.CData) -> bool: """GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);""" ... def ggml_cuda_free_data(tensor: ffi.CData) -> None: """GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);""" ... def ggml_cuda_free_scratch() -> None: """GGML_API void ggml_cuda_free_scratch(void);""" ... def ggml_cuda_get_device_count() -> int: """GGML_API int ggml_cuda_get_device_count(void);""" ... def ggml_cuda_get_device_description(device: int, description: ffi.CData, description_size: int) -> None: """GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);""" ... def ggml_cuda_host_free(ptr: ffi.CData) -> None: """GGML_API void ggml_cuda_host_free(void * ptr);""" ... def ggml_cuda_host_malloc(size: int) -> ffi.CData: """GGML_API void * ggml_cuda_host_malloc(size_t size);""" ... def ggml_cuda_set_main_device(main_device: int) -> None: """GGML_API void ggml_cuda_set_main_device(int main_device);""" ... def ggml_cuda_set_mul_mat_q(mul_mat_q: bool) -> None: """GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);""" ... def ggml_cuda_set_scratch_size(scratch_size: int) -> None: """GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);""" ... def ggml_cuda_set_tensor_split(tensor_split: ffi.CData) -> None: """GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);""" ... def ggml_cuda_transform_tensor(data: ffi.CData, tensor: ffi.CData) -> None: """GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);""" ... def ggml_cycles() -> int: """ GGML_API int64_t ggml_cycles(void);""" ... def ggml_cycles_per_ms() -> int: """ GGML_API int64_t ggml_cycles_per_ms(void);""" ... def ggml_diag(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_diag( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_diag_mask_inf(ctx: ffi.CData, a: ffi.CData, n_past: int) -> ffi.CData: """ set elements above the diagonal to -INF GGML_API struct ggml_tensor * ggml_diag_mask_inf( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); """ ... def ggml_diag_mask_inf_inplace(ctx: ffi.CData, a: ffi.CData, n_past: int) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); """ ... def ggml_diag_mask_zero(ctx: ffi.CData, a: ffi.CData, n_past: int) -> ffi.CData: """ set elements above the diagonal to 0 GGML_API struct ggml_tensor * ggml_diag_mask_zero( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); """ ... def ggml_diag_mask_zero_inplace(ctx: ffi.CData, a: ffi.CData, n_past: int) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); """ ... def ggml_div(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_div( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_div_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_div_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_dup(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_dup( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_dup_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_dup_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_dup_tensor(ctx: ffi.CData, src: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);""" ... def ggml_element_size(tensor: ffi.CData) -> int: """ GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);""" ... def ggml_elu(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_elu_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_flash_attn(ctx: ffi.CData, q: ffi.CData, k: ffi.CData, v: ffi.CData, masked: bool) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_flash_attn( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, bool masked); """ ... def ggml_flash_attn_back(ctx: ffi.CData, q: ffi.CData, k: ffi.CData, v: ffi.CData, d: ffi.CData, masked: bool) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_flash_attn_back( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * d, bool masked); """ ... def ggml_flash_ff(ctx: ffi.CData, a: ffi.CData, b0: ffi.CData, b1: ffi.CData, c0: ffi.CData, c1: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_flash_ff( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b0, struct ggml_tensor * b1, struct ggml_tensor * c0, struct ggml_tensor * c1); """ ... def ggml_format_name(tensor: ffi.CData, fmt: ffi.CData, *args2) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);""" ... def ggml_fp16_to_fp32(x: np.float16) -> float: """ convert FP16 <-> FP32 GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x); """ ... def ggml_fp16_to_fp32_row(x: ffi.CData, y: ffi.CData, n: int) -> None: """ GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);""" ... def ggml_fp32_to_fp16(x: float) -> np.float16: """ GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);""" ... def ggml_fp32_to_fp16_row(x: ffi.CData, y: ffi.CData, n: int) -> None: """ GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);""" ... def ggml_free(ctx: ffi.CData) -> None: """ GGML_API void ggml_free(struct ggml_context * ctx);""" ... def ggml_ftype_to_ggml_type(ftype: int) -> int: """ TODO: temporary until model loading of ggml examples is refactored GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype); """ ... def ggml_gelu(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ TODO: double-check this computation is correct GGML_API struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_gelu_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_gelu_quick(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_gelu_quick_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_get_data(tensor: ffi.CData) -> ffi.CData: """ GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);""" ... def ggml_get_data_f32(tensor: ffi.CData) -> ffi.CData: """ GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);""" ... def ggml_get_f32_1d(tensor: ffi.CData, i: int) -> float: """ GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);""" ... def ggml_get_i32_1d(tensor: ffi.CData, i: int) -> int: """ GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);""" ... def ggml_get_max_tensor_size(ctx: ffi.CData) -> int: """ GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);""" ... def ggml_get_mem_buffer(ctx: ffi.CData) -> ffi.CData: """ GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);""" ... def ggml_get_mem_size(ctx: ffi.CData) -> int: """ GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);""" ... def ggml_get_name(tensor: ffi.CData) -> ffi.CData: """ GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);""" ... def ggml_get_no_alloc(ctx: ffi.CData) -> bool: """ GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);""" ... def ggml_get_rows(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_get_rows_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_get_rows_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c); """ ... def ggml_get_tensor(ctx: ffi.CData, name: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);""" ... def ggml_get_unary_op(tensor: ffi.CData) -> int: """ GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);""" ... def ggml_graph_compute(cgraph: ffi.CData, cplan: ffi.CData) -> int: """ GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);""" ... def ggml_graph_compute_with_ctx(ctx: ffi.CData, cgraph: ffi.CData, n_threads: int) -> None: """ same as ggml_graph_compute() but the work data is allocated as a part of the context note: the drawback of this API is that you must have ensured that the context has enough memory for the work data GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); """ ... def ggml_graph_dump_dot(gb: ffi.CData, gf: ffi.CData, filename: ffi.CData) -> None: """ dump the graph into a file using the dot format GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); """ ... def ggml_graph_get_tensor(cgraph: ffi.CData, name: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);""" ... def ggml_graph_overhead() -> int: """ GGML_API size_t ggml_graph_overhead(void);""" ... def ggml_graph_plan(cgraph: ffi.CData, n_threads: int) -> ffi.CData: """ ggml_graph_plan() has to be called before ggml_graph_compute() when plan.work_size > 0, caller must allocate memory for plan.work_data GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); """ ... def ggml_graph_print(cgraph: ffi.CData) -> None: """ print info and performance information for the graph GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph); """ ... def ggml_graph_reset(cgraph: ffi.CData) -> None: """ GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);""" ... def ggml_init(params: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);""" ... def ggml_init_cuda() -> None: """GGML_API void ggml_init_cuda(void);""" ... def ggml_internal_get_type_traits(type: int) -> ffi.CData: """ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);""" ... def ggml_is_contiguous(tensor: ffi.CData) -> bool: """ GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);""" ... def ggml_is_numa() -> bool: """ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node""" ... def ggml_is_permuted(tensor: ffi.CData) -> bool: """ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);""" ... def ggml_is_quantized(type: int) -> bool: """ GGML_API bool ggml_is_quantized(enum ggml_type type);""" ... def ggml_is_transposed(tensor: ffi.CData) -> bool: """ GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);""" ... def ggml_log(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_log( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_log_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_log_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_map_binary_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_binary_op_f32_t fun), "use ggml_map_custom2 instead"); """ ... def ggml_map_binary_inplace_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_binary_op_f32_t fun), "use ggml_map_custom2_inplace instead"); """ ... def ggml_map_custom1(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom1( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom1_f32(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_f32_t fun), "use ggml_map_custom1 instead"); """ ... def ggml_map_custom1_inplace(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom1_inplace_f32(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_f32_t fun), "use ggml_map_custom1_inplace instead"); """ ... def ggml_map_custom2(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom2( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom2_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_f32_t fun), "use ggml_map_custom2 instead"); """ ... def ggml_map_custom2_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom2_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom2_inplace_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_f32_t fun), "use ggml_map_custom2_inplace instead"); """ ... def ggml_map_custom3(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom3( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom3_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_f32_t fun), "use ggml_map_custom3 instead"); """ ... def ggml_map_custom3_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData, fun: ffi.CData, n_tasks: int, userdata: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_map_custom3_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); """ ... def ggml_map_custom3_inplace_f32(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, c: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_f32_t fun), "use ggml_map_custom3_inplace instead"); """ ... def ggml_map_unary_f32(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_unary_op_f32_t fun), "use ggml_map_custom1 instead"); """ ... def ggml_map_unary_inplace_f32(ctx: ffi.CData, a: ffi.CData, fun: ffi.CData) -> ffi.CData: """ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_unary_op_f32_t fun), "use ggml_map_custom1_inplace instead"); """ ... def ggml_mean(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ mean along rows GGML_API struct ggml_tensor * ggml_mean( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_metal_add_buffer(ctx: ffi.CData, name: ffi.CData, data: ffi.CData, size: int, max_size: int) -> bool: """ creates a mapping between a host memory buffer and a device memory buffer - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute - the mapping is used during computation to determine the arguments of the compute kernels - you don't need to keep the host memory buffer allocated as it is never accessed by Metal - max_size specifies the maximum size of a tensor and is used to create shared views such that it is guaranteed that the tensor will fit in at least one of the views bool ggml_metal_add_buffer( struct ggml_metal_context * ctx, const char * name, void * data, size_t size, size_t max_size); """ ... def ggml_metal_free(ctx: ffi.CData) -> None: """void ggml_metal_free(struct ggml_metal_context * ctx);""" ... def ggml_metal_get_concur_list(ctx: ffi.CData) -> ffi.CData: """ output the concur_list for ggml_alloc int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx); """ ... def ggml_metal_get_tensor(ctx: ffi.CData, t: ffi.CData) -> None: """ get data from the device into host memory void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); """ ... def ggml_metal_graph_compute(ctx: ffi.CData, gf: ffi.CData) -> None: """ same as ggml_graph_compute but uses Metal creates gf->n_threads command buffers in parallel void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); """ ... def ggml_metal_graph_find_concurrency(ctx: ffi.CData, gf: ffi.CData, check_mem: bool) -> None: """ try to find operations that can be run concurrently in the graph you should run it again if the topology of your graph changes void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem); """ ... def ggml_metal_host_free(data: ffi.CData) -> None: """void ggml_metal_host_free (void * data);""" ... def ggml_metal_host_malloc(n: int) -> ffi.CData: """void * ggml_metal_host_malloc(size_t n);""" ... def ggml_metal_if_optimized(ctx: ffi.CData) -> int: """ if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized int ggml_metal_if_optimized(struct ggml_metal_context * ctx); """ ... def ggml_metal_init(n_cb: int) -> ffi.CData: """ number of command buffers to use struct ggml_metal_context * ggml_metal_init(int n_cb); """ ... def ggml_metal_set_n_cb(ctx: ffi.CData, n_cb: int) -> None: """ set the number of command buffers to use void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb); """ ... def ggml_metal_set_tensor(ctx: ffi.CData, t: ffi.CData) -> None: """ set data from host memory into the device void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); """ ... def ggml_mpi_backend_free() -> None: """void ggml_mpi_backend_free(void);""" ... def ggml_mpi_backend_init() -> None: """void ggml_mpi_backend_init(void);""" ... def ggml_mpi_eval_init(ctx_mpi: ffi.CData, n_tokens: ffi.CData, n_past: ffi.CData, n_threads: ffi.CData) -> None: """ void ggml_mpi_eval_init( struct ggml_mpi_context * ctx_mpi, int * n_tokens, int * n_past, int * n_threads); """ ... def ggml_mpi_free(ctx: ffi.CData) -> None: """void ggml_mpi_free(struct ggml_mpi_context * ctx);""" ... def ggml_mpi_graph_compute_post(ctx_mpi: ffi.CData, gf: ffi.CData, n_layers: int) -> None: """ void ggml_mpi_graph_compute_post( struct ggml_mpi_context * ctx_mpi, struct ggml_cgraph * gf, int n_layers); """ ... def ggml_mpi_graph_compute_pre(ctx_mpi: ffi.CData, gf: ffi.CData, n_layers: int) -> None: """ void ggml_mpi_graph_compute_pre( struct ggml_mpi_context * ctx_mpi, struct ggml_cgraph * gf, int n_layers); """ ... def ggml_mpi_init() -> ffi.CData: """struct ggml_mpi_context * ggml_mpi_init(void);""" ... def ggml_mpi_rank(ctx: ffi.CData) -> int: """int ggml_mpi_rank(struct ggml_mpi_context * ctx);""" ... def ggml_mul(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_mul( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_mul_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_mul_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_mul_mat(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ A: n columns, m rows B: n columns, p rows (i.e. we transpose it internally) result is m columns, p rows GGML_API struct ggml_tensor * ggml_mul_mat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_nbytes(tensor: ffi.CData) -> int: """ GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);""" ... def ggml_nbytes_pad(tensor: ffi.CData) -> int: """ GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN""" ... def ggml_nbytes_split(tensor: ffi.CData, nrows_split: int) -> int: """ GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);""" ... def ggml_neg(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_neg_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_nelements(tensor: ffi.CData) -> int: """ GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);""" ... def ggml_new_f32(ctx: ffi.CData, value: float) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);""" ... def ggml_new_graph(ctx: ffi.CData) -> ffi.CData: """ graph allocation in a context GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); """ ... def ggml_new_i32(ctx: ffi.CData, value: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);""" ... def ggml_new_tensor(ctx: ffi.CData, type: int, n_dims: int, ne: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, int n_dims, const int64_t *ne); """ ... def ggml_new_tensor_1d(ctx: ffi.CData, type: int, ne0: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_tensor_1d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0); """ ... def ggml_new_tensor_2d(ctx: ffi.CData, type: int, ne0: int, ne1: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_tensor_2d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1); """ ... def ggml_new_tensor_3d(ctx: ffi.CData, type: int, ne0: int, ne1: int, ne2: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_tensor_3d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2); """ ... def ggml_new_tensor_4d(ctx: ffi.CData, type: int, ne0: int, ne1: int, ne2: int, ne3: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_new_tensor_4d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); """ ... def ggml_norm(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ normalize along rows TODO: eps is hardcoded to 1e-5 for now GGML_API struct ggml_tensor * ggml_norm( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_norm_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_nrows(tensor: ffi.CData) -> int: """ GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);""" ... def ggml_numa_init() -> None: """ GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems""" ... def ggml_op_name(op: int) -> ffi.CData: """ GGML_API const char * ggml_op_name (enum ggml_op op);""" ... def ggml_op_symbol(op: int) -> ffi.CData: """ GGML_API const char * ggml_op_symbol(enum ggml_op op);""" ... def ggml_opt(ctx: ffi.CData, params: ffi.CData, f: ffi.CData) -> int: """ optimize the function defined by the tensor f GGML_API enum ggml_opt_result ggml_opt( struct ggml_context * ctx, struct ggml_opt_params params, struct ggml_tensor * f); """ ... def ggml_opt_default_params(type: int) -> ffi.CData: """ GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);""" ... def ggml_opt_init(ctx: ffi.CData, opt: ffi.CData, params: ffi.CData, nx: int) -> None: """ initialize optimizer context GGML_API void ggml_opt_init( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_opt_params params, int64_t nx); """ ... def ggml_opt_resume(ctx: ffi.CData, opt: ffi.CData, f: ffi.CData) -> int: """ continue optimizing the function defined by the tensor f GGML_API enum ggml_opt_result ggml_opt_resume( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f); """ ... def ggml_opt_resume_g(ctx: ffi.CData, opt: ffi.CData, f: ffi.CData, gf: ffi.CData, gb: ffi.CData) -> int: """ continue optimizing the function defined by the tensor f GGML_API enum ggml_opt_result ggml_opt_resume_g( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb); """ ... def ggml_out_prod(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ A: m columns, n rows, B: p columns, n rows, result is m columns, p rows GGML_API struct ggml_tensor * ggml_out_prod( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_permute(ctx: ffi.CData, a: ffi.CData, axis0: int, axis1: int, axis2: int, axis3: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_permute( struct ggml_context * ctx, struct ggml_tensor * a, int axis0, int axis1, int axis2, int axis3); """ ... def ggml_pool_1d(ctx: ffi.CData, a: ffi.CData, op: int, k0: int, s0: int, p0: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_pool_1d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, // kernel size int s0, // stride int p0); // padding """ ... def ggml_pool_2d(ctx: ffi.CData, a: ffi.CData, op: int, k0: int, k1: int, s0: int, s1: int, p0: int, p1: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int k1, int s0, int s1, int p0, int p1); """ ... def ggml_print_object(obj: ffi.CData) -> None: """ GGML_API void ggml_print_object (const struct ggml_object * obj);""" ... def ggml_print_objects(ctx: ffi.CData) -> None: """ GGML_API void ggml_print_objects(const struct ggml_context * ctx);""" ... def ggml_quantize_chunk(type: int, src: ffi.CData, dst: ffi.CData, start: int, n: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);""" ... def ggml_quantize_q2_K(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ Quantization with histogram collection size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist); """ ... def ggml_quantize_q3_K(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q4_0(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q4_1(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q4_K(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q5_0(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q5_1(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q5_K(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q6_K(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_quantize_q8_0(src: ffi.CData, dst: ffi.CData, n: int, k: int, hist: ffi.CData) -> int: """ GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);""" ... def ggml_relu(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_relu_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_repeat(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ if a is the same shape as b, and a is not parameter, return a otherwise, return a new tensor: repeat(a) to fit in b GGML_API struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_repeat_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_reshape(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ return view(a), b specifies the new shape TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_reshape_1d(ctx: ffi.CData, a: ffi.CData, ne0: int) -> ffi.CData: """ return view(a) TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0); """ ... def ggml_reshape_2d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_reshape_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1); """ ... def ggml_reshape_3d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int, ne2: int) -> ffi.CData: """ return view(a) TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2); """ ... def ggml_reshape_4d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int, ne2: int, ne3: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_reshape_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); """ ... def ggml_rms_norm(ctx: ffi.CData, a: ffi.CData, eps: float) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_rms_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); """ ... def ggml_rms_norm_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ a - x b - dy TODO: update with configurable eps GGML_API struct ggml_tensor * ggml_rms_norm_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_rms_norm_inplace(ctx: ffi.CData, a: ffi.CData, eps: float) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_rms_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); """ ... def ggml_rope(ctx: ffi.CData, a: ffi.CData, n_past: int, n_dims: int, mode: int, n_ctx: int) -> ffi.CData: """ rotary position embedding if mode & 1 == 1, skip n_past elements if mode & 2 == 1, GPT-NeoX style if mode & 4 == 1, ChatGLM style TODO: avoid creating a new tensor every time GGML_API struct ggml_tensor * ggml_rope( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx); """ ... def ggml_rope_back(ctx: ffi.CData, a: ffi.CData, n_past: int, n_dims: int, mode: int, n_ctx: int) -> ffi.CData: """ rotary position embedding backward, i.e compute dx from dy a - dy GGML_API struct ggml_tensor * ggml_rope_back( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx); """ ... def ggml_rope_custom(ctx: ffi.CData, a: ffi.CData, n_past: int, n_dims: int, mode: int, n_ctx: int, freq_base: float, freq_scale: float) -> ffi.CData: """ custom RoPE GGML_API struct ggml_tensor * ggml_rope_custom( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale); """ ... def ggml_rope_custom_inplace(ctx: ffi.CData, a: ffi.CData, n_past: int, n_dims: int, mode: int, n_ctx: int, freq_base: float, freq_scale: float) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_custom_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale); """ ... def ggml_rope_inplace(ctx: ffi.CData, a: ffi.CData, n_past: int, n_dims: int, mode: int, n_ctx: int) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx); """ ... def ggml_scale(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_scale_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_set(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, nb2: int, nb3: int, offset: int) -> ffi.CData: """ b -> view(a,offset,nb1,nb2,3), return modified a GGML_API struct ggml_tensor * ggml_set( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); """ ... def ggml_set_1d(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); """ ... def ggml_set_1d_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_1d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); """ ... def ggml_set_2d(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, offset: int) -> ffi.CData: """ b -> view(a,offset,nb1,nb2,3), return modified a GGML_API struct ggml_tensor * ggml_set_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); """ ... def ggml_set_2d_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, offset: int) -> ffi.CData: """ b -> view(a,offset,nb1,nb2,3), return view(a) GGML_API struct ggml_tensor * ggml_set_2d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); """ ... def ggml_set_f32(tensor: ffi.CData, value: float) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);""" ... def ggml_set_f32_1d(tensor: ffi.CData, i: int, value: float) -> None: """ GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);""" ... def ggml_set_i32(tensor: ffi.CData, value: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);""" ... def ggml_set_i32_1d(tensor: ffi.CData, i: int, value: int) -> None: """ GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);""" ... def ggml_set_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData, nb1: int, nb2: int, nb3: int, offset: int) -> ffi.CData: """ b -> view(a,offset,nb1,nb2,3), return view(a) GGML_API struct ggml_tensor * ggml_set_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); """ ... def ggml_set_name(tensor: ffi.CData, name: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);""" ... def ggml_set_no_alloc(ctx: ffi.CData, no_alloc: bool) -> None: """ GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);""" ... def ggml_set_param(ctx: ffi.CData, tensor: ffi.CData) -> None: """ GGML_API void ggml_set_param( struct ggml_context * ctx, struct ggml_tensor * tensor); """ ... def ggml_set_scratch(ctx: ffi.CData, scratch: ffi.CData) -> int: """ GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);""" ... def ggml_set_zero(tensor: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);""" ... def ggml_sgn(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sgn_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_silu(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_silu_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ a - x b - dy GGML_API struct ggml_tensor * ggml_silu_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_silu_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_soft_max(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_soft_max( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_soft_max_back(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_soft_max_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_soft_max_back_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_soft_max_back_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_soft_max_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ in-place, returns view(a) GGML_API struct ggml_tensor * ggml_soft_max_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sqr(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sqr( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sqr_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sqr_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sqrt(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sqrt( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sqrt_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sqrt_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_step(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_step_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sub(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sub( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_sub_inplace(ctx: ffi.CData, a: ffi.CData, b: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_sub_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); """ ... def ggml_sum(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ return scalar GGML_API struct ggml_tensor * ggml_sum( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_sum_rows(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d] GGML_API struct ggml_tensor * ggml_sum_rows( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_tanh(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_tanh_inplace(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_tensor_overhead() -> int: """ use this to compute the memory overhead of a tensor GGML_API size_t ggml_tensor_overhead(void); """ ... def ggml_time_init() -> None: """ GGML_API void ggml_time_init(void); // call this once at the beginning of the program""" ... def ggml_time_ms() -> int: """ GGML_API int64_t ggml_time_ms(void);""" ... def ggml_time_us() -> int: """ GGML_API int64_t ggml_time_us(void);""" ... def ggml_transpose(ctx: ffi.CData, a: ffi.CData) -> ffi.CData: """ alias for ggml_permute(ctx, a, 1, 0, 2, 3) GGML_API struct ggml_tensor * ggml_transpose( struct ggml_context * ctx, struct ggml_tensor * a); """ ... def ggml_type_name(type: int) -> ffi.CData: """ GGML_API const char * ggml_type_name(enum ggml_type type);""" ... def ggml_type_size(type: int) -> int: """ GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block""" ... def ggml_type_sizef(type: int) -> float: """ GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float""" ... def ggml_unary(ctx: ffi.CData, a: ffi.CData, op: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_unary( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); """ ... def ggml_unary_inplace(ctx: ffi.CData, a: ffi.CData, op: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_unary_inplace( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); """ ... def ggml_used_mem(ctx: ffi.CData) -> int: """ GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);""" ... def ggml_vec_dot_q2_K_q8_K(n: int, s: ffi.CData, vx: ffi.CData, vy: ffi.CData) -> None: """ Dot product void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); """ ... def ggml_vec_dot_q3_K_q8_K(n: int, s: ffi.CData, vx: ffi.CData, vy: ffi.CData) -> None: """void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);""" ... def ggml_vec_dot_q4_K_q8_K(n: int, s: ffi.CData, vx: ffi.CData, vy: ffi.CData) -> None: """void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);""" ... def ggml_vec_dot_q5_K_q8_K(n: int, s: ffi.CData, vx: ffi.CData, vy: ffi.CData) -> None: """void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);""" ... def ggml_vec_dot_q6_K_q8_K(n: int, s: ffi.CData, vx: ffi.CData, vy: ffi.CData) -> None: """void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);""" ... def ggml_view_1d(ctx: ffi.CData, a: ffi.CData, ne0: int, offset: int) -> ffi.CData: """ offset in bytes GGML_API struct ggml_tensor * ggml_view_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, size_t offset); """ ... def ggml_view_2d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int, nb1: int, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_view_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, size_t nb1, // row stride in bytes size_t offset); """ ... def ggml_view_3d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int, ne2: int, nb1: int, nb2: int, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_view_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, size_t nb1, // row stride in bytes size_t nb2, // slice stride in bytes size_t offset); """ ... def ggml_view_4d(ctx: ffi.CData, a: ffi.CData, ne0: int, ne1: int, ne2: int, ne3: int, nb1: int, nb2: int, nb3: int, offset: int) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_view_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, size_t nb1, // row stride in bytes size_t nb2, // slice stride in bytes size_t nb3, size_t offset); """ ... def ggml_view_tensor(ctx: ffi.CData, src: ffi.CData) -> ffi.CData: """ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);""" ... def ggml_win_part(ctx: ffi.CData, a: ffi.CData, w: int) -> ffi.CData: """ partition into non-overlapping windows with padding if needed example: a: 768 64 64 1 w: 14 res: 768 14 14 25 used in sam GGML_API struct ggml_tensor * ggml_win_part( struct ggml_context * ctx, struct ggml_tensor * a, int w); """ ... def ggml_win_unpart(ctx: ffi.CData, a: ffi.CData, w0: int, h0: int, w: int) -> ffi.CData: """ reverse of ggml_win_part used in sam GGML_API struct ggml_tensor * ggml_win_unpart( struct ggml_context * ctx, struct ggml_tensor * a, int w0, int h0, int w); """ ... def gguf_add_tensor(ctx: ffi.CData, tensor: ffi.CData) -> None: """ manage tensor info GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); """ ... def gguf_find_key(ctx: ffi.CData, key: ffi.CData) -> int: """ GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);""" ... def gguf_find_tensor(ctx: ffi.CData, name: ffi.CData) -> int: """ GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);""" ... def gguf_free(ctx: ffi.CData) -> None: """ GGML_API void gguf_free(struct gguf_context * ctx);""" ... def gguf_get_alignment(ctx: ffi.CData) -> int: """ GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);""" ... def gguf_get_arr_data(ctx: ffi.CData, i: int) -> ffi.CData: """ GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);""" ... def gguf_get_arr_n(ctx: ffi.CData, i: int) -> int: """ GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);""" ... def gguf_get_arr_str(ctx: ffi.CData, key_id: int, i: int) -> ffi.CData: """ GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);""" ... def gguf_get_arr_type(ctx: ffi.CData, i: int) -> int: """ GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);""" ... def gguf_get_data(ctx: ffi.CData) -> ffi.CData: """ GGML_API void * gguf_get_data (struct gguf_context * ctx);""" ... def gguf_get_data_offset(ctx: ffi.CData) -> int: """ GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);""" ... def gguf_get_key(ctx: ffi.CData, i: int) -> ffi.CData: """ GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);""" ... def gguf_get_kv_type(ctx: ffi.CData, i: int) -> int: """ GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);""" ... def gguf_get_meta_data(ctx: ffi.CData, data: ffi.CData) -> None: """ GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);""" ... def gguf_get_meta_size(ctx: ffi.CData) -> int: """ get the size in bytes of the meta data (header, kv pairs, tensor info) including padding GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx); """ ... def gguf_get_n_kv(ctx: ffi.CData) -> int: """ GGML_API int gguf_get_n_kv(struct gguf_context * ctx);""" ... def gguf_get_n_tensors(ctx: ffi.CData) -> int: """ GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);""" ... def gguf_get_tensor_name(ctx: ffi.CData, i: int) -> ffi.CData: """ GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);""" ... def gguf_get_tensor_offset(ctx: ffi.CData, i: int) -> int: """ GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);""" ... def gguf_get_val_bool(ctx: ffi.CData, i: int) -> bool: """ GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);""" ... def gguf_get_val_f32(ctx: ffi.CData, i: int) -> float: """ GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_i16(ctx: ffi.CData, i: int) -> int: """ GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_i32(ctx: ffi.CData, i: int) -> int: """ GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_i8(ctx: ffi.CData, i: int) -> int: """ GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_str(ctx: ffi.CData, i: int) -> ffi.CData: """ GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);""" ... def gguf_get_val_u16(ctx: ffi.CData, i: int) -> int: """ GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_u32(ctx: ffi.CData, i: int) -> int: """ GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);""" ... def gguf_get_val_u8(ctx: ffi.CData, i: int) -> int: """ results are undefined if the wrong type is used for the key GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i); """ ... def gguf_get_version(ctx: ffi.CData) -> int: """ GGML_API int gguf_get_version (struct gguf_context * ctx);""" ... def gguf_init_empty() -> ffi.CData: """ GGML_API struct gguf_context * gguf_init_empty(void);""" ... def gguf_init_from_file(fname: ffi.CData, params: ffi.CData) -> ffi.CData: """ GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);""" ... def gguf_set_arr_data(ctx: ffi.CData, key: ffi.CData, type: int, data: ffi.CData, n: int) -> None: """ GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);""" ... def gguf_set_arr_str(ctx: ffi.CData, key: ffi.CData, data: ffi.CData, n: int) -> None: """ GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);""" ... def gguf_set_kv(ctx: ffi.CData, src: ffi.CData) -> None: """ set or add KV pairs from another context GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); """ ... def gguf_set_tensor_data(ctx: ffi.CData, name: ffi.CData, data: ffi.CData, size: int) -> None: """ GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);""" ... def gguf_set_tensor_type(ctx: ffi.CData, name: ffi.CData, type: int) -> None: """ GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);""" ... def gguf_set_val_bool(ctx: ffi.CData, key: ffi.CData, val: bool) -> None: """ GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);""" ... def gguf_set_val_f32(ctx: ffi.CData, key: ffi.CData, val: float) -> None: """ GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);""" ... def gguf_set_val_i16(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);""" ... def gguf_set_val_i32(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);""" ... def gguf_set_val_i8(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);""" ... def gguf_set_val_str(ctx: ffi.CData, key: ffi.CData, val: ffi.CData) -> None: """ GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);""" ... def gguf_set_val_u16(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);""" ... def gguf_set_val_u32(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);""" ... def gguf_set_val_u8(ctx: ffi.CData, key: ffi.CData, val: int) -> None: """ overrides existing values or adds a new one GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); """ ... def gguf_type_name(type: int) -> ffi.CData: """ GGML_API const char * gguf_type_name(enum gguf_type type);""" ... def gguf_write_to_file(ctx: ffi.CData, fname: ffi.CData, only_meta: bool) -> None: """ write the entire context to a binary file GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta); """ ... def quantize_row_q2_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q2_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """ Quantization void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); """ ... def quantize_row_q3_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q3_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);""" ... def quantize_row_q4_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q4_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);""" ... def quantize_row_q5_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q5_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);""" ... def quantize_row_q6_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q6_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);""" ... def quantize_row_q8_K(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);""" ... def quantize_row_q8_K_reference(x: ffi.CData, y: ffi.CData, k: int) -> None: """void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);""" ... ggml-org-ggml-7ec8045/examples/python/ggml/cffi.py000066400000000000000000001444621506673203700221040ustar00rootroot00000000000000# auto-generated file import _cffi_backend ffi = _cffi_backend.FFI('ggml.cffi', _version = 0x2601, _types = b'\x00\x00\xB6\x0D\x00\x00\x09\x0B\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x2F\x03\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x31\x03\x00\x04\x3D\x03\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x32\x03\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x34\x03\x00\x03\xFE\x03\x00\x04\x53\x03\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x3D\x03\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x04\x3E\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x00\xB6\x0D\x00\x00\x00\x0F\x00\x02\xD0\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x04\x0B\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x0B\x0B\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x0F\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x16\x0D\x00\x00\x0B\x11\x00\x04\x38\x03\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x16\x0D\x00\x00\x0B\x11\x00\x00\x44\x11\x00\x00\x08\x11\x00\x04\x30\x03\x00\x00\x4B\x11\x00\x00\x00\x0F\x00\x04\x16\x0D\x00\x00\x0B\x11\x00\x00\x20\x09\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x01\x0B\x00\x00\x00\x0F\x00\x01\x14\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\x34\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x02\x7E\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\xF4\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x00\xF4\x0D\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\xF4\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\xF4\x0D\x00\x00\x06\x01\x00\x00\x00\x0F\x00\x04\x18\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x02\xE9\x0D\x00\x00\x0E\x11\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x4B\x11\x00\x04\x33\x03\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x0E\x11\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x04\x35\x03\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x21\x11\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x22\x0D\x00\x00\x00\x0F\x00\x00\xDB\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\xDB\x0D\x00\x00\x00\x0F\x00\x03\xB0\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x03\xB5\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x04\x0D\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x04\x0D\x00\x00\x10\x11\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x4B\x0D\x00\x00\x0B\x11\x00\x00\x00\x0F\x00\x00\x4B\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x30\x0D\x00\x00\x0F\x11\x00\x00\x0B\x03\x00\x00\xB0\x11\x00\x00\x00\x0F\x00\x04\x30\x0D\x00\x00\x0B\x11\x00\x00\x4B\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x04\x30\x0D\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x0B\x0D\x00\x00\x1B\x09\x00\x00\x00\x0F\x00\x04\x33\x0D\x00\x00\x4B\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x0E\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x7F\x0D\x00\x00\x00\x0F\x00\x00\x50\x0D\x00\x00\x07\x0B\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x4B\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x01\x11\x00\x00\x07\x01\x00\x00\xDB\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x01\x11\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x01\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x01\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x01\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x05\x0B\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x01\x01\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0A\x0B\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0D\x01\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x0D\x01\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0B\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x0B\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x03\x5C\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x03\x62\x03\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x02\xD8\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x03\x4F\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x08\x11\x00\x03\x54\x03\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x02\xD3\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x03\x44\x03\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x03\x48\x03\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x0B\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x08\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x08\x11\x00\x00\x0F\x11\x00\x00\x01\x0F\x00\x00\x08\x0D\x00\x00\x08\x11\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x08\x0D\x00\x00\x08\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x21\x0D\x00\x00\x0F\x11\x00\x00\x24\x09\x00\x00\x00\x0F\x00\x00\x21\x0D\x00\x00\x00\x0F\x00\x03\xBA\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x03\xBF\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x01\x11\x00\x00\xF4\x03\x00\x00\x10\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\xDB\x03\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x02\x35\x11\x00\x00\x10\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x02\x39\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x04\x11\x00\x00\x4B\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x0B\x11\x00\x00\x21\x09\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x04\x32\x03\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x21\x11\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x11\x0D\x00\x00\x00\x0F\x00\x00\x6C\x0D\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x00\x6C\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x10\x0D\x00\x02\x4B\x11\x00\x00\x00\x0F\x00\x00\x10\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x00\x10\x0D\x00\x00\x21\x11\x00\x00\x00\x0F\x00\x00\x10\x0D\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x02\xE1\x0D\x00\x00\x21\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xF8\x03\x00\x00\xF4\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xF9\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xFA\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xFB\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xFC\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x03\xFD\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0F\x11\x00\x00\x0F\x11\x00\x00\x07\x01\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xF8\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xF9\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xFA\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xFB\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xFC\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x03\xFD\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x00\x6C\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x35\x11\x00\x00\x10\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x07\x01\x00\x03\xFE\x03\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x07\x01\x00\x02\x7E\x11\x00\x02\x35\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x07\x01\x00\x02\x7E\x11\x00\x02\x35\x11\x00\x02\x35\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x07\x01\x00\x02\x7E\x11\x00\x04\x53\x03\x00\x02\xE1\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x04\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x04\x11\x00\x00\x22\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x04\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x4B\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x4B\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x04\x30\x03\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\xF8\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\xF8\x11\x00\x02\xF8\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0B\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0B\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0B\x11\x00\x00\x4B\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0B\x11\x00\x00\x44\x11\x00\x00\x50\x11\x00\x00\x0B\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0B\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\x4B\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0E\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0E\x11\x00\x00\x4B\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0E\x11\x00\x00\x4B\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0E\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x7F\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x7F\x11\x00\x02\xE9\x11\x00\x02\xE9\x11\x00\x02\xE9\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x7F\x11\x00\x00\x4B\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x04\x37\x03\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x08\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x08\x11\x00\x00\x10\x11\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x01\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x0F\x03\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x0F\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x01\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x34\x11\x00\x02\xE1\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x0D\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x05\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x03\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x04\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x08\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x00\x06\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x0F\x11\x00\x02\xE1\x11\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x15\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x21\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x21\x11\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x0A\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x6C\x03\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x10\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x10\x11\x00\x00\x08\x11\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x02\xE1\x11\x00\x02\x7E\x11\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x04\x53\x0D\x00\x00\x00\x0F\x00\x00\x24\x03\x00\x00\x0D\x09\x00\x00\x0E\x09\x00\x00\x0F\x09\x00\x00\x10\x09\x00\x00\x11\x09\x00\x00\x12\x09\x00\x00\x13\x09\x00\x00\x14\x09\x00\x00\x04\x09\x00\x00\x05\x09\x00\x00\x06\x09\x00\x00\x07\x09\x00\x00\x08\x09\x00\x00\x09\x09\x00\x00\x0A\x09\x00\x00\x02\x01\x00\x03\xFE\x05\x00\x00\x00\x80\x00\x03\xFE\x05\x00\x00\x00\x10\x00\x03\xFE\x05\x00\x00\x00\xC0\x00\x03\xFE\x05\x00\x00\x00\x25\x00\x03\xFE\x05\x00\x00\x00\x28\x00\x03\xFE\x05\x00\x00\x00\x04\x00\x03\xFE\x05\x00\x00\x00\x38\x00\x03\xFE\x05\x00\x00\x00\x40\x00\x03\xFE\x05\x00\x00\x1F\xF0\x00\x03\xFE\x05\x00\x00\x00\x08\x00\x00\x00\x0B\x00\x00\x02\x0B\x00\x00\x03\x0B\x00\x00\x06\x0B\x00\x00\x08\x0B\x00\x00\x0B\x09\x00\x00\x22\x05\x00\x00\x10\x00\x00\x00\x22\x05\x00\x00\x00\x08\x00\x00\x0F\x01\x00\x00\xDB\x05\x00\x00\x00\x04\x00\x00\x09\x01\x00\x03\xB0\x05\x00\x00\x00\x10\x00\x03\xB5\x05\x00\x00\x00\x10\x00\x03\xB5\x05\x00\x00\x01\x00\x00\x00\x00\x09\x00\x00\x01\x09\x00\x00\x02\x09\x00\x00\x03\x09\x00\x04\x2C\x03\x00\x00\x0C\x09\x00\x04\x2E\x03\x00\x00\x15\x09\x00\x00\x16\x09\x00\x00\x17\x09\x00\x00\x18\x09\x00\x00\x19\x09\x00\x00\x1A\x09\x00\x00\x1C\x09\x00\x00\x1D\x09\x00\x04\x37\x03\x00\x00\x1E\x09\x00\x00\x1F\x09\x00\x00\x08\x05\x00\x00\x10\x00\x00\x00\x08\x05\x00\x00\x00\x06\x00\x00\x22\x09\x00\x00\x23\x09\x00\x03\xBA\x03\x00\x03\xBA\x05\x00\x00\x00\x80\x00\x03\xBA\x05\x00\x00\x00\x0C\x00\x03\xBA\x05\x00\x00\x00\x10\x00\x03\xBA\x05\x00\x00\x00\x20\x00\x03\xBA\x05\x00\x00\x00\x40\x00\x00\x0C\x01\x00\x00\x11\x05\x00\x00\x00\x04\x00\x00\x10\x05\x00\x00\x20\x51\x00\x02\xC6\x03\x00\x02\xDE\x03\x00\x03\xE0\x03\x00\x03\xE7\x03\x00\x00\x00\x01', _globals = (b'\xFF\xFF\xFF\x0BGGML_BACKEND_CPU',0,b'\xFF\xFF\xFF\x0BGGML_BACKEND_GPU',10,b'\xFF\xFF\xFF\x0BGGML_BACKEND_GPU_SPLIT',20,b'\xFF\xFF\xFF\x0BGGML_FTYPE_ALL_F32',0,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_F16',1,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q2_K',10,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q3_K',11,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q4_0',2,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q4_1',3,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q4_1_SOME_F16',4,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q4_K',12,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q5_0',8,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q5_1',9,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q5_K',13,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q6_K',14,b'\xFF\xFF\xFF\x0BGGML_FTYPE_MOSTLY_Q8_0',7,b'\xFF\xFF\xFF\x0BGGML_FTYPE_UNKNOWN',-1,b'\xFF\xFF\xFF\x1FGGML_GRAPH_SIZE',164520,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_BACKTRACKING_ARMIJO',0,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE',2,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_BACKTRACKING_WOLFE',1,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_DEFAULT',1,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_FAIL',-128,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_INVALID_PARAMETERS',-124,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_MAXIMUM_ITERATIONS',-125,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_MAXIMUM_STEP',-126,b'\xFF\xFF\xFF\x0BGGML_LINESEARCH_MINIMUM_STEP',-127,b'\xFF\xFF\xFF\x0BGGML_OBJECT_GRAPH',1,b'\xFF\xFF\xFF\x1FGGML_OBJECT_SIZE',32,b'\xFF\xFF\xFF\x0BGGML_OBJECT_TENSOR',0,b'\xFF\xFF\xFF\x0BGGML_OBJECT_WORK_BUFFER',2,b'\xFF\xFF\xFF\x0BGGML_OPT_ADAM',0,b'\xFF\xFF\xFF\x0BGGML_OPT_DID_NOT_CONVERGE',1,b'\xFF\xFF\xFF\x0BGGML_OPT_FAIL',4,b'\xFF\xFF\xFF\x0BGGML_OPT_INVALID_WOLFE',3,b'\xFF\xFF\xFF\x0BGGML_OPT_LBFGS',1,b'\xFF\xFF\xFF\x0BGGML_OPT_NO_CONTEXT',2,b'\xFF\xFF\xFF\x0BGGML_OPT_OK',0,b'\xFF\xFF\xFF\x0BGGML_OP_ACC',4,b'\xFF\xFF\xFF\x0BGGML_OP_ADD',2,b'\xFF\xFF\xFF\x0BGGML_OP_ADD1',3,b'\xFF\xFF\xFF\x0BGGML_OP_ALIBI',40,b'\xFF\xFF\xFF\x0BGGML_OP_ARGMAX',14,b'\xFF\xFF\xFF\x0BGGML_OP_CLAMP',41,b'\xFF\xFF\xFF\x0BGGML_OP_CONT',26,b'\xFF\xFF\xFF\x0BGGML_OP_CONV_1D',42,b'\xFF\xFF\xFF\x0BGGML_OP_CONV_2D',43,b'\xFF\xFF\xFF\x0BGGML_OP_COUNT',62,b'\xFF\xFF\xFF\x0BGGML_OP_CPY',25,b'\xFF\xFF\xFF\x0BGGML_OP_CROSS_ENTROPY_LOSS',60,b'\xFF\xFF\xFF\x0BGGML_OP_CROSS_ENTROPY_LOSS_BACK',61,b'\xFF\xFF\xFF\x0BGGML_OP_DIAG',33,b'\xFF\xFF\xFF\x0BGGML_OP_DIAG_MASK_INF',34,b'\xFF\xFF\xFF\x0BGGML_OP_DIAG_MASK_ZERO',35,b'\xFF\xFF\xFF\x0BGGML_OP_DIV',7,b'\xFF\xFF\xFF\x0BGGML_OP_DUP',1,b'\xFF\xFF\xFF\x0BGGML_OP_FLASH_ATTN',46,b'\xFF\xFF\xFF\x0BGGML_OP_FLASH_ATTN_BACK',48,b'\xFF\xFF\xFF\x0BGGML_OP_FLASH_FF',47,b'\xFF\xFF\xFF\x0BGGML_OP_GET_ROWS',31,b'\xFF\xFF\xFF\x0BGGML_OP_GET_ROWS_BACK',32,b'\xFF\xFF\xFF\x0BGGML_OP_LOG',10,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_BINARY',53,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM1',57,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM1_F32',54,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM2',58,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM2_F32',55,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM3',59,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_CUSTOM3_F32',56,b'\xFF\xFF\xFF\x0BGGML_OP_MAP_UNARY',52,b'\xFF\xFF\xFF\x0BGGML_OP_MEAN',13,b'\xFF\xFF\xFF\x0BGGML_OP_MUL',6,b'\xFF\xFF\xFF\x0BGGML_OP_MUL_MAT',21,b'\xFF\xFF\xFF\x0BGGML_OP_NONE',0,b'\xFF\xFF\xFF\x0BGGML_OP_NORM',18,b'\xFF\xFF\xFF\x0BGGML_OP_OUT_PROD',22,b'\xFF\xFF\xFF\x0BGGML_OP_PERMUTE',29,b'\xFF\xFF\xFF\x0BGGML_OP_POOL_1D',44,b'\xFF\xFF\xFF\x0BGGML_OP_POOL_2D',45,b'\xFF\xFF\xFF\x0BGGML_OP_POOL_AVG',1,b'\xFF\xFF\xFF\x0BGGML_OP_POOL_COUNT',2,b'\xFF\xFF\xFF\x0BGGML_OP_POOL_MAX',0,b'\xFF\xFF\xFF\x0BGGML_OP_REPEAT',15,b'\xFF\xFF\xFF\x0BGGML_OP_REPEAT_BACK',16,b'\xFF\xFF\xFF\x0BGGML_OP_RESHAPE',27,b'\xFF\xFF\xFF\x0BGGML_OP_RMS_NORM',19,b'\xFF\xFF\xFF\x0BGGML_OP_RMS_NORM_BACK',20,b'\xFF\xFF\xFF\x0BGGML_OP_ROPE',38,b'\xFF\xFF\xFF\x0BGGML_OP_ROPE_BACK',39,b'\xFF\xFF\xFF\x0BGGML_OP_SCALE',23,b'\xFF\xFF\xFF\x0BGGML_OP_SET',24,b'\xFF\xFF\xFF\x0BGGML_OP_SILU_BACK',17,b'\xFF\xFF\xFF\x0BGGML_OP_SOFT_MAX',36,b'\xFF\xFF\xFF\x0BGGML_OP_SOFT_MAX_BACK',37,b'\xFF\xFF\xFF\x0BGGML_OP_SQR',8,b'\xFF\xFF\xFF\x0BGGML_OP_SQRT',9,b'\xFF\xFF\xFF\x0BGGML_OP_SUB',5,b'\xFF\xFF\xFF\x0BGGML_OP_SUM',11,b'\xFF\xFF\xFF\x0BGGML_OP_SUM_ROWS',12,b'\xFF\xFF\xFF\x0BGGML_OP_TRANSPOSE',30,b'\xFF\xFF\xFF\x0BGGML_OP_UNARY',51,b'\xFF\xFF\xFF\x0BGGML_OP_VIEW',28,b'\xFF\xFF\xFF\x0BGGML_OP_WIN_PART',49,b'\xFF\xFF\xFF\x0BGGML_OP_WIN_UNPART',50,b'\xFF\xFF\xFF\x0BGGML_TASK_COMPUTE',1,b'\xFF\xFF\xFF\x0BGGML_TASK_FINALIZE',2,b'\xFF\xFF\xFF\x0BGGML_TASK_INIT',0,b'\xFF\xFF\xFF\x1FGGML_TENSOR_SIZE',288,b'\xFF\xFF\xFF\x0BGGML_TYPE_COUNT',19,b'\xFF\xFF\xFF\x0BGGML_TYPE_F16',1,b'\xFF\xFF\xFF\x0BGGML_TYPE_F32',0,b'\xFF\xFF\xFF\x0BGGML_TYPE_I16',17,b'\xFF\xFF\xFF\x0BGGML_TYPE_I32',18,b'\xFF\xFF\xFF\x0BGGML_TYPE_I8',16,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q2_K',10,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q3_K',11,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q4_0',2,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q4_1',3,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q4_K',12,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q5_0',6,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q5_1',7,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q5_K',13,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q6_K',14,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q8_0',8,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q8_1',9,b'\xFF\xFF\xFF\x0BGGML_TYPE_Q8_K',15,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_ABS',0,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_ELU',5,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_GELU',7,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_GELU_QUICK',8,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_NEG',2,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_RELU',6,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_SGN',1,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_SILU',9,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_STEP',3,b'\xFF\xFF\xFF\x0BGGML_UNARY_OP_TANH',4,b'\xFF\xFF\xFF\x0BGGUF_TYPE_ARRAY',9,b'\xFF\xFF\xFF\x0BGGUF_TYPE_BOOL',7,b'\xFF\xFF\xFF\x0BGGUF_TYPE_COUNT',10,b'\xFF\xFF\xFF\x0BGGUF_TYPE_FLOAT32',6,b'\xFF\xFF\xFF\x0BGGUF_TYPE_INT16',3,b'\xFF\xFF\xFF\x0BGGUF_TYPE_INT32',5,b'\xFF\xFF\xFF\x0BGGUF_TYPE_INT8',1,b'\xFF\xFF\xFF\x0BGGUF_TYPE_STRING',8,b'\xFF\xFF\xFF\x0BGGUF_TYPE_UINT16',2,b'\xFF\xFF\xFF\x0BGGUF_TYPE_UINT32',4,b'\xFF\xFF\xFF\x0BGGUF_TYPE_UINT8',0,b'\x00\x02\x9A\x23__assert_rtn',0,b'\x00\x02\x7C\x23dequantize_row_q2_K',0,b'\x00\x02\x81\x23dequantize_row_q3_K',0,b'\x00\x02\x86\x23dequantize_row_q4_K',0,b'\x00\x02\x8B\x23dequantize_row_q5_K',0,b'\x00\x02\x90\x23dequantize_row_q6_K',0,b'\x00\x02\x95\x23dequantize_row_q8_K',0,b'\x00\x00\xFA\x23ggml_abs',0,b'\x00\x00\xFA\x23ggml_abs_inplace',0,b'\x00\x01\xDD\x23ggml_acc',0,b'\x00\x01\xDD\x23ggml_acc_inplace',0,b'\x00\x01\x84\x23ggml_add',0,b'\x00\x01\x84\x23ggml_add1',0,b'\x00\x01\x84\x23ggml_add1_inplace',0,b'\x00\x01\x84\x23ggml_add_inplace',0,b'\x00\x01\x26\x23ggml_alibi',0,b'\x00\x02\xEC\x23ggml_allocr_alloc',0,b'\x00\x02\x42\x23ggml_allocr_alloc_graph',0,b'\x00\x02\xE4\x23ggml_allocr_free',0,b'\x00\x00\x03\x23ggml_allocr_is_measure',0,b'\x00\x00\xA2\x23ggml_allocr_new',0,b'\x00\x00\x9F\x23ggml_allocr_new_measure',0,b'\x00\x02\xE4\x23ggml_allocr_reset',0,b'\x00\x02\xE7\x23ggml_allocr_set_parse_seq',0,b'\x00\x00\x17\x23ggml_are_same_shape',0,b'\x00\x00\xFA\x23ggml_argmax',0,b'\x00\x00\x74\x23ggml_blck_size',0,b'\x00\x00\xB3\x23ggml_build_backward',0,b'\x00\x00\xB8\x23ggml_build_forward',0,b'\x00\x00\xAA\x23ggml_build_forward_ctx',0,b'\x00\x02\xF3\x23ggml_build_forward_expand',0,b'\x00\x00\x1B\x23ggml_cl_can_mul_mat',0,b'\x00\x03\x6B\x23ggml_cl_free_data',0,b'\x00\x03\xE0\x23ggml_cl_host_free',0,b'\x00\x02\x72\x23ggml_cl_host_malloc',0,b'\x00\x03\xEC\x23ggml_cl_init',0,b'\x00\x03\x78\x23ggml_cl_mul',0,b'\x00\x03\x7D\x23ggml_cl_mul_mat',0,b'\x00\x02\x54\x23ggml_cl_mul_mat_get_wsize',0,b'\x00\x03\xE3\x23ggml_cl_transform_tensor',0,b'\x00\x01\x1B\x23ggml_clamp',0,b'\x00\x00\xFA\x23ggml_cont',0,b'\x00\x01\x90\x23ggml_conv_1d',0,b'\x00\x01\x89\x23ggml_conv_1d_ph',0,b'\x00\x01\x98\x23ggml_conv_2d',0,b'\x00\x00\x90\x23ggml_cpu_has_arm_fma',0,b'\x00\x00\x90\x23ggml_cpu_has_avx',0,b'\x00\x00\x90\x23ggml_cpu_has_avx2',0,b'\x00\x00\x90\x23ggml_cpu_has_avx512',0,b'\x00\x00\x90\x23ggml_cpu_has_avx512_vbmi',0,b'\x00\x00\x90\x23ggml_cpu_has_avx512_vnni',0,b'\x00\x00\x90\x23ggml_cpu_has_blas',0,b'\x00\x00\x90\x23ggml_cpu_has_clblast',0,b'\x00\x00\x90\x23ggml_cpu_has_cublas',0,b'\x00\x00\x90\x23ggml_cpu_has_f16c',0,b'\x00\x00\x90\x23ggml_cpu_has_fma',0,b'\x00\x00\x90\x23ggml_cpu_has_fp16_va',0,b'\x00\x00\x90\x23ggml_cpu_has_gpublas',0,b'\x00\x00\x90\x23ggml_cpu_has_neon',0,b'\x00\x00\x90\x23ggml_cpu_has_sse3',0,b'\x00\x00\x90\x23ggml_cpu_has_vsx',0,b'\x00\x00\x90\x23ggml_cpu_has_wasm_simd',0,b'\x00\x01\x84\x23ggml_cpy',0,b'\x00\x01\x84\x23ggml_cross_entropy_loss',0,b'\x00\x01\xA3\x23ggml_cross_entropy_loss_back',0,b'\x00\x03\x41\x23ggml_cuda_assign_buffers',0,b'\x00\x03\x41\x23ggml_cuda_assign_buffers_force_inplace',0,b'\x00\x03\x41\x23ggml_cuda_assign_buffers_no_scratch',0,b'\x00\x00\x1B\x23ggml_cuda_can_mul_mat',0,b'\x00\x00\x06\x23ggml_cuda_compute_forward',0,b'\x00\x03\x41\x23ggml_cuda_free_data',0,b'\x00\x03\xEC\x23ggml_cuda_free_scratch',0,b'\x00\x00\x90\x23ggml_cuda_get_device_count',0,b'\x00\x02\xCE\x23ggml_cuda_get_device_description',0,b'\x00\x03\xE0\x23ggml_cuda_host_free',0,b'\x00\x02\x72\x23ggml_cuda_host_malloc',0,b'\x00\x02\xCB\x23ggml_cuda_set_main_device',0,b'\x00\x02\x79\x23ggml_cuda_set_mul_mat_q',0,b'\x00\x03\xD8\x23ggml_cuda_set_scratch_size',0,b'\x00\x02\xA0\x23ggml_cuda_set_tensor_split',0,b'\x00\x03\xE3\x23ggml_cuda_transform_tensor',0,b'\x00\x00\x95\x23ggml_cycles',0,b'\x00\x00\x95\x23ggml_cycles_per_ms',0,b'\x00\x00\xFA\x23ggml_diag',0,b'\x00\x01\x21\x23ggml_diag_mask_inf',0,b'\x00\x01\x21\x23ggml_diag_mask_inf_inplace',0,b'\x00\x01\x21\x23ggml_diag_mask_zero',0,b'\x00\x01\x21\x23ggml_diag_mask_zero_inplace',0,b'\x00\x01\x84\x23ggml_div',0,b'\x00\x01\x84\x23ggml_div_inplace',0,b'\x00\x00\xFA\x23ggml_dup',0,b'\x00\x00\xFA\x23ggml_dup_inplace',0,b'\x00\x02\x0B\x23ggml_dup_tensor',0,b'\x00\x02\x4D\x23ggml_element_size',0,b'\x00\x00\xFA\x23ggml_elu',0,b'\x00\x00\xFA\x23ggml_elu_inplace',0,b'\x00\x01\xA9\x23ggml_flash_attn',0,b'\x00\x01\xB0\x23ggml_flash_attn_back',0,b'\x00\x01\xB8\x23ggml_flash_ff',0,b'\x00\x02\x16\x23ggml_format_name',0,b'\x00\x00\x6B\x23ggml_fp16_to_fp32',0,b'\x00\x03\xDB\x23ggml_fp16_to_fp32_row',0,b'\x00\x02\x62\x23ggml_fp32_to_fp16',0,b'\x00\x02\xC1\x23ggml_fp32_to_fp16_row',0,b'\x00\x03\x03\x23ggml_free',0,b'\x00\x00\x53\x23ggml_ftype_to_ggml_type',0,b'\x00\x00\xFA\x23ggml_gelu',0,b'\x00\x00\xFA\x23ggml_gelu_inplace',0,b'\x00\x00\xFA\x23ggml_gelu_quick',0,b'\x00\x00\xFA\x23ggml_gelu_quick_inplace',0,b'\x00\x02\x6C\x23ggml_get_data',0,b'\x00\x00\x5D\x23ggml_get_data_f32',0,b'\x00\x00\x63\x23ggml_get_f32_1d',0,b'\x00\x00\x81\x23ggml_get_i32_1d',0,b'\x00\x02\x4A\x23ggml_get_max_tensor_size',0,b'\x00\x02\x69\x23ggml_get_mem_buffer',0,b'\x00\x02\x4A\x23ggml_get_mem_size',0,b'\x00\x00\x36\x23ggml_get_name',0,b'\x00\x00\x0A\x23ggml_get_no_alloc',0,b'\x00\x01\x84\x23ggml_get_rows',0,b'\x00\x01\xA3\x23ggml_get_rows_back',0,b'\x00\x00\xCE\x23ggml_get_tensor',0,b'\x00\x00\x56\x23ggml_get_unary_op',0,b'\x00\x00\x77\x23ggml_graph_compute',0,b'\x00\x03\x0A\x23ggml_graph_compute_with_ctx',0,b'\x00\x02\xFE\x23ggml_graph_dump_dot',0,b'\x00\x02\xFA\x23ggml_graph_export',0,b'\x00\x00\xCA\x23ggml_graph_get_tensor',0,b'\x00\x00\xAE\x23ggml_graph_import',0,b'\x00\x02\x60\x23ggml_graph_overhead',0,b'\x00\x00\xBE\x23ggml_graph_plan',0,b'\x00\x02\xF7\x23ggml_graph_print',0,b'\x00\x02\xF0\x23ggml_graph_reset',0,b'\x00\x00\xBB\x23ggml_init',0,b'\x00\x03\xEC\x23ggml_init_cublas',0,b'\x00\x00\x6E\x23ggml_internal_get_type_traits',0,b'\x00\x00\x14\x23ggml_is_contiguous',0,b'\x00\x00\x27\x23ggml_is_numa',0,b'\x00\x00\x14\x23ggml_is_permuted',0,b'\x00\x00\x00\x23ggml_is_quantized',0,b'\x00\x00\x14\x23ggml_is_transposed',0,b'\x00\x00\xFA\x23ggml_log',0,b'\x00\x00\xFA\x23ggml_log_inplace',0,b'\x00\x01\xE6\x23ggml_map_binary_f32',0,b'\x00\x01\xE6\x23ggml_map_binary_inplace_f32',0,b'\x00\x02\x04\x23ggml_map_custom1',0,b'\x00\x01\xFF\x23ggml_map_custom1_f32',0,b'\x00\x02\x04\x23ggml_map_custom1_inplace',0,b'\x00\x01\xFF\x23ggml_map_custom1_inplace_f32',0,b'\x00\x01\xF2\x23ggml_map_custom2',0,b'\x00\x01\xEC\x23ggml_map_custom2_f32',0,b'\x00\x01\xF2\x23ggml_map_custom2_inplace',0,b'\x00\x01\xEC\x23ggml_map_custom2_inplace_f32',0,b'\x00\x01\xC7\x23ggml_map_custom3',0,b'\x00\x01\xC0\x23ggml_map_custom3_f32',0,b'\x00\x01\xC7\x23ggml_map_custom3_inplace',0,b'\x00\x01\xC0\x23ggml_map_custom3_inplace_f32',0,b'\x00\x01\xFA\x23ggml_map_unary_f32',0,b'\x00\x01\xFA\x23ggml_map_unary_inplace_f32',0,b'\x00\x00\xFA\x23ggml_mean',0,b'\x00\x00\x0D\x23ggml_metal_add_buffer',0,b'\x00\x03\x1C\x23ggml_metal_free',0,b'\x00\x00\x71\x23ggml_metal_get_concur_list',0,b'\x00\x03\x2C\x23ggml_metal_get_tensor',0,b'\x00\x03\x23\x23ggml_metal_graph_compute',0,b'\x00\x03\x27\x23ggml_metal_graph_find_concurrency',0,b'\x00\x03\xE0\x23ggml_metal_host_free',0,b'\x00\x02\x72\x23ggml_metal_host_malloc',0,b'\x00\x00\x7B\x23ggml_metal_if_optimized',0,b'\x00\x00\xC2\x23ggml_metal_init',0,b'\x00\x03\x1F\x23ggml_metal_set_n_cb',0,b'\x00\x03\x2C\x23ggml_metal_set_tensor',0,b'\x00\x03\xEC\x23ggml_mpi_backend_free',0,b'\x00\x03\xEC\x23ggml_mpi_backend_init',0,b'\x00\x03\x33\x23ggml_mpi_eval_init',0,b'\x00\x03\x30\x23ggml_mpi_free',0,b'\x00\x03\x39\x23ggml_mpi_graph_compute_post',0,b'\x00\x03\x39\x23ggml_mpi_graph_compute_pre',0,b'\x00\x00\xC5\x23ggml_mpi_init',0,b'\x00\x00\x7E\x23ggml_mpi_rank',0,b'\x00\x01\x84\x23ggml_mul',0,b'\x00\x01\x84\x23ggml_mul_inplace',0,b'\x00\x01\x84\x23ggml_mul_mat',0,b'\x00\x02\x4D\x23ggml_nbytes',0,b'\x00\x02\x4D\x23ggml_nbytes_pad',0,b'\x00\x02\x50\x23ggml_nbytes_split',0,b'\x00\x00\xFA\x23ggml_neg',0,b'\x00\x00\xFA\x23ggml_neg_inplace',0,b'\x00\x00\x92\x23ggml_nelements',0,b'\x00\x00\xF2\x23ggml_new_f32',0,b'\x00\x00\xA7\x23ggml_new_graph',0,b'\x00\x00\xF6\x23ggml_new_i32',0,b'\x00\x00\xD2\x23ggml_new_tensor',0,b'\x00\x00\xD8\x23ggml_new_tensor_1d',0,b'\x00\x00\xDD\x23ggml_new_tensor_2d',0,b'\x00\x00\xE3\x23ggml_new_tensor_3d',0,b'\x00\x00\xEA\x23ggml_new_tensor_4d',0,b'\x00\x00\xFA\x23ggml_norm',0,b'\x00\x00\xFA\x23ggml_norm_inplace',0,b'\x00\x00\x92\x23ggml_nrows',0,b'\x00\x03\xEC\x23ggml_numa_init',0,b'\x00\x00\x2D\x23ggml_op_name',0,b'\x00\x00\x2D\x23ggml_op_symbol',0,b'\x00\x00\x4E\x23ggml_opt',0,b'\x00\x00\xC7\x23ggml_opt_default_params',0,b'\x00\x03\x0F\x23ggml_opt_init',0,b'\x00\x00\x42\x23ggml_opt_resume',0,b'\x00\x00\x47\x23ggml_opt_resume_g',0,b'\x00\x01\x84\x23ggml_out_prod',0,b'\x00\x01\x34\x23ggml_permute',0,b'\x00\x00\xFE\x23ggml_pool_1d',0,b'\x00\x01\x06\x23ggml_pool_2d',0,b'\x00\x03\x3E\x23ggml_print_object',0,b'\x00\x03\x19\x23ggml_print_objects',0,b'\x00\x02\x33\x23ggml_quantize_chunk',0,b'\x00\x02\x3B\x23ggml_quantize_q2_K',0,b'\x00\x02\x3B\x23ggml_quantize_q3_K',0,b'\x00\x02\x3B\x23ggml_quantize_q4_0',0,b'\x00\x02\x3B\x23ggml_quantize_q4_1',0,b'\x00\x02\x3B\x23ggml_quantize_q4_K',0,b'\x00\x02\x3B\x23ggml_quantize_q5_0',0,b'\x00\x02\x3B\x23ggml_quantize_q5_1',0,b'\x00\x02\x3B\x23ggml_quantize_q5_K',0,b'\x00\x02\x3B\x23ggml_quantize_q6_K',0,b'\x00\x02\x3B\x23ggml_quantize_q8_0',0,b'\x00\x00\xFA\x23ggml_relu',0,b'\x00\x00\xFA\x23ggml_relu_inplace',0,b'\x00\x01\x84\x23ggml_repeat',0,b'\x00\x01\x84\x23ggml_repeat_back',0,b'\x00\x01\x84\x23ggml_reshape',0,b'\x00\x01\x46\x23ggml_reshape_1d',0,b'\x00\x01\x4B\x23ggml_reshape_2d',0,b'\x00\x01\x51\x23ggml_reshape_3d',0,b'\x00\x01\x58\x23ggml_reshape_4d',0,b'\x00\x01\x16\x23ggml_rms_norm',0,b'\x00\x01\x84\x23ggml_rms_norm_back',0,b'\x00\x01\x16\x23ggml_rms_norm_inplace',0,b'\x00\x01\x34\x23ggml_rope',0,b'\x00\x01\x34\x23ggml_rope_back',0,b'\x00\x01\x3C\x23ggml_rope_custom',0,b'\x00\x01\x3C\x23ggml_rope_custom_inplace',0,b'\x00\x01\x34\x23ggml_rope_inplace',0,b'\x00\x01\x84\x23ggml_scale',0,b'\x00\x01\x84\x23ggml_scale_inplace',0,b'\x00\x01\xDD\x23ggml_set',0,b'\x00\x01\xD0\x23ggml_set_1d',0,b'\x00\x01\xD0\x23ggml_set_1d_inplace',0,b'\x00\x01\xD6\x23ggml_set_2d',0,b'\x00\x01\xD6\x23ggml_set_2d_inplace',0,b'\x00\x02\x1A\x23ggml_set_f32',0,b'\x00\x03\x6E\x23ggml_set_f32_1d',0,b'\x00\x02\x1E\x23ggml_set_i32',0,b'\x00\x03\x73\x23ggml_set_i32_1d',0,b'\x00\x01\xDD\x23ggml_set_inplace',0,b'\x00\x02\x12\x23ggml_set_name',0,b'\x00\x03\x06\x23ggml_set_no_alloc',0,b'\x00\x03\x15\x23ggml_set_param',0,b'\x00\x02\x46\x23ggml_set_scratch',0,b'\x00\x02\x0F\x23ggml_set_zero',0,b'\x00\x00\xFA\x23ggml_sgn',0,b'\x00\x00\xFA\x23ggml_sgn_inplace',0,b'\x00\x00\xFA\x23ggml_silu',0,b'\x00\x01\x84\x23ggml_silu_back',0,b'\x00\x00\xFA\x23ggml_silu_inplace',0,b'\x00\x00\xFA\x23ggml_soft_max',0,b'\x00\x01\x84\x23ggml_soft_max_back',0,b'\x00\x01\x84\x23ggml_soft_max_back_inplace',0,b'\x00\x00\xFA\x23ggml_soft_max_inplace',0,b'\x00\x00\xFA\x23ggml_sqr',0,b'\x00\x00\xFA\x23ggml_sqr_inplace',0,b'\x00\x00\xFA\x23ggml_sqrt',0,b'\x00\x00\xFA\x23ggml_sqrt_inplace',0,b'\x00\x00\xFA\x23ggml_step',0,b'\x00\x00\xFA\x23ggml_step_inplace',0,b'\x00\x01\x84\x23ggml_sub',0,b'\x00\x01\x84\x23ggml_sub_inplace',0,b'\x00\x00\xFA\x23ggml_sum',0,b'\x00\x00\xFA\x23ggml_sum_rows',0,b'\x00\x00\xFA\x23ggml_tanh',0,b'\x00\x00\xFA\x23ggml_tanh_inplace',0,b'\x00\x02\x60\x23ggml_tensor_overhead',0,b'\x00\x03\xEC\x23ggml_time_init',0,b'\x00\x00\x95\x23ggml_time_ms',0,b'\x00\x00\x95\x23ggml_time_us',0,b'\x00\x00\xFA\x23ggml_transpose',0,b'\x00\x00\x30\x23ggml_type_name',0,b'\x00\x02\x30\x23ggml_type_size',0,b'\x00\x00\x60\x23ggml_type_sizef',0,b'\x00\x01\x11\x23ggml_unary',0,b'\x00\x01\x11\x23ggml_unary_inplace',0,b'\x00\x02\x4A\x23ggml_used_mem',0,b'\x00\x02\xDE\x23ggml_vec_dot_q2_K_q8_K',0,b'\x00\x02\xDE\x23ggml_vec_dot_q3_K_q8_K',0,b'\x00\x02\xDE\x23ggml_vec_dot_q4_K_q8_K',0,b'\x00\x02\xDE\x23ggml_vec_dot_q5_K_q8_K',0,b'\x00\x02\xDE\x23ggml_vec_dot_q6_K_q8_K',0,b'\x00\x01\x7E\x23ggml_view_1d',0,b'\x00\x01\x76\x23ggml_view_2d',0,b'\x00\x01\x6C\x23ggml_view_3d',0,b'\x00\x01\x60\x23ggml_view_4d',0,b'\x00\x02\x0B\x23ggml_view_tensor',0,b'\x00\x01\x21\x23ggml_win_part',0,b'\x00\x01\x2D\x23ggml_win_unpart',0,b'\x00\x03\xCC\x23gguf_add_tensor',0,b'\x00\x00\x88\x23gguf_find_key',0,b'\x00\x00\x88\x23gguf_find_tensor',0,b'\x00\x03\x84\x23gguf_free',0,b'\x00\x02\x59\x23gguf_get_alignment',0,b'\x00\x02\x75\x23gguf_get_arr_data',0,b'\x00\x00\x8C\x23gguf_get_arr_n',0,b'\x00\x00\x3D\x23gguf_get_arr_str',0,b'\x00\x00\x59\x23gguf_get_arr_type',0,b'\x00\x02\x6F\x23gguf_get_data',0,b'\x00\x02\x59\x23gguf_get_data_offset',0,b'\x00\x00\x39\x23gguf_get_key',0,b'\x00\x00\x59\x23gguf_get_kv_type',0,b'\x00\x03\xD4\x23gguf_get_meta_data',0,b'\x00\x02\x59\x23gguf_get_meta_size',0,b'\x00\x00\x85\x23gguf_get_n_kv',0,b'\x00\x00\x85\x23gguf_get_n_tensors',0,b'\x00\x00\x29\x23gguf_get_tensor_name',0,b'\x00\x02\x5C\x23gguf_get_tensor_offset',0,b'\x00\x00\x20\x23gguf_get_val_bool',0,b'\x00\x00\x67\x23gguf_get_val_f32',0,b'\x00\x00\x97\x23gguf_get_val_i16',0,b'\x00\x00\x8C\x23gguf_get_val_i32',0,b'\x00\x00\x9B\x23gguf_get_val_i8',0,b'\x00\x00\x39\x23gguf_get_val_str',0,b'\x00\x02\x65\x23gguf_get_val_u16',0,b'\x00\x02\x2C\x23gguf_get_val_u32',0,b'\x00\x02\x28\x23gguf_get_val_u8',0,b'\x00\x00\x85\x23gguf_get_version',0,b'\x00\x02\x26\x23gguf_init_empty',0,b'\x00\x02\x22\x23gguf_init_from_file',0,b'\x00\x03\x9C\x23gguf_set_arr_data',0,b'\x00\x03\x8C\x23gguf_set_arr_str',0,b'\x00\x03\xD0\x23gguf_set_kv',0,b'\x00\x03\xC6\x23gguf_set_tensor_data',0,b'\x00\x03\x97\x23gguf_set_tensor_type',0,b'\x00\x03\x87\x23gguf_set_val_bool',0,b'\x00\x03\xA3\x23gguf_set_val_f32',0,b'\x00\x03\xAD\x23gguf_set_val_i16',0,b'\x00\x03\xA8\x23gguf_set_val_i32',0,b'\x00\x03\xB2\x23gguf_set_val_i8',0,b'\x00\x03\x92\x23gguf_set_val_str',0,b'\x00\x03\xC1\x23gguf_set_val_u16',0,b'\x00\x03\xBC\x23gguf_set_val_u32',0,b'\x00\x03\xB7\x23gguf_set_val_u8',0,b'\x00\x00\x33\x23gguf_type_name',0,b'\x00\x03\x87\x23gguf_write_to_file',0,b'\x00\x02\xC6\x23quantize_row_q2_K',0,b'\x00\x02\xA3\x23quantize_row_q2_K_reference',0,b'\x00\x02\xC6\x23quantize_row_q3_K',0,b'\x00\x02\xA8\x23quantize_row_q3_K_reference',0,b'\x00\x02\xC6\x23quantize_row_q4_K',0,b'\x00\x02\xAD\x23quantize_row_q4_K_reference',0,b'\x00\x02\xC6\x23quantize_row_q5_K',0,b'\x00\x02\xB2\x23quantize_row_q5_K_reference',0,b'\x00\x02\xC6\x23quantize_row_q6_K',0,b'\x00\x02\xB7\x23quantize_row_q6_K_reference',0,b'\x00\x02\xC6\x23quantize_row_q8_K',0,b'\x00\x02\xBC\x23quantize_row_q8_K_reference',0), _struct_unions = ((b'\x00\x00\x04\x27\x00\x00\x00\x02$1',b'\x00\x00\x22\x11n_iter',b'\x00\x00\xF4\x11sched',b'\x00\x00\xF4\x11decay',b'\x00\x00\xF4\x11alpha',b'\x00\x00\xF4\x11beta1',b'\x00\x00\xF4\x11beta2',b'\x00\x00\xF4\x11eps',b'\x00\x00\xF4\x11eps_f',b'\x00\x00\xF4\x11eps_g'),(b'\x00\x00\x04\x28\x00\x00\x00\x02$2',b'\x00\x00\x22\x11m',b'\x00\x00\x22\x11n_iter',b'\x00\x00\x22\x11max_linesearch',b'\x00\x00\xF4\x11eps',b'\x00\x00\xF4\x11ftol',b'\x00\x00\xF4\x11wolfe',b'\x00\x00\xF4\x11min_step',b'\x00\x00\xF4\x11max_step',b'\x00\x04\x14\x11linesearch'),(b'\x00\x00\x04\x29\x00\x00\x00\x02$3',b'\x00\x00\x08\x11x',b'\x00\x00\x08\x11g1',b'\x00\x00\x08\x11g2',b'\x00\x00\x08\x11m',b'\x00\x00\x08\x11v',b'\x00\x00\x08\x11mh',b'\x00\x00\x08\x11vh',b'\x00\x00\x08\x11pf',b'\x00\x00\xF4\x11fx_best',b'\x00\x00\xF4\x11fx_prev',b'\x00\x00\x22\x11n_no_improvement'),(b'\x00\x00\x04\x2A\x00\x00\x00\x02$4',b'\x00\x00\x08\x11x',b'\x00\x00\x08\x11xp',b'\x00\x00\x08\x11g',b'\x00\x00\x08\x11gp',b'\x00\x00\x08\x11d',b'\x00\x00\x08\x11pf',b'\x00\x00\x08\x11lmal',b'\x00\x00\x08\x11lmys',b'\x00\x00\x08\x11lms',b'\x00\x00\x08\x11lmy',b'\x00\x00\xF4\x11fx_best',b'\x00\x00\xF4\x11step',b'\x00\x00\x22\x11j',b'\x00\x00\x22\x11k',b'\x00\x00\x22\x11end',b'\x00\x00\x22\x11n_no_improvement'),(b'\x00\x00\x03\xF7\x00\x00\x00\x03$__mbstate_t',b'\x00\x03\xFF\x11__mbstate8',b'\x00\x00\xDB\x11_mbstateL'),(b'\x00\x00\x03\xF8\x00\x00\x00\x02$block_q2_K',b'\x00\x04\x44\x11scales',b'\x00\x04\x48\x11qs',b'\x00\x00\x6C\x11d',b'\x00\x00\x6C\x11dmin'),(b'\x00\x00\x03\xF9\x00\x00\x00\x02$block_q3_K',b'\x00\x04\x46\x11hmask',b'\x00\x04\x48\x11qs',b'\x00\x04\x42\x11scales',b'\x00\x00\x6C\x11d'),(b'\x00\x00\x03\xFA\x00\x00\x00\x02$block_q4_K',b'\x00\x00\x6C\x11d',b'\x00\x00\x6C\x11dmin',b'\x00\x04\x42\x11scales',b'\x00\x04\x40\x11qs'),(b'\x00\x00\x03\xFB\x00\x00\x00\x02$block_q5_K',b'\x00\x00\x6C\x11d',b'\x00\x00\x6C\x11dmin',b'\x00\x04\x42\x11scales',b'\x00\x04\x46\x11qh',b'\x00\x04\x40\x11qs'),(b'\x00\x00\x03\xFC\x00\x00\x00\x02$block_q6_K',b'\x00\x04\x40\x11ql',b'\x00\x04\x48\x11qh',b'\x00\x04\x23\x11scales',b'\x00\x00\x6C\x11d'),(b'\x00\x00\x03\xFD\x00\x00\x00\x02$block_q8_K',b'\x00\x00\xF4\x11d',b'\x00\x04\x25\x11qs',b'\x00\x04\x21\x11bsums'),(b'\x00\x00\x04\x18\x00\x00\x00\x02$ggml_type_traits_t',b'\x00\x00\x0F\x11type_name',b'\x00\x00\x22\x11blck_size',b'\x00\x00\x11\x11type_size',b'\x00\x00\xB6\x11is_quantized',b'\x00\x04\x52\x11to_float',b'\x00\x04\x4F\x11from_float',b'\x00\x04\x4F\x11from_float_reference',b'\x00\x04\x50\x11vec_dot',b'\x00\x00\x01\x11vec_dot_type'),(b'\x00\x00\x04\x2C\x00\x00\x00\x02__darwin_pthread_handler_rec',b'\x00\x04\x51\x11__routine',b'\x00\x00\x10\x11__arg',b'\x00\x04\x2B\x11__next'),(b'\x00\x00\x03\xEF\x00\x00\x00\x02_opaque_pthread_attr_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x0B\x11__opaque'),(b'\x00\x00\x03\xF0\x00\x00\x00\x02_opaque_pthread_cond_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x07\x11__opaque'),(b'\x00\x00\x03\xF1\x00\x00\x00\x02_opaque_pthread_condattr_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x11\x11__opaque'),(b'\x00\x00\x03\xF2\x00\x00\x00\x02_opaque_pthread_mutex_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x0B\x11__opaque'),(b'\x00\x00\x03\xF3\x00\x00\x00\x02_opaque_pthread_mutexattr_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x11\x11__opaque'),(b'\x00\x00\x03\xF4\x00\x00\x00\x02_opaque_pthread_once_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x11\x11__opaque'),(b'\x00\x00\x03\xF5\x00\x00\x00\x02_opaque_pthread_rwlock_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x03\x11__opaque'),(b'\x00\x00\x03\xF6\x00\x00\x00\x02_opaque_pthread_rwlockattr_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x01\x11__opaque'),(b'\x00\x00\x04\x2E\x00\x00\x00\x02_opaque_pthread_t',b'\x00\x04\x20\x11__sig',b'\x00\x04\x2B\x11__cleanup_stack',b'\x00\x04\x0F\x11__opaque'),(b'\x00\x00\x04\x2F\x00\x00\x00\x10ggml_allocr',),(b'\x00\x00\x04\x30\x00\x00\x00\x02ggml_cgraph',b'\x00\x00\x22\x11n_nodes',b'\x00\x00\x22\x11n_leafs',b'\x00\x04\x39\x11nodes',b'\x00\x04\x39\x11grads',b'\x00\x04\x39\x11leafs',b'\x00\x04\x4D\x11visited_hash_table',b'\x00\x00\x22\x11perf_runs',b'\x00\x00\xDB\x11perf_cycles',b'\x00\x00\xDB\x11perf_time_us'),(b'\x00\x00\x04\x31\x00\x00\x00\x02ggml_compute_params',b'\x00\x04\x17\x11type',b'\x00\x00\x22\x11ith',b'\x00\x00\x22\x11nth',b'\x00\x00\x11\x11wsize',b'\x00\x00\x10\x11wdata'),(b'\x00\x00\x04\x32\x00\x00\x00\x10ggml_context',),(b'\x00\x00\x04\x33\x00\x00\x00\x02ggml_cplan',b'\x00\x00\x11\x11work_size',b'\x00\x04\x3F\x11work_data',b'\x00\x00\x22\x11n_threads',b'\x00\x04\x19\x11n_tasks',b'\x00\x03\xEE\x11abort_callback',b'\x00\x00\x10\x11abort_callback_data'),(b'\x00\x00\x00\xBC\x00\x00\x00\x02ggml_init_params',b'\x00\x00\x11\x11mem_size',b'\x00\x00\x10\x11mem_buffer',b'\x00\x00\xB6\x11no_alloc'),(b'\x00\x00\x04\x34\x00\x00\x00\x10ggml_metal_context',),(b'\x00\x00\x04\x35\x00\x00\x00\x10ggml_mpi_context',),(b'\x00\x00\x04\x37\x00\x00\x00\x02ggml_object',b'\x00\x00\x11\x11offs',b'\x00\x00\x11\x11size',b'\x00\x04\x36\x11next',b'\x00\x04\x15\x11type',b'\x00\x04\x09\x11padding'),(b'\x00\x00\x04\x38\x00\x00\x00\x02ggml_opt_context',b'\x00\x00\x0B\x11ctx',b'\x00\x00\x50\x11params',b'\x00\x00\x22\x11iter',b'\x00\x00\xDB\x11nx',b'\x00\x00\xB6\x11just_initialized',b'\x00\x04\x29\x11adam',b'\x00\x04\x2A\x11lbfgs'),(b'\x00\x00\x00\x50\x00\x00\x00\x02ggml_opt_params',b'\x00\x00\xC8\x11type',b'\x00\x00\x22\x11n_threads',b'\x00\x00\x22\x11past',b'\x00\x00\xF4\x11delta',b'\x00\x00\x22\x11max_no_improvement',b'\x00\x00\xB6\x11print_forward_graph',b'\x00\x00\xB6\x11print_backward_graph',b'\x00\x04\x27\x11adam',b'\x00\x04\x28\x11lbfgs'),(b'\x00\x00\x02\x48\x00\x00\x00\x02ggml_scratch',b'\x00\x00\x11\x11offs',b'\x00\x00\x11\x11size',b'\x00\x00\x10\x11data'),(b'\x00\x00\x04\x3D\x00\x00\x00\x02ggml_tensor',b'\x00\x00\x01\x11type',b'\x00\x04\x13\x11backend',b'\x00\x00\x22\x11n_dims',b'\x00\x04\x1E\x11ne',b'\x00\x04\x4B\x11nb',b'\x00\x00\x2E\x11op',b'\x00\x04\x1B\x11op_params',b'\x00\x00\xB6\x11is_param',b'\x00\x00\x08\x11grad',b'\x00\x04\x3B\x11src',b'\x00\x00\x22\x11perf_runs',b'\x00\x00\xDB\x11perf_cycles',b'\x00\x00\xDB\x11perf_time_us',b'\x00\x00\x10\x11data',b'\x00\x04\x0D\x11name',b'\x00\x00\x10\x11extra',b'\x00\x04\x09\x11padding'),(b'\x00\x00\x04\x3E\x00\x00\x00\x10gguf_context',),(b'\x00\x00\x02\x24\x00\x00\x00\x02gguf_init_params',b'\x00\x00\xB6\x11no_alloc',b'\x00\x00\xB0\x11ctx')), _enums = (b'\x00\x00\x04\x13\x00\x00\x00\x16ggml_backend\x00GGML_BACKEND_CPU,GGML_BACKEND_GPU,GGML_BACKEND_GPU_SPLIT',b'\x00\x00\x00\x54\x00\x00\x00\x15ggml_ftype\x00GGML_FTYPE_UNKNOWN,GGML_FTYPE_ALL_F32,GGML_FTYPE_MOSTLY_F16,GGML_FTYPE_MOSTLY_Q4_0,GGML_FTYPE_MOSTLY_Q4_1,GGML_FTYPE_MOSTLY_Q4_1_SOME_F16,GGML_FTYPE_MOSTLY_Q8_0,GGML_FTYPE_MOSTLY_Q5_0,GGML_FTYPE_MOSTLY_Q5_1,GGML_FTYPE_MOSTLY_Q2_K,GGML_FTYPE_MOSTLY_Q3_K,GGML_FTYPE_MOSTLY_Q4_K,GGML_FTYPE_MOSTLY_Q5_K,GGML_FTYPE_MOSTLY_Q6_K',b'\x00\x00\x04\x14\x00\x00\x00\x16ggml_linesearch\x00GGML_LINESEARCH_DEFAULT,GGML_LINESEARCH_BACKTRACKING_ARMIJO,GGML_LINESEARCH_BACKTRACKING_WOLFE,GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE',b'\x00\x00\x04\x15\x00\x00\x00\x16ggml_object_type\x00GGML_OBJECT_TENSOR,GGML_OBJECT_GRAPH,GGML_OBJECT_WORK_BUFFER',b'\x00\x00\x00\x2E\x00\x00\x00\x16ggml_op\x00GGML_OP_NONE,GGML_OP_DUP,GGML_OP_ADD,GGML_OP_ADD1,GGML_OP_ACC,GGML_OP_SUB,GGML_OP_MUL,GGML_OP_DIV,GGML_OP_SQR,GGML_OP_SQRT,GGML_OP_LOG,GGML_OP_SUM,GGML_OP_SUM_ROWS,GGML_OP_MEAN,GGML_OP_ARGMAX,GGML_OP_REPEAT,GGML_OP_REPEAT_BACK,GGML_OP_SILU_BACK,GGML_OP_NORM,GGML_OP_RMS_NORM,GGML_OP_RMS_NORM_BACK,GGML_OP_MUL_MAT,GGML_OP_OUT_PROD,GGML_OP_SCALE,GGML_OP_SET,GGML_OP_CPY,GGML_OP_CONT,GGML_OP_RESHAPE,GGML_OP_VIEW,GGML_OP_PERMUTE,GGML_OP_TRANSPOSE,GGML_OP_GET_ROWS,GGML_OP_GET_ROWS_BACK,GGML_OP_DIAG,GGML_OP_DIAG_MASK_INF,GGML_OP_DIAG_MASK_ZERO,GGML_OP_SOFT_MAX,GGML_OP_SOFT_MAX_BACK,GGML_OP_ROPE,GGML_OP_ROPE_BACK,GGML_OP_ALIBI,GGML_OP_CLAMP,GGML_OP_CONV_1D,GGML_OP_CONV_2D,GGML_OP_POOL_1D,GGML_OP_POOL_2D,GGML_OP_FLASH_ATTN,GGML_OP_FLASH_FF,GGML_OP_FLASH_ATTN_BACK,GGML_OP_WIN_PART,GGML_OP_WIN_UNPART,GGML_OP_UNARY,GGML_OP_MAP_UNARY,GGML_OP_MAP_BINARY,GGML_OP_MAP_CUSTOM1_F32,GGML_OP_MAP_CUSTOM2_F32,GGML_OP_MAP_CUSTOM3_F32,GGML_OP_MAP_CUSTOM1,GGML_OP_MAP_CUSTOM2,GGML_OP_MAP_CUSTOM3,GGML_OP_CROSS_ENTROPY_LOSS,GGML_OP_CROSS_ENTROPY_LOSS_BACK,GGML_OP_COUNT',b'\x00\x00\x01\x01\x00\x00\x00\x16ggml_op_pool\x00GGML_OP_POOL_MAX,GGML_OP_POOL_AVG,GGML_OP_POOL_COUNT',b'\x00\x00\x04\x16\x00\x00\x00\x15ggml_opt_result\x00GGML_OPT_OK,GGML_OPT_DID_NOT_CONVERGE,GGML_OPT_NO_CONTEXT,GGML_OPT_INVALID_WOLFE,GGML_OPT_FAIL,GGML_LINESEARCH_FAIL,GGML_LINESEARCH_MINIMUM_STEP,GGML_LINESEARCH_MAXIMUM_STEP,GGML_LINESEARCH_MAXIMUM_ITERATIONS,GGML_LINESEARCH_INVALID_PARAMETERS',b'\x00\x00\x00\xC8\x00\x00\x00\x16ggml_opt_type\x00GGML_OPT_ADAM,GGML_OPT_LBFGS',b'\x00\x00\x04\x17\x00\x00\x00\x16ggml_task_type\x00GGML_TASK_INIT,GGML_TASK_COMPUTE,GGML_TASK_FINALIZE',b'\x00\x00\x00\x01\x00\x00\x00\x16ggml_type\x00GGML_TYPE_F32,GGML_TYPE_F16,GGML_TYPE_Q4_0,GGML_TYPE_Q4_1,GGML_TYPE_Q5_0,GGML_TYPE_Q5_1,GGML_TYPE_Q8_0,GGML_TYPE_Q8_1,GGML_TYPE_Q2_K,GGML_TYPE_Q3_K,GGML_TYPE_Q4_K,GGML_TYPE_Q5_K,GGML_TYPE_Q6_K,GGML_TYPE_Q8_K,GGML_TYPE_I8,GGML_TYPE_I16,GGML_TYPE_I32,GGML_TYPE_COUNT',b'\x00\x00\x01\x14\x00\x00\x00\x16ggml_unary_op\x00GGML_UNARY_OP_ABS,GGML_UNARY_OP_SGN,GGML_UNARY_OP_NEG,GGML_UNARY_OP_STEP,GGML_UNARY_OP_TANH,GGML_UNARY_OP_ELU,GGML_UNARY_OP_RELU,GGML_UNARY_OP_GELU,GGML_UNARY_OP_GELU_QUICK,GGML_UNARY_OP_SILU',b'\x00\x00\x00\x34\x00\x00\x00\x16gguf_type\x00GGUF_TYPE_UINT8,GGUF_TYPE_INT8,GGUF_TYPE_UINT16,GGUF_TYPE_INT16,GGUF_TYPE_UINT32,GGUF_TYPE_INT32,GGUF_TYPE_FLOAT32,GGUF_TYPE_BOOL,GGUF_TYPE_STRING,GGUF_TYPE_ARRAY,GGUF_TYPE_COUNT'), _typenames = (b'\x00\x00\x00\xDB__darwin_blkcnt_t',b'\x00\x00\x00\x22__darwin_blksize_t',b'\x00\x00\x00\x11__darwin_clock_t',b'\x00\x00\x00\x22__darwin_ct_rune_t',b'\x00\x00\x00\x22__darwin_dev_t',b'\x00\x00\x03\xBF__darwin_fsblkcnt_t',b'\x00\x00\x03\xBF__darwin_fsfilcnt_t',b'\x00\x00\x03\xBF__darwin_gid_t',b'\x00\x00\x03\xBF__darwin_id_t',b'\x00\x00\x04\x4A__darwin_ino64_t',b'\x00\x00\x04\x4A__darwin_ino_t',b'\x00\x00\x04\x20__darwin_intptr_t',b'\x00\x00\x03\xBF__darwin_mach_port_name_t',b'\x00\x00\x03\xBF__darwin_mach_port_t',b'\x00\x00\x03\xF7__darwin_mbstate_t',b'\x00\x00\x00\x6C__darwin_mode_t',b'\x00\x00\x03\xBF__darwin_natural_t',b'\x00\x00\x00\xDB__darwin_off_t',b'\x00\x00\x00\x22__darwin_pid_t',b'\x00\x00\x03\xEF__darwin_pthread_attr_t',b'\x00\x00\x03\xF0__darwin_pthread_cond_t',b'\x00\x00\x03\xF1__darwin_pthread_condattr_t',b'\x00\x00\x00\x11__darwin_pthread_key_t',b'\x00\x00\x03\xF2__darwin_pthread_mutex_t',b'\x00\x00\x03\xF3__darwin_pthread_mutexattr_t',b'\x00\x00\x03\xF4__darwin_pthread_once_t',b'\x00\x00\x03\xF5__darwin_pthread_rwlock_t',b'\x00\x00\x03\xF6__darwin_pthread_rwlockattr_t',b'\x00\x00\x04\x2D__darwin_pthread_t',b'\x00\x00\x04\x20__darwin_ptrdiff_t',b'\x00\x00\x00\x22__darwin_rune_t',b'\x00\x00\x03\xBF__darwin_sigset_t',b'\x00\x00\x00\x11__darwin_size_t',b'\x00\x00\x03\xBF__darwin_socklen_t',b'\x00\x00\x04\x20__darwin_ssize_t',b'\x00\x00\x00\x22__darwin_suseconds_t',b'\x00\x00\x04\x20__darwin_time_t',b'\x00\x00\x03\xBF__darwin_uid_t',b'\x00\x00\x03\xBF__darwin_useconds_t',b'\x00\x00\x04\x05__darwin_uuid_string_t',b'\x00\x00\x04\x44__darwin_uuid_t',b'\x00\x00\x00\x22__darwin_wchar_t',b'\x00\x00\x00\x22__darwin_wint_t',b'\x00\x00\x03\xB0__int16_t',b'\x00\x00\x00\x22__int32_t',b'\x00\x00\x00\xDB__int64_t',b'\x00\x00\x03\xB5__int8_t',b'\x00\x00\x03\xF7__mbstate_t',b'\x00\x00\x00\x6C__uint16_t',b'\x00\x00\x03\xBF__uint32_t',b'\x00\x00\x04\x4A__uint64_t',b'\x00\x00\x03\xBA__uint8_t',b'\x00\x00\x03\xF8block_q2_K',b'\x00\x00\x03\xF9block_q3_K',b'\x00\x00\x03\xFAblock_q4_K',b'\x00\x00\x03\xFBblock_q5_K',b'\x00\x00\x03\xFCblock_q6_K',b'\x00\x00\x03\xFDblock_q8_K',b'\x00\x00\x01\xEAggml_binary_op_f32_t',b'\x00\x00\x02\x02ggml_custom1_op_f32_t',b'\x00\x00\x02\x07ggml_custom1_op_t',b'\x00\x00\x01\xF0ggml_custom2_op_f32_t',b'\x00\x00\x01\xF6ggml_custom2_op_t',b'\x00\x00\x01\xC5ggml_custom3_op_f32_t',b'\x00\x00\x01\xCCggml_custom3_op_t',b'\x00\x00\x00\x6Cggml_fp16_t',b'\x00\x00\x04\x4Fggml_from_float_t',b'\x00\x00\x04\x52ggml_to_float_t',b'\x00\x00\x04\x18ggml_type_traits_t',b'\x00\x00\x01\xFDggml_unary_op_f32_t',b'\x00\x00\x04\x50ggml_vec_dot_t',b'\x00\x00\x03\xB0int16_t',b'\x00\x00\x00\x22int32_t',b'\x00\x00\x00\xDBint64_t',b'\x00\x00\x03\xB5int8_t',b'\x00\x00\x03\xB0int_fast16_t',b'\x00\x00\x00\x22int_fast32_t',b'\x00\x00\x00\xDBint_fast64_t',b'\x00\x00\x03\xB5int_fast8_t',b'\x00\x00\x03\xB0int_least16_t',b'\x00\x00\x00\x22int_least32_t',b'\x00\x00\x00\xDBint_least64_t',b'\x00\x00\x03\xB5int_least8_t',b'\x00\x00\x04\x20intmax_t',b'\x00\x00\x04\x20intptr_t',b'\x00\x00\x04\x1Dmax_align_t',b'\x00\x00\x04\x20ptrdiff_t',b'\x00\x00\x00\xDBregister_t',b'\x00\x00\x00\x11rsize_t',b'\x00\x00\x00\x11size_t',b'\x00\x00\x04\x4Asyscall_arg_t',b'\x00\x00\x00\x6Cu_int16_t',b'\x00\x00\x03\xBFu_int32_t',b'\x00\x00\x04\x4Au_int64_t',b'\x00\x00\x03\xBAu_int8_t',b'\x00\x00\x00\x6Cuint16_t',b'\x00\x00\x03\xBFuint32_t',b'\x00\x00\x04\x4Auint64_t',b'\x00\x00\x03\xBAuint8_t',b'\x00\x00\x00\x6Cuint_fast16_t',b'\x00\x00\x03\xBFuint_fast32_t',b'\x00\x00\x04\x4Auint_fast64_t',b'\x00\x00\x03\xBAuint_fast8_t',b'\x00\x00\x00\x6Cuint_least16_t',b'\x00\x00\x03\xBFuint_least32_t',b'\x00\x00\x04\x4Auint_least64_t',b'\x00\x00\x03\xBAuint_least8_t',b'\x00\x00\x00\x11uintmax_t',b'\x00\x00\x00\x11uintptr_t',b'\x00\x00\x04\x4Auser_addr_t',b'\x00\x00\x00\xDBuser_long_t',b'\x00\x00\x00\xDBuser_off_t',b'\x00\x00\x04\x4Auser_size_t',b'\x00\x00\x00\xDBuser_ssize_t',b'\x00\x00\x00\xDBuser_time_t',b'\x00\x00\x04\x4Auser_ulong_t',b'\x00\x00\x00\x22wchar_t'), ) ggml-org-ggml-7ec8045/examples/python/ggml/ffi/000077500000000000000000000000001506673203700213545ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/python/ggml/ffi/__init__.pyi000066400000000000000000000000741506673203700236370ustar00rootroot00000000000000# Phony stubs. class CData: pass class CType: passggml-org-ggml-7ec8045/examples/python/ggml/utils.py000066400000000000000000000214051506673203700223240ustar00rootroot00000000000000""" Common helpers for working with ggml + numpy """ from ggml import ffi, lib from typing import Union, Optional import numpy as np def init(mem_size: int, mem_buffer: ffi.CData = ffi.NULL, no_alloc: bool = False) -> ffi.CData: """ Initialize a ggml context, which will be freed automatically when the pointer is garbage collected. """ params = ffi.new('struct ggml_init_params*') params.mem_size = mem_size params.mem_buffer = mem_buffer params.no_alloc = no_alloc return ffi.gc(lib.ggml_init(params[0]), lib.ggml_free) TensorLike = Union[ffi.CData, np.ndarray] def copy(from_tensor: TensorLike, to_tensor: TensorLike, allow_requantize: bool = True): """ Copy the contents of one tensor to another, doing any necessary (de/re)quantization transparently. Works across numpy & ggml tensors, but they must have the same shape (and be contiguous). Parameters ---------- from_tensor : TensorLike The tensor to copy from (a numpy array or possibly-quantized ggml tensor) to_tensor : TensorLike The tensor to copy to (a numpy array or possibly-quantized ggml tensor) allow_requantize : bool If False, will throw an error if requantization is required (i.e. both from_tensor and to_tensor are quantized with different quantization types) """ if id(from_tensor) == id(to_tensor): return __expect_same_layout("source", from_tensor, "destination", to_tensor) __check_shape_consistent_with_type(from_tensor) __check_shape_consistent_with_type(to_tensor) from_type = __get_type(from_tensor) to_type = __get_type(to_tensor) if from_type == to_type: ffi.memmove(__get_data(to_tensor), __get_data(from_tensor), __get_nbytes(from_tensor)) else: assert allow_requantize or not lib.ggml_is_quantized(from_type) or not lib.ggml_is_quantized(to_type), \ f"Requantizing from {__type_name(from_type)} to {__type_name(to_type)} is disabled. Force with allow_requantize=True" __set_floats(to_tensor, __get_floats(from_tensor)) def numpy(tensor: ffi.CData, allow_copy: Union[bool, np.ndarray] = False, allow_requantize=False) -> np.ndarray: """ Convert a ggml tensor to a numpy array. If the tensor isn't quantized, the returned numpy array will be a view over its data. If it is quantized (and allow_copy is True), the copy will involve dequantization and the returned array will be a copy of the original tensor (any changes to the numpy array won't then be reflected back to the tensor). Parameters ---------- tensor : ffi.CData The tensor to convert to a numpy array allow_copy : bool or np.ndarray If False, will throw an error if the tensor is quantized (since dequantization requires extra memory). If True, will dequantize the tensor and return a copy of the data in a new float32 numpy array. If an np.ndarray, will copy the data into the given array (which must be the same shape as the tensor) when dequantization is needed allow_requantize : bool If allow_copy is a tensor with a different quantization type than the source tensor, will throw an error unless allow_requantize is True. """ shape = __get_shape(tensor) if lib.ggml_is_quantized(tensor.type): if allow_copy == False: raise ValueError(f"{__describe(tensor)} is quantized, conversion to numpy requires a copy (pass allow_copy=True; changes to the numpy array won't affect the original).") elif isinstance(allow_copy, np.ndarray): __expect_same_layout("source tensor", tensor, "dequantization output tensor", allow_copy) destination = allow_copy else: destination = np.empty(shape, dtype=np.float32) copy(tensor, destination, allow_requantize=allow_requantize) return destination else: dtype = __type_to_dtype(tensor.type) if not dtype: raise NotImplementedError(f'Cannot convert {__describe(tensor)} to numpy') assert __is_contiguous(tensor), f"Cannot convert {__describe(tensor)} to numpy (support contiguous tensors only)" nbytes = lib.ggml_nelements(tensor) * lib.ggml_type_size(tensor.type) array = np.frombuffer(ffi.buffer(lib.ggml_get_data(tensor), nbytes), dtype=dtype) array.shape = shape return array def __type_name(type: int) -> str: name = lib.ggml_type_name(type) return ffi.string(name).decode('utf-8') if name else None __k_quant_types = set([ lib.GGML_TYPE_Q2_K, lib.GGML_TYPE_Q3_K, lib.GGML_TYPE_Q4_K, lib.GGML_TYPE_Q5_K, lib.GGML_TYPE_Q6_K, lib.GGML_TYPE_Q8_K, ]) __type_to_dtype_dict = { lib.GGML_TYPE_I8: np.int8, lib.GGML_TYPE_I16: np.int16, lib.GGML_TYPE_I32: np.int32, lib.GGML_TYPE_F16: np.float16, lib.GGML_TYPE_F32: np.float32, } def __type_to_dtype(type: int) -> Optional[np.dtype]: return __type_to_dtype_dict.get(type) def __dtype_to_type(dtype: np.dtype): if dtype == np.float32: return lib.GGML_TYPE_F32 elif dtype == np.float16: return lib.GGML_TYPE_F16 elif dtype == np.int32: return lib.GGML_TYPE_I32 elif dtype == np.int16: return lib.GGML_TYPE_I16 elif dtype == np.int8: return lib.GGML_TYPE_I8 else: raise ValueError(f"Unsupported dtype: {dtype}") def __describe(tensor: ffi.CType): return f'Tensor[{__type_name(__get_type(tensor))}, {__get_shape(tensor)}]' def __get_type(tensor: TensorLike): return __dtype_to_type(tensor.dtype) if isinstance(tensor, np.ndarray) else tensor.type def __get_shape(x: TensorLike): return x.shape if isinstance(x, np.ndarray) else tuple([x.ne[i] for i in range(x.n_dims)]) def __get_strides(x: TensorLike): return x.strides if isinstance(x, np.ndarray) else tuple([x.nb[i] for i in range(x.n_dims)]) def __get_data(x: TensorLike) -> ffi.CData: return ffi.from_buffer(x) if isinstance(x, np.ndarray) else lib.ggml_get_data(x) def __get_nbytes(tensor: TensorLike): return tensor.nbytes if isinstance(tensor, np.ndarray) else lib.ggml_nbytes(tensor) def __get_nelements(tensor: TensorLike): return tensor.size if isinstance(tensor, np.ndarray) else lib.ggml_nelements(tensor) def __is_contiguous(tensor: TensorLike): return tensor.flags['C_CONTIGUOUS'] if isinstance(tensor, np.ndarray) else lib.ggml_is_contiguous(tensor) def __get_floats(tensor: TensorLike) -> ffi.CData: data, type = __get_data(tensor), __get_type(tensor) if type == lib.GGML_TYPE_F32: return ffi.cast('float*', data) else: nelements = __get_nelements(tensor) floats = ffi.new('float[]', nelements) if type == lib.GGML_TYPE_F16: lib.ggml_fp16_to_fp32_row(ffi.cast('uint16_t*', data), floats, nelements) elif lib.ggml_is_quantized(type): qtype = lib.ggml_internal_get_type_traits(type) assert qtype.to_float, f"Type {__type_name(type)} is not supported by ggml" qtype.to_float(data, floats, nelements) else: raise NotImplementedError(f'Cannot read floats from {__describe(tensor)}') return floats def __set_floats(tensor: TensorLike, f32_data: ffi.CData) -> None: data, type, nbytes = __get_data(tensor), __get_type(tensor), __get_nbytes(tensor) if type == lib.GGML_TYPE_F32: ffi.memmove(data, f32_data, nbytes) else: nelements = __get_nelements(tensor) if type == lib.GGML_TYPE_F16: lib.ggml_fp32_to_fp16_row(f32_data, ffi.cast('uint16_t*', data), nelements) elif lib.ggml_is_quantized(type): qtype = lib.ggml_internal_get_type_traits(type) assert qtype.from_float, f"Type {__type_name(type)} is not supported by ggml" qtype.from_float(f32_data, data, nelements) else: raise NotImplementedError(f'Cannot write floats to {__describe(tensor)}') def __expect_same_layout(name1: str, tensor1: TensorLike, name2: str, tensor2: TensorLike): shape1, shape2 = __get_shape(tensor1), __get_shape(tensor2) assert shape1 == shape2, f"Shape mismatch: {name1} has {shape1} but {name2} has {shape2}" assert __is_contiguous(tensor1) and __is_contiguous(tensor2), f"Only contiguous tensors are supported (got {name1} with strides {__get_strides(tensor1)} and {name2} with strides {__get_strides(tensor2)})" def __check_shape_consistent_with_type(tensor: TensorLike): type = __get_type(tensor) if not lib.ggml_is_quantized(type): return shape = __get_shape(tensor) block_size = lib.ggml_blck_size(type) assert not (block_size == 0 and type in __k_quant_types), f"Can't quantize, native library was not compiled with USE_K_QUANTS!" assert block_size > 0, f"Invalid block size {block_size} for type {__type_name(type)}" for i, d in enumerate(shape): assert d % block_size == 0, f"Dimension {i} of {__describe(tensor)} is not divisible by {block_size}, required for quantization." ggml-org-ggml-7ec8045/examples/python/regenerate.py000066400000000000000000000041401506673203700223540ustar00rootroot00000000000000# Generates bindings for the ggml library. # # cffi requires prior C preprocessing of the headers, and it uses pycparser which chokes on a couple of things # so we help it a bit (e.g. replace sizeof expressions with their value, remove exotic syntax found in Darwin headers). import os, sys, re, subprocess import cffi from stubs import generate_stubs API = os.environ.get('API', 'api.h') CC = os.environ.get('CC') or 'gcc' C_INCLUDE_DIR = os.environ.get('C_INCLUDE_DIR', '../../../llama.cpp') CPPFLAGS = [ "-I", C_INCLUDE_DIR, '-D__fp16=uint16_t', # pycparser doesn't support __fp16 '-D__attribute__(x)=', '-D_Static_assert(x, m)=', ] + [x for x in os.environ.get('CPPFLAGS', '').split(' ') if x != ''] try: header = subprocess.run([CC, "-E", *CPPFLAGS, API], capture_output=True, text=True, check=True).stdout except subprocess.CalledProcessError as e: print(f'{e.stderr}\n{e}', file=sys.stderr); raise header = '\n'.join([l for l in header.split('\n') if '__darwin_va_list' not in l]) # pycparser hates this # Replace constant size expressions w/ their value (compile & run a mini exe for each, because why not). # First, extract anyting *inside* square brackets and anything that looks like a sizeof call. for expr in set(re.findall(f'(?<=\\[)[^\\]]+(?=])|sizeof\\s*\\([^()]+\\)', header)): if re.match(r'^(\d+|\s*)$', expr): continue # skip constants and empty bracket contents subprocess.run([CC, "-o", "eval_size_expr", *CPPFLAGS, "-x", "c", "-"], text=True, check=True, input=f'''#include #include "{API}" int main() {{ printf("%lu", (size_t)({expr})); }}''') size = subprocess.run(["./eval_size_expr"], capture_output=True, text=True, check=True).stdout print(f'Computed constexpr {expr} = {size}') header = header.replace(expr, size) ffibuilder = cffi.FFI() ffibuilder.cdef(header) ffibuilder.set_source(f'ggml.cffi', None) # we're not compiling a native extension, as this quickly gets hairy ffibuilder.compile(verbose=True) with open("ggml/__init__.pyi", "wt") as f: f.write(generate_stubs(header))ggml-org-ggml-7ec8045/examples/python/stubs.py000066400000000000000000000106441506673203700214010ustar00rootroot00000000000000""" This generates .pyi stubs for the cffi Python bindings generated by regenerate.py """ import sys, re, itertools sys.path.extend(['.', '..']) # for pycparser from pycparser import c_ast, parse_file, CParser import pycparser.plyparser from pycparser.c_ast import PtrDecl, TypeDecl, FuncDecl, EllipsisParam, IdentifierType, Struct, Enum, Typedef from typing import Tuple __c_type_to_python_type = { 'void': 'None', '_Bool': 'bool', 'char': 'int', 'short': 'int', 'int': 'int', 'long': 'int', 'ptrdiff_t': 'int', 'size_t': 'int', 'int8_t': 'int', 'uint8_t': 'int', 'int16_t': 'int', 'uint16_t': 'int', 'int32_t': 'int', 'uint32_t': 'int', 'int64_t': 'int', 'uint64_t': 'int', 'float': 'float', 'double': 'float', 'ggml_fp16_t': 'np.float16', } def format_type(t: TypeDecl): if isinstance(t, PtrDecl) or isinstance(t, Struct): return 'ffi.CData' if isinstance(t, Enum): return 'int' if isinstance(t, TypeDecl): return format_type(t.type) if isinstance(t, IdentifierType): assert len(t.names) == 1, f'Expected a single name, got {t.names}' return __c_type_to_python_type.get(t.names[0]) or 'ffi.CData' return t.name class PythonStubFuncDeclVisitor(c_ast.NodeVisitor): def __init__(self): self.sigs = {} self.sources = {} def get_source_snippet_lines(self, coord: pycparser.plyparser.Coord) -> Tuple[list[str], list[str]]: if coord.file not in self.sources: with open(coord.file, 'rt') as f: self.sources[coord.file] = f.readlines() source_lines = self.sources[coord.file] ncomment_lines = len(list(itertools.takewhile(lambda i: re.search(r'^\s*(//|/\*)', source_lines[i]), range(coord.line - 2, -1, -1)))) comment_lines = [l.strip() for l in source_lines[coord.line - 1 - ncomment_lines:coord.line - 1]] decl_lines = [] for line in source_lines[coord.line - 1:]: decl_lines.append(line.rstrip()) if (';' in line) or ('{' in line): break return (comment_lines, decl_lines) def visit_Enum(self, node: Enum): if node.values is not None: for e in node.values.enumerators: self.sigs[e.name] = f' @property\n def {e.name}(self) -> int: ...' def visit_Typedef(self, node: Typedef): pass def visit_FuncDecl(self, node: FuncDecl): ret_type = node.type is_ptr = False while isinstance(ret_type, PtrDecl): ret_type = ret_type.type is_ptr = True fun_name = ret_type.declname if fun_name.startswith('__'): return args = [] argnames = [] def gen_name(stem): i = 1 while True: new_name = stem if i == 1 else f'{stem}{i}' if new_name not in argnames: return new_name i += 1 for a in node.args.params: if isinstance(a, EllipsisParam): arg_name = gen_name('args') argnames.append(arg_name) args.append('*' + gen_name('args')) elif format_type(a.type) == 'None': continue else: arg_name = a.name or gen_name('arg') argnames.append(arg_name) args.append(f'{arg_name}: {format_type(a.type)}') ret = format_type(ret_type if not is_ptr else node.type) comment_lines, decl_lines = self.get_source_snippet_lines(node.coord) lines = [f' def {fun_name}({", ".join(args)}) -> {ret}:'] if len(comment_lines) == 0 and len(decl_lines) == 1: lines += [f' """{decl_lines[0]}"""'] else: lines += [' """'] lines += [f' {c.lstrip("/* ")}' for c in comment_lines] if len(comment_lines) > 0: lines += [''] lines += [f' {d}' for d in decl_lines] lines += [' """'] lines += [' ...'] self.sigs[fun_name] = '\n'.join(lines) def generate_stubs(header: str): """ Generates a .pyi Python stub file for the GGML API using C header files. """ v = PythonStubFuncDeclVisitor() v.visit(CParser().parse(header, "")) keys = list(v.sigs.keys()) keys.sort() return '\n'.join([ '# auto-generated file', 'import ggml.ffi as ffi', 'import numpy as np', 'class lib:', *[v.sigs[k] for k in keys] ]) ggml-org-ggml-7ec8045/examples/python/test_tensor.py000066400000000000000000000224401506673203700226070ustar00rootroot00000000000000import pytest from pytest import raises from ggml import lib, ffi from ggml.utils import init, copy, numpy import numpy as np import numpy.testing as npt @pytest.fixture() def ctx(): print("setup") yield init(mem_size=10*1024*1024) print("teardown") class TestNumPy: # Single element def test_set_get_single_i32(self, ctx): i = lib.ggml_new_i32(ctx, 42) assert lib.ggml_get_i32_1d(i, 0) == 42 assert numpy(i) == np.array([42], dtype=np.int32) def test_set_get_single_f32(self, ctx): i = lib.ggml_new_f32(ctx, 4.2) epsilon = 0.000001 # Not sure why so large a difference?? pytest.approx(lib.ggml_get_f32_1d(i, 0), 4.2, epsilon) pytest.approx(numpy(i), np.array([4.2], dtype=np.float32), epsilon) def _test_copy_np_to_ggml(self, a: np.ndarray, t: ffi.CData): a2 = a.copy() # Clone original copy(a, t) npt.assert_array_equal(numpy(t), a2) # I32 def test_copy_np_to_ggml_1d_i32(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_I32, 10) a = np.arange(10, dtype=np.int32) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_2d_i32(self, ctx): t = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_I32, 2, 3) a = np.arange(2 * 3, dtype=np.int32).reshape((2, 3)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_3d_i32(self, ctx): t = lib.ggml_new_tensor_3d(ctx, lib.GGML_TYPE_I32, 2, 3, 4) a = np.arange(2 * 3 * 4, dtype=np.int32).reshape((2, 3, 4)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_i32(self, ctx): t = lib.ggml_new_tensor_4d(ctx, lib.GGML_TYPE_I32, 2, 3, 4, 5) a = np.arange(2 * 3 * 4 * 5, dtype=np.int32).reshape((2, 3, 4, 5)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_n_i32(self, ctx): dims = [2, 3, 4, 5] # GGML_MAX_DIMS is 4, going beyond would crash pdims = ffi.new('int64_t[]', len(dims)) for i, d in enumerate(dims): pdims[i] = d t = lib.ggml_new_tensor(ctx, lib.GGML_TYPE_I32, len(dims), pdims) a = np.arange(np.prod(dims), dtype=np.int32).reshape(tuple(pdims)) self._test_copy_np_to_ggml(a, t) # F32 def test_copy_np_to_ggml_1d_f32(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 10) a = np.arange(10, dtype=np.float32) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_2d_f32(self, ctx): t = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, 2, 3) a = np.arange(2 * 3, dtype=np.float32).reshape((2, 3)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_3d_f32(self, ctx): t = lib.ggml_new_tensor_3d(ctx, lib.GGML_TYPE_F32, 2, 3, 4) a = np.arange(2 * 3 * 4, dtype=np.float32).reshape((2, 3, 4)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_f32(self, ctx): t = lib.ggml_new_tensor_4d(ctx, lib.GGML_TYPE_F32, 2, 3, 4, 5) a = np.arange(2 * 3 * 4 * 5, dtype=np.float32).reshape((2, 3, 4, 5)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_n_f32(self, ctx): dims = [2, 3, 4, 5] # GGML_MAX_DIMS is 4, going beyond would crash pdims = ffi.new('int64_t[]', len(dims)) for i, d in enumerate(dims): pdims[i] = d t = lib.ggml_new_tensor(ctx, lib.GGML_TYPE_F32, len(dims), pdims) a = np.arange(np.prod(dims), dtype=np.float32).reshape(tuple(pdims)) self._test_copy_np_to_ggml(a, t) # F16 def test_copy_np_to_ggml_1d_f16(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F16, 10) a = np.arange(10, dtype=np.float16) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_2d_f16(self, ctx): t = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F16, 2, 3) a = np.arange(2 * 3, dtype=np.float16).reshape((2, 3)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_3d_f16(self, ctx): t = lib.ggml_new_tensor_3d(ctx, lib.GGML_TYPE_F16, 2, 3, 4) a = np.arange(2 * 3 * 4, dtype=np.float16).reshape((2, 3, 4)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_f16(self, ctx): t = lib.ggml_new_tensor_4d(ctx, lib.GGML_TYPE_F16, 2, 3, 4, 5) a = np.arange(2 * 3 * 4 * 5, dtype=np.float16).reshape((2, 3, 4, 5)) self._test_copy_np_to_ggml(a, t) def test_copy_np_to_ggml_4d_n_f16(self, ctx): dims = [2, 3, 4, 5] # GGML_MAX_DIMS is 4, going beyond would crash pdims = ffi.new('int64_t[]', len(dims)) for i, d in enumerate(dims): pdims[i] = d t = lib.ggml_new_tensor(ctx, lib.GGML_TYPE_F16, len(dims), pdims) a = np.arange(np.prod(dims), dtype=np.float16).reshape(tuple(pdims)) self._test_copy_np_to_ggml(a, t) # Mismatching shapes def test_copy_mismatching_shapes_1d(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 10) a = np.arange(10, dtype=np.float32) copy(a, t) # OK a = a.reshape((5, 2)) with raises(AssertionError): copy(a, t) with raises(AssertionError): copy(t, a) def test_copy_mismatching_shapes_2d(self, ctx): t = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, 2, 3) a = np.arange(6, dtype=np.float32) copy(a.reshape((2, 3)), t) # OK a = a.reshape((3, 2)) with raises(AssertionError): copy(a, t) with raises(AssertionError): copy(t, a) def test_copy_mismatching_shapes_3d(self, ctx): t = lib.ggml_new_tensor_3d(ctx, lib.GGML_TYPE_F32, 2, 3, 4) a = np.arange(24, dtype=np.float32) copy(a.reshape((2, 3, 4)), t) # OK a = a.reshape((2, 4, 3)) with raises(AssertionError): copy(a, t) with raises(AssertionError): copy(t, a) def test_copy_mismatching_shapes_4d(self, ctx): t = lib.ggml_new_tensor_4d(ctx, lib.GGML_TYPE_F32, 2, 3, 4, 5) a = np.arange(24*5, dtype=np.float32) copy(a.reshape((2, 3, 4, 5)), t) # OK a = a.reshape((2, 3, 5, 4)) with raises(AssertionError): copy(a, t) with raises(AssertionError): copy(t, a) def test_copy_f16_to_f32(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 1) a = np.array([123.45], dtype=np.float16) copy(a, t) np.testing.assert_allclose(lib.ggml_get_f32_1d(t, 0), 123.45, rtol=1e-3) def test_copy_f32_to_f16(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F16, 1) a = np.array([123.45], dtype=np.float32) copy(a, t) np.testing.assert_allclose(lib.ggml_get_f32_1d(t, 0), 123.45, rtol=1e-3) def test_copy_f16_to_Q5_K(self, ctx): n = 256 t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) a = np.arange(n, dtype=np.float16) copy(a, t) np.testing.assert_allclose(a, numpy(t, allow_copy=True), rtol=0.05) def test_copy_Q5_K_to_f16(self, ctx): n = 256 t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) copy(np.arange(n, dtype=np.float32), t) a = np.arange(n, dtype=np.float16) copy(t, a) np.testing.assert_allclose(a, numpy(t, allow_copy=True), rtol=0.05) def test_copy_i16_f32_mismatching_types(self, ctx): t = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 1) a = np.arange(1, dtype=np.int16) with raises(NotImplementedError): copy(a, t) with raises(NotImplementedError): copy(t, a) class TestTensorCopy: def test_copy_self(self, ctx): t = lib.ggml_new_i32(ctx, 42) copy(t, t) assert lib.ggml_get_i32_1d(t, 0) == 42 def test_copy_1d(self, ctx): t1 = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 10) t2 = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, 10) a = np.arange(10, dtype=np.float32) copy(a, t1) copy(t1, t2) assert np.allclose(a, numpy(t2)) assert np.allclose(numpy(t1), numpy(t2)) class TestGraph: def test_add(self, ctx): n = 256 ta = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) tb = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) tsum = lib.ggml_add(ctx, ta, tb) assert tsum.type == lib.GGML_TYPE_F32 gf = ffi.new('struct ggml_cgraph*') lib.ggml_build_forward_expand(gf, tsum) a = np.arange(0, n, dtype=np.float32) b = np.arange(n, 0, -1, dtype=np.float32) copy(a, ta) copy(b, tb) lib.ggml_graph_compute_with_ctx(ctx, gf, 1) assert np.allclose(numpy(tsum, allow_copy=True), a + b) class TestQuantization: def test_quantized_add(self, ctx): n = 256 ta = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_Q5_K, n) tb = lib.ggml_new_tensor_1d(ctx, lib.GGML_TYPE_F32, n) tsum = lib.ggml_add(ctx, ta, tb) assert tsum.type == lib.GGML_TYPE_Q5_K gf = ffi.new('struct ggml_cgraph*') lib.ggml_build_forward_expand(gf, tsum) a = np.arange(0, n, dtype=np.float32) b = np.arange(n, 0, -1, dtype=np.float32) copy(a, ta) copy(b, tb) lib.ggml_graph_compute_with_ctx(ctx, gf, 1) unquantized_sum = a + b sum = numpy(tsum, allow_copy=True) diff = np.linalg.norm(unquantized_sum - sum, np.inf) assert diff > 4 assert diff < 5 ggml-org-ggml-7ec8045/examples/sam/000077500000000000000000000000001506673203700171215ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/sam/CMakeLists.txt000066400000000000000000000004311506673203700216570ustar00rootroot00000000000000# # sam set(TEST_TARGET sam) add_executable(${TEST_TARGET} sam.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common) # # sam-quantize #set(TEST_TARGET sam-quantize) #add_executable(${TEST_TARGET} quantize.cpp) #target_link_libraries(${TEST_TARGET} PRIVATE ggml common) ggml-org-ggml-7ec8045/examples/sam/README.md000066400000000000000000000062311506673203700204020ustar00rootroot00000000000000# SAM.cpp Inference of Meta's [Segment Anything Model](https://github.com/facebookresearch/segment-anything/) in pure C/C++ ## Description The example currently supports only the [ViT-B SAM model checkpoint](https://huggingface.co/facebook/sam-vit-base). ## Next steps - [X] Reduce memory usage by utilizing the new ggml-alloc - [X] Remove redundant graph nodes - [ ] Make inference faster - [X] Fix the difference in output masks compared to the PyTorch implementation - [X] Filter masks based on stability score - [ ] Add support for user input - [ ] Support F16 for heavy F32 ops - [ ] Test quantization - [X] Support bigger model checkpoints - [ ] GPU support ## Quick start Setup Python and build examples according to main README. ```bash # Download PTH model wget -P examples/sam/ https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth # Convert PTH model to ggml python examples/sam/convert-pth-to-ggml.py examples/sam/sam_vit_b_01ec64.pth examples/sam/ 1 # run inference ./bin/sam -t 16 -i ../examples/sam/example.jpg -m ../examples/sam/ggml-model-f16.bin ``` ## Downloading and converting the model checkpoints You can download a [model checkpoint](https://github.com/facebookresearch/segment-anything/tree/main#model-checkpoints) and convert it to `ggml` format using the script `convert-pth-to-ggml.py`: ## Example output on M2 Ultra ``` $ ▶ make -j sam && time ./bin/sam -t 8 -i img.jpg [ 28%] Built target common [ 71%] Built target ggml [100%] Built target sam main: seed = 1693224265 main: loaded image 'img.jpg' (680 x 453) sam_image_preprocess: scale = 0.664062 main: preprocessed image (1024 x 1024) sam_model_load: loading model from 'models/sam-vit-b/ggml-model-f16.bin' - please wait ... sam_model_load: n_enc_state = 768 sam_model_load: n_enc_layer = 12 sam_model_load: n_enc_head = 12 sam_model_load: n_enc_out_chans = 256 sam_model_load: n_pt_embd = 4 sam_model_load: ftype = 1 sam_model_load: qntvr = 0 operator(): ggml ctx size = 202.32 MB sam_model_load: ...................................... done sam_model_load: model size = 185.05 MB / num tensors = 304 embd_img dims: 64 64 256 1 f32 First & Last 10 elements: -0.05117 -0.06408 -0.07154 -0.06991 -0.07212 -0.07690 -0.07508 -0.07281 -0.07383 -0.06779 0.01589 0.01775 0.02250 0.01675 0.01766 0.01661 0.01811 0.02051 0.02103 0.03382 sum: 12736.272313 Skipping mask 0 with iou 0.705935 below threshold 0.880000 Skipping mask 1 with iou 0.762136 below threshold 0.880000 Mask 2: iou = 0.947081, stability_score = 0.955437, bbox (371, 436), (144, 168) main: load time = 51.28 ms main: total time = 2047.49 ms real 0m2.068s user 0m16.343s sys 0m0.214s ``` Input point is (414.375, 162.796875) (currently hardcoded) Input image: ![llamas](https://user-images.githubusercontent.com/8558655/261301565-37b7bf4b-bf91-40cf-8ec1-1532316e1612.jpg) Output mask (mask_out_2.png in build folder): ![mask_glasses](https://user-images.githubusercontent.com/8558655/263706800-47eeea30-1457-4c87-938b-8f11536c5aa7.png) ## References - [ggml](https://github.com/ggerganov/ggml) - [SAM](https://segment-anything.com/) - [SAM demo](https://segment-anything.com/demo) ggml-org-ggml-7ec8045/examples/sam/convert-pth-to-ggml.py000066400000000000000000000075061506673203700233200ustar00rootroot00000000000000# Convert a SAM model checkpoint to a ggml compatible file # import sys import torch import struct import numpy as np if len(sys.argv) < 3: print("Usage: convert-pth-to-ggml.py file-model dir-output [ftype]\n") print(" ftype == 0 -> float32") print(" ftype == 1 -> float16") sys.exit(1) # output in the same directory as the model fname_model = sys.argv[1] dir_out = sys.argv[2] fname_out = dir_out + "/ggml-model.bin" # possible data types # ftype == 0 -> float32 # ftype == 1 -> float16 # # map from ftype to string ftype_str = ["f32", "f16"] ftype = 1 if len(sys.argv) > 3: ftype = int(sys.argv[3]) if ftype < 0 or ftype > 1: print("Invalid ftype: " + str(ftype)) sys.exit(1) fname_out = fname_out.replace(".bin", "-" + ftype_str[ftype] + ".bin") # Default params are set to sam_vit_b checkpoint n_enc_state = 768 n_enc_layers = 12 n_enc_heads = 12 n_enc_out_chans = 256 n_pt_embd = 4 model = torch.load(fname_model, map_location="cpu") for k, v in model.items(): print(k, v.shape) if k == "image_encoder.blocks.0.norm1.weight": n_enc_state = v.shape[0] if n_enc_state == 1024: # sam_vit_l n_enc_layers = 24 n_enc_heads = 16 elif n_enc_state == 1280: # sam_vit_h n_enc_layers = 32 n_enc_heads = 16 hparams = { "n_enc_state": n_enc_state, "n_enc_layers": n_enc_layers, "n_enc_heads": n_enc_heads, "n_enc_out_chans": n_enc_out_chans, "n_pt_embd": n_pt_embd, } print(hparams) for k, v in model.items(): print(k, v.shape) #exit() #code.interact(local=locals()) fout = open(fname_out, "wb") fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex fout.write(struct.pack("i", hparams["n_enc_state"])) fout.write(struct.pack("i", hparams["n_enc_layers"])) fout.write(struct.pack("i", hparams["n_enc_heads"])) fout.write(struct.pack("i", hparams["n_enc_out_chans"])) fout.write(struct.pack("i", hparams["n_pt_embd"])) fout.write(struct.pack("i", ftype)) for k, v in model.items(): name = k shape = v.shape if name[:19] == "prompt_encoder.mask": continue print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) #data = tf.train.load_variable(dir_model, name).squeeze() #data = v.numpy().squeeze() data = v.numpy() n_dims = len(data.shape) # for efficiency - transpose some matrices # "model/h.*/attn/c_attn/w" # "model/h.*/attn/c_proj/w" # "model/h.*/mlp/c_fc/w" # "model/h.*/mlp/c_proj/w" #if name[-14:] == "/attn/c_attn/w" or \ # name[-14:] == "/attn/c_proj/w" or \ # name[-11:] == "/mlp/c_fc/w" or \ # name[-13:] == "/mlp/c_proj/w": # print(" Transposing") # data = data.transpose() dshape = data.shape # default type is fp16 ftype_cur = 1 if ftype == 0 or n_dims == 1 or \ name == "image_encoder.pos_embed" or \ name.startswith("prompt_encoder") or \ name.startswith("mask_decoder.iou_token") or \ name.startswith("mask_decoder.mask_tokens"): print(" Converting to float32") data = data.astype(np.float32) ftype_cur = 0 else: print(" Converting to float16") data = data.astype(np.float16) # reshape the 1D bias into a 4D tensor so we can use ggml_repeat # keep it in F32 since the data is small if name == "image_encoder.patch_embed.proj.bias": data = data.reshape(1, data.shape[0], 1, 1) n_dims = len(data.shape) dshape = data.shape print(" New shape: ", dshape) # header str = name.encode('utf-8') fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) for i in range(n_dims): fout.write(struct.pack("i", dshape[n_dims - 1 - i])) fout.write(str) # data data.tofile(fout) fout.close() print("Done. Output file: " + fname_out) print("") ggml-org-ggml-7ec8045/examples/sam/example.jpg000066400000000000000000002326041506673203700212650ustar00rootroot00000000000000JFIFC  !"$"$C" J !1AQ"aq2B#R3b$r񂒢%CSc4sU5!1AQ"aq2#34B$ ?ժբu+j@@ [ClV[6 Eek& `5ڷ@vchQ ;d bD5 kҲi@+`+t4;Vo[ lv P4[ƫ1Df(&Y f(Qb@Qb)m')]$PdPiBhHh~*0(R(TR(PⴢA@ *51G (PFmQ( FDb4`V4(h/4{k`SAh};@CԦZe]##8$R.P!(3g$z0?JisOq e2;{Ed:A<?Zu?VFۅvטa6Ͻ(Xc|drʨrI=E)gr%`VGwL:1[fT`2khC E) Lf(H(fS*RpHҀj|Bmu.@ېۉS8IUop .P4Ҙ.p u@MjɈϊFmR;;O,S%R|!ؑdTsQk y'E 1< vid4p/OSUo qYhxXq8rBZ*_fQVI/fM%m$rqȢy~]K]FNg{b3C0=^AwȒZ )D"T9([ҤRRW8ϯ4n$ Kcq[܏oBtS:52>Ӑ.W-oMvڑ< ZP8J yR;08"a@˷uI,g}'={)<#xYSr=fEei6G  $Lc9OioI=W <'f#8ۃ*j9l$R$T|lP@!_&E.“a@,(a@ )R(=(/J)B((Hm+ڈvDlQQ@(VR(jҊQEm(*6QX FFb QG4bQb-L݁ 0iEZ(h 0@V0)@(D`@V`Vh  ڠ> x fnm;ݛhџ@Hϸm!czn+q+o=Zzk1gyhΧʕXO`y^x!SȏPE YA8 +pG$_w O4M1y#+uAY1da v5-yKB) N6oQFd+XG=~][I'xhbL㍅ۊ+c( DuE ncI p&lb=q1`>e BuI%c##{>M$߬.A%_ cթ=A ޔI)_' X$ 9y{|O4|I} rkZݴ{4*0N:>k FhEއ9KHf\ok% 1!y$cE$yuӮϮKS:G!CF}N9>)޳]#M,`?#dpc8}*5}'GvG瑞}lzѳzyU8m&]KV>Di5Xq~;Ug9t 1:a7?g U$U"$PɥH"4H,(Ȃj3ך1) M#< 1RJڹq:?(H>,~d*/zw[|3PoOqLfY.osjZ1$[Tr=2kǞL3F̡'Gk}jGR~.&syMYXol육ATM8edeQbDD?Һ#tە ګ?^S@/gO;|g 2ĂNw'{/{umö {gpIіgwл9rJ0}w%OB0 )\+QBE4LE~zp,(hXRl)r(PPhNh#e8ڶ(QB E,E*J)EJ``V'ڀ9,`gHc.vڻc*X0 A io&fȸN}=LkfŦ\ݛbJH2$pqq]<1 ڍ +@v#mæ6>ƜVa#$:W`|k8mEIWdP" 7`2>$ʳ7gopQ;C+1!AڣWI>GQ$`>U4NG ]u`#v?/%uU;sͺ v?ޒ m]QQ [J;f2(`=?LԜ TkX4@R0KDܙ-32' vK(#^S-OB ?[ iow[]ƅcQ:\t兝g6@g>(/ Hu|*zEjTEڗ?aTdܳwʷ<O55EY]#݃9u}3wbd/H I_%ȹ1q})).]S9ǡZF亞+`:Hd-蓪TKڟ.01HXGuvuON¾R !2o>Z|`Ss/p@4X{ Ѽ{sdKRc̲+0s>b |` {TwXbGq-K ;g4kWk+q/o_%iDw,AkGsX_uoFY3<2ݽ@;H s<ަ=޺ʂYX =5.qu}:n~ǡtH4 D +\~GږpҦhdPJz4Iȝ0ᜲ$d*9U;Ni t 4&9bx4 %{HGB]Kmh7Vcnp;\siڲFRZ1ѻ,=>dWeq&傕`>cƱmunRI"o1vwlYdo^Iו.21|`g=V݄qnY <}#8MOسt%_6qd"&Nc>AKL{%^"Kl{ToLBiwVY3)Y'mcʥ-\E=ڦ L}5 J,ApǷ>^\<=((#f@2(=H4a@•".@E&—aIҀ 2;Q J D UE QE(P V *E* EVsJc4 te l A#=V>Ll!ks#(uqϡgj%q}>ᧆXsP^6ܸFeYsk'1ʹSr))c6B<@9ޟTe4s.S>8Q~0;NR2bI#w:lR{暵I4^xQܑ$P]dQ#ΚI UNwkK}Cv;▹6,!Ӥy%٘i98 xsmuvvp4"uHفQOZۧnd[Gf,ۮSyG%Kq;p{_x 3ԥG@@HUfb ()t*]SNk%@82ߘZnD;pNg?KZ8K8'apOſA-Q.&2y/>?JjS5v4>3_6{K5Q< ql2;Q). %^^Eı$Brm>Q:˘A3Mw) aH JaӮ࿽on' Ҁ m=}g4$$S&: Iy;^I'1_H=Ze$a#b<qj7 ŕYTs2{Ql^SI!15NOjt܌}6$l#Ki$ttz=ϭ5>o]g>(|>=ss\I>j8e#l>!ǥC+%џ;^XeZzὅzK;]0%ٚfVirgf@#ϯ=MԐjVT0 o/X'uTWmbS}v_qEXrw?JzN^^c.&uL]HIoݲN;<ʁc=Gq]id;w' vbPHqcv)#OEve|7Ӛ1:5]D r=dhHOIRǒ?JgKk}!灌N4Ba$y,K 2W$ϖʪMFuy.ggXơkזK8m0_~O Rz_QYǫ.%Jd÷8,4r笊z߼vqvӽvD_ rQH#{P5?X/;AYp=cT"T{;&ȝ=pUڴ˂KƳox َ1>N)i}hKg"`Kx=+=]D;sW 1FioJsf!H'5puI4Rq-3}1j|LOq_I+^ Y S @FCq5X'Xx5Qg;QW6EZj( )BEDj%{Zt e(ȥYL bxnG;'nQMj+{r[~_,=-\IY i$H|֨^84q}h"bO<'>JXZ@%vXr~1~\.҆پh PrBͮ[Ѽ1͠?gl >X,_>EQ`~-S$έ$lm:QuM"3nȘ䤑SMMi,]]\Yϵ#${/яd&ҵO3K餶Xhs 8ݱ<)[P躽iv줉E>G8##^z)S험<9ҹz40bKcЎ2~H<1e`/Nj:N:%ͽŌ<$va؃v0-ij2CV @ ֈ"E8VQ$A(PPP(J)DBA@QIAH%PR =YPF KjpjB[Nt*3|Q>!F9SϯZ.ЌǥqQ麃ۮICWZ2JacU'aRx5$F1(Y$HYn%.nv5ԲRT梍IfIm!gP g D#xBxC\|{ A9Idԥ+ISy&w$',E Dj=7mRs/Lҋđ%>GkSuc,7/a}(Y{x[7[}̱BQipH5>z]r{8AՓbiknAli],}Ӂڑ-դkk+Qw/+w#.fKVO2K]"/JsAI[Ʊƨ*jҊ(1Fhv+R\I~VN(S^ϊvEhu8@{U>jX6Bf˚njvD7 "$28yjtou}JH涰7QD}ow`2ɵցnBcyd&Y.FbgҚU--0H¹c?\~xP=i(i}u JN ; X݉$?]Cni#Na"աAx~uSk3.m:=\K3>#Gx88a;if)7qϿ?WT@/dl%HXC8hppeW8 @*Jo4[NںJ}*[/U.V31[ Y%[ . ێ?OYnqøE]>pg[Z;S 攘r@x /#![n+q_]w=\TM9$ۊ'XI2-G::l[/0/Imeo#bA#F3u=壗S4'vЀGҢOW[\^FΟk1 51<_2*GYJyQ HU> An##Lqk]/[BNvs\+HR#n;Kx'7i|R@p>ךzF-oբY|i9vA<+~tC=],i,<ǻbw)8 |6SiZin@xkH,SmJC*X1vu׊ O$gtzR6yj1 y^ ,i*\}]JNe9->o?_Kjrj6'j5U?#H:^K3Pi2qWEƁ>cJÄ8xQ 'iL=TcpjU<Ꮔ*9#R{{>]q*p2yKbvMcAۦ*獰1/oMON]#,1p)Ziҙ-·uN3^kSY5ٝ$[-+1 .[i)@`9_]+Ȗ|RpP:>8bG.avb8|h`X4o$qJ,7LP3$=o>3>(6bc^JIx2Q L׊'k*呦/*hs/vH~= HE4ʉ|maak=*z(䛞G=iB(XTdV{BEi$E PJzPfҲ 2QXE(AZAJ@(E* *GS@5҈(R(QHjqڴ.Q6;QE)Ma\^׽7z,u2FN@?v붒mySL$pIڟkKPgdD\)dLfĎku tʺ24q(錰wO|XD˨(>Ǻ^O!\Ӱz(iUcczqNU}RmW: 7P,RFw|UګޙMK*aBDn+l?ַZ}mIy_&Z.)֙m|j]D&5# 8ԁ3]Z}/i<"&X6|.{3quOOtX5AjLcf N P>0;*t׹aE-^1m˿<,Mte#p=P{*笮mAhдRqc9?_NK뷺=.@98ގ״XWgNmql]ZBqcpOn֒N 2϶J7ݭ/R3Ą,.7>rq^ލش?RC4P)PîjM .@Hv"T kwjvfH|F ϫ]57l9C¢ QH޳՚KU+1U@ UwtD|pOZbVQ(v˵6P=OҝK{ggk6:uҋe̱0Ru>J7K'I,]y&KZ[:eٕ ?/er$U,8y]Nc%%zy ޿-~!UG+Gm;x#yfG.Y؜ԓa=2Ңd x<I@lp`*mLsqKVbPO8 *Q!2\Cwa1Ht %Zb>I7H:KV_-ǔݓٽ@+؞YۯL֩ լo 'wOptGf TH_>+$ܠ#EP=OX(udkXV)W./joݎ6 MEI$(ɒN|% ^M#3Y $sKy6f>M7&̙ ?.Y 2ru[א!PEWw=;2?k6?YIeݻWtֽ9ԁcK}=_g@.@ʷtSb;/ڭ\Cg{ąUyGt\G`͸q۷u_+^AofG0'#Z/ZS*u~UzMMsDNä4%~W zuμX1;ק?gä.:DjKi<KK(~vq/˚ ]צdPvM\\q^OM;Ct8 n{|Gη8Gr)[ãiRi׈w%|K  zz񟈳\_jbjNj~gelsߜ;zC].n:QHX > nf'# `|Lzi_Y`r|\I{;*QtZX!'R:z+ ; =[1"-$^[/DܥO>Cxܱ%(DV40a4mgTh!<דItܺukY\0!}pGpGzU-oSSwqO=k:̣~wj{cxٞi5]i۠`lI]~^Tr>,tʬ18 c{Wb`#Ҵ5,X4$QPTp=hw@|=7jR3]4pEF'KHO^Y,VX.0vm8 WnKPJ̃!ң=YqZ1%c8 y߄UkcYI#Wk66Ix 񂯆OȚp:FS"xHb=㒀mMl/J "V&_2)Ucc#Gj0]o8Y PR.03I$ͩ -1Ay'w&c?zt/TX.jk{tQظ O'`sOFi&vA伴ѴIHF)Q5oxP-WuOQjAu귍uu=N~ ;¹di.u}E qߝwNȷmrQcx.v8z*Tſҝ{}&c堐X*AHϾ+ea Y l`C0n:cmCnKUp2|%' 6ѵMSOda)Aj;%"VJD FEk 5J?JJCFE (M8ȬiQF( sBA@QZAJ( QZAF&BQJ(QFCQF(/tζn4cNwm˔pOO#> O=J-Jo'POmݎ8Ͽ _DG:fDkXbXBOd A"ǃiz?tԴ_ɺ\XsN\dql+h:X4ہKK:t>]~YuΣo]FɶY.En;LtUV[+8HjzRSUͦ rJUW棅?amOP;_o<WЮt}-UCO?t r sWxafi K s  u5!HBggo8~41uF-% 7`}"U>$+VM~Owk&&P;m<xMl!"y* /oi qρW'w=ųD,Zfݹs+y9^7ON9srkq6k^,zgL]ndqED;s2ݜ z f~}KB - Ӽ6C۞j| +T`P8D+0v;Q'c@]VKs!e*G+?qPI7[3[L K\ӡB\N;^[8E*9p}qQI.aH j ;><3zu%7 z}+ޑi6e0Gvf$j-' uƹkQ}} G_KQ{{폦jy XOߐSċk?:u].кto iv\\ZdH?lV;;xuY$(B0rJL`o5\&FJ=HS-䱔C{5BW³7`~t9~Gu KПofޡzc#;'}*S^6t?}w̍ @淺,]I47aHn9'Bu_NuGKy x*zA<#-/t]VWOYdFb\wɮ<:fu4]v!>]yzcw> ۹-ͽ_"4ҝM.j0#g l.vh&2 sjڲK"ZBTc]dxnIWf<ϨAX29U=M45g6&y$cX'k"˪{[yT[`y,Gfit7L7a*#)HEQsoq~H{wq-dҷNמ=J([kf_H(y]%I* A7RI,b6mT~S_[E`-m9"ҤD`{Б/zJV[ qutnq2=V Ҥf5M bƛ/s*zAO!52i9tx'1)N^M81CPxf8'}j-^"B2sBsnvz60:֝гK:uSLӕY]ERTu8MͿN]}WkVD}ZFebr=aT'PhڏJG 14nG ;#ӣn\~wMܐ^XjVo$Ln^r>*Ե= NH3߇wvgFj m}GԎڄqc2=3]:EzO)ifr=ǡӚX<0ADw}OTz秴k{hF'+w$)%SvUdRP7z(M 4 К&Bh=(PfJYKQ(R(R(RPSFQJQF QEUEl 5B(ր7Qn̶紇J2utn*CʫȖJW-n׍ I=XgS鎊1xS>ӨMRp@IHڼrƊ;`~pq`<;Ni}s\Eu;N#x!@>""x'}"tc}V˙3ې< tc7tKFx-lk!7+;XwN>i5ƥ! g0ᙿۚoEuGLz^ b}`s|`t\2Z]YHmq;=>u xz*)uķM'l@*lIKv+ҝ7f,:~At^F==M,~l=9/j5Nƍj*?#ьRD;R6Z#bn揥u1X߸?ʡ'5(91jwVy$PX<.yO&5`VhB2vEn[OAcco*4>z%d)} 0lޘh-~qug4˔LR{i}Q<2Q6_X\ˣ[\L*H)$4~mcc}gUދwg,vH{`)ʏNSUIג- I Gw[uvL W}0 >C>?bv.W>qٮ]p_nINgÛ^.ӭcP0XzTo4ޖN RBϧ8^zricGyC߂2rꎨ_sp<##rn"8gO&4SoϨofY!n`= >TIX6ڠZ)+( >2Ѯ>XnOZնt9%64`CޣS tj_iXm.I6LaUrP?tϵZuM-&8dΫiX,dSpEiKYV7mnb`0_ߑ'$4=ݜWvSqo*9a`]4pG&҃ҏ҄ЀzPzR8mOH_}tqMm(!n7ȕ u߯f/1aی0U;WM W55;H̘ }k}_ڋK6%NzKۈQ䵻1 5ܿ }7v^D;mI"JIlFl␼r8i'޽@1Ҽ0XEmGν"cI[!$ji1ڌv%p1ڔb~*D?+j!B;QZ`Qӈ׮W ǽEgc 6>C7j'V w$#4ɉʹWd8r+=KXmd̄aWIAGNdm)_ yͳq׭'Im?mR$d q[q4^y<-Q c1#2{wKXevA>)vm5~ζ\{ZK(;#ۜ >fےs*SӞ"jg>j eKC vy ԼIdS[uVN!=Z*7 9F=ijiaJ-<3+1X+3J4 m$,h,>di&.]kmld(c&gڏPܶ/ .<{6)XsZ~$x-q\i f#!F0#$$m{IC mN8'"0iި.c2&;8TAc w90v%4V6)rާvR}RYp<}~:\9 g%Q5s)E6jzw#M0mJѰpQc_\eܾ|HlqV9 GߓIٔ`}fpjQ{jp*k4}Jf/ MCn']xV{? ܨH{~gj$(T Ob$`/xZĎ]c`~#\qYAA٭XKir > d䩃cp-U1?WuΥ>q: {;m!8H8<EWk#8$ǟsߌҺgR]iRڐ#ǹIAlwsRWӔ)۬Z^?< ^:MjW-FɎ~Ө+]3:?RXI{ys<@>PAp>vi?w" @` 9HE>=VSGNX1[Y%,FUgee}FVkveO֣^u/ɢ| -%^A)ZXCGA@Z44|E ,c.C)R8#Ba}B䵲՟bkD1ܯ4lUǡv`pA8+:C%žRx*J>W#|v>v }~uYMr~[I,{yiUTv'>Ĭ 0fAVoV5 `69`;oZkr^ǙMՈ\9ɗߧד*%NԟspW[zj,9\vSVUX.y wERIidTJ(hjkbv ⶷fHdve$h>cAS%F?u .ꙵw[/|i;:1c=MV)-^ }(lM*DWp`' #nEIg]Yvrm6>pNx?ZϺ ϓwfh{jbdi9Ig v  j6*#SqF1g1NG|}aUC|S2VB'rl}HMVۛ˅I9zUshB[Kc#9Ezzua`v񟙥.":8zfQ2G,''*K0'U$l|UZ[N$ R}JuvGܟjZu/wm@?28=A"]v=s}{Nωp w;kͺ. F"F?<8#fx9Ӿ!ut7N &ԤBCdas0sķ=}jŊi+MF[IYr{OLX f5ŨGS ̲"v2@ǩ'< /"ifȞa.N ԦKw/id-|ͷVbaN ~Z]:PQmrmUac+:mh0@ }œm|3f2)|x.y*Б2)Vx]l>RW̆K0ď(UPx7^.]c I8(g.q*6 )bēqԯK=;uQhtk-u;k;_>T|c޻"V* ,SfdJ1awwp[H|[U!HHl 31kw*Ciw*LѴH `g#' aMVsm$s+Љ<{)ۨuHX!k'2H Tx'sުaj޻`xSiDEwm!`FHvrU>5tЬ".o"2ve?/B9AWHbFgE]q'_~>_S^S fI (g S=}Vi{ z8/R?%CFGU[н4eTMkQ.I.?j1KVJt vr/fFò+=u\CǠ͐"c8b >]YWm yw?HUi'UW/s}{AG/oj'cq8v=&znw푤`,Tg+wg;ppNO#?zû}њ\óڡb1 SPID^Q|ؗJϮYdW7]ʐT?iZ ;{K̅F22gMAJ[ @Zu|#$L0ԽIoӅRBq3d{V/k.dM}Xz =A8>ԑFk/'Sށ_uZ H}_GK5p:Z{m{g0W#?.1EZnԣP54Q&jJJމM Ph}P٬QJ Q(iڕQ@w4a-(;%( 5)҉QBj(c(;PDf`I'r}5dP3TNzSt,jY;'rH}Wy50r2s6{xRjxe$+gEi(nilkIOґh87M!>T@z$Mʧ;s EyCFqɮo::||`+sPkA%mcd`GŻ0=J(wh:vbN3E{&ߊvs]rv*IGpA5ڀ "'8$p}s^uێ;/%2Y|%(l:V+E 4Qh ةFTzUhZD;SX;SOTkwNp&}IsAK0Yn"cN{"n;}_dQkkK ` (<, Np1MU\dW$NY5ŵk=hEvUP#qPR{p/-3F67r c%OlJYoŬF<Z@*iM R>iVFH`pH`8Np1]WJ=sPL[Zvci#*~Àp>=3Or.@WdBH \ݙw;`҉05x<1Q`Jܻ|Pn[E.nc|ZLF |Sl ʘXw%6*w=ڕ"hm~뻞_@u*&A%y\:e| (Aw.8CRO&xWvK!`= Bzz/X G83'fu+\d;Ĥc ]>\dy7)m;D e_mm w.նn{ "kJ0v1B}vsŽ j2|#Q]3SThhB۪ᜦYHQq=n [~X[~Jݝ_C:0vbs޻:uUz]+ZbtFc 1* `Q_.֐+7G1l]/Ӻ2 ]!qٓ>>nN3hc-F~J+ PWctm%#r68p?0)ӡzNe97K/`~h=s/<4$r$D0Gm{Terk} ORPht9+#Q]'O&s/0Wf|&EνhU O}Gio[!C*=Wӓ,kVku!k{6H#vPn}Aa-/EI.3#vP׹5pAk*!WUI_U)>%R3wsv%q( )cs6NHxS(K<0Qy!W6쎶(qH ^ߕr̅#98>¥GoᙾQHM{@wΙB9JLn05K(?-^;S==OPGow<ɮ&`;@ݍDlck$j|E'7Դ655Fo;ƆCG%O5҆KZ"c wWtŰzzo2̨iQF,y&51~#r(xc;B_s^5mXK>닫;H v{~g&$z?g/w F#͐́Lgkj=4=`ot/uPHKcH۾}yVVd@ȕvpZԽkz͝#;}v]KghftQG s40'>xGfJ2E\!Ͻ5,͒1}}v Kk7nGaF:jY[^R:xA|`GG~?cץ|IgG^oMHmI帷,A6BK28'q^>"] uҳhh*UN SF7{0orf?OfN}BeF=*Զ츍Cqڦ?j3 \G4`TG,3Fog,!`M=poekuqq0XR7؞יu{5+./ be8lN]:lkԲxYvp@ʟ?JgC|w[ۉ#<}Em/" ˸ UN=s>7{NNbTf6d1sL9E[2 A|ҵ+1+?_J>W!ŬnφELbh)@^֐7E/ gjσ+ vy qO7R- mAl{^{fN{96cgK1N2eU$%BM>M ׺S<_0=87WDLX^,P *k;{aK%ɹc.L2vݻ)OO%Y|&c܌{KzBYXY^Ydr,; dϖ\0|KOJr_VR[YzZc7dGV GsE|\5B1lo$HQ;w4:YQ܂ND' yێ+%ѭ`x&FrOl6 $/^jO}fm̬dvwÂa,gkϯ_鐇4%#np}0OcއjZJtOoetb]̣#?*pNyW-#մ{mN#Rd;̚'\d_(ꆢՔQë/c-kڣVQĐ0Ve?s$FGҹ?"IH6Lr8l^F]$KkΏ BiTb! 6X\zZɿ'Gr}o䜾lH"-m vQOh:~XR- s~`Nqd雔/.±weȸk4 UXKz*jo[ ./R^/;e6un3CPP ZsحF>cT\CMbkIr0'hi[`<.s;i5.X}~r7R,cחlzZO7eyd>IumvB@-"I搻Gq^[n.!FrQ8ߐ6$r[jwSHvBC`6qFO<\=Y J C?1#q$s^pn_L(Q\\\uK.ӣ!@|Ozu~ҷw0lW K20 QԚǧ-tmC3 F@gpmV\/%v.@9vY*No-ˁ%N@<+:N_Zy/<0I0˰}dcS OԜvǾ۞-]ddf϶8*N =O W8bqyئ 2[)Bl|pon-lX>w#" F2g 9lr;5jZ$[z92 gfi&+V@sx9?WYۏO__GsxWo4וXY~?l~TMzI^}USJ.m5k=ڜ4pZjI>BRit`q9< i, vjN+YoozD^~Xo3"1ul$K13>㚸e MI^k屍OH8_EP}EVDj-g;6$|VZ.gukmb2oW>{^y@ P;eGtS".o#qWNp4?UƌBgʱ_bH#+}WH1A>/-2#Gv88n*'v˳8Ͼ1}9.d2p] pkV˻jj8)#F3H'9'qY$A;wt5ac%ܶu6gYOdQ]vn}>S2#MZӊǒGΝ ~mik1ivce. u{V/wo]Ʋyv5?Z_\\Gc66@k+jPGb"K*)zoZKd%#[/:E:[3]K4BV\J5M^]kQHa|d­?4Ps^:][܇"k0>f1\ņ5t d;ÃwZTi8cW9ָG UL*6R;~#\MF$I6pӹg+6n¤=׊\޷}ۣ,^{Vwb_~O20Ѫ6,l @,H?*fwoܩ&񽐎?:uM]Q]-! n sHiu|1mZ{r9?aлaMֵntV_/M$AUYsRQVIu֟Ֆ}*,C"vP{s^>ꮹz]Km3h0bF9~@q^}MK<<ꋞ,:L ldݤڏٰ98 kad%M(G1{(5qxtbX% 1ス,<%ж@"A?4AeU#"9Ӂ#t.{{g> qzs88XaUլ#7I('8nPkιb:Z&̈́GPc`=f\isRI>)_1}Fu-M5.(wdI EK/f\=Aejp]K$ʩos|W?9$ |S+{InX˜,AܒI98$5Ѯ뗶{԰ݭS3A|!.<6;>$9nNq|KfMuQWcGHkhHij(A'ߏvh}o`u;l!+ƞS629#|Bc^Bd`p3ǯ?ҢW]zX Ն3Pn:GSke`Ɂ5 b'`=U -U䢛^Q}*NFc>n-{fRHo=>#L6I5|f6&'HAP%H qǸyAǍzms&Fb->@<2e#l$Hn[767<*v1:4wZޫ/+|g+I;6- -_ kL#C mAQ]G杻zsM?Fu$Xwǎ˜+;)oJ*УT|rnu;ˌؿ@;*j`/ \ORtgjR z> g֭aoo5k..?,` v9?֪aY?+YZC4֖F8/.U\rs@eA oSr77~G'w[ضoE(--mJigbApr@-ڑyhI3dgdI'p3Tmu[#TYc|q؆ :'K{vOk؎4s)v۞pFq8^57HOBjt#,ʅ?E F0|vU圶)QGpqp1)qH2'Sp$dp9,D|"ێʡnmJԁ<;jt;O&ITR۱ߎ22J#/_۰'ý|`ѝtlaJ _bdG`܅2FG^Εikm-\Gs{h;4Jp?*"9zW ndc<`e@ӌ =:W_wzq?'oϽX;@RϠ#l`gF^~#F矙=+yr=6H 2fpXTl*샩{KBho)369f(Ju_3KWdpehaINWBl(=)R9 VȬ4hGzQ4DlR@4 EFښXO4dgXļBw18K:XֵEk;e>l6 -Lц [' A܌ZZh;N$y3H$ j7ޓqi$Bo]%ӥ j;iғ܆۹Td`{|Cbȷe},1dقNѻp:ZX VPcse_;rv{zӯ *{wYŊ0Y@~팞;W5~YB^qr5t:XŧUedSS^,.4׵ 7vY$-d1ǯkGM_|_g۠ xTV8m…Lm 5I:NYWWYD g;N9b6⫒yy|\{?+vt p~gr92}j;U3H[t2C{ }P^J&ݨoYA&vnm,5TtFE~o.] rppG.BC< .ʎY')UK3]Y cUM)T7MϞF9ljKuӋaK'˙6ͻ%Æ{ڭ5륬il**8*#\'ՏaVjzMEҊ}DZ%] ba{I4ab Y 龐^᱾浆IfFYHK|!FT8 w<;0tLFXmWbJ qҪ_zZ^ B9Y`, UW.3 ̓5}7\lKMw4{۴# de'bز ͧS.H :ǻjrq!;O6zƣtNO:„TI+eø gted(q/1ݙ%3d9,I$=-<P `;WNIY\GIH:4 K@Ov'I(uMG~A)cL\ʹ~F #70ت3]Dӈ|˗;a!= *{| zpv8y~ҭ^.8e@Ӹl)Vn9ȧrE*\7VHWHU$: a0<$`@:N 雙$ a~1a9d >:ǵv(q7+~ #ӶyTw "d2S^xСu}뢬崽y',2ѱ:?[$זs>ӏ( 2j6Ji|YeAӰ'ew؞)Spcb xeٹ (+uY#qXR0dwA8p+ӷ@|̨O O#Qv?,Kٞa;~q-mRB ;*+D,qUbDmܡϯҡ+[})o9h|ˉ1\q78|V*H[u5vchԽ݆ 2m *tf=,*m<jcu|?֦69R@*蜗iuѳS[E,fveM1~gN׭wE$И4l#$MkƜ<-~>@'hZC)r7~x9Q#-oY+ ĂI$n棷.s0IOh FR7/9a錍Qɘ'SoQwYڼvHz5jH @ >VpLAc(]#PϜ*?!IxEj6М>P>Fu(t⾕q& %JcCF6} ?*wWܽn\zsC康kYZ7r"+_MջֲbiJuė:][ۣD/@ y ڸ8Q.ouXȊ͕W'F3~di>W 9QUqB'G⺂W^`eiyp1cto*>35:jki$ǑM6]P)8Ts#+2]|_Ukoq{\>L*[27;v_^ƩwDe3::q c'ٻfeoTZ;ިq}ǸXgYAst]7JbΩ*C[3J- aIqAztw0hDrew) $s3 JS]M6]-'HT ώ*|fOYI)NW#},"E6) o̐1c>43 OA83)$ݟ[ɵ F{鿾ŏ$UxFNF)%oe ]G(B(o08wfV75!ؼ;T&hGJ&.&y-2{v|UȗdJIOY[-rY8U\;Jir"43:4;hc}\Ɏ}7?܊譬3 qks 2y,ڛ3B BAUŕwIVVL4˖"0b`c;@VI~̫%QFFHSRG{sVQLHH}:%OqXi|ǃ.5_-i_m gM9!FaP{6`um=z="\bXG98$ 4]Zh84N3 ䷁H$ DOʤ5W3թle#ݣ'n䌫ܜƯCIS'PB;{߭:4=;կS[Mբ"Iw"1bRr[<ĚI-?6`A!PLJ{z(el2cw4"ZA(hk izs-lཬS? cv_^_"u]3~&=݁'5n;XY"$?3)YC)G[: job?M=CߌOM}ajVul<.8(}y`F*Gœ?ǯ[&BHGfujXh[qPI!rm W#E2@U߀r=Gz1max0Z.JOn|`F1};o%OJZ@+ \rБcnWOJuf`B"Y(˂PwjTan^ebPmdKH.L~TnNCiMrXJlTǧbmn턐]ZNzA`UYhV`-4MB,noqڒ4c~~I?ʤ괗ғ_|CMd-N^噽TlIӴ[YqgvrH=XJNR"_,?fY104Fu${o.F7Fv>}}8OtAh ڭs kIhh<,hH?x5NFl&ӣ}w Pu0ݣv 8a-y&օK+|$RI$; :XۦfpO+Ϯ{TmTa7^e|r-B鑶YI)[[yP&sKxԟ6R[C%s%hq`S>Zh76 #1PpT ?LcNz|=Aat̖(g܌tQjkekpHM) >#+€ǾiS''XZ c ؓCL'w=CIAD*RX\zTYI-$>]1In/\;ʁ&,BWZtĚխ݌@F &ckAOýj >F-Y(>H@V&*Ccs7Z2K6"i`@Z62Dr@F])bG ֺ[` RjY[jڵݭDaPY>upܐ2DW.-"O¸#@z 渴 l˓%=؀79~S i5_B`\`S[YrPKnd-qťiR_EbcD9. ~^^i]WczJ#Uy$X,O8[ !K Y O F|4-'1a#y{xTj~S/gZOKJ{br|~VjZM̸\Fd! Nw_He+ f'<`4۬j?wF6qN ǭGfy#XrGpG\w2[VǷ><3|-3^qPFK\{=uE/V[hV ֑@\JXTA5mjw2;['`?{^}j,$۝ jպ:Xv{rj|?ڽJb/U:Wnqtfo}V7qftW ۉaxcl)FbR -W TE-:',LRM?Sckt]2Q.MQ<;3\O|<)oC+XXݔƿ=GzxXWC1Q,JyDywyw?;8Q̏C9b!ԗ6S W u-E ޠzsǿɩ;M!cnB?Ɓʁm =A,zOm1LrǖTA8e6mrMiKtt+mVO:cqq%\]s$6m|_sd.}VBۂ=F?߮*,Z}K[4,W+=翮jJG/|.p{w6y]Q$/:FqhCd GGr'@iioyČ4dJm=A^s^9Kxǹe[ f+%A5;e,N 1*xqFTvHb.\G9 W0.ysc!8XBȼ(e#{o&v-6ɅV8ژS׾0J%MnS}Fn!Xی#13}32=͕͜KDyv5ōݻMp$Tn6O#ҥ#tH/i3Ǭyz=dp@y.'v]99MTbe+glܧǂ?[:I|ddg~(p'.=>:/Z+8Mhw'F(s.X!atD|ܺLumpTb4r 1nqU^Ūkin} uklS Ǧ*R<5/[[0Pls܅O5֪ϒ`0cYXk(1J%'FƟWlկDC%g H V磓-8+֢}S ~}ϧS9 (>j&cmC]2?X{zCddH+mPZ,1@|Lo_ *xXqN[]$i$0J9$MZʲZya؃,}x 2x=5|Z0v.ӌUVij% &&6]ya\y!'-<=)~濎H1!\MĒ[ǥ5kK>{of}J'$Ot8lnl#( w&T ,)  '+5 G|YV.nm5IuFgsٕ3c{ & px|$9}xn#Rr0v2l ]hMrPH!e-CH(ڡDꌪ$JlИu<1INzݙeY38y%}9GC֜bm 'n9x1.к/.kHa\}G ,Dg gT.myykgʈ9G%gvxRnf#[(FqZVb]:J Ԯ.nẓw8UZ;T;!oUk. #,#k=x/< YNY7I݌U='hw_L-;GQӯ$6KSdv 6-6yb+s2xdd v"< U4VUi^=e>ç:2X#p 'v;Eagc%#6L 4N\c9q j]=7Eߜ#Q(F/;9O6FS Wb2FKam><<<.Ӑ+I [FYcD&pH2^8bLOYz:5ܖ}Vy6vkbCc~鋝su~Or㏺] v\'kdUX_35ҝk:'2hbp s Ǔp}KukaԚNogmqjD C#O#5{tΫ=acpѤ 2pl*NA$w5`n ǖ pqvP-fkQeh =զ/!U*A?V{'#{SxKJԜ$|\lG* Z:Bxxԫ==?av/h_9讀5Y-qkj",)n"xmأ_!90Q/Nqi C ZU|+ F 1rXxLukEWx>q&5}DIJ+/ 4D`,ğ 8 7.^nnn6m-݉VB{\1l#zcQeh൅C30;q;ûMyľ Z,-d\(-G8*=iz~Q_/?e;Ў>JFDy<9 ~| QQ1wֺӆqHt8b˶o13n>H8fm51 =;o+}Zݎa,EEX6=qPn螢۷Y s#^E^w̲֗5;<8tthItkznIǞэ3z*@?5CKws'=2)Eg^jUZBH#2]w,IӓjG{QtHqjᡞ>0 䁜nR{I&2kc3\$lƘ Y$VWqpl[2 $e>kzڅ?c|1˾4eOtWIdvVQ턐ykG<+Ѻ= e2M)TVʡ*DŽܓ7)".ݙU.q'v2z4kI.ou)$O)gYB ryR+lg4mۂY׾=['O!A>GQ݅B8m:yv'88Άoz^JK$`I;pGcңp%?݌ʻmOZ>,X8P.e~imnA?*ZO֩jPw ~}J #1jО(q#05Y@RNG^{$W,W{D6-O''GHYc/!o\Cpe<=[ #MgQt}wpdyd'`ePzլwJ:'$>[1\n݉5-v l#wR+[?3&D'8@6ހH[cۻ7>ԓZ|{BI$l+)T`|@B9;o$)k,22Z࢒<@j>"v=qm˕{{_e;9$SL%%`鶿W80|#ГߏzV 쎧VUu| K98 8ԩK-2oo 'lEuLnZ Hh*`HA?.xgv%ylcH*]dc FxKlԫm2FEQ='Ťڽ؜N:f C #Xy[9fQA<37>G<F\ΖK"4#' &#e,w KM]pu9?+fI=9oRm,rI HK"|%'<-crw6,"hRP\Yq+V+-74l4j;! ɮ X̃vnۣ 琮r8[ ݲDҲxB#O59% $0@x/Q꫍JPj3aDs< n ,*G,mn-zvcWC>|zC@>IOPYpH$Gu@B>E\Ӧv Wۑz]| Q,ԅGj֣t2Rxj׶9܁*~L0ik-OZ؋xGlǧuKW"+=7MӞ3h$h[q?߽{q^+yVJdw5+ԴrW7_u}}xM7*Qv`/֑:.im1 洹fU+~2L-a] HqߔE 4-b3w\dx;ͫ hSGMljvgwkU{-ŭ5eY] XO18ʒ oTGt#G!kq ';ZՄz)uL@ H tQEiKxLr8n<{ *j?.3ܾmVs/6pXbu~rsb;׊hmo%PrEǝ lƀq94cko͜0B—ݏ)_À gNjSӲ}u5XX I!FOF zjmwMÝv<-gI CV5f,9O*? 2[kGkks#*y˽B3Du]a\m0*~ ~}čHzHCX^\JonGu㌒{0P1~(4۾YReg>b2@U>cW]+ ^-ܮ1]ZK|pC#/$\}~(KP:p\[BW=E韤GɅ'(Y |;H#6>%'zEJ+^wGvS ?$|}{UI& 5iu\tv HMO G'90aV7@b_\wϱMDmNO mWPI d{P9۸!xk/ڈ_Š ⸘$E gzUv0PVSs};kw"0\qW=SJD1$3_JX Xw֭o5O w'U&ɝ˞ή6'9 ~|ޟ4[=8Zi[SEK{60,Caq߿qH֒!xƩf5 ɂ0x=Zn"o0{'u̇i2p3٤8`c;IGǟmMAڡ0d}fMU{}FG ,;Ɏx).)DPdnH=J;^VӑU WG;yn—O4Ơ3a@"y`hퟩljEEr5:Xu-`i#{(G?h.4A9r@EMuޥpi)@P^Č`|Y\[ڌwp\X(c/%A2tv}j7V:mdl_j(W.k9W?(S Rh֚t '[{{vH^c$杶F f?,>I_$Y{\W5aSG%5-3φ|0 6Ĉbj:ޡ8LO#qᑁ˶k& 3md' 9-zl0o>klߴ sAz\Rs ?;>'rwekiI*G/G$2{uODU]w1Kv,P}ls!94&1eXcg|#sX` )>ꎡ@ɽ-  # 1*M'|'9b t[wnlleՂ͎ ᇆ?I7[mnLCyqqXADȈ `މ ^]Eèk{ q;F3|QL۬W6 II9lpqd+XM;I-pPM|o`Yuv+q*B> ͂ﴶ6!][?R]Nr:#$';*8>mGCm[ >BG>8GeRk.{ M?Y;~5iNðX{g8t8- UTe U5PZ2~59ZE0cG!s>ږ=fzI]4F,O\{upV[ č'Mӭե[19+ぐιz7S5[{kXZ59Ĩ>}zOJ"hLq{"g<ۀ%?6W/fUI}d=71Uv~9/rI>x@t;D;Mf4 ,3 QUPBx*ܮӜnsVyYCi3Y|*Yd0ʌq9Uם+E,0IsTM=C$$_hѱ-Ϡ-"qQHiyXvPC zI]ﭣ,ː$ >U ٗ)pA :b(ǜcM4%)L6#4peA]fEqrouޥ~Q coZ +:pe2MÜ7k7[A4;4eЮG F}jճ2r#bv |Crx&^_U_mxy dr # sCXb{)RM4zҹTZ5O|q?*쵎ҍӖ^˶9Eġ!gk}]* ۘSmۿ6kא._+#]M zU?*1Oj ԛ]7s,!O*aU$C[׬QSj7nurϥYӴY|OkvJ)RW啊O5wp>sEjegjm[hjgp3rxbüкcK7b9b  G~栰OnMt#*?w ![:ay }DVK=KM3]Ŕ$( _:~WkEdɰ=O1~4-GkzFԷ dO54#w]=N)z.YIus3La 's9WWOsjZEߞ-"ݞi_b$#qLs<}7tj!hd 4f#N̿q3wv }3\RO32+y9s ) mSlaH3N;ٟɑc/OXPy;r2s HOjJXT.rU3rx8p47662V8cwbI%xjkӭ[S-Ja$Ho@$AJarT\{heIy?S66 NJ1L1#_]p( ҵ)5 "d$ROY$( FH8?j)WH7|'cwԊz}dXF[<P}bE!ɭQ}"2YJv3_EB;]pچ[El8 z:A.jodm n82jě:,5Jcg=G_5Z)$q+(OLgOɤ_ ${-|-!妟'HĻ)?0j)2IhZ;n~j:׎:ŝ.1Z)+sFtj}Ҭ|z;$kigtr0?1m"l$E~d 9lSD\m0u:) ZD^YʏR g jKB+-y1!O Czdf׋PR==?Σ77s n3ǘ֧+,^[3Z{]6#uyg!lnNOЗ=޼JMi> V9$aI֥=)u=zNkY?9NA.!z#ǽnI,l%Yhm( $v"P8ej!,,#{Kjm6g.îK +rqJ9fmy_{GȀO('ΏeEyn$xrTe69לaKk{E6C"]CI?Ͽj)oZ1 AU~|4ݡDMȸ1G>Q]PnI=!+,{/wtW_Iw>[~GRZ !;;l3)/*GJc!7KبXr9!BtZcwv[Ϸҷպl皟^2_L^M| }bnN:m<0THN8'j֡h&~ ġq܅r)5!2H  s7QZǧY܇Wo >#MmY+Zj5:e2y X00F{zS̟ ʤ"Sm`JhѴkUl!{uS}XܞjٲNb}?/\Y2nŏ[uHAn5{{˖ȟ'$C;Uƣ:O"L'a'x_>wjҴ662Ao;5S;ع,8)FN F24 0Ֆ:KM}.7$@!Y*мa`]X-W}3S(=nC@I9I![i::u[j72?Ry|2u.[אݻޟ )/WxXAg;sΠ rڝ1stdfDcު1ϰ7GwZGsw6NcF5v*qF}q\4.u9fsƫ.y۸Jr_sm,$HrQ.WvɨD2N¨Ds !1{N9;rqϱ駗LٷHt02/FHrjgNjNM-a:2OQ{Rlm =? O4vPXʹmgm,2K(P+5ʴ3Gs}§Xt棘&pħTZi0NH6`> >);/eywPY>^Nl H96'y9Qԯ^oQ;=:IbQN{N'>Y;+)Mg2@`>u{ JUWAe˾#[X- jK3rNy95PKqrKbkwA'ۊ:J怯Y ̑-gm\9Nj..m{$w"AHC9=^j 4c4Fb9~ ~1o0*il|ӹ' ?L}+жw)|:oPrX=i:%%@,UOk}C!IWeSNQQo: [Ɗ ite>wx'xs֯ C"%cx(91XHΥ|'/-qS ( i^fS؂arshSWVrͨjux4P.Ł]ac=;Zw'Pfsi"cl#q޳PcyfE#9F8>ҭ ޤg)a^E$3}rxcKHq:RӴh-`а΢vn2L<G9J=Ig3F .Y"ݮc66(l HgӖHvpKr+g=0ÖG Hm9! ~.sUSJ;sVY\,l,TM 2{}*u!rX׫k]Ԯu'#=;N-ւ'ԅYݑ pL?W:;)JǾ]=r: Gif61}9ny/6Ʋ8vq⡽]Xj,qx#\o}ĩm;JҮLa۟U@nK=VeymWʹ_̉i0ZK鶷i=,Jdg^66qY"*Sh'8qEx_!͌dqMzq-W91VTyVu{lN57D .M]D0S2}~F?%m/B`_&9Ep~}_׵fQ#.0ú{s=ܐKNoU $DyV*?2!L//,- [Kw$%3CԴ NEm֥|WW*H ^|3}3Fw#^j)OnZ7ߗ !KtPVhQYq8Yb[(PRybf}R=gd\S_o~Ufxbù}ϖGljශcV,#9Q8ǩ)1NA$=ʱJ(>iسIGQIwn}R: RuS(#q^sᝐ(LƫCk.M\/SRxfkNI&8($ qUETaNO;Ձ29b-ښH&Ee;V(||B{9((2tNLu7܀;~Y7ZM_ۺ%դ9Y8<|?W7}J˸0 q?!#[I椎w] Zc\t)$Hx q8=GQ n"]s3qZiڜ7yRwaC޵qDSi'iL|j-pMoCHEB}:ʒ1iTk]#qy$\ʌ AAZn[$5Eʘ F=!o%]@矯l9zXpFNі;>%B%Sh⑱+}>#kOD,w[qUv]mb!J?,qjOoQr;"]YAԢ$ʲH͵#⋭1itgq] 4L"ۓn8FuI n{7?MkXX3F˅':yN@y]I'eՕkOƒTH$id $ ^}Y:e/{6>S nX0 !XQ# W3](|P0=yEimmadERp>}rǀ=GI\11'mIm+Q/!s$ 6qQNc;tD-՚pjrW< ?RSSEG,6q0X`8$XkW %y ٌ܋Jyʠj}G}Ζ5n&Q `\Dc0Fy'sznc颗m1|8'Ž3E. 1[0h^PIsu%ס7`Nq*$I8 3DHm7Jg6,v:a˩ppp6u:S5͐>?.vw4d}M[E)k 4k+V°<$w `>8y{TbpOTӒ$>tv+$,]H?Uw8r޲Fb0n "G&6E316>7xR0[tKROtf;q*GqHa].o" }Vvm=*29,d~N#'8^/l/Zխ4X, 4svbT`k5 Vǧ_[D5 {r4/38%h:5T[B$8+5ʳpS8w\1PO0'#k]]Vmm`<ـ+|V9γ伸+IA*ߴ?Iq> ɤ$"3+90x9n擧Y^2*B<R_ tJԍ?VФra;`z`%4e)bj9uȣHG I9Qm;Ntխu}2QG \L9yI WFxCG,IxX\sMvZtuiy42#ʍpG(zGʣeM owΓ(&w HVϪAq/nYAhN;2S䏞GNh40j-[)}$ }6y9#5XWJOOrY+a+)#8@ǭIgo2I-5gr|_c2mGR+I ꉐ @1N>E&ǑQq}ԣ*uW-tc5 m]Ob=?jֽGuC-Jmmmkaq5̭"*!,v'qn=Xƫ@>';çl&n<ШY"lqiZvjM;bCyCjM4n}2FgTǜl,!)^{T[zNض"02=#s\WѷC+7[`'*=Z?ٞѩ'B\XO"zQ1o|'%;ܞ_>9l[ۍ'XGG?wdsLsC$aBT4:.ܔidIEcF)ζ--E~uΟNf:eTP.-pdxH[8Axs Pw{gwUm ԷCOgaA;|DΡx:Z%|} Z#h #c5%ҼN ϰ H4ۜ91Y cn!+veA R8q?hy* R&bgLYݎ@f'c~nQr>8R9hXH7)Mj W (Ȭk(TK˱$)++,M78q6^rȢu.^[}:V`LΨv qXsNiI-mkJV" r'aFrD]zn pN *|يH'?*T0Ij-%GS5i[c檆fHJsHrIқ59Y,3w=(np9㏞kB3n89 >zSeC*΀ 2K" d`dZV=/W;SVCv(8<~T V,ȷ?~O(ZJ0<}iXUvH3?F$v{:VK(ø򧵖?!Nۑb?˷5if"S^oQ ݊1*o@}{qڦ,gBĉVP{r7AM dO@oP={iQX$FXān.b{wMGH~^!k>lk>m#&w>O8:6aVViB+x"Xvy$y5>UCeak xWjFy'tsreQF9Pa&4zPzP)cD;"+oF Ņ${MJ" RgƳNrTcn'O~=5}3y:\Ay ȈSG A1V.35wJ]Zyb$/4q?SfpH.l~5Wi>im%yڲ' s<p=*༻Q{u㈪#H8{g8l;;we(|worp,d2F ~2EWWg{.'1|Uk,mn(hLb|P; zN<ֶא L.O1y;cl8'NY*^Uxt#]2!ʏ25! 8?#u{yJG$q1zj0N.X\ 8Ғs]8l%\rN5>bb<#? 2=CkI$q9`GrqKtI3\^Z'H?U] .w+ Q,IԚ^A{sjt&@k8oih:GJK+T@ö@=:8zs羽cwܧrAx.$^rέgM~BkXR%^}I .§VX=XzCфcc]Z,O?.̒Ga7qe|u)=}̐B++HpVcn `*7JxL|=;x""n#J`,vm'.>FX bR*!$]+p/yƞןv*[MTetnXܲCm2i;"Ww\9k {3;u!uM!W!-|NH$`)P~"R-BRAQfSaO#;A Zt) zƱ<7Y#Ha\ 1Ns{ts&a{6+!q,׿Ρ]a Q3"ͻJWnc `<{ք# bj!fQ2i<Vmc^ !\%{ׁJ77PGq&21ߟ_wUzP-v ̒Q8)]}iDQkE oD6!$srj1{c%"cխkI4 x8p09^Ch uaD"?xo3H|iO6UqhcԠXY#;wozcߋFӴ^߲ҹ',ǖcy5vZ."Dc60]e_zp>AP3C+%y }6PƽD2$ጲo#̶Tړ_3]]o26xQi) 񢷈)蜏:"W!W 3t_w5A>Q 3qr8GVH6vZɡ5/ncVvٕY$Ʒ#0A9W7PvkK;VHS!O#ԐOUjY{%:z恭-b͎ ࡊN;GV. `NW9I+V$1 Cܜˏ pH=iVݕIF?Zք;Vmm<^lhҍ轍Pxlgoajt?m/EԗΤ[RU!I=Wͧy=9n23) 둃ǯqdֲ/]Zko^4A\PK154kNmr]m_.ebU$$2OdSֈgգ7W܃XfhHª 1dz9sVkSɍn%|ʑ8C<;gJ%--%UX'& Ls$"Pi6lYWRPF/t5~cX)X#swl5s!=;ҬetGR]FY@ZY$H8=j:JTzd_1q_%ovضwYoӮ^uDul1lvj /[`c !bsV:>^yn#\c'4+y. KYS'(Tرq :'sNwegR*]VzTpFYddLWeFqt# Y$1\KZ[]/{F_c1%hqPhK m]{(I:b8fp,=PS!MYR$_OjY[ItS.;xXt 'Ur:~ "i }~Prό!2WCv,~.tֱꚄ sRr{ @ _W:szXG̍*4.5V," ;-PKy>taSɱ}N=5FMxi%/_R HúlO~e^X뵭 wВ 1$!VpTrqR~:GjNHYBaH;]B#r8&C&64Ne_\QN]#o}wi[`-LebDNR-xBFZejPc,XfCvbJ2H^'\%d"2K+Ǩ$,pX9ޢ}7_jߕ5\v2}Jٯv SocFF˒AU{d"bi4Q(M9`1rH,5ɨY3Pv;m,=Dg`*e *#~pĎ>#0jEIr$ŨrK]$U-i]_Үb*JY&-IܣR{djsK+aKԯfkVrU&lgǮ[$Ws9h݇M=ƫLo2JK_N^>Pu_-֣)(|(02Ǔu i}k__Y\ݥi{ٰDyGtz,%Bb!08.Xd/^1u%c  ]HmtHA1i˹r8=ˡ) 0>sKkF%ìwlyFFN@$^W;u /dIv, =X+5L $r PF(bɽ X{6USN. 1~梯8$YlPeSW◅ךmcG[ "dϩS _Cb+u{)_X#::DTHf^j!9ǹ:z;Wnhʤ,c^{պӹvW75k8˭l݈q޸:\:OP7۠)p#sy Ү#_^K3X[L|\p S:'÷,ܚQ^}߱kz)-H6=b`oU_9om7 qN? ˘IӮe ypEY cǡbąVKvXmYn-mr^1c%ŀBEY5i|ԥ2W쾃!7cZM*֠kI^fY%1*f%i]|)eVvӢ쬡~ (mܣ*cs꺜vE_с &+H B>2TNU-bfh",[a'XrI5MNDŽdj5Kn#b'`Rwl`gp+4-Af@!hw m99{zW3S8c`ۗZф`יyF::ΕɦGt<Y6x,@'ix]liw]6DRьG\G봜ǥ{+VѺs^%-5 Om/`-++Vf 0FьU ErƟtgd <]T# 6}"'Y+=B[.#0V8 A;pp}; {0I8I\I-f\5յVXQqvzxA>2{"V 2ꎪ+LwĮ| o5x%$cPۤ A 6򧞒f$5+:Cmmw>G.Dz"]ϢUT|!示;zuMtB+zߥ:oߛI6O9#+ H 2cH.{ ?Q}H G s^SMSŸ(nm8k.wliUI兲wz2Ƴ1C"+0n'`~"F* i k$qx!$$ fLI @]f[m;Qiq,V2HeZ1#.,scuj47 ;[[i0 `FHgs? $+vdH^̦I@krIV'(@m-~I^Y/@+Cprva'?q޳[]͂w.nl~;[ry1,aXLu%w+qW?:у亱Ȓ.K&2K{9>ф;ۓޝ%};Xˀ}j鋉<5gfmycI5ltAp{ \==YE/3ta 8$zcQtvZ0"a0Ϸ0ȷ_-Xླ _JA؊P4mIydK{b-4_C#' ";Y|c4ԐI'kǧa oCӺQ4oVM fB~p q) a,} m XexUK=NqXZ]H-+3LH CdgsǭUcZI$p=zzv*ozf-+#IFQ#5KRoҳ^F_->a0r%I$0xj|cADU=30UO/, ~ P~35 Jľ2;Wq]+ZE1XR iTq|)EȂieAc[D1ځ d#ʺ˫2;+eB`ҁWY0ngH[ji<z2wa5$sZ>OOkon% Y}200{S,Mر oCJ-F+,U#8NUa9w?a7)"Xdnn2>66s됙M*؛dHUPb=ycb4)+9"oZ2pqqcICִ-<cݶ$ p $dߒ5q& VdQݘ#B߄T<WZE=D?asTŭŬҾ+}\w&A1]bj%bsxൣlgaq.9Fd ގ .q[\!rn5luB&:1}KmX U4:Xu/T Fm0K|¥@|JVbGCF%)~O}F(fY~kqߎOʬ' fv{s%;!.+19>O5X:u>Vs̐ylbx{=KNLQce%ZpA~N@HWXQnte,.IOHѲcF]b9EH=GϧOq7;xTgr}Z4~ }I8>vw/0*%rLsl<7~OnEFI /t6eqOH]DlˌH7c>/vgBy]lB?6#qHWcHcv- xH Ye>n:;=GU{R*b*HNcqU=,ɍ]5_⬟6ώlY(Pm;U,垽6Zu~֑>o4H.%Uۻ9,KVf`!UM:kN *S?-WwqLW8 IOkqjzMIQY-\2NTZuV:,xo˦Cz扢`*Dϩ3N+OGqCvFECɜFA?xvף$͏/-¾XG"57VX-w_4y-u ᢳГeau zQMk!i%ffO{`vB$յk;{v%8YdlQ#y9'9#be"&`PgUN 3`)E->ufE2Ow5ZFtPRodE/%,>C G=M*ڛ5{g˛W8T>*GȂ+ܕQ٭="Ie7.m, dc 5E){gy:Fٶ 2>1#y`c nI>>qQJW=Ԗգd#-*gLnSc!{%L?a<UI҄<Bs 7eb8G#3*.T<,vAWAiCG Āy`qU5+~XG}>qc ~U^IY ,K8z.~W<_ꞡ~zF_Ll}6Y"Bq d~K?RѺ|@OYHhk7Vpj=FH_NMy-A$;pHQOcV<-e F(b@/JU 'rwhn.pn,-s-# *w`e} Kqb4;-{ïVdl/ʣW<"s՝2gDM^SgHbP7ygX{ma "Y˿nn!gV Ce z\$ t%ƿvV9Xg7>u1ܣAǭyDMo{y 0_FU{6NB⽍siaz[ )dluH ,#,>{xzͅ}q}}-ӭ*@c -W$ ck'E=G=Z=_",-o1޸6x>ڜ;)9xF_u>Un^ň-Ё '*0~b56f0\ { Tqe\{r;/T\hW76Qk&y;> Woʽg%%1amzMʢHmQ@98bkɤMhUlfZsLZoj}b1)q$8hPk`e>B bZ;{T OWu50S@~5RUCa`rr+iK,1L٥@1V+HxˣIm_.N7bߒ۽Kg?Yҟfy,-2(d9 <>Ԇ467HC Rӵ]dnnl[42В1*/ ^TYH&{Gxlr0T?H7 ¿z&. =$tHf, $I켷H"44PUT 8;E1OZ*UTX9$A^`wnnt%s {Fr =Uno#讯ꮞnmE+|RD2?wӬ %H+>'B55è-ZPmSEMwڍ쥮eŽRƅzIc&i_}dvqYACg |:yQL}=+[f \4qGQ+\z3_=m`Q)dy9ޠv;b$g~9[[tG#JۂОˌvEȒe7 RO,I}Xil?6Q!rOo"Zo/ aM3wD Rr$+˅xbJѯ-99rRx[;qt\4Fc^|7?Zറ~3!Vg"8,VW [E>_$fVTE ;Wka4a<[&k$j=(I fhZ։ VV+(vZ9N敍 _}[u率d#gXUaTF [@2qaϏLdG9q]6;հHp[cՓc 3ɒk<h:|]>8ձG"&-Yv؏SZ}ZضƻK*_Icb}$ǰ#4}fco3C;lX Wμߦu o|w/ `_u`^vv$9 3ANYZN+%Qu|P(ʂx = H4YlQ_so; Gyc>_ҥ:2E5璩Gzz:ĕR(_8n{{y/͸r |N>f~ 8ݏLcC]ADLE#{}IDQ/eԫ_k.+c4wa(Ov~X!a*o\)Mi*{^ΙLX3[Twvz\kQI>֏Zb ~jVh:cG-3Ι-!-:.DLT]-s{(Ac ;ӇQqƯwwEu!Rc316xu 3qtb,ín(Hcg30~M_hZ>'QNOW 2EѬ6/&UMRNMkskIj3DHkZ՘3i`,נ?f軍)ﴛ3ea(QL߀ZECmVu\d~8q gTfk(^ZoTtƣzfP!&3d_+^4W辧еtH=cmvUrs$MųK`MHcNgd RD;i^&Ѩ̜+s*O?J9Dxg^"ᠶyV[[wO>YQHFn䜖9Qm;EMZgKŻ\B)ʛ#|yS:v{n tGBGy.1M6͗Wu鱲MiHf G;wlS&eM s)xE*d|.jLdMRcWvRXV?㸪U~mŦ{Rqc)2ۏI$}H?3VpCmqŲ[:YcpqG:!'G^o Q08q8OU//Z٦iJ[ܱVzlvVg#J608POsyZFv4RnfYG;g((g%ɽ'ܺ֟rXϳ~0~2ܩ1xj=UGeDfi#w?N |F'i<v$G. 3q#U=k☎6Ol0;x,RǸssmBvă9#O?:y-U6C(,pO u)a!<1֞M&@Wo?\uޕ'c%֫P(9|?}qm욌~߈"7*@p>9yP#6c~SkeeAIT&|F֤c!˹h[`4,ie'ڗie"!F0yccneqi@UT;}Ik5BQCkKC"FyaP=y' c L<% `}2F??58 ;R/ %Xg9UwmaV!3&><`g8Eeհ6yo'eW]>=zK)eR`H}#R;WOHɡtރZmI 旔MQM3M5 9=]csm{ RHdYH8]ڼ:ҽ#%Mi Inw-Ui/vqP;N7ÁN0KqO2(-/UI0JaN FE<ڽ-- *(8s3:n<`@\=D'pm˕4܀N?v_8Ҭ3)v>_RX!0OlSL[;; r$Q 8P?֘# ?Υ |L ?i,-=]k{fHŎ8>̂@>{+5ҳ𽭥Wt?zBMacLl(։-@5Jh P5 5֨=(}+)&CVVSDFz~lZ2@QKOGS+++pFeCg{S% Ѳ۱j]ܙG1݃OJdha, Gb=沲DsG m}ʞb~ȫÍJm"DA@,3vYYXkZc[8/3y U*KU@|ܥA m 9eer?_ֽ3m5ƳuLϾ Q7l`@Njxm?Zʫzx~ț/ I7nA6 ee]JqGWT#3ͯsp>e%Χ{wdMeet]=I3W9W[x;.hl[ٜ9z?5Լ>՚2."X _) sɬD[A56cKj;%|vmouxSfee^ ˏ'vƠ4R4r{C ʲ۽K"my*`>Y{[k ;#VHJʾ*y*O ?f\Ι *#f( p3<յj7^Z<(Zp\-E+#ke}x沲 oZ4'bwb[q!E! [ҲAڟ\K-tӭp{ *&::E{mCZ6:5at.nʭ)!:HϦXD4aG.V Dĭ KHkZ$Dm@;dOVVVoP>u;OS^|<,,YHNII9״]gA4#hmܬ9VʆݬX?] :_IKֺxuE `;Ix m>Ӽ:C^ƽ?bpҘ C*ǐ_;VVS霻$ȠuBMP ?:ຕԌoYYZzč@.AVYP7k++Tד8Rj;-I;beeEJDxA@Wn8YYV%Gc!e۝l\rFqYYRāWj m9YYMJ-jR*p('ЙBG5YYY҇~>PjڄVP( eeggml-org-ggml-7ec8045/examples/sam/sam.cpp000066400000000000000000003106461506673203700204170ustar00rootroot00000000000000#define _USE_MATH_DEFINES // for M_PI #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" #include #include #include #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif // default hparams (ViT-B SAM) struct sam_hparams { int32_t n_enc_state = 768; int32_t n_enc_layer = 12; int32_t n_enc_head = 12; int32_t n_enc_out_chans = 256; int32_t n_pt_embd = 4; int32_t n_dec_heads = 8; int32_t ftype = 1; float mask_threshold = 0.f; float iou_threshold = 0.88f; float stability_score_threshold = 0.95f; float stability_score_offset = 1.0f; float eps = 1e-6f; float eps_decoder_transformer = 1e-5f; int32_t n_enc_head_dim() const { return n_enc_state / n_enc_head; } int32_t n_img_size() const { return 1024; } int32_t n_window_size() const { return 14; } int32_t n_patch_size() const { return 16; } int32_t n_img_embd() const { return n_img_size() / n_patch_size(); } std::vector global_attn_indices() const { switch (n_enc_state) { case 768: return { 2, 5, 8, 11 }; case 1024: return { 5, 11, 17, 23 }; case 1280: return { 7, 15, 23, 31 }; default: { fprintf(stderr, "%s: unsupported n_enc_state = %d\n", __func__, n_enc_state); } break; }; return {}; } bool is_global_attn(int32_t layer) const { const auto indices = global_attn_indices(); for (const auto & idx : indices) { if (layer == idx) { return true; } } return false; } }; struct sam_layer_enc { struct ggml_tensor * norm1_w; struct ggml_tensor * norm1_b; struct ggml_tensor * rel_pos_w; struct ggml_tensor * rel_pos_h; struct ggml_tensor * qkv_w; struct ggml_tensor * qkv_b; struct ggml_tensor * proj_w; struct ggml_tensor * proj_b; struct ggml_tensor * norm2_w; struct ggml_tensor * norm2_b; struct ggml_tensor * mlp_lin1_w; struct ggml_tensor * mlp_lin1_b; struct ggml_tensor * mlp_lin2_w; struct ggml_tensor * mlp_lin2_b; }; struct sam_encoder_image { struct ggml_tensor * pe; struct ggml_tensor * proj_w; struct ggml_tensor * proj_b; struct ggml_tensor * neck_conv_0; struct ggml_tensor * neck_norm_0_w; struct ggml_tensor * neck_norm_0_b; struct ggml_tensor * neck_conv_1; struct ggml_tensor * neck_norm_1_w; struct ggml_tensor * neck_norm_1_b; std::vector layers; }; struct sam_encoder_prompt { struct ggml_tensor * pe; struct ggml_tensor * not_a_pt_embd_w; std::vector pt_embd; struct ggml_tensor * no_mask_embd_w; //std::vector mask_down_w; //std::vector mask_down_b; }; struct sam_layer_dec_transformer_attn { // q_proj struct ggml_tensor * q_w; struct ggml_tensor * q_b; // k_proj struct ggml_tensor * k_w; struct ggml_tensor * k_b; // v_proj struct ggml_tensor * v_w; struct ggml_tensor * v_b; // out_proj struct ggml_tensor * out_w; struct ggml_tensor * out_b; }; struct sam_layer_dec_transformer { sam_layer_dec_transformer_attn self_attn; // norm1 struct ggml_tensor * norm1_w; struct ggml_tensor * norm1_b; sam_layer_dec_transformer_attn cross_attn_token_to_img; // norm2 struct ggml_tensor * norm2_w; struct ggml_tensor * norm2_b; // mlp.lin1 struct ggml_tensor * mlp_lin1_w; struct ggml_tensor * mlp_lin1_b; // mlp.lin2 struct ggml_tensor * mlp_lin2_w; struct ggml_tensor * mlp_lin2_b; // norm3 struct ggml_tensor * norm3_w; struct ggml_tensor * norm3_b; // norm4 struct ggml_tensor * norm4_w; struct ggml_tensor * norm4_b; sam_layer_dec_transformer_attn cross_attn_img_to_token; }; struct sam_layer_dec_output_hypernet_mlps { // mlps_*.layers.0 struct ggml_tensor * w_0; struct ggml_tensor * b_0; // mlps_*.layers.1 struct ggml_tensor * w_1; struct ggml_tensor * b_1; // mlps_*.layers.2 struct ggml_tensor * w_2; struct ggml_tensor * b_2; }; struct sam_decoder_mask { std::vector transformer_layers; // trasnformer.final_attn_token_to_image sam_layer_dec_transformer_attn transformer_final_attn_token_to_img; // transformer.norm_final struct ggml_tensor * transformer_norm_final_w; struct ggml_tensor * transformer_norm_final_b; // output_upscaling.0 struct ggml_tensor * output_upscaling_0_w; struct ggml_tensor * output_upscaling_0_b; // output_upscaling.1 struct ggml_tensor * output_upscaling_1_w; struct ggml_tensor * output_upscaling_1_b; // output_upscaling.3 struct ggml_tensor * output_upscaling_3_w; struct ggml_tensor * output_upscaling_3_b; // output_hypernetworks_mlps std::vector output_hypernet_mlps; // iou_prediction_head.0 struct ggml_tensor * iou_prediction_head_0_w; struct ggml_tensor * iou_prediction_head_0_b; // iou_prediction_head.1 struct ggml_tensor * iou_prediction_head_1_w; struct ggml_tensor * iou_prediction_head_1_b; // iou_prediction_head.2 struct ggml_tensor * iou_prediction_head_2_w; struct ggml_tensor * iou_prediction_head_2_b; // iou_token.weight struct ggml_tensor * iou_token_w; // mask_tokens.weight struct ggml_tensor * mask_tokens_w; }; struct sam_state { struct ggml_tensor * embd_img; struct ggml_tensor * low_res_masks; struct ggml_tensor * iou_predictions; //struct ggml_tensor * tmp_save = {}; struct ggml_context * ctx; // buffer for `ggml_graph_plan.work_data` std::vector work_buffer; // buffers to evaluate the model std::vector buf_compute_img_enc; std::vector buf_compute_fast; ggml_gallocr_t allocr = {}; }; // void save_tensor(sam_state& state, struct ggml_tensor * t, struct ggml_cgraph * gf) { // if (!state.tmp_save) { // state.tmp_save = ggml_new_tensor(state.ctx, t->type, t->n_dims, t->ne); // } // struct ggml_tensor * tmp0 = ggml_cpy(state.ctx, t, state.tmp_save); // ggml_build_forward_expand(gf, tmp0); // } struct sam_model { sam_hparams hparams; sam_encoder_image enc_img; sam_encoder_prompt enc_prompt; sam_decoder_mask dec; // struct ggml_context * ctx; std::map tensors; }; struct sam_point { float x; float y; }; struct sam_box { float x1; float y1; float x2; float y2; }; // RGB uint8 image struct sam_image_u8 { int nx; int ny; std::vector data; }; // RGB float32 image // Memory layout: RGBRGBRGB... struct sam_image_f32 { int nx; int ny; std::vector data; }; enum sam_prompt_type { SAM_PROMPT_TYPE_POINT = 0, SAM_PROMPT_TYPE_BOX = 1, }; struct sam_prompt { sam_prompt_type prompt_type = SAM_PROMPT_TYPE_POINT; sam_point pt = { 414.375f, 162.796875f, }; sam_box box = { 368.0f, 144.0f, 441.0f, 173.0f }; }; struct sam_params { int32_t seed = -1; // RNG seed int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path std::string fname_inp = "img.jpg"; std::string fname_out = "img.out"; float mask_threshold = 0.f; float iou_threshold = 0.88f; float stability_score_threshold = 0.95f; float stability_score_offset = 1.0f; float eps = 1e-6f; float eps_decoder_transformer = 1e-5f; sam_prompt prompt; bool multimask_output = true; }; void print_t_f32(const char* title, struct ggml_tensor * t, int n = 10) { printf("%s\n", title); float * data = (float *)t->data; printf("dims: % " PRId64 " % " PRId64 " % " PRId64 " % " PRId64 " f32\n", t->ne[0], t->ne[1], t->ne[2], t->ne[3]); printf("First & Last %d elements:\n", n); for (int i = 0; i < std::min((int) (t->ne[0]*t->ne[1]), n); i++) { printf("%.5f ", data[i]); if (i != 0 && i % t->ne[0] == 0) { printf("\n"); } } printf("\n"); for (int i = 0; i < std::min((int) (t->ne[0]*t->ne[1]), n); i++) { printf("%.5f ", data[ggml_nelements(t) - n + i]); if ((ggml_nelements(t) - n + i) % t->ne[0] == 0) { printf("\n"); } } printf("\n"); double sum = 0.0; for (int i = 0; i < ggml_nelements(t); i++) { sum += data[i]; } printf("sum: %f\n\n", sum); } static void ggml_disconnect_node_from_graph(ggml_tensor * t) { t->op = GGML_OP_NONE; for (int i = 0; i < GGML_MAX_SRC; i++) { t->src[i] = NULL; } } static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * graph, int n_threads) { struct ggml_cplan plan = ggml_graph_plan(graph, n_threads, nullptr); if (plan.work_size > 0) { buf.resize(plan.work_size); plan.work_data = buf.data(); } ggml_graph_compute(graph, &plan); } static void ggml_sam_sin(struct ggml_tensor * dst , const struct ggml_tensor * src, int ith, int nth, void * userdata) { GGML_ASSERT(userdata == NULL); GGML_ASSERT(ggml_are_same_shape(dst, src)); GGML_ASSERT(ggml_is_contiguous(dst)); GGML_ASSERT(ggml_is_contiguous(src)); const float * src_data = ggml_get_data_f32(src); float * dst_data = ggml_get_data_f32(dst); const int ne = (int)ggml_nelements(dst); const int dr = (ne + nth - 1) / nth; const int ie0 = dr * ith; const int ie1 = std::min(ie0 + dr, ne); for (int i = ie0; i < ie1; ++i) { dst_data[i] = sinf(src_data[i]); } } static void ggml_sam_cos(struct ggml_tensor * dst , const struct ggml_tensor * src, int ith, int nth, void * userdata) { GGML_ASSERT(userdata == NULL); GGML_ASSERT(ggml_are_same_shape(dst, src)); GGML_ASSERT(ggml_is_contiguous(dst)); GGML_ASSERT(ggml_is_contiguous(src)); const float * src_data = ggml_get_data_f32(src); float * dst_data = ggml_get_data_f32(dst); const int ne = (int)ggml_nelements(dst); const int dr = (ne + nth - 1) / nth; const int ie0 = dr * ith; const int ie1 = std::min(ie0 + dr, ne); for (int i = ie0; i < ie1; ++i) { dst_data[i] = cosf(src_data[i]); } } bool sam_image_load_from_file(const std::string & fname, sam_image_u8 & img) { int nx, ny, nc; auto data = stbi_load(fname.c_str(), &nx, &ny, &nc, 3); if (!data) { fprintf(stderr, "%s: failed to load '%s'\n", __func__, fname.c_str()); return false; } img.nx = nx; img.ny = ny; img.data.resize(nx * ny * 3); memcpy(img.data.data(), data, nx * ny * 3); stbi_image_free(data); return true; } // ref: https://github.com/facebookresearch/segment-anything/blob/efeab7296ab579d4a261e554eca80faf6b33924a/segment_anything/modeling/sam.py#L164 // resize largest dimension to 1024 // normalize: x = (x - mean) / std // mean = [123.675, 116.28, 103.53] // std = [58.395, 57.12, 57.375] // TODO: why are these hardcoded !? // pad to 1024x1024 // TODO: for some reason, this is not numerically identical to pytorch's interpolation bool sam_image_preprocess(const sam_image_u8 & img, sam_image_f32 & res) { const int nx = img.nx; const int ny = img.ny; const int nx2 = 1024; const int ny2 = 1024; res.nx = nx2; res.ny = ny2; res.data.resize(3*nx2*ny2); const float scale = std::max(nx, ny) / 1024.0f; fprintf(stderr, "%s: scale = %f\n", __func__, scale); const int nx3 = int(nx/scale + 0.5f); const int ny3 = int(ny/scale + 0.5f); const float m3[3] = { 123.675f, 116.280f, 103.530f }; const float s3[3] = { 58.395f, 57.120f, 57.375f }; for (int y = 0; y < ny3; y++) { for (int x = 0; x < nx3; x++) { for (int c = 0; c < 3; c++) { // linear interpolation const float sx = (x + 0.5f)*scale - 0.5f; const float sy = (y + 0.5f)*scale - 0.5f; const int x0 = std::max(0, (int) std::floor(sx)); const int y0 = std::max(0, (int) std::floor(sy)); const int x1 = std::min(x0 + 1, nx - 1); const int y1 = std::min(y0 + 1, ny - 1); const float dx = sx - x0; const float dy = sy - y0; const int j00 = 3*(y0*nx + x0) + c; const int j01 = 3*(y0*nx + x1) + c; const int j10 = 3*(y1*nx + x0) + c; const int j11 = 3*(y1*nx + x1) + c; const float v00 = img.data[j00]; const float v01 = img.data[j01]; const float v10 = img.data[j10]; const float v11 = img.data[j11]; const float v0 = v00*(1.0f - dx) + v01*dx; const float v1 = v10*(1.0f - dx) + v11*dx; const float v = v0*(1.0f - dy) + v1*dy; const uint8_t v2 = std::min(std::max(std::round(v), 0.0f), 255.0f); const int i = 3*(y*nx3 + x) + c; res.data[i] = (float(v2) - m3[c]) / s3[c]; } } } return true; } // load the model's weights from a file bool sam_model_load(const sam_params & params, sam_model & model) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, params.model.c_str()); auto fin = std::ifstream(params.model, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, params.model.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != 0x67676d6c) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, params.model.c_str()); return false; } } // load hparams { // Override defaults with user choices model.hparams.mask_threshold = params.mask_threshold; model.hparams.iou_threshold = params.iou_threshold; model.hparams.stability_score_threshold = params.stability_score_threshold; model.hparams.stability_score_offset = params.stability_score_offset; model.hparams.eps = params.eps; model.hparams.eps_decoder_transformer = params.eps_decoder_transformer; auto & hparams = model.hparams; fin.read((char *) &hparams.n_enc_state, sizeof(hparams.n_enc_state)); fin.read((char *) &hparams.n_enc_layer, sizeof(hparams.n_enc_layer)); fin.read((char *) &hparams.n_enc_head, sizeof(hparams.n_enc_head)); fin.read((char *) &hparams.n_enc_out_chans, sizeof(hparams.n_enc_out_chans)); fin.read((char *) &hparams.n_pt_embd, sizeof(hparams.n_pt_embd)); fin.read((char *) &hparams.ftype, sizeof(hparams.ftype)); const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; printf("%s: n_enc_state = %d\n", __func__, hparams.n_enc_state); printf("%s: n_enc_layer = %d\n", __func__, hparams.n_enc_layer); printf("%s: n_enc_head = %d\n", __func__, hparams.n_enc_head); printf("%s: n_enc_out_chans = %d\n", __func__, hparams.n_enc_out_chans); printf("%s: n_pt_embd = %d\n", __func__, hparams.n_pt_embd); printf("%s: ftype = %d\n", __func__, hparams.ftype); printf("%s: qntvr = %d\n", __func__, qntvr); hparams.ftype %= GGML_QNT_VERSION_FACTOR; } // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, params.model.c_str(), model.hparams.ftype); return false; } auto & ctx = model.ctx; const size_t ctx_size = [&]() { size_t ctx_size = 0; const auto & hparams = model.hparams; const int32_t n_enc_state = hparams.n_enc_state; const int32_t n_enc_layer = hparams.n_enc_layer; const int32_t n_enc_head_dim = hparams.n_enc_head_dim(); const int32_t n_enc_out_chans = hparams.n_enc_out_chans; const int32_t n_pt_embd = hparams.n_pt_embd; const int32_t n_enc_layer_local = hparams.global_attn_indices().size(); const int32_t n_enc_layer_global = n_enc_layer - n_enc_layer_local; const int32_t n_img_embd = hparams.n_img_embd(); const int32_t n_window_size = hparams.n_window_size(); const int32_t n_patch_size = hparams.n_patch_size(); // image encoder { ctx_size += n_enc_state*n_img_embd*n_img_embd*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_state*3*n_patch_size*n_patch_size*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_state*n_enc_out_chans*1*1*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_out_chans*n_enc_out_chans*3*3*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); } // image encoder layers { ctx_size += n_enc_layer*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer_global*n_enc_head_dim*(2*n_img_embd - 1)*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer_global*n_enc_head_dim*(2*n_img_embd - 1)*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer_local*n_enc_head_dim*(2*n_window_size - 1)*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer_local*n_enc_head_dim*(2*n_window_size - 1)*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer*3*n_enc_state*n_enc_state*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer*3*n_enc_state* ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*n_enc_state*n_enc_state*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer*n_enc_state* ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*4*n_enc_state*n_enc_state*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer*4*n_enc_state* ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_layer*4*n_enc_state*n_enc_state*ggml_type_size(GGML_TYPE_F16); ctx_size += n_enc_layer*4*n_enc_state* ggml_type_size(GGML_TYPE_F32); } ctx_size += (8 + 14*n_enc_layer)*ggml_tensor_overhead(); // prompt encoder { ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); // 2*(n_enc_out_chans/2) ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); ctx_size += n_pt_embd*n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); } ctx_size += (2 + n_pt_embd)*ggml_tensor_overhead(); // mask decoder { //transformer { const int tfm_layers_count = 2; const int qkv_count = 3; const int norm_count = 4; const int n_hypernet_mpls_count = 4; // self_attn ctx_size += tfm_layers_count*qkv_count*n_enc_state*n_enc_state*ggml_type_size(GGML_TYPE_F16); ctx_size += tfm_layers_count*qkv_count*n_enc_state* ggml_type_size(GGML_TYPE_F32); ctx_size += tfm_layers_count*n_enc_state* ggml_type_size(GGML_TYPE_F32); // all norms ctx_size += tfm_layers_count*norm_count*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += tfm_layers_count*norm_count*n_enc_state*ggml_type_size(GGML_TYPE_F32); // cross_attn_token_to_img ctx_size += tfm_layers_count*qkv_count*n_enc_state*(n_enc_state/2)*ggml_type_size(GGML_TYPE_F16); ctx_size += tfm_layers_count*qkv_count*(n_enc_state/2)* ggml_type_size(GGML_TYPE_F32); ctx_size += tfm_layers_count*n_enc_state* ggml_type_size(GGML_TYPE_F32); // mlp ctx_size += tfm_layers_count*8*n_enc_out_chans*n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); ctx_size += tfm_layers_count*8*n_enc_out_chans* ggml_type_size(GGML_TYPE_F32); ctx_size += tfm_layers_count*n_enc_out_chans*8*n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); ctx_size += tfm_layers_count*n_enc_out_chans* ggml_type_size(GGML_TYPE_F32); // cross_attn_img_to_token ctx_size += tfm_layers_count*qkv_count*n_enc_state*(n_enc_state/2)*ggml_type_size(GGML_TYPE_F16); ctx_size += tfm_layers_count*qkv_count*(n_enc_state/2)* ggml_type_size(GGML_TYPE_F32); ctx_size += tfm_layers_count*n_enc_state* ggml_type_size(GGML_TYPE_F32); // transformer_final_attn_token_to_img ctx_size += qkv_count*n_enc_state*(n_enc_state/2)*ggml_type_size(GGML_TYPE_F16); ctx_size += qkv_count*(n_enc_state/2)* ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_state* ggml_type_size(GGML_TYPE_F32); // transformer_norm_final ctx_size += norm_count*n_enc_state*ggml_type_size(GGML_TYPE_F32); ctx_size += norm_count*n_enc_state*ggml_type_size(GGML_TYPE_F32); // output_upscaling ctx_size += n_enc_out_chans*n_img_embd*2*2*ggml_type_size(GGML_TYPE_F16); ctx_size += 3*n_img_embd* ggml_type_size(GGML_TYPE_F32); ctx_size += n_enc_out_chans*n_img_embd*(n_img_embd/2)*2*2*ggml_type_size(GGML_TYPE_F16); ctx_size += (n_img_embd/2)* ggml_type_size(GGML_TYPE_F32); // output_hypernetworks_mlps ctx_size += n_hypernet_mpls_count*2*n_enc_out_chans*n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); ctx_size += n_hypernet_mpls_count*2*n_enc_out_chans* ggml_type_size(GGML_TYPE_F32); ctx_size += n_hypernet_mpls_count*n_enc_out_chans*(n_img_embd/2)*ggml_type_size(GGML_TYPE_F16); ctx_size += n_hypernet_mpls_count*(n_img_embd/2)* ggml_type_size(GGML_TYPE_F32); // iou_prediction_head ctx_size += 2*n_enc_out_chans*n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); ctx_size += 2*n_enc_out_chans* ggml_type_size(GGML_TYPE_F32); ctx_size += n_pt_embd*n_enc_out_chans*ggml_type_size(GGML_TYPE_F16); ctx_size += n_pt_embd* ggml_type_size(GGML_TYPE_F32); // iou_token_w ctx_size += n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); // mask_tokens_w ctx_size += n_pt_embd*n_enc_out_chans*ggml_type_size(GGML_TYPE_F32); } } fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); return ctx_size; }(); // create the ggml context { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; ctx = ggml_init(params); if (!ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // prepare memory for the weights { const auto & hparams = model.hparams; const int32_t n_enc_state = hparams.n_enc_state; const int32_t n_enc_layer = hparams.n_enc_layer; const int32_t n_enc_head_dim = hparams.n_enc_head_dim(); const int32_t n_enc_out_chans = hparams.n_enc_out_chans; const int32_t n_pt_embd = hparams.n_pt_embd; const int32_t n_img_embd = hparams.n_img_embd(); const int32_t n_window_size = hparams.n_window_size(); const int32_t n_patch_size = hparams.n_patch_size(); model.enc_img.layers.resize(n_enc_layer); // image encoder { auto & enc = model.enc_img; enc.pe = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_enc_state, n_img_embd, n_img_embd, 1); enc.proj_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, n_patch_size, n_patch_size, 3, n_enc_state); enc.proj_b = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 1, 1, n_enc_state); enc.neck_conv_0 = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, n_enc_state, n_enc_out_chans); enc.neck_conv_1 = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 3, 3, n_enc_out_chans, n_enc_out_chans); enc.neck_norm_0_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); enc.neck_norm_0_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); enc.neck_norm_1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); enc.neck_norm_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); model.tensors["image_encoder.pos_embed"] = enc.pe; model.tensors["image_encoder.patch_embed.proj.weight"] = enc.proj_w; model.tensors["image_encoder.patch_embed.proj.bias"] = enc.proj_b; model.tensors["image_encoder.neck.0.weight"] = enc.neck_conv_0; model.tensors["image_encoder.neck.2.weight"] = enc.neck_conv_1; model.tensors["image_encoder.neck.1.weight"] = enc.neck_norm_0_w; model.tensors["image_encoder.neck.1.bias"] = enc.neck_norm_0_b; model.tensors["image_encoder.neck.3.weight"] = enc.neck_norm_1_w; model.tensors["image_encoder.neck.3.bias"] = enc.neck_norm_1_b; for (int i = 0; i < n_enc_layer; ++i) { auto & layer = enc.layers[i]; layer.norm1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); layer.norm1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); if (hparams.is_global_attn(i)) { layer.rel_pos_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_head_dim, 2*n_img_embd - 1); layer.rel_pos_h = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_head_dim, 2*n_img_embd - 1); } else { layer.rel_pos_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_head_dim, 2*n_window_size - 1); layer.rel_pos_h = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_head_dim, 2*n_window_size - 1); } layer.qkv_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_state, 3*n_enc_state); layer.qkv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3*n_enc_state); layer.proj_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_state, n_enc_state); layer.proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); layer.norm2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); layer.norm2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); layer.mlp_lin1_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_state, 4*n_enc_state); layer.mlp_lin1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_enc_state); layer.mlp_lin2_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 4*n_enc_state, n_enc_state); layer.mlp_lin2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_state); model.tensors["image_encoder.blocks." + std::to_string(i) + ".norm1.weight"] = layer.norm1_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".norm1.bias"] = layer.norm1_b; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.rel_pos_w"] = layer.rel_pos_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.rel_pos_h"] = layer.rel_pos_h; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.qkv.weight"] = layer.qkv_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.qkv.bias"] = layer.qkv_b; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.proj.weight"] = layer.proj_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".attn.proj.bias"] = layer.proj_b; model.tensors["image_encoder.blocks." + std::to_string(i) + ".norm2.weight"] = layer.norm2_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".norm2.bias"] = layer.norm2_b; model.tensors["image_encoder.blocks." + std::to_string(i) + ".mlp.lin1.weight"] = layer.mlp_lin1_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".mlp.lin1.bias"] = layer.mlp_lin1_b; model.tensors["image_encoder.blocks." + std::to_string(i) + ".mlp.lin2.weight"] = layer.mlp_lin2_w; model.tensors["image_encoder.blocks." + std::to_string(i) + ".mlp.lin2.bias"] = layer.mlp_lin2_b; } } // prompt encoder { auto & enc = model.enc_prompt; enc.pe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_enc_out_chans/2, 2); enc.not_a_pt_embd_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); enc.no_mask_embd_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); model.tensors["prompt_encoder.pe_layer.positional_encoding_gaussian_matrix"] = enc.pe; model.tensors["prompt_encoder.not_a_point_embed.weight"] = enc.not_a_pt_embd_w; model.tensors["prompt_encoder.no_mask_embed.weight"] = enc.no_mask_embd_w; enc.pt_embd.resize(n_pt_embd); for (int i = 0; i < n_pt_embd; i++) { enc.pt_embd[i] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); model.tensors["prompt_encoder.point_embeddings." + std::to_string(i) + ".weight"] = enc.pt_embd[i]; } } // mask decoder { auto & dec = model.dec; auto & tfm_layers = dec.transformer_layers; const int tfm_layers_count = 2; tfm_layers.resize(tfm_layers_count); for (int i = 0; i < tfm_layers_count; ++i) { auto& l = tfm_layers[i]; l.self_attn.q_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); l.self_attn.q_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.self_attn.k_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); l.self_attn.k_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.self_attn.v_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); l.self_attn.v_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.self_attn.out_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); l.self_attn.out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.cross_attn_token_to_img.q_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_token_to_img.q_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_token_to_img.k_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_token_to_img.k_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_token_to_img.v_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_token_to_img.v_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_token_to_img.out_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans/2, n_enc_out_chans); l.cross_attn_token_to_img.out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.mlp_lin1_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, 8*n_enc_out_chans); l.mlp_lin1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 8*n_enc_out_chans); l.mlp_lin2_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 8*n_enc_out_chans, n_enc_out_chans); l.mlp_lin2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm3_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm3_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm4_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.norm4_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); l.cross_attn_img_to_token.q_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_img_to_token.q_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_img_to_token.k_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_img_to_token.k_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_img_to_token.v_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); l.cross_attn_img_to_token.v_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); l.cross_attn_img_to_token.out_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans/2, n_enc_out_chans); l.cross_attn_img_to_token.out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); const auto prefix = "mask_decoder.transformer.layers." + std::to_string(i) + "."; model.tensors[prefix + "self_attn.q_proj.weight"] = l.self_attn.q_w; model.tensors[prefix + "self_attn.q_proj.bias"] = l.self_attn.q_b; model.tensors[prefix + "self_attn.k_proj.weight"] = l.self_attn.k_w; model.tensors[prefix + "self_attn.k_proj.bias"] = l.self_attn.k_b; model.tensors[prefix + "self_attn.v_proj.weight"] = l.self_attn.v_w; model.tensors[prefix + "self_attn.v_proj.bias"] = l.self_attn.v_b; model.tensors[prefix + "self_attn.out_proj.weight"] = l.self_attn.out_w; model.tensors[prefix + "self_attn.out_proj.bias"] = l.self_attn.out_b; model.tensors[prefix + "norm1.weight"] = l.norm1_w; model.tensors[prefix + "norm1.bias"] = l.norm1_b; model.tensors[prefix + "cross_attn_token_to_image.q_proj.weight"] = l.cross_attn_token_to_img.q_w; model.tensors[prefix + "cross_attn_token_to_image.q_proj.bias"] = l.cross_attn_token_to_img.q_b; model.tensors[prefix + "cross_attn_token_to_image.k_proj.weight"] = l.cross_attn_token_to_img.k_w; model.tensors[prefix + "cross_attn_token_to_image.k_proj.bias"] = l.cross_attn_token_to_img.k_b; model.tensors[prefix + "cross_attn_token_to_image.v_proj.weight"] = l.cross_attn_token_to_img.v_w; model.tensors[prefix + "cross_attn_token_to_image.v_proj.bias"] = l.cross_attn_token_to_img.v_b; model.tensors[prefix + "cross_attn_token_to_image.out_proj.weight"] = l.cross_attn_token_to_img.out_w; model.tensors[prefix + "cross_attn_token_to_image.out_proj.bias"] = l.cross_attn_token_to_img.out_b; model.tensors[prefix + "norm2.weight"] = l.norm2_w; model.tensors[prefix + "norm2.bias"] = l.norm2_b; model.tensors[prefix + "mlp.lin1.weight"] = l.mlp_lin1_w; model.tensors[prefix + "mlp.lin1.bias"] = l.mlp_lin1_b; model.tensors[prefix + "mlp.lin2.weight"] = l.mlp_lin2_w; model.tensors[prefix + "mlp.lin2.bias"] = l.mlp_lin2_b; model.tensors[prefix + "norm3.weight"] = l.norm3_w; model.tensors[prefix + "norm3.bias"] = l.norm3_b; model.tensors[prefix + "norm4.weight"] = l.norm4_w; model.tensors[prefix + "norm4.bias"] = l.norm4_b; model.tensors[prefix + "cross_attn_image_to_token.q_proj.weight"] = l.cross_attn_img_to_token.q_w; model.tensors[prefix + "cross_attn_image_to_token.q_proj.bias"] = l.cross_attn_img_to_token.q_b; model.tensors[prefix + "cross_attn_image_to_token.k_proj.weight"] = l.cross_attn_img_to_token.k_w; model.tensors[prefix + "cross_attn_image_to_token.k_proj.bias"] = l.cross_attn_img_to_token.k_b; model.tensors[prefix + "cross_attn_image_to_token.v_proj.weight"] = l.cross_attn_img_to_token.v_w; model.tensors[prefix + "cross_attn_image_to_token.v_proj.bias"] = l.cross_attn_img_to_token.v_b; model.tensors[prefix + "cross_attn_image_to_token.out_proj.weight"] = l.cross_attn_img_to_token.out_w; model.tensors[prefix + "cross_attn_image_to_token.out_proj.bias"] = l.cross_attn_img_to_token.out_b; } dec.transformer_final_attn_token_to_img.q_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.q_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.k_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.k_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.v_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.v_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans/2); dec.transformer_final_attn_token_to_img.out_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans/2, n_enc_out_chans); dec.transformer_final_attn_token_to_img.out_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); model.tensors["mask_decoder.transformer.final_attn_token_to_image.q_proj.weight"] = dec.transformer_final_attn_token_to_img.q_w; model.tensors["mask_decoder.transformer.final_attn_token_to_image.q_proj.bias"] = dec.transformer_final_attn_token_to_img.q_b; model.tensors["mask_decoder.transformer.final_attn_token_to_image.k_proj.weight"] = dec.transformer_final_attn_token_to_img.k_w; model.tensors["mask_decoder.transformer.final_attn_token_to_image.k_proj.bias"] = dec.transformer_final_attn_token_to_img.k_b; model.tensors["mask_decoder.transformer.final_attn_token_to_image.v_proj.weight"] = dec.transformer_final_attn_token_to_img.v_w; model.tensors["mask_decoder.transformer.final_attn_token_to_image.v_proj.bias"] = dec.transformer_final_attn_token_to_img.v_b; model.tensors["mask_decoder.transformer.final_attn_token_to_image.out_proj.weight"] = dec.transformer_final_attn_token_to_img.out_w; model.tensors["mask_decoder.transformer.final_attn_token_to_image.out_proj.bias"] = dec.transformer_final_attn_token_to_img.out_b; dec.transformer_norm_final_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); dec.transformer_norm_final_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); model.tensors["mask_decoder.transformer.norm_final_attn.weight"] = dec.transformer_norm_final_w; model.tensors["mask_decoder.transformer.norm_final_attn.bias"] = dec.transformer_norm_final_b; dec.output_upscaling_0_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 2, 2, n_img_embd, n_enc_out_chans); dec.output_upscaling_0_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_img_embd); dec.output_upscaling_1_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_img_embd); dec.output_upscaling_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_img_embd); dec.output_upscaling_3_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 2, 2, n_img_embd/2, n_img_embd); dec.output_upscaling_3_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_img_embd/2); model.tensors["mask_decoder.output_upscaling.0.weight"] = dec.output_upscaling_0_w; model.tensors["mask_decoder.output_upscaling.0.bias"] = dec.output_upscaling_0_b; model.tensors["mask_decoder.output_upscaling.1.weight"] = dec.output_upscaling_1_w; model.tensors["mask_decoder.output_upscaling.1.bias"] = dec.output_upscaling_1_b; model.tensors["mask_decoder.output_upscaling.3.weight"] = dec.output_upscaling_3_w; model.tensors["mask_decoder.output_upscaling.3.bias"] = dec.output_upscaling_3_b; const int n_hypernet_mpls_count = 4; dec.output_hypernet_mlps.resize(n_hypernet_mpls_count); for (int i = 0; i < n_hypernet_mpls_count; ++i) { auto& mlp = dec.output_hypernet_mlps[i]; mlp.w_0 = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); mlp.b_0 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); mlp.w_1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); mlp.b_1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); mlp.w_2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_img_embd/2); mlp.b_2 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_img_embd/2); const auto prefix = "mask_decoder.output_hypernetworks_mlps." + std::to_string(i) + "."; model.tensors[prefix + "layers.0.weight"] = mlp.w_0; model.tensors[prefix + "layers.0.bias"] = mlp.b_0; model.tensors[prefix + "layers.1.weight"] = mlp.w_1; model.tensors[prefix + "layers.1.bias"] = mlp.b_1; model.tensors[prefix + "layers.2.weight"] = mlp.w_2; model.tensors[prefix + "layers.2.bias"] = mlp.b_2; } dec.iou_prediction_head_0_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); dec.iou_prediction_head_0_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); dec.iou_prediction_head_1_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_enc_out_chans); dec.iou_prediction_head_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_enc_out_chans); dec.iou_prediction_head_2_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, n_enc_out_chans, n_pt_embd); dec.iou_prediction_head_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_pt_embd); dec.iou_token_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_enc_out_chans, 1); dec.mask_tokens_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_enc_out_chans, n_pt_embd); model.tensors["mask_decoder.iou_prediction_head.layers.0.weight"] = dec.iou_prediction_head_0_w; model.tensors["mask_decoder.iou_prediction_head.layers.0.bias"] = dec.iou_prediction_head_0_b; model.tensors["mask_decoder.iou_prediction_head.layers.1.weight"] = dec.iou_prediction_head_1_w; model.tensors["mask_decoder.iou_prediction_head.layers.1.bias"] = dec.iou_prediction_head_1_b; model.tensors["mask_decoder.iou_prediction_head.layers.2.weight"] = dec.iou_prediction_head_2_w; model.tensors["mask_decoder.iou_prediction_head.layers.2.bias"] = dec.iou_prediction_head_2_b; model.tensors["mask_decoder.iou_token.weight"] = dec.iou_token_w; model.tensors["mask_decoder.mask_tokens.weight"] = dec.mask_tokens_w; } } // load weights { int n_tensors = 0; size_t total_size = 0; fprintf(stderr, "%s: ", __func__); while (true) { int32_t n_dims; int32_t length; int32_t ftype; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); fin.read(reinterpret_cast(&length), sizeof(length)); fin.read(reinterpret_cast(&ftype), sizeof(ftype)); if (fin.eof()) { break; } int64_t nelements = 1; int64_t ne[4] = { 1, 1, 1, 1 }; for (int i = 0; i < n_dims; ++i) { int32_t ne_cur; fin.read(reinterpret_cast(&ne_cur), sizeof(ne_cur)); ne[i] = ne_cur; nelements *= ne[i]; } std::string name(length, 0); fin.read(&name[0], length); if (model.tensors.find(name.data()) == model.tensors.end()) { fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data()); return false; } auto tensor = model.tensors[name.data()]; //printf("ne0 = %jd, ne1 = %jd, ne2 = %jd, ne3 = %jd\n", ne[0], ne[1], ne[2], ne[3]); if (ggml_nelements(tensor) != nelements) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %d, expected %d\n", __func__, name.data(), (int) nelements, (int) ggml_nelements(tensor)); return false; } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1] || tensor->ne[2] != ne[2] || tensor->ne[3] != ne[3]) { fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d, %d, %d], expected [%d, %d, %d, %d]\n", __func__, name.data(), (int) ne[0], (int) ne[1], (int) ne[2], (int) ne[3], (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], (int) tensor->ne[3]); return false; } size_t bpe = 0; switch (ftype) { case 0: bpe = ggml_type_size(GGML_TYPE_F32); break; case 1: bpe = ggml_type_size(GGML_TYPE_F16); break; case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break; case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break; default: { fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype); return false; } }; if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", __func__, name.data(), ggml_nbytes(tensor), (size_t) nelements*bpe); return false; } fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); total_size += ggml_nbytes(tensor); if (++n_tensors % 8 == 0) { fprintf(stderr, "."); fflush(stdout); } } if (n_tensors != int(model.tensors.size())) { fprintf(stderr, "%s: model file has %d tensors, but %d tensors were expected\n", __func__, n_tensors, (int) model.tensors.size()); return false; } fprintf(stderr, " done\n"); fprintf(stderr, "%s: model size = %8.2f MB / num tensors = %d\n", __func__, total_size/1024.0/1024.0, n_tensors); } fin.close(); return true; } struct ggml_tensor * sam_fill_dense_pe( const sam_model & model, struct ggml_context * ctx0, struct ggml_cgraph * gf, sam_state & state) { const auto & hparams = model.hparams; const auto & enc = model.enc_prompt; const int32_t n_img_embd = hparams.n_img_embd(); struct ggml_tensor * xy_embed_stacked = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, 2, n_img_embd, n_img_embd); ggml_set_name(xy_embed_stacked, "xy_embed_stacked"); ggml_set_input(xy_embed_stacked); struct ggml_tensor * cur = ggml_mul_mat(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, enc.pe)), xy_embed_stacked); cur = ggml_scale(ctx0, cur, float(2.0*M_PI)); // concat // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/prompt_encoder.py#L192 { struct ggml_tensor * t_sin = ggml_map_custom1(ctx0, cur, ggml_sam_sin, GGML_N_TASKS_MAX, NULL); struct ggml_tensor * t_cos = ggml_map_custom1(ctx0, cur, ggml_sam_cos, GGML_N_TASKS_MAX, NULL); cur = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, t_sin->ne[0] + t_cos->ne[0], cur->ne[1], cur->ne[2]); ggml_build_forward_expand(gf, ggml_cpy(ctx0, t_sin, ggml_view_3d(ctx0, cur, t_sin->ne[0], t_sin->ne[1], t_sin->ne[2], cur->nb[1], cur->nb[2], 0))); ggml_build_forward_expand(gf, ggml_cpy(ctx0, t_cos, ggml_view_3d(ctx0, cur, t_sin->ne[0], t_sin->ne[1], t_sin->ne[2], cur->nb[1], cur->nb[2], t_sin->nb[1]))); } struct ggml_tensor * pe_img_dense = ggml_cont(ctx0, ggml_permute(ctx0, cur, 2, 0, 1, 3)); ggml_build_forward_expand(gf, pe_img_dense); return pe_img_dense; } struct ggml_tensor* sam_layer_norm_2d( struct ggml_context * ctx0, struct ggml_tensor * layer, int n_channels, struct ggml_tensor * w, struct ggml_tensor * b, float eps) { // LayerNorm2d // normalize along channel dimmension // TODO: better implementation layer = ggml_permute(ctx0, ggml_norm(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, layer, 1, 2, 0, 3)), eps), 2, 0, 1, 3); layer = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, ggml_reshape_3d(ctx0, w, 1, 1, n_channels), layer), layer), ggml_repeat(ctx0, ggml_reshape_3d(ctx0, b, 1, 1, n_channels), layer)); return layer; } struct ggml_cgraph * sam_encode_image( const sam_model & model, sam_state & state, const sam_image_f32 & img) { const auto & hparams = model.hparams; const auto & enc = model.enc_img; const int32_t n_enc_state = hparams.n_enc_state; const int32_t n_enc_layer = hparams.n_enc_layer; const int32_t n_enc_head = hparams.n_enc_head; const int32_t n_enc_head_dim = hparams.n_enc_head_dim(); const int32_t n_enc_out_chans = hparams.n_enc_out_chans; const int32_t n_img_size = hparams.n_img_size(); const int32_t n_window_size = hparams.n_window_size(); struct ggml_init_params ggml_params = { /*.mem_size =*/ state.buf_compute_img_enc.size(), /*.mem_buffer =*/ state.buf_compute_img_enc.data(), /*.no_alloc =*/ true, // skip allocating as we use ggml_alloc to allocate exact memory requirements }; struct ggml_context * ctx0 = ggml_init(ggml_params); struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_tensor * inp = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_img_size, n_img_size, 3, 1); ggml_set_name(inp, "inp"); ggml_set_input(inp); // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L392 struct ggml_tensor * cur = ggml_conv_2d_sk_p0(ctx0, enc.proj_w, inp); cur = ggml_add_inplace(ctx0, cur, ggml_repeat(ctx0, enc.proj_b, cur)); // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L394 // keep in F32 cur = ggml_cont(ctx0, ggml_permute(ctx0, cur, 1, 2, 0, 3)); // convert to F16 //cur = ggml_cpy(ctx0, // ggml_permute(ctx0, cur, 1, 2, 0, 3), // ggml_new_tensor_3d(ctx0, GGML_TYPE_F16, n_enc_state, n_img_embd, n_img_embd)); // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L108-L109 cur = ggml_add_inplace(ctx0, cur, enc.pe); struct ggml_tensor * inpL = cur; for (int il = 0; il < n_enc_layer; ++il) { const auto & layer = enc.layers[il]; // norm // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L168 { cur = ggml_norm(ctx0, inpL, hparams.eps); // cur = ln_0_w*cur + ln_0_b cur = ggml_mul(ctx0, cur, layer.norm1_w); cur = ggml_add_inplace(ctx0, cur, layer.norm1_b); } const int64_t w0 = cur->ne[1]; const int64_t h0 = cur->ne[2]; if (hparams.is_global_attn(il) == false) { // local attention layer - apply window partition // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L169-L172 cur = ggml_win_part(ctx0, cur, n_window_size); } const int64_t W = cur->ne[1]; const int64_t H = cur->ne[2]; // self-attention { cur = ggml_mul_mat(ctx0, layer.qkv_w, cur); cur = ggml_add_inplace(ctx0, cur, layer.qkv_b); // split qkv into separate tensors // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L225-L229 const int B = cur->ne[3]; cur = ggml_reshape_4d(ctx0, cur, n_enc_state, 3, W*H, B); cur = ggml_cont(ctx0, ggml_permute(ctx0, cur, 0, 3, 1, 2)); struct ggml_tensor * Q; struct ggml_tensor * K; struct ggml_tensor * V; Q = ggml_view_3d (ctx0, cur, n_enc_state, W*H, B, cur->nb[1], cur->nb[2], 0*cur->nb[3]); Q = ggml_reshape_4d(ctx0, Q, n_enc_head_dim, n_enc_head, W*H, B); Q = ggml_cont (ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3)); Q = ggml_reshape_3d(ctx0, Q, n_enc_head_dim, W*H, B*n_enc_head); K = ggml_view_3d (ctx0, cur, n_enc_state, W*H, B, cur->nb[1], cur->nb[2], 1*cur->nb[3]); K = ggml_reshape_4d(ctx0, K, n_enc_head_dim, n_enc_head, W*H, B); K = ggml_cont (ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3)); K = ggml_reshape_3d(ctx0, K, n_enc_head_dim, W*H, B*n_enc_head); V = ggml_view_3d (ctx0, cur, n_enc_state, W*H, B, cur->nb[1], cur->nb[2], 2*cur->nb[3]); V = ggml_reshape_4d(ctx0, V, n_enc_head_dim, n_enc_head, W*H, B); V = ggml_cont (ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3)); // transposed V = ggml_reshape_3d(ctx0, V, W*H, n_enc_head_dim, B*n_enc_head); struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, 1.0f/sqrtf(n_enc_head_dim)); struct ggml_tensor * rw = ggml_get_rel_pos(ctx0, layer.rel_pos_w, W, W); struct ggml_tensor * rh = ggml_get_rel_pos(ctx0, layer.rel_pos_h, H, H); struct ggml_tensor * q_r = ggml_reshape_4d(ctx0, Q, n_enc_head_dim, W, H, B*n_enc_head); struct ggml_tensor * rel_w = ggml_cont(ctx0, ggml_permute(ctx0, ggml_mul_mat(ctx0, rw, ggml_cont(ctx0, ggml_permute(ctx0, q_r, 0, 2, 1, 3))), 0, 2, 1, 3)); struct ggml_tensor * rel_h = ggml_mul_mat(ctx0, rh, q_r); struct ggml_tensor * attn = ggml_add_rel_pos_inplace(ctx0, KQ_scaled, rel_w, rel_h); struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, attn); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); cur = ggml_reshape_4d(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, ggml_reshape_4d(ctx0, KQV, n_enc_head_dim, W*H, n_enc_head, B), 0, 2, 1, 3)), n_enc_state, W, H, B); cur = ggml_mul_mat(ctx0, layer.proj_w, cur); cur = ggml_add_inplace(ctx0, cur, layer.proj_b); } if (hparams.is_global_attn(il) == false) { // local attention layer - reverse window partition cur = ggml_win_unpart(ctx0, cur, w0, h0, n_window_size); } cur = ggml_add_inplace(ctx0, cur, inpL); struct ggml_tensor * inpFF = cur; // feed-forward network { // norm { cur = ggml_norm(ctx0, inpFF, hparams.eps); // cur = mlp_ln_w*cur + mlp_ln_b cur = ggml_mul(ctx0, cur, layer.norm2_w); cur = ggml_add_inplace(ctx0, cur, layer.norm2_b); } // fully connected cur = ggml_mul_mat(ctx0, layer.mlp_lin1_w, cur); cur = ggml_add_inplace(ctx0, cur, layer.mlp_lin1_b); // GELU activation cur = ggml_gelu(ctx0, cur); // projection cur = ggml_mul_mat(ctx0, layer.mlp_lin2_w, cur); cur = ggml_add_inplace(ctx0, cur, layer.mlp_lin2_b); } inpL = ggml_add(ctx0, cur, inpFF); } cur = ggml_cont(ctx0, ggml_permute(ctx0, inpL, 2, 0, 1, 3)); cur = ggml_conv_2d_sk_p0(ctx0, enc.neck_conv_0, cur); cur = sam_layer_norm_2d(ctx0, cur, n_enc_out_chans, enc.neck_norm_0_w, enc.neck_norm_0_b, hparams.eps); cur = ggml_conv_2d_s1_ph(ctx0, enc.neck_conv_1, cur); cur = sam_layer_norm_2d(ctx0, cur, n_enc_out_chans, enc.neck_norm_1_w, enc.neck_norm_1_b, hparams.eps); cur = ggml_cpy(ctx0, cur, state.embd_img); ggml_build_forward_expand(gf, cur); ggml_disconnect_node_from_graph(state.embd_img); //ggml_graph_print(&gf); ggml_free(ctx0); ggml_gallocr_alloc_graph(state.allocr, gf); { struct ggml_tensor * inp = ggml_graph_get_tensor(gf, "inp"); float * data = (float *) ggml_get_data(inp); const int nx = img.nx; const int ny = img.ny; const int n = nx*ny; GGML_ASSERT(nx == n_img_size && ny == n_img_size); for (int k = 0; k < 3; k++) { for (int y = 0; y < ny; y++) { for (int x = 0; x < nx; x++) { data[k*n + y*nx + x] = img.data[3*(y*nx + x) + k]; } } } } return gf; } struct prompt_encoder_result { struct ggml_tensor * embd_prompt_sparse = {}; struct ggml_tensor * embd_prompt_dense = {}; }; struct ggml_tensor * sam_prompt_encode_pe_encoding( const sam_encoder_prompt & enc, struct ggml_context * ctx0, struct ggml_cgraph * gf, struct ggml_tensor * coords) { auto * cur = ggml_mul_mat(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, enc.pe)), coords); cur = ggml_scale(ctx0, cur, float(2.0*M_PI)); // concat // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/prompt_encoder.py#L192 { struct ggml_tensor * t_sin = ggml_map_custom1(ctx0, cur, ggml_sam_sin, GGML_N_TASKS_MAX, NULL); struct ggml_tensor * t_cos = ggml_map_custom1(ctx0, cur, ggml_sam_cos, GGML_N_TASKS_MAX, NULL); cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, t_sin->ne[0] + t_cos->ne[0], cur->ne[1]); ggml_build_forward_expand(gf, ggml_cpy(ctx0, t_sin, ggml_view_2d(ctx0, cur, t_sin->ne[0], t_sin->ne[1], cur->nb[1], 0))); ggml_build_forward_expand(gf, ggml_cpy(ctx0, t_cos, ggml_view_2d(ctx0, cur, t_sin->ne[0], t_sin->ne[1], cur->nb[1], t_sin->nb[1]))); } return cur; } // encode a prompt // // - points // - boxes // - masks // // TODO: currently just encode a single point for simplicity // prompt_encoder_result sam_encode_prompt( const sam_model & model, struct ggml_context * ctx0, struct ggml_cgraph * gf, sam_state & state, const sam_prompt & prompt) { const auto & hparams = model.hparams; const auto & enc = model.enc_prompt; struct ggml_tensor * inp = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, 2, 2); ggml_set_name(inp, "prompt_input"); ggml_set_input(inp); auto * embd_prompt_sparse = [&]() -> struct ggml_tensor * { switch (prompt.prompt_type) { case SAM_PROMPT_TYPE_POINT: { // PromptEncoder._embed_points auto * pt_embd = sam_prompt_encode_pe_encoding(enc, ctx0, gf, inp); // overwrite label == -1 with not_a_point_embed.weight // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/prompt_encoder.py#L86 // TODO: extend for multiple points auto * pt_embd_not = ggml_view_2d(ctx0, pt_embd, pt_embd->ne[0], 1, pt_embd->nb[1], pt_embd->nb[1]); ggml_build_forward_expand(gf, ggml_cpy(ctx0, enc.not_a_pt_embd_w, pt_embd_not)); // add point_embeddings[1] to label == 1 // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/prompt_encoder.py#L90 auto * pt_embd1 = ggml_view_2d(ctx0, pt_embd, pt_embd->ne[0], 1, pt_embd->nb[1], 0); ggml_build_forward_expand(gf, ggml_add_inplace(ctx0, pt_embd1, enc.pt_embd[1])); return pt_embd; } break; case SAM_PROMPT_TYPE_BOX: { // PromptEncoder._embed_boxes auto * corner_embd = sam_prompt_encode_pe_encoding(enc, ctx0, gf, inp); // corner_embd[:, 0, :] += self.point_embeddings[2].weight // corner_embd[:, 1, :] += self.point_embeddings[3].weight auto * corner0 = ggml_view_2d( ctx0, corner_embd, corner_embd->ne[0], 1, corner_embd->nb[1], 0); auto * corner1 = ggml_view_2d( ctx0, corner_embd, corner_embd->ne[0], 1, corner_embd->nb[1], corner_embd->nb[1]); ggml_build_forward_expand(gf, ggml_add_inplace(ctx0, corner0, enc.pt_embd[2])); ggml_build_forward_expand(gf, ggml_add_inplace(ctx0, corner1, enc.pt_embd[3])); return corner_embd; } break; default: { fprintf(stderr, "%s: unsupported prompt type %d\n", __func__, prompt.prompt_type); return nullptr; } break; } }(); ggml_build_forward_expand(gf, embd_prompt_sparse); struct ggml_tensor * embd_prompt_dense = ggml_repeat(ctx0, ggml_cont(ctx0, ggml_view_3d(ctx0, enc.no_mask_embd_w, 1, 1, enc.no_mask_embd_w->ne[0], enc.no_mask_embd_w->nb[0], enc.no_mask_embd_w->nb[0], 0)), ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hparams.n_img_embd(), hparams.n_img_embd(), hparams.n_enc_out_chans)); ggml_build_forward_expand(gf, embd_prompt_dense); //printf("used_mem = %zu\n", ggml_used_mem(ctx0)); prompt_encoder_result res; res.embd_prompt_sparse = embd_prompt_sparse; res.embd_prompt_dense = embd_prompt_dense; return res; } struct ggml_tensor* sam_decode_mask_transformer_attn( const sam_layer_dec_transformer_attn & attn, struct ggml_tensor * queries, struct ggml_tensor * keys, struct ggml_tensor * values, struct ggml_context * ctx0, const sam_model & model) { const auto & hparams = model.hparams; const int n_head = hparams.n_dec_heads; struct ggml_tensor * Qcur = {}; struct ggml_tensor * Kcur = {}; struct ggml_tensor * Vcur = {}; Qcur = ggml_mul_mat(ctx0, attn.q_w, queries); Qcur = ggml_add_inplace(ctx0, Qcur, attn.q_b); Kcur = ggml_mul_mat(ctx0, attn.k_w, keys); Kcur = ggml_add_inplace(ctx0, Kcur, attn.k_b); Vcur = ggml_mul_mat(ctx0, attn.v_w, values); Vcur = ggml_add_inplace(ctx0, Vcur, attn.v_b); struct ggml_tensor * Q = {}; struct ggml_tensor * K = {}; struct ggml_tensor * V = {}; Q = ggml_reshape_4d(ctx0, Qcur, Qcur->ne[0]/n_head, n_head, Qcur->ne[1], Qcur->ne[2]); Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3)); K = ggml_reshape_4d(ctx0, Kcur, Kcur->ne[0]/n_head, n_head, Kcur->ne[1], Kcur->ne[2]); K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3)); V = ggml_reshape_4d(ctx0, Vcur, Vcur->ne[0]/n_head, n_head, Vcur->ne[1], Vcur->ne[2]); V = ggml_cont(ctx0, ggml_permute(ctx0, V, 0, 2, 1, 3)); // Q * K struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, 1.0f/sqrt(float(Q->ne[0]))); struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_scaled); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, KQ_soft_max, ggml_cont(ctx0, ggml_transpose(ctx0, V))); struct ggml_tensor * KQV_merged = ggml_cont(ctx0, ggml_transpose(ctx0, KQV)); KQV_merged = ggml_cont(ctx0, ggml_permute(ctx0, KQV_merged, 0, 2, 1, 3)); KQV_merged = ggml_reshape_3d(ctx0, KQV_merged, KQV_merged->ne[0]*KQV_merged->ne[1], KQV_merged->ne[2], KQV_merged->ne[3]); KQV_merged = ggml_mul_mat(ctx0, attn.out_w, KQV_merged); KQV_merged = ggml_add_inplace(ctx0, KQV_merged, attn.out_b); return KQV_merged; } struct ggml_tensor * sam_decode_mask_mlp_relu_3( struct ggml_tensor * in, struct ggml_tensor * w_0, struct ggml_tensor * b_0, struct ggml_tensor * w_1, struct ggml_tensor * b_1, struct ggml_tensor * w_2, struct ggml_tensor * b_2, struct ggml_context * ctx0) { struct ggml_tensor * cur = {}; cur = ggml_mul_mat(ctx0, w_0, in); cur = ggml_add_inplace(ctx0, cur, b_0); cur = ggml_relu_inplace(ctx0, cur); cur = ggml_mul_mat(ctx0, w_1, cur); cur = ggml_add_inplace(ctx0, cur, b_1); cur = ggml_relu_inplace(ctx0, cur); cur = ggml_mul_mat(ctx0, w_2, cur); cur = ggml_add_inplace(ctx0, cur, b_2); return cur; } bool sam_decode_mask( const sam_model & model, const prompt_encoder_result & prompt, struct ggml_tensor * pe_img, struct ggml_context * ctx0, struct ggml_cgraph * gf, sam_state & state, const bool multimask_output) { const auto & hparams = model.hparams; const auto & dec = model.dec; const int n_img_embd = hparams.n_img_embd(); struct ggml_tensor * tokens = {}; { // Concatenate output tokens // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/mask_decoder.py#L120 const auto& sparse = prompt.embd_prompt_sparse; tokens = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, dec.iou_token_w->ne[0], dec.iou_token_w->ne[1] + dec.mask_tokens_w->ne[1] + sparse->ne[1], sparse->ne[2]); const size_t offsets[3] = { 0, dec.iou_token_w->ne[1]*tokens->nb[1], dec.iou_token_w->ne[1]*tokens->nb[1] + dec.mask_tokens_w->ne[1]*tokens->nb[1] }; ggml_build_forward_expand(gf, ggml_cpy(ctx0, dec.iou_token_w, ggml_view_2d(ctx0, tokens, tokens->ne[0], dec.iou_token_w->ne[1], tokens->nb[1], offsets[0]))); ggml_build_forward_expand(gf, ggml_cpy(ctx0, dec.mask_tokens_w, ggml_view_2d(ctx0, tokens, tokens->ne[0], dec.mask_tokens_w->ne[1], tokens->nb[1], offsets[1]))); ggml_build_forward_expand(gf, ggml_cpy(ctx0, sparse, ggml_view_2d(ctx0, tokens, tokens->ne[0], sparse->ne[1], tokens->nb[1], offsets[2]))); // TODO: Sparse prompt embeddings can have more than one point } struct ggml_tensor * src = {}; struct ggml_tensor * pos_src = {}; int srcNE[4] = { 0, 0, 0, 0 }; { // Expand per-image data in the batch direction to be per-mask // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/mask_decoder.py#L125 src = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, state.embd_img->ne[0], state.embd_img->ne[1], state.embd_img->ne[2], tokens->ne[2]); src = ggml_add(ctx0, ggml_repeat(ctx0, state.embd_img, src), prompt.embd_prompt_dense); srcNE[0] = src->ne[0]; srcNE[1] = src->ne[1]; srcNE[2] = src->ne[2]; srcNE[3] = src->ne[3]; // flatten & permute // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L83 src = ggml_cont(ctx0, ggml_permute(ctx0, ggml_view_3d(ctx0, src, src->ne[0]*src->ne[1], src->ne[2], src->ne[3], src->nb[2], src->nb[3], 0), 1, 0, 2, 3)); pos_src = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, pe_img->ne[0], pe_img->ne[1], pe_img->ne[2], tokens->ne[2]); pos_src = ggml_repeat(ctx0, pe_img, pos_src); // flatten & permute // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L83 pos_src = ggml_cont(ctx0, ggml_permute(ctx0, ggml_view_3d(ctx0, pos_src, pos_src->ne[0]*pos_src->ne[1], pos_src->ne[2], pos_src->ne[3], pos_src->nb[2], pos_src->nb[3], 0), 1, 0, 2, 3)); } struct ggml_tensor * queries = tokens; struct ggml_tensor * keys = src; { // Run the transformer // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L62 for (int i = 0; i < int(model.dec.transformer_layers.size()); ++i) { const auto& tfm_layer = model.dec.transformer_layers[i]; // Self attention block // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L154 const bool skip_first_layer_pe = i == 0; if (skip_first_layer_pe) { queries = sam_decode_mask_transformer_attn(tfm_layer.self_attn, queries, queries, queries, ctx0, model); } else { struct ggml_tensor * q_0 = ggml_add(ctx0, queries, tokens); struct ggml_tensor * self_attn = sam_decode_mask_transformer_attn(tfm_layer.self_attn, q_0, q_0, queries, ctx0, model); queries = ggml_add(ctx0, queries, self_attn); } queries = ggml_norm(ctx0, queries, hparams.eps_decoder_transformer); queries = ggml_add_inplace(ctx0, ggml_mul(ctx0, queries, tfm_layer.norm1_w), tfm_layer.norm1_b); // Cross attention block, tokens attending to image embedding // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L163 struct ggml_tensor * q_1 = ggml_add(ctx0, queries, tokens); struct ggml_tensor * k_1 = ggml_add(ctx0, keys, pos_src); struct ggml_tensor * cross_attn_token_to_img = sam_decode_mask_transformer_attn(tfm_layer.cross_attn_token_to_img, q_1, k_1, keys, ctx0, model); queries = ggml_add_inplace(ctx0, queries, cross_attn_token_to_img); queries = ggml_norm_inplace(ctx0, queries, hparams.eps_decoder_transformer); queries = ggml_add_inplace(ctx0, ggml_mul(ctx0, queries, tfm_layer.norm2_w), tfm_layer.norm2_b); // MLP block // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L170 struct ggml_tensor * mlp_out = ggml_mul_mat(ctx0, tfm_layer.mlp_lin1_w, queries); mlp_out = ggml_add_inplace(ctx0, mlp_out, tfm_layer.mlp_lin1_b); // RELU activation mlp_out = ggml_relu_inplace(ctx0, mlp_out); mlp_out = ggml_mul_mat(ctx0, tfm_layer.mlp_lin2_w, mlp_out); mlp_out = ggml_add_inplace(ctx0, mlp_out, tfm_layer.mlp_lin2_b); queries = ggml_add_inplace(ctx0, queries, mlp_out); queries = ggml_norm_inplace(ctx0, queries, hparams.eps_decoder_transformer); queries = ggml_add_inplace(ctx0, ggml_mul(ctx0, queries, tfm_layer.norm3_w), tfm_layer.norm3_b); // Cross attention block, image embedding attending to tokens // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L175 struct ggml_tensor * q_2 = ggml_add(ctx0, queries, tokens); struct ggml_tensor * k_2 = ggml_add(ctx0, keys, pos_src); struct ggml_tensor * cross_attn_img_to_token = sam_decode_mask_transformer_attn(tfm_layer.cross_attn_img_to_token, k_2, q_2, queries, ctx0, model); keys = ggml_add_inplace(ctx0, keys, cross_attn_img_to_token); keys = ggml_norm_inplace(ctx0, keys, hparams.eps_decoder_transformer); keys = ggml_add_inplace(ctx0, ggml_mul(ctx0, keys, tfm_layer.norm4_w), tfm_layer.norm4_b); } // Apply the final attention layer from the points to the image // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/transformer.py#L99 struct ggml_tensor * q = ggml_add(ctx0, queries, tokens); struct ggml_tensor * k = ggml_add(ctx0, keys, pos_src); struct ggml_tensor * final_attn_token_to_img = sam_decode_mask_transformer_attn(dec.transformer_final_attn_token_to_img, q, k, keys, ctx0, model); queries = ggml_add_inplace(ctx0, queries, final_attn_token_to_img); queries = ggml_norm_inplace(ctx0, queries, hparams.eps_decoder_transformer); queries = ggml_add_inplace(ctx0, ggml_mul(ctx0, queries, dec.transformer_norm_final_w), dec.transformer_norm_final_b); } struct ggml_tensor * iou_pred = ggml_view_2d(ctx0, queries, queries->ne[0], queries->ne[2], queries->nb[2], 0); const int num_mask_tokens = 4; // num_multimask_outputs + 1 struct ggml_tensor * mask_tokens_out = ggml_view_3d(ctx0, queries, queries->ne[0], num_mask_tokens, queries->ne[2], queries->nb[1], num_mask_tokens*queries->nb[1], queries->nb[1]); // Upscale mask embeddings and predict masks using the mask tokens // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/mask_decoder.py#L136 keys = ggml_cont(ctx0, ggml_transpose(ctx0, keys)); keys = ggml_view_4d(ctx0, keys, srcNE[0], srcNE[1], srcNE[2], srcNE[3], srcNE[0]*keys->nb[0], keys->nb[1], keys->nb[2], 0); // ggml_build_forward_expand(gf, keys); struct ggml_tensor * upscaled_embedding = {}; { // ConvTranspose2d keys = ggml_conv_transpose_2d_p0(ctx0, dec.output_upscaling_0_w, keys, 2); keys = ggml_add_inplace(ctx0, keys, ggml_repeat(ctx0, ggml_reshape_3d(ctx0, dec.output_upscaling_0_b, 1, 1, dec.output_upscaling_0_b->ne[0]), keys)); keys = sam_layer_norm_2d(ctx0, keys, n_img_embd, dec.output_upscaling_1_w, dec.output_upscaling_1_b, hparams.eps); // GELU activation keys = ggml_gelu_inplace(ctx0, keys); // ConvTranspose2d keys = ggml_conv_transpose_2d_p0(ctx0, dec.output_upscaling_3_w, keys, 2); keys = ggml_add_inplace(ctx0, ggml_repeat(ctx0, ggml_reshape_3d(ctx0, dec.output_upscaling_3_b, 1, 1, dec.output_upscaling_3_b->ne[0]), keys), keys); // GELU activation keys = ggml_gelu_inplace(ctx0, keys); upscaled_embedding = ggml_reshape_3d(ctx0, keys, keys->ne[0]*keys->ne[1], keys->ne[2], keys->ne[3]); upscaled_embedding = ggml_cont(ctx0, ggml_transpose(ctx0, upscaled_embedding)); // TODO: Shouldn't be needed } struct ggml_tensor * hyper_in = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_img_embd/2, num_mask_tokens, mask_tokens_out->ne[2]); for (int i = 0; i < num_mask_tokens; ++i) { const auto& mlp = dec.output_hypernet_mlps[i]; struct ggml_tensor * in = ggml_view_2d(ctx0, mask_tokens_out, mask_tokens_out->ne[0], mask_tokens_out->ne[2], mask_tokens_out->nb[1], i*mask_tokens_out->nb[1]); struct ggml_tensor * out = sam_decode_mask_mlp_relu_3(in, mlp.w_0, mlp.b_0, mlp.w_1, mlp.b_1, mlp.w_2, mlp.b_2, ctx0); ggml_build_forward_expand(gf, ggml_cpy(ctx0, out, ggml_view_2d(ctx0, hyper_in, hyper_in->ne[0], hyper_in->ne[2], hyper_in->nb[1], i*hyper_in->nb[1]))); } struct ggml_tensor * masks = ggml_mul_mat(ctx0, hyper_in, upscaled_embedding); masks = ggml_cont(ctx0, ggml_transpose(ctx0, masks)); // TODO: Shouldn't be needed masks = ggml_reshape_4d(ctx0, masks, keys->ne[0], keys->ne[1], masks->ne[1], keys->ne[3]); // Generate mask quality predictions // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/mask_decoder.py#L146 iou_pred = sam_decode_mask_mlp_relu_3(iou_pred, dec.iou_prediction_head_0_w, dec.iou_prediction_head_0_b, dec.iou_prediction_head_1_w, dec.iou_prediction_head_1_b, dec.iou_prediction_head_2_w, dec.iou_prediction_head_2_b, ctx0); // Select the correct mask or masks for output // ref: https://github.com/facebookresearch/segment-anything/blob/6fdee8f2727f4506cfbbe553e23b895e27956588/segment_anything/modeling/mask_decoder.py#L101 if (multimask_output) { iou_pred = ggml_cpy(state.ctx, ggml_view_1d(ctx0, iou_pred, iou_pred->ne[0] - 1, iou_pred->nb[0]), state.iou_predictions); masks = ggml_view_4d(ctx0, masks, masks->ne[0], masks->ne[1], masks->ne[2] - 1, masks->ne[3], masks->nb[1], masks->nb[2], masks->nb[3], masks->nb[2] /* offset*/); masks = ggml_cpy(state.ctx, masks, state.low_res_masks); } else { iou_pred = ggml_cpy(state.ctx, ggml_view_1d(ctx0, iou_pred, 1, 0), ggml_view_1d(ctx0, state.iou_predictions, 1, 0)); masks = ggml_view_4d(ctx0, masks, masks->ne[0], masks->ne[1], 1, masks->ne[3], masks->nb[1], masks->nb[2], masks->nb[3], 0); auto * low_res_mask = ggml_view_4d(ctx0, state.low_res_masks, masks->ne[0], masks->ne[1], 1, masks->ne[3], masks->nb[1], masks->nb[2], masks->nb[3], 0); masks = ggml_cpy(state.ctx, masks, low_res_mask); } ggml_build_forward_expand(gf, masks); ggml_build_forward_expand(gf, iou_pred); ggml_disconnect_node_from_graph(state.low_res_masks); ggml_disconnect_node_from_graph(state.iou_predictions); return true; } bool sam_write_masks(const sam_hparams& hparams, int nx, int ny, const sam_state & state, const std::string & fname, const bool multimask_output) { if (state.low_res_masks->ne[2] == 0) return true; if (state.low_res_masks->ne[2] != state.iou_predictions->ne[0]) { printf("Error: number of masks (%d) does not match number of iou predictions (%d)\n", (int)state.low_res_masks->ne[2], (int)state.iou_predictions->ne[0]); return false; } const int n_img_size = hparams.n_img_size(); const float mask_threshold = hparams.mask_threshold; const float iou_threshold = hparams.iou_threshold; const float stability_score_threshold = hparams.stability_score_threshold; const float intersection_threshold = mask_threshold + hparams.stability_score_offset; const float union_threshold = mask_threshold - hparams.stability_score_offset; const int ne0 = state.low_res_masks->ne[0]; const int ne1 = state.low_res_masks->ne[1]; const int ne2 = multimask_output ? state.low_res_masks->ne[2] : 1; // Remove padding and upscale masks to the original image size. // ref: https://github.com/facebookresearch/segment-anything/blob/efeab7296ab579d4a261e554eca80faf6b33924a/segment_anything/modeling/sam.py#L140 const float preprocess_scale = std::max(nx, ny) / float(n_img_size); const int cropped_nx = int(nx / preprocess_scale + 0.5f); const int cropped_ny = int(ny / preprocess_scale + 0.5f); const float scale_x_1 = (float)ne0 / (float)n_img_size; const float scale_y_1 = (float)ne1 / (float)n_img_size; const float scale_x_2 = float(cropped_nx) / float(nx); const float scale_y_2 = float(cropped_ny) / float(ny); const auto iou_data = (float*)state.iou_predictions->data; for (int i = 0; i < ne2; ++i) { if (iou_threshold > 0.f && iou_data[i] < iou_threshold) { printf("Skipping mask %d with iou %f below threshold %f\n", i, iou_data[i], iou_threshold); continue; // Filtering masks with iou below the threshold } std::vector mask_data(n_img_size*n_img_size); { const float* data = (float *) state.low_res_masks->data + i*ne0*ne1; for (int iy = 0; iy < n_img_size; ++iy) { for (int ix = 0; ix < n_img_size; ++ix) { const float sx = std::max(scale_x_1*(ix + 0.5f) - 0.5f, 0.0f); const float sy = std::max(scale_y_1*(iy + 0.5f) - 0.5f, 0.0f); const int x0 = std::max(0, (int)sx); const int y0 = std::max(0, (int)sy); const int x1 = std::min(x0 + 1, ne0 - 1); const int y1 = std::min(y0 + 1, ne1 - 1); const float dx = sx - x0; const float dy = sy - y0; const int j00 = y0*ne0 + x0; const int j01 = y0*ne0 + x1; const int j10 = y1*ne0 + x0; const int j11 = y1*ne0 + x1; const float v00 = data[j00]; const float v01 = data[j01]; const float v10 = data[j10]; const float v11 = data[j11]; const float v0 = (1-dx)*v00 + dx*v01; const float v1 = (1-dx)*v10 + dx*v11; const float v = (1-dy)*v0 + dy*v1; mask_data[iy*n_img_size + ix] = v; } } } int intersections = 0; int unions = 0; sam_image_u8 res; int min_iy = ny; int max_iy = 0; int min_ix = nx; int max_ix = 0; { const float* data = mask_data.data(); res.nx = nx; res.ny = ny; res.data.resize(nx*ny); for (int iy = 0; iy < ny; ++iy) { for (int ix = 0; ix < nx; ++ix) { const float sx = std::max(scale_x_2*(ix + 0.5f) - 0.5f, 0.0f); const float sy = std::max(scale_y_2*(iy + 0.5f) - 0.5f, 0.0f); const int x0 = std::max(0, (int)sx); const int y0 = std::max(0, (int)sy); const int x1 = std::min(x0 + 1, cropped_nx - 1); const int y1 = std::min(y0 + 1, cropped_ny - 1); const float dx = sx - x0; const float dy = sy - y0; const int j00 = y0*n_img_size + x0; const int j01 = y0*n_img_size + x1; const int j10 = y1*n_img_size + x0; const int j11 = y1*n_img_size + x1; const float v00 = data[j00]; const float v01 = data[j01]; const float v10 = data[j10]; const float v11 = data[j11]; const float v0 = (1-dx)*v00 + dx*v01; const float v1 = (1-dx)*v10 + dx*v11; const float v = (1-dy)*v0 + dy*v1; if (v > intersection_threshold) { intersections++; } if (v > union_threshold) { unions++; } if (v > mask_threshold) { min_iy = std::min(min_iy, iy); max_iy = std::max(max_iy, iy); min_ix = std::min(min_ix, ix); max_ix = std::max(max_ix, ix); res.data[iy*nx + ix] = 255; } } } } const float stability_score = float(intersections) / float(unions); if (stability_score_threshold > 0.f && stability_score < stability_score_threshold) { printf("Skipping mask %d with stability score %f below threshold %f\n", i, stability_score, stability_score_threshold); continue; // Filtering masks with stability score below the threshold } printf("Mask %d: iou = %f, stability_score = %f, bbox (%d, %d), (%d, %d)\n", i, iou_data[i], stability_score, min_ix, max_ix, min_iy, max_iy); const std::string filename = multimask_output ? fname + std::to_string(i) + ".png" : fname + ".png"; if (!stbi_write_png(filename.c_str(), res.nx, res.ny, 1, res.data.data(), res.nx)) { printf("%s: failed to write mask %s\n", __func__, filename.c_str()); return false; } } return true; } struct ggml_cgraph * sam_build_fast_graph( const sam_model & model, sam_state & state, const int nx, const int ny, const sam_prompt & prompt, const bool multimask_output) { struct ggml_init_params ggml_params = { /*.mem_size =*/ state.buf_compute_fast.size(), /*.mem_buffer =*/ state.buf_compute_fast.data(), /*.no_alloc =*/ true, // skip allocating as we use ggml_alloc to allocate exact memory requirements }; struct ggml_context * ctx0 = ggml_init(ggml_params); struct ggml_cgraph * gf = ggml_new_graph(ctx0); prompt_encoder_result enc_res = sam_encode_prompt(model, ctx0, gf, state, prompt); if (!enc_res.embd_prompt_sparse || !enc_res.embd_prompt_dense) { fprintf(stderr, "%s: failed to encode prompt\n", __func__); return {}; } struct ggml_tensor * pe_img_dense = sam_fill_dense_pe(model, ctx0, gf, state); if (!pe_img_dense) { fprintf(stderr, "%s: failed to get dense positional encoding\n", __func__); return {}; } if (!sam_decode_mask(model, enc_res, pe_img_dense, ctx0, gf, state, multimask_output)) { fprintf(stderr, "%s: failed to decode mask\n", __func__); return {}; } ggml_free(ctx0); ggml_gallocr_alloc_graph(state.allocr, gf); struct ggml_tensor * inp = ggml_graph_get_tensor(gf, "prompt_input"); auto * data = (float *) inp->data; // Transform prompt (point or box) { // https://github.com/facebookresearch/segment-anything/blob/dca509fe793f601edb92606367a655c15ac00fdf/segment_anything/utils/transforms.py#L33 // The point scaling here is greatly simplified but mathematically equivalent. const auto scale = 1.0F / std::max(nx, ny); switch (prompt.prompt_type) { case SAM_PROMPT_TYPE_POINT: { const auto & pt = prompt.pt; // set the input by converting the [0, 1] coordinates to [-1, 1] data[0] = 2.0f*pt.x*scale - 1.0f; data[1] = 2.0f*pt.y*scale - 1.0f; // padding // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/prompt_encoder.py#L81-L85 data[2] = 2.0f*(0.0f) - 1.0f; data[3] = 2.0f*(0.0f) - 1.0f; } break; case SAM_PROMPT_TYPE_BOX: { const auto & box = prompt.box; data[0] = 2.0f*box.x1*scale - 1.0f; data[1] = 2.0f*box.y1*scale - 1.0f; data[2] = 2.0f*box.x2*scale - 1.0f; data[3] = 2.0f*box.y2*scale - 1.0f; } break; } } // from sam_fill_dense_pe { struct ggml_tensor * xy_embed_stacked = ggml_graph_get_tensor(gf, "xy_embed_stacked"); const int32_t n_img_embd = model.hparams.n_img_embd(); const float n_img_embd_inv = 1.0f / n_img_embd; float * data = (float *) ggml_get_data(xy_embed_stacked); for (int i = 0; i < n_img_embd; ++i) { const int row = 2*i*n_img_embd; const float y_val = 2 * (i + 0.5f) * n_img_embd_inv - 1; for (int j = 0; j < n_img_embd; ++j) { const float x_val = 2 * (j + 0.5f) * n_img_embd_inv - 1; data[row + 2*j + 0] = x_val; data[row + 2*j + 1] = y_val; } } } return gf; } void sam_print_usage(int argc, char ** argv, const sam_params & params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, " -i FNAME, --inp FNAME\n"); fprintf(stderr, " input file (default: %s)\n", params.fname_inp.c_str()); fprintf(stderr, " -o FNAME, --out FNAME\n"); fprintf(stderr, " mask file name prefix (default: %s)\n", params.fname_out.c_str()); fprintf(stderr, " -sm, --single-mask\n"); fprintf(stderr, " single mask output (default multi mask output)\n"); fprintf(stderr, "SAM hyperparameters:\n"); fprintf(stderr, " -mt FLOAT, --mask-threshold\n"); fprintf(stderr, " mask threshold (default: %f)\n", params.mask_threshold); fprintf(stderr, " -it FLOAT, --iou-threshold\n"); fprintf(stderr, " iou threshold (default: %f)\n", params.iou_threshold); fprintf(stderr, " -st FLOAT, --score-threshold\n"); fprintf(stderr, " score threshold (default: %f)\n", params.stability_score_threshold); fprintf(stderr, " -so FLOAT, --score-offset\n"); fprintf(stderr, " score offset (default: %f)\n", params.stability_score_offset); fprintf(stderr, " -e FLOAT, --epsilon\n"); fprintf(stderr, " epsilon (default: %f)\n", params.eps); fprintf(stderr, " -ed FLOAT, --epsilon-decoder-transformer\n"); fprintf(stderr, " epsilon decoder transformer (default: %f)\n", params.eps_decoder_transformer); fprintf(stderr, "SAM prompt:\n"); fprintf(stderr, " -p [x,y], --point-prompt\n"); fprintf(stderr, " point to be used as prompt for SAM (default: %f,%f). Must be in a format FLOAT,FLOAT \n", params.prompt.pt.x, params.prompt.pt.y); fprintf(stderr, " -b [x1,y1,x2,y2], --box-prompt\n"); fprintf(stderr, " box to be used as prompt for SAM (default: %f,%f,%f,%f). Must be in a format FLOAT,FLOAT,FLOAT,FLOAT \n", params.prompt.box.x1, params.prompt.box.y1, params.prompt.box.x2, params.prompt.box.y2); fprintf(stderr, "\n"); } bool sam_params_parse(int argc, char ** argv, sam_params & params) { bool use_point_prompt = false; bool use_box_prompt = false; for (int i = 1; i < argc; i++) { std::string arg = argv[i]; if (arg == "-s" || arg == "--seed") { params.seed = std::stoi(argv[++i]); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-i" || arg == "--inp") { params.fname_inp = argv[++i]; } else if (arg == "-o" || arg == "--out") { params.fname_out = argv[++i]; } else if (arg == "-sm" || arg == "--single-mask") { params.multimask_output = false; } else if (arg == "-mt" || arg == "--mask-threshold") { params.mask_threshold = std::stof(argv[++i]); } else if (arg == "-it" || arg == "--iou-threshold") { params.iou_threshold = std::stof(argv[++i]); } else if (arg == "-st" || arg == "--score-threshold") { params.stability_score_threshold = std::stof(argv[++i]); } else if (arg == "-so" || arg == "--score-offset") { params.stability_score_offset = std::stof(argv[++i]); } else if (arg == "-e" || arg == "--epsilon") { params.eps = std::stof(argv[++i]); } else if (arg == "-ed" || arg == "--epsilon-decoder-transformer") { params.eps_decoder_transformer = std::stof(argv[++i]); } else if (arg == "-p" || arg == "--point-prompt") { // TODO multiple points per model invocation use_point_prompt = true; char* point = argv[++i]; char* coord = strtok(point, ","); if (!coord){ fprintf(stderr, "Error while parsing prompt!\n"); exit(1); } params.prompt.pt.x = std::stof(coord); coord = strtok(NULL, ","); if (!coord){ fprintf(stderr, "Error while parsing prompt!\n"); exit(1); } params.prompt.pt.y = std::stof(coord); } else if (arg == "-b" || arg == "--box-prompt") { use_box_prompt = true; char * box_prompt = argv[++i]; float box_vals[4]; char * val = strtok(box_prompt, ","); if (!val) { fprintf(stderr, "Error while parsing prompt!\n"); exit(1); } box_vals[0] = std::stof(val); for (int j = 1; j < 4; ++j) { char * val = strtok(NULL, ","); if (!val) { fprintf(stderr, "Error while parsing prompt!\n"); exit(1); } box_vals[j] = std::stof(val); } params.prompt.box.x1 = box_vals[0]; params.prompt.box.y1 = box_vals[1]; params.prompt.box.x2 = box_vals[2]; params.prompt.box.y2 = box_vals[3]; } else if (arg == "-h" || arg == "--help") { sam_print_usage(argc, argv, params); exit(0); } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); sam_print_usage(argc, argv, params); exit(0); } } if (use_box_prompt && use_point_prompt) { fprintf(stderr, "Error: use either point or box prompt, not both.\n"); exit(1); } params.prompt.prompt_type = SAM_PROMPT_TYPE_POINT; if (use_box_prompt) { params.prompt.prompt_type = SAM_PROMPT_TYPE_BOX; } return true; } int main(int argc, char ** argv) { const int64_t t_main_start_us = ggml_time_us(); sam_params params; params.model = "models/sam-vit-b/ggml-model-f16.bin"; sam_model model; sam_state state; int64_t t_load_us = 0; if (sam_params_parse(argc, argv, params) == false) { return 1; } if (params.seed < 0) { params.seed = time(NULL); } fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); // load the image sam_image_u8 img0; if (!sam_image_load_from_file(params.fname_inp, img0)) { fprintf(stderr, "%s: failed to load image from '%s'\n", __func__, params.fname_inp.c_str()); return 1; } fprintf(stderr, "%s: loaded image '%s' (%d x %d)\n", __func__, params.fname_inp.c_str(), img0.nx, img0.ny); // preprocess to f32 sam_image_f32 img1; if (!sam_image_preprocess(img0, img1)) { fprintf(stderr, "%s: failed to preprocess image\n", __func__); return 1; } fprintf(stderr, "%s: preprocessed image (%d x %d)\n", __func__, img1.nx, img1.ny); // load the model { const int64_t t_start_us = ggml_time_us(); if (!sam_model_load(params, model)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } t_load_us = ggml_time_us() - t_start_us; } { static size_t buf_size = 256u*1024*1024; struct ggml_init_params ggml_params = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; state.ctx = ggml_init(ggml_params); state.embd_img = ggml_new_tensor_3d(state.ctx, GGML_TYPE_F32, model.hparams.n_img_embd(), model.hparams.n_img_embd(), model.hparams.n_enc_out_chans); state.low_res_masks = ggml_new_tensor_3d(state.ctx, GGML_TYPE_F32, model.hparams.n_enc_out_chans, model.hparams.n_enc_out_chans, 3); state.iou_predictions = ggml_new_tensor_1d(state.ctx, GGML_TYPE_F32, 3); } // Encode image { state.buf_compute_img_enc.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead()); state.allocr = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); struct ggml_cgraph * gf = sam_encode_image(model, state, img1); if (!gf) { fprintf(stderr, "%s: failed to encode image\n", __func__); return 1; } ggml_graph_compute_helper(state.work_buffer, gf, params.n_threads); // print_t_f32("embd_img", state.embd_img); ggml_gallocr_free(state.allocr); state.allocr = NULL; state.work_buffer.clear(); } // Encode prompt and decode mask { state.buf_compute_fast.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead()); state.allocr = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); switch (params.prompt.prompt_type) { case SAM_PROMPT_TYPE_POINT: fprintf(stderr, "Using point prompt: (%f, %f)\n", params.prompt.pt.x, params.prompt.pt.y); break; case SAM_PROMPT_TYPE_BOX: fprintf(stderr, "Using box prompt: (%f, %f, %f, %f)\n", params.prompt.box.x1, params.prompt.box.y1, params.prompt.box.x2, params.prompt.box.y2); break; } struct ggml_cgraph * gf = sam_build_fast_graph(model, state, img0.nx, img0.ny, params.prompt, params.multimask_output); if (!gf) { fprintf(stderr, "%s: failed to build fast graph\n", __func__); return 1; } ggml_graph_compute_helper(state.work_buffer, gf, params.n_threads); //print_t_f32("iou_predictions", state.iou_predictions); //print_t_f32("low_res_masks", state.low_res_masks); ggml_gallocr_free(state.allocr); state.allocr = NULL; } if (!sam_write_masks(model.hparams, img0.nx, img0.ny, state, params.fname_out, params.multimask_output)) { fprintf(stderr, "%s: failed to write masks\n", __func__); return 1; } // report timing { const int64_t t_main_end_us = ggml_time_us(); fprintf(stderr, "\n\n"); fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); } ggml_free(model.ctx); return 0; } ggml-org-ggml-7ec8045/examples/simple/000077500000000000000000000000001506673203700176325ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/simple/CMakeLists.txt000066400000000000000000000006571506673203700224020ustar00rootroot00000000000000# # simple-ctx set(TEST_TARGET simple-ctx) add_executable(${TEST_TARGET} simple-ctx.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) # # simple-backend set(TEST_TARGET simple-backend) add_executable(${TEST_TARGET} simple-backend.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) if (GGML_CUDA) add_compile_definitions(GGML_USE_CUDA) endif() if (GGML_METAL) add_compile_definitions(GGML_USE_METAL) endif() ggml-org-ggml-7ec8045/examples/simple/README.md000066400000000000000000000020661506673203700211150ustar00rootroot00000000000000## Simple This example simply performs a matrix multiplication, solely for the purpose of demonstrating a basic usage of ggml and backend handling. The code is commented to help understand what each part does. Traditional matrix multiplication goes like this (multiply row-by-column): $$ A \times B = C $$ $$ \begin{bmatrix} 2 & 8 \\ 5 & 1 \\ 4 & 2 \\ 8 & 6 \\ \end{bmatrix} \times \begin{bmatrix} 10 & 9 & 5 \\ 5 & 9 & 4 \\ \end{bmatrix} \= \begin{bmatrix} 60 & 90 & 42 \\ 55 & 54 & 29 \\ 50 & 54 & 28 \\ 110 & 126 & 64 \\ \end{bmatrix} $$ In `ggml`, we pass the matrix $B$ in transposed form and multiply row-by-row. The result $C$ is also transposed: $$ ggml\\_mul\\_mat(A, B^T) = C^T $$ $$ ggml\\_mul\\_mat( \begin{bmatrix} 2 & 8 \\ 5 & 1 \\ 4 & 2 \\ 8 & 6 \\ \end{bmatrix} , \begin{bmatrix} 10 & 5 \\ 9 & 9 \\ 5 & 4 \\ \end{bmatrix} ) \= \begin{bmatrix} 60 & 55 & 50 & 110 \\ 90 & 54 & 54 & 126 \\ 42 & 29 & 28 & 64 \\ \end{bmatrix} $$ The `simple-ctx` doesn't support gpu acceleration. `simple-backend` demonstrates how to use other backends like CUDA and Metal. ggml-org-ggml-7ec8045/examples/simple/simple-backend.cpp000066400000000000000000000144661506673203700232270ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include "ggml-alloc.h" #include "ggml-backend.h" #ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #include #include #include #include #include #include #include #include static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) { (void) level; (void) user_data; fputs(text, stderr); fflush(stderr); } // This is a simple model with two tensors a and b struct simple_model { struct ggml_tensor * a; struct ggml_tensor * b; // the backend to perform the computation (CPU, CUDA, METAL) ggml_backend_t backend = NULL; // the backend buffer to storage the tensors data of a and b ggml_backend_buffer_t buffer; // the context to define the tensor information (dimensions, size, memory address) struct ggml_context * ctx; }; // initialize the tensors of the model in this case two matrices 2x2 void load_model(simple_model & model, float * a, float * b, int rows_A, int cols_A, int rows_B, int cols_B) { ggml_log_set(ggml_log_callback_default, nullptr); // initialize the backend #ifdef GGML_USE_CUDA fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); // init device 0 if (!model.backend) { fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); } #endif #ifdef GGML_USE_METAL fprintf(stderr, "%s: using Metal backend\n", __func__); model.backend = ggml_backend_metal_init(); if (!model.backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } #endif // if there aren't GPU Backends fallback to CPU backend if (!model.backend) { model.backend = ggml_backend_cpu_init(); } int num_tensors = 2; struct ggml_init_params params { /*.mem_size =*/ ggml_tensor_overhead() * num_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; // create context model.ctx = ggml_init(params); // create tensors model.a = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_A, rows_A); model.b = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_B, rows_B); // create a backend buffer (backend memory) and alloc the tensors from the context model.buffer = ggml_backend_alloc_ctx_tensors(model.ctx, model.backend); // load data from cpu memory to backend buffer ggml_backend_tensor_set(model.a, a, 0, ggml_nbytes(model.a)); ggml_backend_tensor_set(model.b, b, 0, ggml_nbytes(model.b)); } // build the compute graph to perform a matrix multiplication struct ggml_cgraph * build_graph(const simple_model& model) { static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); static std::vector buf(buf_size); struct ggml_init_params params0 = { /*.mem_size =*/ buf_size, /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; // create a temporally context to build the graph struct ggml_context * ctx0 = ggml_init(params0); struct ggml_cgraph * gf = ggml_new_graph(ctx0); // result = a*b^T struct ggml_tensor * result = ggml_mul_mat(ctx0, model.a, model.b); // build operations nodes ggml_build_forward_expand(gf, result); // delete the temporally context used to build the graph ggml_free(ctx0); return gf; } // compute with backend struct ggml_tensor * compute(const simple_model & model, ggml_gallocr_t allocr) { // reset the allocator to free all the memory allocated during the previous inference struct ggml_cgraph * gf = build_graph(model); // allocate tensors ggml_gallocr_alloc_graph(allocr, gf); int n_threads = 1; // number of threads to perform some operations with multi-threading if (ggml_backend_is_cpu(model.backend)) { ggml_backend_cpu_set_n_threads(model.backend, n_threads); } ggml_backend_graph_compute(model.backend, gf); // in this case, the output tensor is the last one in the graph return ggml_graph_node(gf, -1); } int main(void) { ggml_time_init(); // initialize data of matrices to perform matrix multiplication const int rows_A = 4, cols_A = 2; float matrix_A[rows_A * cols_A] = { 2, 8, 5, 1, 4, 2, 8, 6 }; const int rows_B = 3, cols_B = 2; /* Transpose([ 10, 9, 5, 5, 9, 4 ]) 2 rows, 3 cols */ float matrix_B[rows_B * cols_B] = { 10, 5, 9, 9, 5, 4 }; simple_model model; load_model(model, matrix_A, matrix_B, rows_A, cols_A, rows_B, cols_B); // calculate the temporaly memory required to compute ggml_gallocr_t allocr = NULL; { allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); // create the worst case graph for memory usage estimation struct ggml_cgraph * gf = build_graph(model); ggml_gallocr_reserve(allocr, gf); size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0); fprintf(stderr, "%s: compute buffer size: %.4f KB\n", __func__, mem_size/1024.0); } // perform computation struct ggml_tensor * result = compute(model, allocr); // create a array to print result std::vector out_data(ggml_nelements(result)); // bring the data from the backend memory ggml_backend_tensor_get(result, out_data.data(), 0, ggml_nbytes(result)); // expected result: // [ 60.00 55.00 50.00 110.00 // 90.00 54.00 54.00 126.00 // 42.00 29.00 28.00 64.00 ] printf("mul mat (%d x %d) (transposed result):\n[", (int) result->ne[0], (int) result->ne[1]); for (int j = 0; j < result->ne[1] /* rows */; j++) { if (j > 0) { printf("\n"); } for (int i = 0; i < result->ne[0] /* cols */; i++) { printf(" %.2f", out_data[j * result->ne[0] + i]); } } printf(" ]\n"); // release backend memory used for computation ggml_gallocr_free(allocr); // free memory ggml_free(model.ctx); // release backend memory and free backend ggml_backend_buffer_free(model.buffer); ggml_backend_free(model.backend); return 0; } ggml-org-ggml-7ec8045/examples/simple/simple-ctx.cpp000066400000000000000000000071061506673203700224270ustar00rootroot00000000000000#include "ggml.h" #include "ggml-cpu.h" #include #include #include #include #include #include #include #include // This is a simple model with two tensors a and b struct simple_model { struct ggml_tensor * a; struct ggml_tensor * b; // the context to define the tensor information (dimensions, size, memory data) struct ggml_context * ctx; }; // initialize the tensors of the model in this case two matrices 2x2 void load_model(simple_model & model, float * a, float * b, int rows_A, int cols_A, int rows_B, int cols_B) { size_t ctx_size = 0; { ctx_size += rows_A * cols_A * ggml_type_size(GGML_TYPE_F32); // tensor a ctx_size += rows_B * cols_B * ggml_type_size(GGML_TYPE_F32); // tensor b ctx_size += 2 * ggml_tensor_overhead(), // tensors ctx_size += ggml_graph_overhead(); // compute graph ctx_size += 1024; // some overhead } struct ggml_init_params params { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, // NOTE: this should be false when using the legacy API }; // create context model.ctx = ggml_init(params); // create tensors model.a = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_A, rows_A); model.b = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_B, rows_B); memcpy(model.a->data, a, ggml_nbytes(model.a)); memcpy(model.b->data, b, ggml_nbytes(model.b)); } // build the compute graph to perform a matrix multiplication struct ggml_cgraph * build_graph(const simple_model& model) { struct ggml_cgraph * gf = ggml_new_graph(model.ctx); // result = a*b^T struct ggml_tensor * result = ggml_mul_mat(model.ctx, model.a, model.b); ggml_build_forward_expand(gf, result); return gf; } // compute with backend struct ggml_tensor * compute(const simple_model & model) { struct ggml_cgraph * gf = build_graph(model); int n_threads = 1; // number of threads to perform some operations with multi-threading ggml_graph_compute_with_ctx(model.ctx, gf, n_threads); // in this case, the output tensor is the last one in the graph return ggml_graph_node(gf, -1); } int main(void) { ggml_time_init(); // initialize data of matrices to perform matrix multiplication const int rows_A = 4, cols_A = 2; float matrix_A[rows_A * cols_A] = { 2, 8, 5, 1, 4, 2, 8, 6 }; const int rows_B = 3, cols_B = 2; /* Transpose([ 10, 9, 5, 5, 9, 4 ]) 2 rows, 3 cols */ float matrix_B[rows_B * cols_B] = { 10, 5, 9, 9, 5, 4 }; simple_model model; load_model(model, matrix_A, matrix_B, rows_A, cols_A, rows_B, cols_B); // perform computation in cpu struct ggml_tensor * result = compute(model); // get the result data pointer as a float array to print std::vector out_data(ggml_nelements(result)); memcpy(out_data.data(), result->data, ggml_nbytes(result)); // expected result: // [ 60.00 55.00 50.00 110.00 // 90.00 54.00 54.00 126.00 // 42.00 29.00 28.00 64.00 ] printf("mul mat (%d x %d) (transposed result):\n[", (int) result->ne[0], (int) result->ne[1]); for (int j = 0; j < result->ne[1] /* rows */; j++) { if (j > 0) { printf("\n"); } for (int i = 0; i < result->ne[0] /* cols */; i++) { printf(" %.2f", out_data[j * result->ne[0] + i]); } } printf(" ]\n"); // free memory ggml_free(model.ctx); return 0; } ggml-org-ggml-7ec8045/examples/stb_image.h000066400000000000000000010540751506673203700204600ustar00rootroot00000000000000/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: #define STB_IMAGE_IMPLEMENTATION before you include this file in *one* C or C++ file to create the implementation. // i.e. it should look like this: #include ... #include ... #include ... #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free QUICK NOTES: Primarily of interest to game developers and other people who can avoid problematic images and only need the trivial interface JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) PNG 1/2/4/8/16-bit-per-channel TGA (not sure what subset, if a subset) BMP non-1bpp, non-RLE PSD (composited view only, no extra channels, 8/16 bit-per-channel) GIF (*comp always reports as 4-channel) HDR (radiance rgbE format) PIC (Softimage PIC) PNM (PPM and PGM binary only) Animated GIF still needs a proper API, but here's one way to do it: http://gist.github.com/urraka/685d9a6340b26b830d49 - decode from memory or through FILE (define STBI_NO_STDIO to remove code) - decode from arbitrary I/O callbacks - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) Full documentation under "DOCUMENTATION" below. LICENSE See end of file for license information. RECENT REVISION HISTORY: 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes 2.25 (2020-02-02) fix warnings 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically 2.23 (2019-08-11) fix clang static analysis warning 2.22 (2019-03-04) gif fixes, fix warnings 2.21 (2019-02-25) fix typo in comment 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 2.19 (2018-02-11) fix warning 2.18 (2018-01-30) fix warnings 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove white matting in PSD; allocate large structures on the stack; correct channel count for PNG & BMP 2.10 (2016-01-22) avoid warning introduced in 2.09 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED See end of file for full revision history. ============================ Contributors ========================= Image formats Extensions, features Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) github:urraka (animated gif) Junggon Kim (PNM comments) Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) socks-the-fox (16-bit PNG) Jeremy Sawicki (handle all ImageNet JPGs) Optimizations & bugfixes Mikhail Morozov (1-bit BMP) Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) Arseny Kapoulkine Simon Breuss (16-bit PNM) John-Mark Allen Carmelo J Fdez-Aguera Bug & warning fixes Marc LeBlanc David Woo Guillaume George Martins Mozeiko Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski Phil Jordan Dave Moore Roy Eltham Hayaki Saito Nathan Reed Won Chun Luke Graham Johan Duparc Nick Verigakis the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh Janez Zemva John Bartholomew Michal Cichon github:romigrou Jonathan Blow Ken Hamada Tero Hanninen github:svdijk Eugene Golushkov Laurent Gomila Cort Stratton github:snagar Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex Cass Everitt Ryamond Barbiero github:grim210 Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo Julian Raschke Gregory Mullen Christian Floisand github:darealshinji Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 Brad Weinberger Matvey Cherevko github:mosra Luca Sas Alexander Veselov Zack Middleton [reserved] Ryan C. Gordon [reserved] [reserved] DO NOT ADD YOUR NAME HERE Jacko Dirks To add your name to the credits, pick a random blank space in the middle and fill it. 80% of merge conflicts on stb PRs are due to people adding their name at the end of the credits. */ #ifndef STBI_INCLUDE_STB_IMAGE_H #define STBI_INCLUDE_STB_IMAGE_H // DOCUMENTATION // // Limitations: // - no 12-bit-per-channel JPEG // - no JPEGs with arithmetic coding // - GIF always returns *comp=4 // // Basic usage (see HDR discussion below for HDR usage): // int x,y,n; // unsigned char *data = stbi_load(filename, &x, &y, &n, 0); // // ... process data if not NULL ... // // ... x = width, y = height, n = # 8-bit components per pixel ... // // ... replace '0' with '1'..'4' to force that many components per pixel // // ... but 'n' will always be the number that it would have been if you said 0 // stbi_image_free(data); // // Standard parameters: // int *x -- outputs image width in pixels // int *y -- outputs image height in pixels // int *channels_in_file -- outputs # of image components in image file // int desired_channels -- if non-zero, # of image components requested in result // // The return value from an image loader is an 'unsigned char *' which points // to the pixel data, or NULL on an allocation failure or if the image is // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, // with each pixel consisting of N interleaved 8-bit components; the first // pixel pointed to is top-left-most in the image. There is no padding between // image scanlines or between pixels, regardless of format. The number of // components N is 'desired_channels' if desired_channels is non-zero, or // *channels_in_file otherwise. If desired_channels is non-zero, // *channels_in_file has the number of components that _would_ have been // output otherwise. E.g. if you set desired_channels to 4, you will always // get RGBA output, but you can check *channels_in_file to see if it's trivially // opaque because e.g. there were only 3 channels in the source image. // // An output image with N components has the following components interleaved // in this order in each pixel: // // N=#comp components // 1 grey // 2 grey, alpha // 3 red, green, blue // 4 red, green, blue, alpha // // If image loading fails for any reason, the return value will be NULL, // and *x, *y, *channels_in_file will be unchanged. The function // stbi_failure_reason() can be queried for an extremely brief, end-user // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly // more user-friendly ones. // // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. // // To query the width, height and component count of an image without having to // decode the full file, you can use the stbi_info family of functions: // // int x,y,n,ok; // ok = stbi_info(filename, &x, &y, &n); // // returns ok=1 and sets x, y, n if image is a supported format, // // 0 otherwise. // // Note that stb_image pervasively uses ints in its public API for sizes, // including sizes of memory buffers. This is now part of the API and thus // hard to change without causing breakage. As a result, the various image // loaders all have certain limits on image size; these differ somewhat // by format but generally boil down to either just under 2GB or just under // 1GB. When the decoded image would be larger than this, stb_image decoding // will fail. // // Additionally, stb_image will reject image files that have any of their // dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, // which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, // the only way to have an image with such dimensions load correctly // is for it to have a rather extreme aspect ratio. Either way, the // assumption here is that such larger images are likely to be malformed // or malicious. If you do need to load an image with individual dimensions // larger than that, and it still fits in the overall size limit, you can // #define STBI_MAX_DIMENSIONS on your own to be something larger. // // =========================================================================== // // UNICODE: // // If compiling for Windows and you wish to use Unicode filenames, compile // with // #define STBI_WINDOWS_UTF8 // and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert // Windows wchar_t filenames to utf8. // // =========================================================================== // // Philosophy // // stb libraries are designed with the following priorities: // // 1. easy to use // 2. easy to maintain // 3. good performance // // Sometimes I let "good performance" creep up in priority over "easy to maintain", // and for best performance I may provide less-easy-to-use APIs that give higher // performance, in addition to the easy-to-use ones. Nevertheless, it's important // to keep in mind that from the standpoint of you, a client of this library, // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. // // Some secondary priorities arise directly from the first two, some of which // provide more explicit reasons why performance can't be emphasized. // // - Portable ("ease of use") // - Small source code footprint ("easy to maintain") // - No dependencies ("ease of use") // // =========================================================================== // // I/O callbacks // // I/O callbacks allow you to read from arbitrary sources, like packaged // files or some other source. Data read from callbacks are processed // through a small internal buffer (currently 128 bytes) to try to reduce // overhead. // // The three functions you must define are "read" (reads some bytes of data), // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). // // =========================================================================== // // SIMD support // // The JPEG decoder will try to automatically use SIMD kernels on x86 when // supported by the compiler. For ARM Neon support, you must explicitly // request it. // // (The old do-it-yourself SIMD API is no longer supported in the current // code.) // // On x86, SSE2 will automatically be used when available based on a run-time // test; if not, the generic C versions are used as a fall-back. On ARM targets, // the typical path is to have separate builds for NEON and non-NEON devices // (at least this is true for iOS and Android). Therefore, the NEON support is // toggled by a build flag: define STBI_NEON to get NEON loops. // // If for some reason you do not want to use any of SIMD code, or if // you have issues compiling it, you can disable it entirely by // defining STBI_NO_SIMD. // // =========================================================================== // // HDR image support (disable by defining STBI_NO_HDR) // // stb_image supports loading HDR images in general, and currently the Radiance // .HDR file format specifically. You can still load any file through the existing // interface; if you attempt to load an HDR file, it will be automatically remapped // to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; // both of these constants can be reconfigured through this interface: // // stbi_hdr_to_ldr_gamma(2.2f); // stbi_hdr_to_ldr_scale(1.0f); // // (note, do not use _inverse_ constants; stbi_image will invert them // appropriately). // // Additionally, there is a new, parallel interface for loading files as // (linear) floats to preserve the full dynamic range: // // float *data = stbi_loadf(filename, &x, &y, &n, 0); // // If you load LDR images through this interface, those images will // be promoted to floating point values, run through the inverse of // constants corresponding to the above: // // stbi_ldr_to_hdr_scale(1.0f); // stbi_ldr_to_hdr_gamma(2.2f); // // Finally, given a filename (or an open file or memory block--see header // file for details) containing image data, you can query for the "most // appropriate" interface to use (that is, whether the image is HDR or // not), using: // // stbi_is_hdr(char *filename); // // =========================================================================== // // iPhone PNG support: // // We optionally support converting iPhone-formatted PNGs (which store // premultiplied BGRA) back to RGB, even though they're internally encoded // differently. To enable this conversion, call // stbi_convert_iphone_png_to_rgb(1). // // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per // pixel to remove any premultiplied alpha *only* if the image file explicitly // says there's premultiplied data (currently only happens in iPhone images, // and only if iPhone convert-to-rgb processing is on). // // =========================================================================== // // ADDITIONAL CONFIGURATION // // - You can suppress implementation of any of the decoders to reduce // your code footprint by #defining one or more of the following // symbols before creating the implementation. // // STBI_NO_JPEG // STBI_NO_PNG // STBI_NO_BMP // STBI_NO_PSD // STBI_NO_TGA // STBI_NO_GIF // STBI_NO_HDR // STBI_NO_PIC // STBI_NO_PNM (.ppm and .pgm) // // - You can request *only* certain decoders and suppress all other ones // (this will be more forward-compatible, as addition of new decoders // doesn't require you to disable them explicitly): // // STBI_ONLY_JPEG // STBI_ONLY_PNG // STBI_ONLY_BMP // STBI_ONLY_PSD // STBI_ONLY_TGA // STBI_ONLY_GIF // STBI_ONLY_HDR // STBI_ONLY_PIC // STBI_ONLY_PNM (.ppm and .pgm) // // - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still // want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB // // - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater // than that size (in either width or height) without further processing. // This is to let programs in the wild set an upper bound to prevent // denial-of-service attacks on untrusted data, as one could generate a // valid image of gigantic dimensions and force stb_image to allocate a // huge block of memory and spend disproportionate time decoding it. By // default this is set to (1 << 24), which is 16777216, but that's still // very big. #ifndef STBI_NO_STDIO #include #endif // STBI_NO_STDIO #define STBI_VERSION 1 enum { STBI_default = 0, // only used for desired_channels STBI_grey = 1, STBI_grey_alpha = 2, STBI_rgb = 3, STBI_rgb_alpha = 4 }; #include typedef unsigned char stbi_uc; typedef unsigned short stbi_us; #ifdef __cplusplus extern "C" { #endif #ifndef STBIDEF #ifdef STB_IMAGE_STATIC #define STBIDEF static #else #define STBIDEF extern #endif #endif ////////////////////////////////////////////////////////////////////////////// // // PRIMARY API - works on images of any type // // // load image by filename, open file, or memory buffer // typedef struct { int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative int (*eof) (void *user); // returns nonzero if we are at end of file/data } stbi_io_callbacks; //////////////////////////////////// // // 8-bits-per-channel interface // STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); // for stbi_load_from_file, file pointer is left pointing immediately after image #endif #ifndef STBI_NO_GIF STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); #endif #ifdef STBI_WINDOWS_UTF8 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); #endif //////////////////////////////////// // // 16-bits-per-channel interface // STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif //////////////////////////////////// // // float-per-channel interface // #ifndef STBI_NO_LINEAR STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif #endif #ifndef STBI_NO_HDR STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); STBIDEF void stbi_hdr_to_ldr_scale(float scale); #endif // STBI_NO_HDR #ifndef STBI_NO_LINEAR STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); STBIDEF void stbi_ldr_to_hdr_scale(float scale); #endif // STBI_NO_LINEAR // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); #ifndef STBI_NO_STDIO STBIDEF int stbi_is_hdr (char const *filename); STBIDEF int stbi_is_hdr_from_file(FILE *f); #endif // STBI_NO_STDIO // get a VERY brief reason for failure // on most compilers (and ALL modern mainstream compilers) this is threadsafe STBIDEF const char *stbi_failure_reason (void); // free the loaded image -- this is just free() STBIDEF void stbi_image_free (void *retval_from_stbi_load); // get image dimensions & components without fully decoding STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); #ifndef STBI_NO_STDIO STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); STBIDEF int stbi_is_16_bit (char const *filename); STBIDEF int stbi_is_16_bit_from_file(FILE *f); #endif // for image formats that explicitly notate that they have premultiplied alpha, // we just return the colors as stored in the file. set this flag to force // unpremultiplication. results are undefined if the unpremultiply overflow. STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); // indicate whether we should process iphone images back to canonical format, // or just pass them through "as-is" STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); // flip the image vertically, so the first pixel in the output array is the bottom left STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); // as above, but only applies to images loaded on the thread that calls the function // this function is only available if your compiler supports thread-local variables; // calling it will fail to link if your compiler doesn't STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); // ZLIB client - used by PNG, available for other purposes STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); #ifdef __cplusplus } #endif // // //// end header file ///////////////////////////////////////////////////// #endif // STBI_INCLUDE_STB_IMAGE_H #ifdef STB_IMAGE_IMPLEMENTATION #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ || defined(STBI_ONLY_ZLIB) #ifndef STBI_ONLY_JPEG #define STBI_NO_JPEG #endif #ifndef STBI_ONLY_PNG #define STBI_NO_PNG #endif #ifndef STBI_ONLY_BMP #define STBI_NO_BMP #endif #ifndef STBI_ONLY_PSD #define STBI_NO_PSD #endif #ifndef STBI_ONLY_TGA #define STBI_NO_TGA #endif #ifndef STBI_ONLY_GIF #define STBI_NO_GIF #endif #ifndef STBI_ONLY_HDR #define STBI_NO_HDR #endif #ifndef STBI_ONLY_PIC #define STBI_NO_PIC #endif #ifndef STBI_ONLY_PNM #define STBI_NO_PNM #endif #endif #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) #define STBI_NO_ZLIB #endif #include #include // ptrdiff_t on osx #include #include #include #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) #include // ldexp, pow #endif #ifndef STBI_NO_STDIO #include #endif #ifndef STBI_ASSERT #include #define STBI_ASSERT(x) assert(x) #endif #ifdef __cplusplus #define STBI_EXTERN extern "C" #else #define STBI_EXTERN extern #endif #ifndef _MSC_VER #ifdef __cplusplus #define stbi_inline inline #else #define stbi_inline #endif #else #define stbi_inline __forceinline #endif #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local #elif defined(__GNUC__) && __GNUC__ < 5 #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) #define STBI_THREAD_LOCAL _Thread_local #endif #ifndef STBI_THREAD_LOCAL #if defined(__GNUC__) #define STBI_THREAD_LOCAL __thread #endif #endif #endif #if defined(_MSC_VER) || defined(__SYMBIAN32__) typedef unsigned short stbi__uint16; typedef signed short stbi__int16; typedef unsigned int stbi__uint32; typedef signed int stbi__int32; #else #include typedef uint16_t stbi__uint16; typedef int16_t stbi__int16; typedef uint32_t stbi__uint32; typedef int32_t stbi__int32; #endif // should produce compiler error if size is wrong typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER #define STBI_NOTUSED(v) (void)(v) #else #define STBI_NOTUSED(v) (void)sizeof(v) #endif #ifdef _MSC_VER #define STBI_HAS_LROTL #endif #ifdef STBI_HAS_LROTL #define stbi_lrot(x,y) _lrotl(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) // ok #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) // ok #else #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." #endif #ifndef STBI_MALLOC #define STBI_MALLOC(sz) malloc(sz) #define STBI_REALLOC(p,newsz) realloc(p,newsz) #define STBI_FREE(p) free(p) #endif #ifndef STBI_REALLOC_SIZED #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) #endif // x86/x64 detection #if defined(__x86_64__) || defined(_M_X64) #define STBI__X64_TARGET #elif defined(__i386) || defined(_M_IX86) #define STBI__X86_TARGET #endif #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) // gcc doesn't support sse2 intrinsics unless you compile with -msse2, // which in turn means it gets to use SSE2 everywhere. This is unfortunate, // but previous attempts to provide the SSE2 functions with runtime // detection caused numerous issues. The way architecture extensions are // exposed in GCC/Clang is, sadly, not really suited for one-file libs. // New behavior: if compiled with -msse2, we use SSE2 without any // detection; if not, we don't use it at all. #define STBI_NO_SIMD #endif #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET // // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not // simultaneously enabling "-mstackrealign". // // See https://github.com/nothings/stb/issues/81 for more information. // // So default to no SSE2 on 32-bit MinGW. If you've read this far and added // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. #define STBI_NO_SIMD #endif #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) #define STBI_SSE2 #include #ifdef _MSC_VER #if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { int info[4]; __cpuid(info,1); return info[3]; } #else static int stbi__cpuid3(void) { int res; __asm { mov eax,1 cpuid mov res,edx } return res; } #endif #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) static int stbi__sse2_available(void) { int info3 = stbi__cpuid3(); return ((info3 >> 26) & 1) != 0; } #endif #else // assume GCC-style if not VC++ #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) static int stbi__sse2_available(void) { // If we're even attempting to compile this on GCC/Clang, that means // -msse2 is on, which means the compiler is allowed to use SSE2 // instructions at will, and so are we. return 1; } #endif #endif #endif // ARM NEON #if defined(STBI_NO_SIMD) && defined(STBI_NEON) #undef STBI_NEON #endif #ifdef STBI_NEON #include #ifdef _MSC_VER #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name #else #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #endif #endif #ifndef STBI_SIMD_ALIGN #define STBI_SIMD_ALIGN(type, name) type name #endif #ifndef STBI_MAX_DIMENSIONS #define STBI_MAX_DIMENSIONS (1 << 24) #endif /////////////////////////////////////////////// // // stbi__context struct and start_xxx functions // stbi__context structure is our basic context used by all images, so it // contains all the IO context, plus some basic image information typedef struct { stbi__uint32 img_x, img_y; int img_n, img_out_n; stbi_io_callbacks io; void *io_user_data; int read_from_callbacks; int buflen; stbi_uc buffer_start[128]; int callback_already_read; stbi_uc *img_buffer, *img_buffer_end; stbi_uc *img_buffer_original, *img_buffer_original_end; } stbi__context; static void stbi__refill_buffer(stbi__context *s); // initialize a memory-decode context static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) { s->io.read = NULL; s->read_from_callbacks = 0; s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; } // initialize a callback-based context static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) { s->io = *c; s->io_user_data = user; s->buflen = sizeof(s->buffer_start); s->read_from_callbacks = 1; s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = s->buffer_start; stbi__refill_buffer(s); s->img_buffer_original_end = s->img_buffer_end; } #ifndef STBI_NO_STDIO static int stbi__stdio_read(void *user, char *data, int size) { return (int) fread(data,1,size,(FILE*) user); } static void stbi__stdio_skip(void *user, int n) { int ch; fseek((FILE*) user, n, SEEK_CUR); ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ if (ch != EOF) { ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ } } static int stbi__stdio_eof(void *user) { return feof((FILE*) user) || ferror((FILE *) user); } static stbi_io_callbacks stbi__stdio_callbacks = { stbi__stdio_read, stbi__stdio_skip, stbi__stdio_eof, }; static void stbi__start_file(stbi__context *s, FILE *f) { stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); } //static void stop_file(stbi__context *s) { } #endif // !STBI_NO_STDIO static void stbi__rewind(stbi__context *s) { // conceptually rewind SHOULD rewind to the beginning of the stream, // but we just rewind to the beginning of the initial buffer, because // we only use it after doing 'test', which only ever looks at at most 92 bytes s->img_buffer = s->img_buffer_original; s->img_buffer_end = s->img_buffer_original_end; } enum { STBI_ORDER_RGB, STBI_ORDER_BGR }; typedef struct { int bits_per_channel; int num_channels; int channel_order; } stbi__result_info; #ifndef STBI_NO_JPEG static int stbi__jpeg_test(stbi__context *s); static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNG static int stbi__png_test(stbi__context *s); static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__png_is16(stbi__context *s); #endif #ifndef STBI_NO_BMP static int stbi__bmp_test(stbi__context *s); static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_TGA static int stbi__tga_test(stbi__context *s); static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PSD static int stbi__psd_test(stbi__context *s); static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__psd_is16(stbi__context *s); #endif #ifndef STBI_NO_HDR static int stbi__hdr_test(stbi__context *s); static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PIC static int stbi__pic_test(stbi__context *s); static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_GIF static int stbi__gif_test(stbi__context *s); static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNM static int stbi__pnm_test(stbi__context *s); static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__pnm_is16(stbi__context *s); #endif static #ifdef STBI_THREAD_LOCAL STBI_THREAD_LOCAL #endif const char *stbi__g_failure_reason; STBIDEF const char *stbi_failure_reason(void) { return stbi__g_failure_reason; } #ifndef STBI_NO_FAILURE_STRINGS static int stbi__err(const char *str) { stbi__g_failure_reason = str; return 0; } #endif static void *stbi__malloc(size_t size) { return STBI_MALLOC(size); } // stb_image uses ints pervasively, including for offset calculations. // therefore the largest decoded image size we can support with the // current code, even on 64-bit targets, is INT_MAX. this is not a // significant limitation for the intended use case. // // we do, however, need to make sure our size calculations don't // overflow. hence a few helper functions for size calculations that // multiply integers together, making sure that they're non-negative // and no overflow occurs. // return 1 if the sum is valid, 0 on overflow. // negative terms are considered invalid. static int stbi__addsizes_valid(int a, int b) { if (b < 0) return 0; // now 0 <= b <= INT_MAX, hence also // 0 <= INT_MAX - b <= INTMAX. // And "a + b <= INT_MAX" (which might overflow) is the // same as a <= INT_MAX - b (no overflow) return a <= INT_MAX - b; } // returns 1 if the product is valid, 0 on overflow. // negative factors are considered invalid. static int stbi__mul2sizes_valid(int a, int b) { if (a < 0 || b < 0) return 0; if (b == 0) return 1; // mul-by-0 is always safe // portable way to check for no overflows in a*b return a <= INT_MAX/b; } #if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); } #endif // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow static int stbi__mad3sizes_valid(int a, int b, int c, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && stbi__addsizes_valid(a*b*c, add); } // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); } #endif #if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) // mallocs with size overflow checking static void *stbi__malloc_mad2(int a, int b, int add) { if (!stbi__mad2sizes_valid(a, b, add)) return NULL; return stbi__malloc(a*b + add); } #endif static void *stbi__malloc_mad3(int a, int b, int c, int add) { if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; return stbi__malloc(a*b*c + add); } #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) { if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; return stbi__malloc(a*b*c*d + add); } #endif // returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. static int stbi__addints_valid(int a, int b) { if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. return a <= INT_MAX - b; } // returns 1 if the product of two signed shorts is valid, 0 on overflow. static int stbi__mul2shorts_valid(short a, short b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN return a >= SHRT_MIN / b; } // stbi__err - error // stbi__errpf - error returning pointer to float // stbi__errpuc - error returning pointer to unsigned char #ifdef STBI_NO_FAILURE_STRINGS #define stbi__err(x,y) 0 #elif defined(STBI_FAILURE_USERMSG) #define stbi__err(x,y) stbi__err(y) #else #define stbi__err(x,y) stbi__err(x) #endif #define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) #define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) STBIDEF void stbi_image_free(void *retval_from_stbi_load) { STBI_FREE(retval_from_stbi_load); } #ifndef STBI_NO_LINEAR static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); #endif #ifndef STBI_NO_HDR static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); #endif static int stbi__vertically_flip_on_load_global = 0; STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_global = flag_true_if_should_flip; } #ifndef STBI_THREAD_LOCAL #define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global #else static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_local = flag_true_if_should_flip; stbi__vertically_flip_on_load_set = 1; } #define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ ? stbi__vertically_flip_on_load_local \ : stbi__vertically_flip_on_load_global) #endif // STBI_THREAD_LOCAL static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) { memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order ri->num_channels = 0; // test the formats with a very explicit header first (at least a FOURCC // or distinctive magic number first) #ifndef STBI_NO_PNG if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_BMP if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_GIF if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PSD if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); #else STBI_NOTUSED(bpc); #endif #ifndef STBI_NO_PIC if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); #endif // then the formats that can end up attempting to load with just 1 or 2 // bytes matching expectations; these are prone to false positives, so // try them later #ifndef STBI_NO_JPEG if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PNM if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); } #endif #ifndef STBI_NO_TGA // test tga last because it's a crappy test! if (stbi__tga_test(s)) return stbi__tga_load(s,x,y,comp,req_comp, ri); #endif return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); } static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) { int i; int img_len = w * h * channels; stbi_uc *reduced; reduced = (stbi_uc *) stbi__malloc(img_len); if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling STBI_FREE(orig); return reduced; } static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) { int i; int img_len = w * h * channels; stbi__uint16 *enlarged; enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff STBI_FREE(orig); return enlarged; } static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) { int row; size_t bytes_per_row = (size_t)w * bytes_per_pixel; stbi_uc temp[2048]; stbi_uc *bytes = (stbi_uc *)image; for (row = 0; row < (h>>1); row++) { stbi_uc *row0 = bytes + row*bytes_per_row; stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; // swap row0 with row1 size_t bytes_left = bytes_per_row; while (bytes_left) { size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); memcpy(temp, row0, bytes_copy); memcpy(row0, row1, bytes_copy); memcpy(row1, temp, bytes_copy); row0 += bytes_copy; row1 += bytes_copy; bytes_left -= bytes_copy; } } } #ifndef STBI_NO_GIF static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) { int slice; int slice_size = w * h * bytes_per_pixel; stbi_uc *bytes = (stbi_uc *)image; for (slice = 0; slice < z; ++slice) { stbi__vertical_flip(bytes, w, h, bytes_per_pixel); bytes += slice_size; } } #endif static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) { stbi__result_info ri; void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); if (result == NULL) return NULL; // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 8) { result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); ri.bits_per_channel = 8; } // @TODO: move stbi__convert_format to here if (stbi__vertically_flip_on_load) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); } return (unsigned char *) result; } static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) { stbi__result_info ri; void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); if (result == NULL) return NULL; // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 16) { result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); ri.bits_per_channel = 16; } // @TODO: move stbi__convert_format16 to here // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision if (stbi__vertically_flip_on_load) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); } return (stbi__uint16 *) result; } #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) { if (stbi__vertically_flip_on_load && result != NULL) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); } } #endif #ifndef STBI_NO_STDIO #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); #endif #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) { return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); } #endif static FILE *stbi__fopen(char const *filename, char const *mode) { FILE *f; #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) wchar_t wMode[64]; wchar_t wFilename[1024]; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) return 0; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) return 0; #if defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != _wfopen_s(&f, wFilename, wMode)) f = 0; #else f = _wfopen(wFilename, wMode); #endif #elif defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) f=0; #else f = fopen(filename, mode); #endif return f; } STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = stbi__fopen(filename, "rb"); unsigned char *result; if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); result = stbi_load_from_file(f,x,y,comp,req_comp); fclose(f); return result; } STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { unsigned char *result; stbi__context s; stbi__start_file(&s,f); result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); if (result) { // need to 'unget' all the characters in the IO buffer fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) { stbi__uint16 *result; stbi__context s; stbi__start_file(&s,f); result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); if (result) { // need to 'unget' all the characters in the IO buffer fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = stbi__fopen(filename, "rb"); stbi__uint16 *result; if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); result = stbi_load_from_file_16(f,x,y,comp,req_comp); fclose(f); return result; } #endif //!STBI_NO_STDIO STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); } STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); } STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } #ifndef STBI_NO_GIF STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { unsigned char *result; stbi__context s; stbi__start_mem(&s,buffer,len); result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); if (stbi__vertically_flip_on_load) { stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); } return result; } #endif #ifndef STBI_NO_LINEAR static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) { unsigned char *data; #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { stbi__result_info ri; float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); if (hdr_data) stbi__float_postprocess(hdr_data,x,y,comp,req_comp); return hdr_data; } #endif data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); if (data) return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); } STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__loadf_main(&s,x,y,comp,req_comp); } STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__loadf_main(&s,x,y,comp,req_comp); } #ifndef STBI_NO_STDIO STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) { float *result; FILE *f = stbi__fopen(filename, "rb"); if (!f) return stbi__errpf("can't fopen", "Unable to open file"); result = stbi_loadf_from_file(f,x,y,comp,req_comp); fclose(f); return result; } STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_file(&s,f); return stbi__loadf_main(&s,x,y,comp,req_comp); } #endif // !STBI_NO_STDIO #endif // !STBI_NO_LINEAR // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always // reports false! STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) { #ifndef STBI_NO_HDR stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__hdr_test(&s); #else STBI_NOTUSED(buffer); STBI_NOTUSED(len); return 0; #endif } #ifndef STBI_NO_STDIO STBIDEF int stbi_is_hdr (char const *filename) { FILE *f = stbi__fopen(filename, "rb"); int result=0; if (f) { result = stbi_is_hdr_from_file(f); fclose(f); } return result; } STBIDEF int stbi_is_hdr_from_file(FILE *f) { #ifndef STBI_NO_HDR long pos = ftell(f); int res; stbi__context s; stbi__start_file(&s,f); res = stbi__hdr_test(&s); fseek(f, pos, SEEK_SET); return res; #else STBI_NOTUSED(f); return 0; #endif } #endif // !STBI_NO_STDIO STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) { #ifndef STBI_NO_HDR stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__hdr_test(&s); #else STBI_NOTUSED(clbk); STBI_NOTUSED(user); return 0; #endif } #ifndef STBI_NO_LINEAR static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } #endif static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } ////////////////////////////////////////////////////////////////////////////// // // Common code used by all image loaders // enum { STBI__SCAN_load=0, STBI__SCAN_type, STBI__SCAN_header }; static void stbi__refill_buffer(stbi__context *s) { int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); if (n == 0) { // at end of file, treat same as if from memory, but need to handle case // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file s->read_from_callbacks = 0; s->img_buffer = s->buffer_start; s->img_buffer_end = s->buffer_start+1; *s->img_buffer = 0; } else { s->img_buffer = s->buffer_start; s->img_buffer_end = s->buffer_start + n; } } stbi_inline static stbi_uc stbi__get8(stbi__context *s) { if (s->img_buffer < s->img_buffer_end) return *s->img_buffer++; if (s->read_from_callbacks) { stbi__refill_buffer(s); return *s->img_buffer++; } return 0; } #if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else stbi_inline static int stbi__at_eof(stbi__context *s) { if (s->io.read) { if (!(s->io.eof)(s->io_user_data)) return 0; // if feof() is true, check if buffer = end // special case: we've only got the special 0 character at the end if (s->read_from_callbacks == 0) return 1; } return s->img_buffer >= s->img_buffer_end; } #endif #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) // nothing #else static void stbi__skip(stbi__context *s, int n) { if (n == 0) return; // already there! if (n < 0) { s->img_buffer = s->img_buffer_end; return; } if (s->io.read) { int blen = (int) (s->img_buffer_end - s->img_buffer); if (blen < n) { s->img_buffer = s->img_buffer_end; (s->io.skip)(s->io_user_data, n - blen); return; } } s->img_buffer += n; } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) // nothing #else static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) { if (s->io.read) { int blen = (int) (s->img_buffer_end - s->img_buffer); if (blen < n) { int res, count; memcpy(buffer, s->img_buffer, blen); count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); res = (count == (n-blen)); s->img_buffer = s->img_buffer_end; return res; } } if (s->img_buffer+n <= s->img_buffer_end) { memcpy(buffer, s->img_buffer, n); s->img_buffer += n; return 1; } else return 0; } #endif #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) // nothing #else static int stbi__get16be(stbi__context *s) { int z = stbi__get8(s); return (z << 8) + stbi__get8(s); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) // nothing #else static stbi__uint32 stbi__get32be(stbi__context *s) { stbi__uint32 z = stbi__get16be(s); return (z << 16) + stbi__get16be(s); } #endif #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) // nothing #else static int stbi__get16le(stbi__context *s) { int z = stbi__get8(s); return z + (stbi__get8(s) << 8); } #endif #ifndef STBI_NO_BMP static stbi__uint32 stbi__get32le(stbi__context *s) { stbi__uint32 z = stbi__get16le(s); z += (stbi__uint32)stbi__get16le(s) << 16; return z; } #endif #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else ////////////////////////////////////////////////////////////////////////////// // // generic converter from built-in img_n to req_comp // individual types do this automatically as much as possible (e.g. jpeg // does all cases internally since it needs to colorspace convert anyway, // and it never has alpha, so very few cases ). png can automatically // interleave an alpha=255 channel, but falls back to this for other cases // // assume data buffer is malloced, so malloc a new one and free that one // only failure mode is malloc failing static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) { int i,j; unsigned char *good; if (req_comp == img_n) return data; STBI_ASSERT(req_comp >= 1 && req_comp <= 4); good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); if (good == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { unsigned char *src = data + j * x * img_n ; unsigned char *dest = good + j * x * req_comp; #define STBI__COMBO(a,b) ((a)*8+(b)) #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros switch (STBI__COMBO(img_n, req_comp)) { STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; STBI__CASE(2,1) { dest[0]=src[0]; } break; STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); } #undef STBI__CASE } STBI_FREE(data); return good; } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) // nothing #else static stbi__uint16 stbi__compute_y_16(int r, int g, int b) { return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) // nothing #else static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) { int i,j; stbi__uint16 *good; if (req_comp == img_n) return data; STBI_ASSERT(req_comp >= 1 && req_comp <= 4); good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); if (good == NULL) { STBI_FREE(data); return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { stbi__uint16 *src = data + j * x * img_n ; stbi__uint16 *dest = good + j * x * req_comp; #define STBI__COMBO(a,b) ((a)*8+(b)) #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros switch (STBI__COMBO(img_n, req_comp)) { STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; STBI__CASE(2,1) { dest[0]=src[0]; } break; STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); } #undef STBI__CASE } STBI_FREE(data); return good; } #endif #ifndef STBI_NO_LINEAR static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) { int i,k,n; float *output; if (!data) return NULL; output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { for (k=0; k < n; ++k) { output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); } } if (n < comp) { for (i=0; i < x*y; ++i) { output[i*comp + n] = data[i*comp + n]/255.0f; } } STBI_FREE(data); return output; } #endif #ifndef STBI_NO_HDR #define stbi__float2int(x) ((int) (x)) static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) { int i,k,n; stbi_uc *output; if (!data) return NULL; output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { for (k=0; k < n; ++k) { float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; if (z < 0) z = 0; if (z > 255) z = 255; output[i*comp + k] = (stbi_uc) stbi__float2int(z); } if (k < comp) { float z = data[i*comp+k] * 255 + 0.5f; if (z < 0) z = 0; if (z > 255) z = 255; output[i*comp + k] = (stbi_uc) stbi__float2int(z); } } STBI_FREE(data); return output; } #endif ////////////////////////////////////////////////////////////////////////////// // // "baseline" JPEG/JFIF decoder // // simple implementation // - doesn't support delayed output of y-dimension // - simple interface (only one output format: 8-bit interleaved RGB) // - doesn't try to recover corrupt jpegs // - doesn't allow partial loading, loading multiple at once // - still fast on x86 (copying globals into locals doesn't help x86) // - allocates lots of intermediate memory (full size of all components) // - non-interleaved case requires this anyway // - allows good upsampling (see next) // high-quality // - upsampled channels are bilinearly interpolated, even across blocks // - quality integer IDCT derived from IJG's 'slow' // performance // - fast huffman; reasonable integer IDCT // - some SIMD kernels for common paths on targets with SSE2/NEON // - uses a lot of intermediate memory, could cache poorly #ifndef STBI_NO_JPEG // huffman decoding acceleration #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache typedef struct { stbi_uc fast[1 << FAST_BITS]; // weirdly, repacking this into AoS is a 10% speed loss, instead of a win stbi__uint16 code[256]; stbi_uc values[256]; stbi_uc size[257]; unsigned int maxcode[18]; int delta[17]; // old 'firstsymbol' - old 'firstcode' } stbi__huffman; typedef struct { stbi__context *s; stbi__huffman huff_dc[4]; stbi__huffman huff_ac[4]; stbi__uint16 dequant[4][64]; stbi__int16 fast_ac[4][1 << FAST_BITS]; // sizes for components, interleaved MCUs int img_h_max, img_v_max; int img_mcu_x, img_mcu_y; int img_mcu_w, img_mcu_h; // definition of jpeg image component struct { int id; int h,v; int tq; int hd,ha; int dc_pred; int x,y,w2,h2; stbi_uc *data; void *raw_data, *raw_coeff; stbi_uc *linebuf; short *coeff; // progressive only int coeff_w, coeff_h; // number of 8x8 coefficient blocks } img_comp[4]; stbi__uint32 code_buffer; // jpeg entropy-coded buffer int code_bits; // number of valid bits unsigned char marker; // marker seen while filling entropy buffer int nomore; // flag if we saw a marker so must stop int progressive; int spec_start; int spec_end; int succ_high; int succ_low; int eob_run; int jfif; int app14_color_transform; // Adobe APP14 tag int rgb; int scan_n, order[4]; int restart_interval, todo; // kernels void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); } stbi__jpeg; static int stbi__build_huffman(stbi__huffman *h, int *count) { int i,j,k=0; unsigned int code; // build size list for each symbol (from JPEG spec) for (i=0; i < 16; ++i) { for (j=0; j < count[i]; ++j) { h->size[k++] = (stbi_uc) (i+1); if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); } } h->size[k] = 0; // compute actual symbols (from jpeg spec) code = 0; k = 0; for(j=1; j <= 16; ++j) { // compute delta to add to code to compute symbol id h->delta[j] = k - code; if (h->size[k] == j) { while (h->size[k] == j) h->code[k++] = (stbi__uint16) (code++); if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); } // compute largest code + 1 for this size, preshifted as needed later h->maxcode[j] = code << (16-j); code <<= 1; } h->maxcode[j] = 0xffffffff; // build non-spec acceleration table; 255 is flag for not-accelerated memset(h->fast, 255, 1 << FAST_BITS); for (i=0; i < k; ++i) { int s = h->size[i]; if (s <= FAST_BITS) { int c = h->code[i] << (FAST_BITS-s); int m = 1 << (FAST_BITS-s); for (j=0; j < m; ++j) { h->fast[c+j] = (stbi_uc) i; } } } return 1; } // build a table that decodes both magnitude and value of small ACs in // one go. static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) { int i; for (i=0; i < (1 << FAST_BITS); ++i) { stbi_uc fast = h->fast[i]; fast_ac[i] = 0; if (fast < 255) { int rs = h->values[fast]; int run = (rs >> 4) & 15; int magbits = rs & 15; int len = h->size[fast]; if (magbits && len + magbits <= FAST_BITS) { // magnitude code followed by receive_extend code int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); int m = 1 << (magbits - 1); if (k < m) k += (~0U << magbits) + 1; // if the result is small enough, we can fit it in fast_ac table if (k >= -128 && k <= 127) fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); } } } } static void stbi__grow_buffer_unsafe(stbi__jpeg *j) { do { unsigned int b = j->nomore ? 0 : stbi__get8(j->s); if (b == 0xff) { int c = stbi__get8(j->s); while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes if (c != 0) { j->marker = (unsigned char) c; j->nomore = 1; return; } } j->code_buffer |= b << (24 - j->code_bits); j->code_bits += 8; } while (j->code_bits <= 24); } // (1 << n) - 1 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; // decode a jpeg huffman value from the bitstream stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) { unsigned int temp; int c,k; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); // look at the top FAST_BITS and determine what symbol ID it is, // if the code is <= FAST_BITS c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); k = h->fast[c]; if (k < 255) { int s = h->size[k]; if (s > j->code_bits) return -1; j->code_buffer <<= s; j->code_bits -= s; return h->values[k]; } // naive test is to shift the code_buffer down so k bits are // valid, then test against maxcode. To speed this up, we've // preshifted maxcode left so that it has (16-k) 0s at the // end; in other words, regardless of the number of bits, it // wants to be compared against something shifted to have 16; // that way we don't need to shift inside the loop. temp = j->code_buffer >> 16; for (k=FAST_BITS+1 ; ; ++k) if (temp < h->maxcode[k]) break; if (k == 17) { // error! code not found j->code_bits -= 16; return -1; } if (k > j->code_bits) return -1; // convert the huffman code to the symbol id c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; if(c < 0 || c >= 256) // symbol id out of bounds! return -1; STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); // convert the id to a symbol j->code_bits -= k; j->code_buffer <<= k; return h->values[c]; } // bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) k = stbi_lrot(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; return k + (stbi__jbias[n] & (sgn - 1)); } // get some unsigned bits stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) { unsigned int k; if (j->code_bits < n) stbi__grow_buffer_unsafe(j); if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing k = stbi_lrot(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; return k; } stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) { unsigned int k; if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing k = j->code_buffer; j->code_buffer <<= 1; --j->code_bits; return k & 0x80000000; } // given a value that's at position X in the zigzag stream, // where does it appear in the 8x8 matrix coded as row-major? static const stbi_uc stbi__jpeg_dezigzag[64+15] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, // let corrupt input sample past end 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }; // decode one 64-entry block-- static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) { int diff,dc,k; int t; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); t = stbi__jpeg_huff_decode(j, hdc); if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); // 0 all the ac values now so we can do it 32-bits at a time memset(data,0,64*sizeof(data[0])); diff = t ? stbi__extend_receive(j, t) : 0; if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); data[0] = (short) (dc * dequant[0]); // decode AC components, see JPEG spec k = 1; do { unsigned int zig; int c,r,s; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); r = fac[c]; if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; // decode into unzigzag'd location zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) ((r >> 8) * dequant[zig]); } else { int rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (rs != 0xf0) break; // end block k += 16; } else { k += r; // decode into unzigzag'd location zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); } } } while (k < 64); return 1; } static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) { int diff,dc; int t; if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); if (j->succ_high == 0) { // first scan for DC coefficient, must be first memset(data,0,64*sizeof(data[0])); // 0 all the ac values now t = stbi__jpeg_huff_decode(j, hdc); if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); diff = t ? stbi__extend_receive(j, t) : 0; if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); data[0] = (short) (dc * (1 << j->succ_low)); } else { // refinement scan for DC coefficient if (stbi__jpeg_get_bit(j)) data[0] += (short) (1 << j->succ_low); } return 1; } // @OPTIMIZE: store non-zigzagged during the decode passes, // and only de-zigzag when dequantizing static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) { int k; if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); if (j->succ_high == 0) { int shift = j->succ_low; if (j->eob_run) { --j->eob_run; return 1; } k = j->spec_start; do { unsigned int zig; int c,r,s; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); r = fac[c]; if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) ((r >> 8) * (1 << shift)); } else { int rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (r < 15) { j->eob_run = (1 << r); if (r) j->eob_run += stbi__jpeg_get_bits(j, r); --j->eob_run; break; } k += 16; } else { k += r; zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); } } } while (k <= j->spec_end); } else { // refinement scan for these AC coefficients short bit = (short) (1 << j->succ_low); if (j->eob_run) { --j->eob_run; for (k = j->spec_start; k <= j->spec_end; ++k) { short *p = &data[stbi__jpeg_dezigzag[k]]; if (*p != 0) if (stbi__jpeg_get_bit(j)) if ((*p & bit)==0) { if (*p > 0) *p += bit; else *p -= bit; } } } else { k = j->spec_start; do { int r,s; int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (r < 15) { j->eob_run = (1 << r) - 1; if (r) j->eob_run += stbi__jpeg_get_bits(j, r); r = 64; // force end of block } else { // r=15 s=0 should write 16 0s, so we just do // a run of 15 0s and then write s (which is 0), // so we don't have to do anything special here } } else { if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); // sign bit if (stbi__jpeg_get_bit(j)) s = bit; else s = -bit; } // advance by r while (k <= j->spec_end) { short *p = &data[stbi__jpeg_dezigzag[k++]]; if (*p != 0) { if (stbi__jpeg_get_bit(j)) if ((*p & bit)==0) { if (*p > 0) *p += bit; else *p -= bit; } } else { if (r == 0) { *p = (short) s; break; } --r; } } } while (k <= j->spec_end); } } return 1; } // take a -128..127 value and stbi__clamp it and convert to 0..255 stbi_inline static stbi_uc stbi__clamp(int x) { // trick to use a single test to catch both cases if ((unsigned int) x > 255) { if (x < 0) return 0; if (x > 255) return 255; } return (stbi_uc) x; } #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) #define stbi__fsh(x) ((x) * 4096) // derived from jidctint -- DCT_ISLOW #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ p2 = s2; \ p3 = s6; \ p1 = (p2+p3) * stbi__f2f(0.5411961f); \ t2 = p1 + p3*stbi__f2f(-1.847759065f); \ t3 = p1 + p2*stbi__f2f( 0.765366865f); \ p2 = s0; \ p3 = s4; \ t0 = stbi__fsh(p2+p3); \ t1 = stbi__fsh(p2-p3); \ x0 = t0+t3; \ x3 = t0-t3; \ x1 = t1+t2; \ x2 = t1-t2; \ t0 = s7; \ t1 = s5; \ t2 = s3; \ t3 = s1; \ p3 = t0+t2; \ p4 = t1+t3; \ p1 = t0+t3; \ p2 = t1+t2; \ p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ t0 = t0*stbi__f2f( 0.298631336f); \ t1 = t1*stbi__f2f( 2.053119869f); \ t2 = t2*stbi__f2f( 3.072711026f); \ t3 = t3*stbi__f2f( 1.501321110f); \ p1 = p5 + p1*stbi__f2f(-0.899976223f); \ p2 = p5 + p2*stbi__f2f(-2.562915447f); \ p3 = p3*stbi__f2f(-1.961570560f); \ p4 = p4*stbi__f2f(-0.390180644f); \ t3 += p1+p4; \ t2 += p2+p3; \ t1 += p2+p4; \ t0 += p1+p3; static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) { int i,val[64],*v=val; stbi_uc *o; short *d = data; // columns for (i=0; i < 8; ++i,++d, ++v) { // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 && d[40]==0 && d[48]==0 && d[56]==0) { // no shortcut 0 seconds // (1|2|3|4|5|6|7)==0 0 seconds // all separate -0.047 seconds // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds int dcterm = d[0]*4; v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; } else { STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) // constants scaled things up by 1<<12; let's bring them back // down, but keep 2 extra bits of precision x0 += 512; x1 += 512; x2 += 512; x3 += 512; v[ 0] = (x0+t3) >> 10; v[56] = (x0-t3) >> 10; v[ 8] = (x1+t2) >> 10; v[48] = (x1-t2) >> 10; v[16] = (x2+t1) >> 10; v[40] = (x2-t1) >> 10; v[24] = (x3+t0) >> 10; v[32] = (x3-t0) >> 10; } } for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { // no fast case since the first 1D IDCT spread components out STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) // constants scaled things up by 1<<12, plus we had 1<<2 from first // loop, plus horizontal and vertical each scale by sqrt(8) so together // we've got an extra 1<<3, so 1<<17 total we need to remove. // so we want to round that, which means adding 0.5 * 1<<17, // aka 65536. Also, we'll end up with -128 to 127 that we want // to encode as 0..255 by adding 128, so we'll add that before the shift x0 += 65536 + (128<<17); x1 += 65536 + (128<<17); x2 += 65536 + (128<<17); x3 += 65536 + (128<<17); // tried computing the shifts into temps, or'ing the temps to see // if any were out of range, but that was slower o[0] = stbi__clamp((x0+t3) >> 17); o[7] = stbi__clamp((x0-t3) >> 17); o[1] = stbi__clamp((x1+t2) >> 17); o[6] = stbi__clamp((x1-t2) >> 17); o[2] = stbi__clamp((x2+t1) >> 17); o[5] = stbi__clamp((x2-t1) >> 17); o[3] = stbi__clamp((x3+t0) >> 17); o[4] = stbi__clamp((x3-t0) >> 17); } } #ifdef STBI_SSE2 // sse2 integer IDCT. not the fastest possible implementation but it // produces bit-identical results to the generic C version so it's // fully "transparent". static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { // This is constructed to match our regular (generic) integer IDCT exactly. __m128i row0, row1, row2, row3, row4, row5, row6, row7; __m128i tmp; // dot product constant: even elems=x, odd elems=y #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) // out(1) = c1[even]*x + c1[odd]*y #define dct_rot(out0,out1, x,y,c0,c1) \ __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) // out = in << 12 (in 16-bit, out 32-bit) #define dct_widen(out, in) \ __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) // wide add #define dct_wadd(out, a, b) \ __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ __m128i out##_h = _mm_add_epi32(a##_h, b##_h) // wide sub #define dct_wsub(out, a, b) \ __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) // butterfly a/b, add bias, then shift by "s" and pack #define dct_bfly32o(out0, out1, a,b,bias,s) \ { \ __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ dct_wadd(sum, abiased, b); \ dct_wsub(dif, abiased, b); \ out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ } // 8-bit interleave step (for transposes) #define dct_interleave8(a, b) \ tmp = a; \ a = _mm_unpacklo_epi8(a, b); \ b = _mm_unpackhi_epi8(tmp, b) // 16-bit interleave step (for transposes) #define dct_interleave16(a, b) \ tmp = a; \ a = _mm_unpacklo_epi16(a, b); \ b = _mm_unpackhi_epi16(tmp, b) #define dct_pass(bias,shift) \ { \ /* even part */ \ dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ __m128i sum04 = _mm_add_epi16(row0, row4); \ __m128i dif04 = _mm_sub_epi16(row0, row4); \ dct_widen(t0e, sum04); \ dct_widen(t1e, dif04); \ dct_wadd(x0, t0e, t3e); \ dct_wsub(x3, t0e, t3e); \ dct_wadd(x1, t1e, t2e); \ dct_wsub(x2, t1e, t2e); \ /* odd part */ \ dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ __m128i sum17 = _mm_add_epi16(row1, row7); \ __m128i sum35 = _mm_add_epi16(row3, row5); \ dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ dct_wadd(x4, y0o, y4o); \ dct_wadd(x5, y1o, y5o); \ dct_wadd(x6, y2o, y5o); \ dct_wadd(x7, y3o, y4o); \ dct_bfly32o(row0,row7, x0,x7,bias,shift); \ dct_bfly32o(row1,row6, x1,x6,bias,shift); \ dct_bfly32o(row2,row5, x2,x5,bias,shift); \ dct_bfly32o(row3,row4, x3,x4,bias,shift); \ } __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); // rounding biases in column/row passes, see stbi__idct_block for explanation. __m128i bias_0 = _mm_set1_epi32(512); __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); // load row0 = _mm_load_si128((const __m128i *) (data + 0*8)); row1 = _mm_load_si128((const __m128i *) (data + 1*8)); row2 = _mm_load_si128((const __m128i *) (data + 2*8)); row3 = _mm_load_si128((const __m128i *) (data + 3*8)); row4 = _mm_load_si128((const __m128i *) (data + 4*8)); row5 = _mm_load_si128((const __m128i *) (data + 5*8)); row6 = _mm_load_si128((const __m128i *) (data + 6*8)); row7 = _mm_load_si128((const __m128i *) (data + 7*8)); // column pass dct_pass(bias_0, 10); { // 16bit 8x8 transpose pass 1 dct_interleave16(row0, row4); dct_interleave16(row1, row5); dct_interleave16(row2, row6); dct_interleave16(row3, row7); // transpose pass 2 dct_interleave16(row0, row2); dct_interleave16(row1, row3); dct_interleave16(row4, row6); dct_interleave16(row5, row7); // transpose pass 3 dct_interleave16(row0, row1); dct_interleave16(row2, row3); dct_interleave16(row4, row5); dct_interleave16(row6, row7); } // row pass dct_pass(bias_1, 17); { // pack __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 __m128i p1 = _mm_packus_epi16(row2, row3); __m128i p2 = _mm_packus_epi16(row4, row5); __m128i p3 = _mm_packus_epi16(row6, row7); // 8bit 8x8 transpose pass 1 dct_interleave8(p0, p2); // a0e0a1e1... dct_interleave8(p1, p3); // c0g0c1g1... // transpose pass 2 dct_interleave8(p0, p1); // a0c0e0g0... dct_interleave8(p2, p3); // b0d0f0h0... // transpose pass 3 dct_interleave8(p0, p2); // a0b0c0d0... dct_interleave8(p1, p3); // a4b4c4d4... // store _mm_storel_epi64((__m128i *) out, p0); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p2); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p1); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p3); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); } #undef dct_const #undef dct_rot #undef dct_widen #undef dct_wadd #undef dct_wsub #undef dct_bfly32o #undef dct_interleave8 #undef dct_interleave16 #undef dct_pass } #endif // STBI_SSE2 #ifdef STBI_NEON // NEON integer IDCT. should produce bit-identical // results to the generic C version. static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); #define dct_long_mul(out, inq, coeff) \ int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) #define dct_long_mac(out, acc, inq, coeff) \ int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) #define dct_widen(out, inq) \ int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) // wide add #define dct_wadd(out, a, b) \ int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ int32x4_t out##_h = vaddq_s32(a##_h, b##_h) // wide sub #define dct_wsub(out, a, b) \ int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ int32x4_t out##_h = vsubq_s32(a##_h, b##_h) // butterfly a/b, then shift using "shiftop" by "s" and pack #define dct_bfly32o(out0,out1, a,b,shiftop,s) \ { \ dct_wadd(sum, a, b); \ dct_wsub(dif, a, b); \ out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ } #define dct_pass(shiftop, shift) \ { \ /* even part */ \ int16x8_t sum26 = vaddq_s16(row2, row6); \ dct_long_mul(p1e, sum26, rot0_0); \ dct_long_mac(t2e, p1e, row6, rot0_1); \ dct_long_mac(t3e, p1e, row2, rot0_2); \ int16x8_t sum04 = vaddq_s16(row0, row4); \ int16x8_t dif04 = vsubq_s16(row0, row4); \ dct_widen(t0e, sum04); \ dct_widen(t1e, dif04); \ dct_wadd(x0, t0e, t3e); \ dct_wsub(x3, t0e, t3e); \ dct_wadd(x1, t1e, t2e); \ dct_wsub(x2, t1e, t2e); \ /* odd part */ \ int16x8_t sum15 = vaddq_s16(row1, row5); \ int16x8_t sum17 = vaddq_s16(row1, row7); \ int16x8_t sum35 = vaddq_s16(row3, row5); \ int16x8_t sum37 = vaddq_s16(row3, row7); \ int16x8_t sumodd = vaddq_s16(sum17, sum35); \ dct_long_mul(p5o, sumodd, rot1_0); \ dct_long_mac(p1o, p5o, sum17, rot1_1); \ dct_long_mac(p2o, p5o, sum35, rot1_2); \ dct_long_mul(p3o, sum37, rot2_0); \ dct_long_mul(p4o, sum15, rot2_1); \ dct_wadd(sump13o, p1o, p3o); \ dct_wadd(sump24o, p2o, p4o); \ dct_wadd(sump23o, p2o, p3o); \ dct_wadd(sump14o, p1o, p4o); \ dct_long_mac(x4, sump13o, row7, rot3_0); \ dct_long_mac(x5, sump24o, row5, rot3_1); \ dct_long_mac(x6, sump23o, row3, rot3_2); \ dct_long_mac(x7, sump14o, row1, rot3_3); \ dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ } // load row0 = vld1q_s16(data + 0*8); row1 = vld1q_s16(data + 1*8); row2 = vld1q_s16(data + 2*8); row3 = vld1q_s16(data + 3*8); row4 = vld1q_s16(data + 4*8); row5 = vld1q_s16(data + 5*8); row6 = vld1q_s16(data + 6*8); row7 = vld1q_s16(data + 7*8); // add DC bias row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); // column pass dct_pass(vrshrn_n_s32, 10); // 16bit 8x8 transpose { // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. // whether compilers actually get this is another story, sadly. #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } // pass 1 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 dct_trn16(row2, row3); dct_trn16(row4, row5); dct_trn16(row6, row7); // pass 2 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 dct_trn32(row1, row3); dct_trn32(row4, row6); dct_trn32(row5, row7); // pass 3 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 dct_trn64(row1, row5); dct_trn64(row2, row6); dct_trn64(row3, row7); #undef dct_trn16 #undef dct_trn32 #undef dct_trn64 } // row pass // vrshrn_n_s32 only supports shifts up to 16, we need // 17. so do a non-rounding shift of 16 first then follow // up with a rounding shift by 1. dct_pass(vshrn_n_s32, 16); { // pack and round uint8x8_t p0 = vqrshrun_n_s16(row0, 1); uint8x8_t p1 = vqrshrun_n_s16(row1, 1); uint8x8_t p2 = vqrshrun_n_s16(row2, 1); uint8x8_t p3 = vqrshrun_n_s16(row3, 1); uint8x8_t p4 = vqrshrun_n_s16(row4, 1); uint8x8_t p5 = vqrshrun_n_s16(row5, 1); uint8x8_t p6 = vqrshrun_n_s16(row6, 1); uint8x8_t p7 = vqrshrun_n_s16(row7, 1); // again, these can translate into one instruction, but often don't. #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } // sadly can't use interleaved stores here since we only write // 8 bytes to each scan line! // 8x8 8-bit transpose pass 1 dct_trn8_8(p0, p1); dct_trn8_8(p2, p3); dct_trn8_8(p4, p5); dct_trn8_8(p6, p7); // pass 2 dct_trn8_16(p0, p2); dct_trn8_16(p1, p3); dct_trn8_16(p4, p6); dct_trn8_16(p5, p7); // pass 3 dct_trn8_32(p0, p4); dct_trn8_32(p1, p5); dct_trn8_32(p2, p6); dct_trn8_32(p3, p7); // store vst1_u8(out, p0); out += out_stride; vst1_u8(out, p1); out += out_stride; vst1_u8(out, p2); out += out_stride; vst1_u8(out, p3); out += out_stride; vst1_u8(out, p4); out += out_stride; vst1_u8(out, p5); out += out_stride; vst1_u8(out, p6); out += out_stride; vst1_u8(out, p7); #undef dct_trn8_8 #undef dct_trn8_16 #undef dct_trn8_32 } #undef dct_long_mul #undef dct_long_mac #undef dct_widen #undef dct_wadd #undef dct_wsub #undef dct_bfly32o #undef dct_pass } #endif // STBI_NEON #define STBI__MARKER_none 0xff // if there's a pending marker from the entropy stream, return that // otherwise, fetch from the stream and get a marker. if there's no // marker, return 0xff, which is never a valid marker value static stbi_uc stbi__get_marker(stbi__jpeg *j) { stbi_uc x; if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } x = stbi__get8(j->s); if (x != 0xff) return STBI__MARKER_none; while (x == 0xff) x = stbi__get8(j->s); // consume repeated 0xff fill bytes return x; } // in each scan, we'll have scan_n components, and the order // of the components is specified by order[] #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) // after a restart interval, stbi__jpeg_reset the entropy decoder and // the dc prediction static void stbi__jpeg_reset(stbi__jpeg *j) { j->code_bits = 0; j->code_buffer = 0; j->nomore = 0; j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; j->marker = STBI__MARKER_none; j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; j->eob_run = 0; // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, // since we don't even allow 1<<30 pixels } static int stbi__parse_entropy_coded_data(stbi__jpeg *z) { stbi__jpeg_reset(z); if (!z->progressive) { if (z->scan_n == 1) { int i,j; STBI_SIMD_ALIGN(short, data[64]); int n = z->order[0]; // non-interleaved data, we just need to process one block at a time, // in trivial scanline order // number of blocks to do just depends on how many actual "pixels" this // component has, independent of interleaved MCU blocking and such int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); // every data block is an MCU, so countdown the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); // if it's NOT a restart, then just bail, so we get corrupt data // rather than no data if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } else { // interleaved int i,j,k,x,y; STBI_SIMD_ALIGN(short, data[64]); for (j=0; j < z->img_mcu_y; ++j) { for (i=0; i < z->img_mcu_x; ++i) { // scan an interleaved mcu... process scan_n components in order for (k=0; k < z->scan_n; ++k) { int n = z->order[k]; // scan out an mcu's worth of this component; that's just determined // by the basic H and V specified for the component for (y=0; y < z->img_comp[n].v; ++y) { for (x=0; x < z->img_comp[n].h; ++x) { int x2 = (i*z->img_comp[n].h + x)*8; int y2 = (j*z->img_comp[n].v + y)*8; int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); } } } // after all interleaved components, that's an interleaved MCU, // so now count down the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } } else { if (z->scan_n == 1) { int i,j; int n = z->order[0]; // non-interleaved data, we just need to process one block at a time, // in trivial scanline order // number of blocks to do just depends on how many actual "pixels" this // component has, independent of interleaved MCU blocking and such int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); if (z->spec_start == 0) { if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) return 0; } else { int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) return 0; } // every data block is an MCU, so countdown the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } else { // interleaved int i,j,k,x,y; for (j=0; j < z->img_mcu_y; ++j) { for (i=0; i < z->img_mcu_x; ++i) { // scan an interleaved mcu... process scan_n components in order for (k=0; k < z->scan_n; ++k) { int n = z->order[k]; // scan out an mcu's worth of this component; that's just determined // by the basic H and V specified for the component for (y=0; y < z->img_comp[n].v; ++y) { for (x=0; x < z->img_comp[n].h; ++x) { int x2 = (i*z->img_comp[n].h + x); int y2 = (j*z->img_comp[n].v + y); short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) return 0; } } } // after all interleaved components, that's an interleaved MCU, // so now count down the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } } } static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) { int i; for (i=0; i < 64; ++i) data[i] *= dequant[i]; } static void stbi__jpeg_finish(stbi__jpeg *z) { if (z->progressive) { // dequantize and idct the data int i,j,n; for (n=0; n < z->s->img_n; ++n) { int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); } } } } } static int stbi__process_marker(stbi__jpeg *z, int m) { int L; switch (m) { case STBI__MARKER_none: // no marker found return stbi__err("expected marker","Corrupt JPEG"); case 0xDD: // DRI - specify restart interval if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); z->restart_interval = stbi__get16be(z->s); return 1; case 0xDB: // DQT - define quantization table L = stbi__get16be(z->s)-2; while (L > 0) { int q = stbi__get8(z->s); int p = q >> 4, sixteen = (p != 0); int t = q & 15,i; if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); for (i=0; i < 64; ++i) z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); L -= (sixteen ? 129 : 65); } return L==0; case 0xC4: // DHT - define huffman table L = stbi__get16be(z->s)-2; while (L > 0) { stbi_uc *v; int sizes[16],i,n=0; int q = stbi__get8(z->s); int tc = q >> 4; int th = q & 15; if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); for (i=0; i < 16; ++i) { sizes[i] = stbi__get8(z->s); n += sizes[i]; } if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! L -= 17; if (tc == 0) { if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; v = z->huff_dc[th].values; } else { if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; v = z->huff_ac[th].values; } for (i=0; i < n; ++i) v[i] = stbi__get8(z->s); if (tc != 0) stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); L -= n; } return L==0; } // check for comment block or APP blocks if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { L = stbi__get16be(z->s); if (L < 2) { if (m == 0xFE) return stbi__err("bad COM len","Corrupt JPEG"); else return stbi__err("bad APP len","Corrupt JPEG"); } L -= 2; if (m == 0xE0 && L >= 5) { // JFIF APP0 segment static const unsigned char tag[5] = {'J','F','I','F','\0'}; int ok = 1; int i; for (i=0; i < 5; ++i) if (stbi__get8(z->s) != tag[i]) ok = 0; L -= 5; if (ok) z->jfif = 1; } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; int ok = 1; int i; for (i=0; i < 6; ++i) if (stbi__get8(z->s) != tag[i]) ok = 0; L -= 6; if (ok) { stbi__get8(z->s); // version stbi__get16be(z->s); // flags0 stbi__get16be(z->s); // flags1 z->app14_color_transform = stbi__get8(z->s); // color transform L -= 6; } } stbi__skip(z->s, L); return 1; } return stbi__err("unknown marker","Corrupt JPEG"); } // after we see SOS static int stbi__process_scan_header(stbi__jpeg *z) { int i; int Ls = stbi__get16be(z->s); z->scan_n = stbi__get8(z->s); if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); for (i=0; i < z->scan_n; ++i) { int id = stbi__get8(z->s), which; int q = stbi__get8(z->s); for (which = 0; which < z->s->img_n; ++which) if (z->img_comp[which].id == id) break; if (which == z->s->img_n) return 0; // no match z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); z->order[i] = which; } { int aa; z->spec_start = stbi__get8(z->s); z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 aa = stbi__get8(z->s); z->succ_high = (aa >> 4); z->succ_low = (aa & 15); if (z->progressive) { if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) return stbi__err("bad SOS", "Corrupt JPEG"); } else { if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); z->spec_end = 63; } } return 1; } static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) { int i; for (i=0; i < ncomp; ++i) { if (z->img_comp[i].raw_data) { STBI_FREE(z->img_comp[i].raw_data); z->img_comp[i].raw_data = NULL; z->img_comp[i].data = NULL; } if (z->img_comp[i].raw_coeff) { STBI_FREE(z->img_comp[i].raw_coeff); z->img_comp[i].raw_coeff = 0; z->img_comp[i].coeff = 0; } if (z->img_comp[i].linebuf) { STBI_FREE(z->img_comp[i].linebuf); z->img_comp[i].linebuf = NULL; } } return why; } static int stbi__process_frame_header(stbi__jpeg *z, int scan) { stbi__context *s = z->s; int Lf,p,i,q, h_max=1,v_max=1,c; Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); c = stbi__get8(s); if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); s->img_n = c; for (i=0; i < c; ++i) { z->img_comp[i].data = NULL; z->img_comp[i].linebuf = NULL; } if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); z->rgb = 0; for (i=0; i < s->img_n; ++i) { static const unsigned char rgb[3] = { 'R', 'G', 'B' }; z->img_comp[i].id = stbi__get8(s); if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) ++z->rgb; q = stbi__get8(s); z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); } if (scan != STBI__SCAN_load) return 1; if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); for (i=0; i < s->img_n; ++i) { if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios // and I've never seen a non-corrupted JPEG file actually use them for (i=0; i < s->img_n; ++i) { if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); } // compute interleaved mcu info z->img_h_max = h_max; z->img_v_max = v_max; z->img_mcu_w = h_max * 8; z->img_mcu_h = v_max * 8; // these sizes can't be more than 17 bits z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; for (i=0; i < s->img_n; ++i) { // number of effective pixels (e.g. for non-interleaved MCU) z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; // to simplify generation, we'll allocate enough memory to decode // the bogus oversized data from using interleaved MCUs and their // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't // discard the extra data until colorspace conversion // // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) // so these muls can't overflow with 32-bit ints (which we require) z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; z->img_comp[i].coeff = 0; z->img_comp[i].raw_coeff = 0; z->img_comp[i].linebuf = NULL; z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); if (z->img_comp[i].raw_data == NULL) return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); // align blocks for idct using mmx/sse z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); if (z->progressive) { // w2, h2 are multiples of 8 (see above) z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); if (z->img_comp[i].raw_coeff == NULL) return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); } } return 1; } // use comparisons since in some cases we handle more than one case (e.g. SOF) #define stbi__DNL(x) ((x) == 0xdc) #define stbi__SOI(x) ((x) == 0xd8) #define stbi__EOI(x) ((x) == 0xd9) #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) #define stbi__SOS(x) ((x) == 0xda) #define stbi__SOF_progressive(x) ((x) == 0xc2) static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) { int m; z->jfif = 0; z->app14_color_transform = -1; // valid values are 0,1,2 z->marker = STBI__MARKER_none; // initialize cached marker to empty m = stbi__get_marker(z); if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); if (scan == STBI__SCAN_type) return 1; m = stbi__get_marker(z); while (!stbi__SOF(m)) { if (!stbi__process_marker(z,m)) return 0; m = stbi__get_marker(z); while (m == STBI__MARKER_none) { // some files have extra padding after their blocks, so ok, we'll scan if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); m = stbi__get_marker(z); } } z->progressive = stbi__SOF_progressive(m); if (!stbi__process_frame_header(z, scan)) return 0; return 1; } static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { int x = stbi__get8(j->s); while (x == 255) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { // not a stuffed zero or lead-in to another marker, looks // like an actual marker, return it return x; } // stuffed zero has x=0 now which ends the loop, meaning we go // back to regular scan loop. // repeated 0xff keeps trying to read the next byte of the marker. } } return STBI__MARKER_none; } // decode image to YCbCr format static int stbi__decode_jpeg_image(stbi__jpeg *j) { int m; for (m = 0; m < 4; m++) { j->img_comp[m].raw_data = NULL; j->img_comp[m].raw_coeff = NULL; } j->restart_interval = 0; if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; m = stbi__get_marker(j); while (!stbi__EOI(m)) { if (stbi__SOS(m)) { if (!stbi__process_scan_header(j)) return 0; if (!stbi__parse_entropy_coded_data(j)) return 0; if (j->marker == STBI__MARKER_none ) { j->marker = stbi__skip_jpeg_junk_at_end(j); // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 } m = stbi__get_marker(j); if (STBI__RESTART(m)) m = stbi__get_marker(j); } else if (stbi__DNL(m)) { int Ld = stbi__get16be(j->s); stbi__uint32 NL = stbi__get16be(j->s); if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); m = stbi__get_marker(j); } else { if (!stbi__process_marker(j, m)) return 1; m = stbi__get_marker(j); } } if (j->progressive) stbi__jpeg_finish(j); return 1; } // static jfif-centered resampling (across block boundaries) typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, int w, int hs); #define stbi__div4(x) ((stbi_uc) ((x) >> 2)) static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { STBI_NOTUSED(out); STBI_NOTUSED(in_far); STBI_NOTUSED(w); STBI_NOTUSED(hs); return in_near; } static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate two samples vertically for every one in input int i; STBI_NOTUSED(hs); for (i=0; i < w; ++i) out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); return out; } static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate two samples horizontally for every one in input int i; stbi_uc *input = in_near; if (w == 1) { // if only one sample, can't do any interpolation out[0] = out[1] = input[0]; return out; } out[0] = input[0]; out[1] = stbi__div4(input[0]*3 + input[1] + 2); for (i=1; i < w-1; ++i) { int n = 3*input[i]+2; out[i*2+0] = stbi__div4(n+input[i-1]); out[i*2+1] = stbi__div4(n+input[i+1]); } out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); out[i*2+1] = input[w-1]; STBI_NOTUSED(in_far); STBI_NOTUSED(hs); return out; } #define stbi__div16(x) ((stbi_uc) ((x) >> 4)) static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate 2x2 samples for every one in input int i,t0,t1; if (w == 1) { out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); return out; } t1 = 3*in_near[0] + in_far[0]; out[0] = stbi__div4(t1+2); for (i=1; i < w; ++i) { t0 = t1; t1 = 3*in_near[i]+in_far[i]; out[i*2-1] = stbi__div16(3*t0 + t1 + 8); out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); STBI_NOTUSED(hs); return out; } #if defined(STBI_SSE2) || defined(STBI_NEON) static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate 2x2 samples for every one in input int i=0,t0,t1; if (w == 1) { out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); return out; } t1 = 3*in_near[0] + in_far[0]; // process groups of 8 pixels for as long as we can. // note we can't handle the last pixel in a row in this loop // because we need to handle the filter boundary conditions. for (; i < ((w-1) & ~7); i += 8) { #if defined(STBI_SSE2) // load and perform the vertical filtering pass // this uses 3*x + y = 4*x + (y - x) __m128i zero = _mm_setzero_si128(); __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); __m128i farw = _mm_unpacklo_epi8(farb, zero); __m128i nearw = _mm_unpacklo_epi8(nearb, zero); __m128i diff = _mm_sub_epi16(farw, nearw); __m128i nears = _mm_slli_epi16(nearw, 2); __m128i curr = _mm_add_epi16(nears, diff); // current row // horizontal filter works the same based on shifted vers of current // row. "prev" is current row shifted right by 1 pixel; we need to // insert the previous pixel value (from t1). // "next" is current row shifted left by 1 pixel, with first pixel // of next block of 8 pixels added in. __m128i prv0 = _mm_slli_si128(curr, 2); __m128i nxt0 = _mm_srli_si128(curr, 2); __m128i prev = _mm_insert_epi16(prv0, t1, 0); __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); // horizontal filter, polyphase implementation since it's convenient: // even pixels = 3*cur + prev = cur*4 + (prev - cur) // odd pixels = 3*cur + next = cur*4 + (next - cur) // note the shared term. __m128i bias = _mm_set1_epi16(8); __m128i curs = _mm_slli_epi16(curr, 2); __m128i prvd = _mm_sub_epi16(prev, curr); __m128i nxtd = _mm_sub_epi16(next, curr); __m128i curb = _mm_add_epi16(curs, bias); __m128i even = _mm_add_epi16(prvd, curb); __m128i odd = _mm_add_epi16(nxtd, curb); // interleave even and odd pixels, then undo scaling. __m128i int0 = _mm_unpacklo_epi16(even, odd); __m128i int1 = _mm_unpackhi_epi16(even, odd); __m128i de0 = _mm_srli_epi16(int0, 4); __m128i de1 = _mm_srli_epi16(int1, 4); // pack and write output __m128i outv = _mm_packus_epi16(de0, de1); _mm_storeu_si128((__m128i *) (out + i*2), outv); #elif defined(STBI_NEON) // load and perform the vertical filtering pass // this uses 3*x + y = 4*x + (y - x) uint8x8_t farb = vld1_u8(in_far + i); uint8x8_t nearb = vld1_u8(in_near + i); int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); int16x8_t curr = vaddq_s16(nears, diff); // current row // horizontal filter works the same based on shifted vers of current // row. "prev" is current row shifted right by 1 pixel; we need to // insert the previous pixel value (from t1). // "next" is current row shifted left by 1 pixel, with first pixel // of next block of 8 pixels added in. int16x8_t prv0 = vextq_s16(curr, curr, 7); int16x8_t nxt0 = vextq_s16(curr, curr, 1); int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); // horizontal filter, polyphase implementation since it's convenient: // even pixels = 3*cur + prev = cur*4 + (prev - cur) // odd pixels = 3*cur + next = cur*4 + (next - cur) // note the shared term. int16x8_t curs = vshlq_n_s16(curr, 2); int16x8_t prvd = vsubq_s16(prev, curr); int16x8_t nxtd = vsubq_s16(next, curr); int16x8_t even = vaddq_s16(curs, prvd); int16x8_t odd = vaddq_s16(curs, nxtd); // undo scaling and round, then store with even/odd phases interleaved uint8x8x2_t o; o.val[0] = vqrshrun_n_s16(even, 4); o.val[1] = vqrshrun_n_s16(odd, 4); vst2_u8(out + i*2, o); #endif // "previous" value for next iter t1 = 3*in_near[i+7] + in_far[i+7]; } t0 = t1; t1 = 3*in_near[i] + in_far[i]; out[i*2] = stbi__div16(3*t1 + t0 + 8); for (++i; i < w; ++i) { t0 = t1; t1 = 3*in_near[i]+in_far[i]; out[i*2-1] = stbi__div16(3*t0 + t1 + 8); out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); STBI_NOTUSED(hs); return out; } #endif static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // resample with nearest-neighbor int i,j; STBI_NOTUSED(in_far); for (i=0; i < w; ++i) for (j=0; j < hs; ++j) out[i*hs+j] = in_near[i]; return out; } // this is a reduced-precision calculation of YCbCr-to-RGB introduced // to make sure the code produces the same results in both SIMD and scalar #define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) { int i; for (i=0; i < count; ++i) { int y_fixed = (y[i] << 20) + (1<<19); // rounding int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; r = y_fixed + cr* stbi__float2fixed(1.40200f); g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } out[0] = (stbi_uc)r; out[1] = (stbi_uc)g; out[2] = (stbi_uc)b; out[3] = 255; out += step; } } #if defined(STBI_SSE2) || defined(STBI_NEON) static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) { int i = 0; #ifdef STBI_SSE2 // step == 3 is pretty ugly on the final interleave, and i'm not convinced // it's useful in practice (you wouldn't use it for textures, for example). // so just accelerate step == 4 case. if (step == 4) { // this is a fairly straightforward implementation and not super-optimized. __m128i signflip = _mm_set1_epi8(-0x80); __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); __m128i xw = _mm_set1_epi16(255); // alpha channel for (; i+7 < count; i += 8) { // load __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 // unpack to short (and left-shift cr, cb by 8) __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); // color transform __m128i yws = _mm_srli_epi16(yw, 4); __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); __m128i rws = _mm_add_epi16(cr0, yws); __m128i gwt = _mm_add_epi16(cb0, yws); __m128i bws = _mm_add_epi16(yws, cb1); __m128i gws = _mm_add_epi16(gwt, cr1); // descale __m128i rw = _mm_srai_epi16(rws, 4); __m128i bw = _mm_srai_epi16(bws, 4); __m128i gw = _mm_srai_epi16(gws, 4); // back to byte, set up for transpose __m128i brb = _mm_packus_epi16(rw, bw); __m128i gxb = _mm_packus_epi16(gw, xw); // transpose to interleave channels __m128i t0 = _mm_unpacklo_epi8(brb, gxb); __m128i t1 = _mm_unpackhi_epi8(brb, gxb); __m128i o0 = _mm_unpacklo_epi16(t0, t1); __m128i o1 = _mm_unpackhi_epi16(t0, t1); // store _mm_storeu_si128((__m128i *) (out + 0), o0); _mm_storeu_si128((__m128i *) (out + 16), o1); out += 32; } } #endif #ifdef STBI_NEON // in this version, step=3 support would be easy to add. but is there demand? if (step == 4) { // this is a fairly straightforward implementation and not super-optimized. uint8x8_t signflip = vdup_n_u8(0x80); int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); for (; i+7 < count; i += 8) { // load uint8x8_t y_bytes = vld1_u8(y + i); uint8x8_t cr_bytes = vld1_u8(pcr + i); uint8x8_t cb_bytes = vld1_u8(pcb + i); int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); // expand to s16 int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); int16x8_t crw = vshll_n_s8(cr_biased, 7); int16x8_t cbw = vshll_n_s8(cb_biased, 7); // color transform int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); int16x8_t rws = vaddq_s16(yws, cr0); int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); int16x8_t bws = vaddq_s16(yws, cb1); // undo scaling, round, convert to byte uint8x8x4_t o; o.val[0] = vqrshrun_n_s16(rws, 4); o.val[1] = vqrshrun_n_s16(gws, 4); o.val[2] = vqrshrun_n_s16(bws, 4); o.val[3] = vdup_n_u8(255); // store, interleaving r/g/b/a vst4_u8(out, o); out += 8*4; } } #endif for (; i < count; ++i) { int y_fixed = (y[i] << 20) + (1<<19); // rounding int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; r = y_fixed + cr* stbi__float2fixed(1.40200f); g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } out[0] = (stbi_uc)r; out[1] = (stbi_uc)g; out[2] = (stbi_uc)b; out[3] = 255; out += step; } } #endif // set up the kernels static void stbi__setup_jpeg(stbi__jpeg *j) { j->idct_block_kernel = stbi__idct_block; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; #ifdef STBI_SSE2 if (stbi__sse2_available()) { j->idct_block_kernel = stbi__idct_simd; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; } #endif #ifdef STBI_NEON j->idct_block_kernel = stbi__idct_simd; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; #endif } // clean up the temporary component buffers static void stbi__cleanup_jpeg(stbi__jpeg *j) { stbi__free_jpeg_components(j, j->s->img_n, 0); } typedef struct { resample_row_func resample; stbi_uc *line0,*line1; int hs,vs; // expansion factor in each axis int w_lores; // horizontal pixels pre-expansion int ystep; // how far through vertical expansion we are int ypos; // which pre-expansion row we're on } stbi__resample; // fast 0..255 * 0..255 => 0..255 rounded multiplication static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) { unsigned int t = x*y + 128; return (stbi_uc) ((t + (t >>8)) >> 8); } static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) { int n, decode_n, is_rgb; z->s->img_n = 0; // make stbi__cleanup_jpeg safe // validate req_comp if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); // load a jpeg image from whichever source, but leave in YCbCr format if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } // determine actual number of components to generate n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); if (z->s->img_n == 3 && n < 3 && !is_rgb) decode_n = 1; else decode_n = z->s->img_n; // nothing to do if no components requested; check this now to avoid // accessing uninitialized coutput[0] later if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } // resample and color-convert { int k; unsigned int i,j; stbi_uc *output; stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; stbi__resample res_comp[4]; for (k=0; k < decode_n; ++k) { stbi__resample *r = &res_comp[k]; // allocate line buffer big enough for upsampling off the edges // with upsample factor of 4 z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } r->hs = z->img_h_max / z->img_comp[k].h; r->vs = z->img_v_max / z->img_comp[k].v; r->ystep = r->vs >> 1; r->w_lores = (z->s->img_x + r->hs-1) / r->hs; r->ypos = 0; r->line0 = r->line1 = z->img_comp[k].data; if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; else r->resample = stbi__resample_row_generic; } // can't error after this so, this is safe output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } // now go ahead and resample for (j=0; j < z->s->img_y; ++j) { stbi_uc *out = output + n * z->s->img_x * j; for (k=0; k < decode_n; ++k) { stbi__resample *r = &res_comp[k]; int y_bot = r->ystep >= (r->vs >> 1); coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0, y_bot ? r->line0 : r->line1, r->w_lores, r->hs); if (++r->ystep >= r->vs) { r->ystep = 0; r->line0 = r->line1; if (++r->ypos < z->img_comp[k].y) r->line1 += z->img_comp[k].w2; } } if (n >= 3) { stbi_uc *y = coutput[0]; if (z->s->img_n == 3) { if (is_rgb) { for (i=0; i < z->s->img_x; ++i) { out[0] = y[i]; out[1] = coutput[1][i]; out[2] = coutput[2][i]; out[3] = 255; out += n; } } else { z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); } } else if (z->s->img_n == 4) { if (z->app14_color_transform == 0) { // CMYK for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; out[0] = stbi__blinn_8x8(coutput[0][i], m); out[1] = stbi__blinn_8x8(coutput[1][i], m); out[2] = stbi__blinn_8x8(coutput[2][i], m); out[3] = 255; out += n; } } else if (z->app14_color_transform == 2) { // YCCK z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; out[0] = stbi__blinn_8x8(255 - out[0], m); out[1] = stbi__blinn_8x8(255 - out[1], m); out[2] = stbi__blinn_8x8(255 - out[2], m); out += n; } } else { // YCbCr + alpha? Ignore the fourth channel for now z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); } } else for (i=0; i < z->s->img_x; ++i) { out[0] = out[1] = out[2] = y[i]; out[3] = 255; // not used if n==3 out += n; } } else { if (is_rgb) { if (n == 1) for (i=0; i < z->s->img_x; ++i) *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); else { for (i=0; i < z->s->img_x; ++i, out += 2) { out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); out[1] = 255; } } } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); out[0] = stbi__compute_y(r, g, b); out[1] = 255; out += n; } } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { for (i=0; i < z->s->img_x; ++i) { out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); out[1] = 255; out += n; } } else { stbi_uc *y = coutput[0]; if (n == 1) for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; else for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } } } } stbi__cleanup_jpeg(z); *out_x = z->s->img_x; *out_y = z->s->img_y; if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output return output; } } static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { unsigned char* result; stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); if (!j) return stbi__errpuc("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); STBI_NOTUSED(ri); j->s = s; stbi__setup_jpeg(j); result = load_jpeg_image(j, x,y,comp,req_comp); STBI_FREE(j); return result; } static int stbi__jpeg_test(stbi__context *s) { int r; stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); if (!j) return stbi__err("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); j->s = s; stbi__setup_jpeg(j); r = stbi__decode_jpeg_header(j, STBI__SCAN_type); stbi__rewind(s); STBI_FREE(j); return r; } static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) { if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { stbi__rewind( j->s ); return 0; } if (x) *x = j->s->img_x; if (y) *y = j->s->img_y; if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; return 1; } static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) { int result; stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); if (!j) return stbi__err("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); j->s = s; result = stbi__jpeg_info_raw(j, x, y, comp); STBI_FREE(j); return result; } #endif // public domain zlib decode v0.2 Sean Barrett 2006-11-18 // simple implementation // - all input must be provided in an upfront buffer // - all output is written to a single output buffer (can malloc/realloc) // performance // - fast huffman #ifndef STBI_NO_ZLIB // fast-way is faster to check than jpeg huffman, but slow way is slower #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) #define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet // zlib-style huffman encoding // (jpegs packs from left, zlib from right, so can't share code) typedef struct { stbi__uint16 fast[1 << STBI__ZFAST_BITS]; stbi__uint16 firstcode[16]; int maxcode[17]; stbi__uint16 firstsymbol[16]; stbi_uc size[STBI__ZNSYMS]; stbi__uint16 value[STBI__ZNSYMS]; } stbi__zhuffman; stbi_inline static int stbi__bitreverse16(int n) { n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); return n; } stbi_inline static int stbi__bit_reverse(int v, int bits) { STBI_ASSERT(bits <= 16); // to bit reverse n bits, reverse 16 and shift // e.g. 11 bits, bit reverse and shift away 5 return stbi__bitreverse16(v) >> (16-bits); } static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) { int i,k=0; int code, next_code[16], sizes[17]; // DEFLATE spec for generating codes memset(sizes, 0, sizeof(sizes)); memset(z->fast, 0, sizeof(z->fast)); for (i=0; i < num; ++i) ++sizes[sizelist[i]]; sizes[0] = 0; for (i=1; i < 16; ++i) if (sizes[i] > (1 << i)) return stbi__err("bad sizes", "Corrupt PNG"); code = 0; for (i=1; i < 16; ++i) { next_code[i] = code; z->firstcode[i] = (stbi__uint16) code; z->firstsymbol[i] = (stbi__uint16) k; code = (code + sizes[i]); if (sizes[i]) if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); z->maxcode[i] = code << (16-i); // preshift for inner loop code <<= 1; k += sizes[i]; } z->maxcode[16] = 0x10000; // sentinel for (i=0; i < num; ++i) { int s = sizelist[i]; if (s) { int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); z->size [c] = (stbi_uc ) s; z->value[c] = (stbi__uint16) i; if (s <= STBI__ZFAST_BITS) { int j = stbi__bit_reverse(next_code[s],s); while (j < (1 << STBI__ZFAST_BITS)) { z->fast[j] = fastv; j += (1 << s); } } ++next_code[s]; } } return 1; } // zlib-from-memory implementation for PNG reading // because PNG allows splitting the zlib stream arbitrarily, // and it's annoying structurally to have PNG call ZLIB call PNG, // we require PNG read all the IDATs and combine them into a single // memory buffer typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; stbi__uint32 code_buffer; char *zout; char *zout_start; char *zout_end; int z_expandable; stbi__zhuffman z_length, z_distance; } stbi__zbuf; stbi_inline static int stbi__zeof(stbi__zbuf *z) { return (z->zbuffer >= z->zbuffer_end); } stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) { return stbi__zeof(z) ? 0 : *z->zbuffer++; } static void stbi__fill_bits(stbi__zbuf *z) { do { if (z->code_buffer >= (1U << z->num_bits)) { z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ return; } z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; z->num_bits += 8; } while (z->num_bits <= 24); } stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) { unsigned int k; if (z->num_bits < n) stbi__fill_bits(z); k = z->code_buffer & ((1 << n) - 1); z->code_buffer >>= n; z->num_bits -= n; return k; } static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) { int b,s,k; // not resolved by fast table, so compute it the slow way // use jpeg approach, which requires MSbits at top k = stbi__bit_reverse(a->code_buffer, 16); for (s=STBI__ZFAST_BITS+1; ; ++s) if (k < z->maxcode[s]) break; if (s >= 16) return -1; // invalid code! // code size is s, so: b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. a->code_buffer >>= s; a->num_bits -= s; return z->value[b]; } stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) { int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { return -1; /* report error for unexpected end of data. */ } stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { s = b >> 9; a->code_buffer >>= s; a->num_bits -= s; return b & 511; } return stbi__zhuffman_decode_slowpath(a, z); } static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; unsigned int cur, limit, old_limit; z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); cur = (unsigned int) (z->zout - z->zout_start); limit = old_limit = (unsigned) (z->zout_end - z->zout_start); if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); while (cur + n > limit) { if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); limit *= 2; } q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); STBI_NOTUSED(old_limit); if (q == NULL) return stbi__err("outofmem", "Out of memory"); z->zout_start = q; z->zout = q + cur; z->zout_end = q + limit; return 1; } static const int stbi__zlength_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; static const int stbi__zlength_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; static const int stbi__zdist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; static int stbi__parse_huffman_block(stbi__zbuf *a) { char *zout = a->zout; for(;;) { int z = stbi__zhuffman_decode(a, &a->z_length); if (z < 256) { if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes if (zout >= a->zout_end) { if (!stbi__zexpand(a, zout, 1)) return 0; zout = a->zout; } *zout++ = (char) z; } else { stbi_uc *p; int len,dist; if (z == 256) { a->zout = zout; return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data z -= 257; len = stbi__zlength_base[z]; if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); z = stbi__zhuffman_decode(a, &a->z_distance); if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); if (zout + len > a->zout_end) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } p = (stbi_uc *) (zout - dist); if (dist == 1) { // run of one byte; common in images. stbi_uc v = *p; if (len) { do *zout++ = v; while (--len); } } else { if (len) { do *zout++ = *p++; while (--len); } } } } } static int stbi__compute_huffman_codes(stbi__zbuf *a) { static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; stbi__zhuffman z_codelength; stbi_uc lencodes[286+32+137];//padding for maximum single op stbi_uc codelength_sizes[19]; int i,n; int hlit = stbi__zreceive(a,5) + 257; int hdist = stbi__zreceive(a,5) + 1; int hclen = stbi__zreceive(a,4) + 4; int ntot = hlit + hdist; memset(codelength_sizes, 0, sizeof(codelength_sizes)); for (i=0; i < hclen; ++i) { int s = stbi__zreceive(a,3); codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; } if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; n = 0; while (n < ntot) { int c = stbi__zhuffman_decode(a, &z_codelength); if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); if (c < 16) lencodes[n++] = (stbi_uc) c; else { stbi_uc fill = 0; if (c == 16) { c = stbi__zreceive(a,2)+3; if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); fill = lencodes[n-1]; } else if (c == 17) { c = stbi__zreceive(a,3)+3; } else if (c == 18) { c = stbi__zreceive(a,7)+11; } else { return stbi__err("bad codelengths", "Corrupt PNG"); } if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); memset(lencodes+n, fill, c); n += c; } } if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; return 1; } static int stbi__parse_uncompressed_block(stbi__zbuf *a) { stbi_uc header[4]; int len,nlen,k; if (a->num_bits & 7) stbi__zreceive(a, a->num_bits & 7); // discard // drain the bit-packed data into header k = 0; while (a->num_bits > 0) { header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check a->code_buffer >>= 8; a->num_bits -= 8; } if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); // now fill header the normal way while (k < 4) header[k++] = stbi__zget8(a); len = header[1] * 256 + header[0]; nlen = header[3] * 256 + header[2]; if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); if (a->zout + len > a->zout_end) if (!stbi__zexpand(a, a->zout, len)) return 0; memcpy(a->zout, a->zbuffer, len); a->zbuffer += len; a->zout += len; return 1; } static int stbi__parse_zlib_header(stbi__zbuf *a) { int cmf = stbi__zget8(a); int cm = cmf & 15; /* int cinfo = cmf >> 4; */ int flg = stbi__zget8(a); if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png // window = 1 << (8 + cinfo)... but who cares, we fully buffer output return 1; } static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = { 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 }; static const stbi_uc stbi__zdefault_distance[32] = { 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 }; /* Init algorithm: { int i; // use <= to match clearly with spec for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; } */ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { int final, type; if (parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); if (type == 0) { if (!stbi__parse_uncompressed_block(a)) return 0; } else if (type == 3) { return 0; } else { if (type == 1) { // use fixed code lengths if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; } else { if (!stbi__compute_huffman_codes(a)) return 0; } if (!stbi__parse_huffman_block(a)) return 0; } } while (!final); return 1; } static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) { a->zout_start = obuf; a->zout = obuf; a->zout_end = obuf + olen; a->z_expandable = exp; return stbi__parse_zlib(a, parse_header); } STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) { stbi__zbuf a; char *p = (char *) stbi__malloc(initial_size); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) { return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); } STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) { stbi__zbuf a; char *p = (char *) stbi__malloc(initial_size); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) { stbi__zbuf a; a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) return (int) (a.zout - a.zout_start); else return -1; } STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) { stbi__zbuf a; char *p = (char *) stbi__malloc(16384); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer+len; if (stbi__do_zlib(&a, p, 16384, 1, 0)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) { stbi__zbuf a; a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) return (int) (a.zout - a.zout_start); else return -1; } #endif // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 // simple implementation // - only 8-bit samples // - no CRC checking // - allocates lots of intermediate memory // - avoids problem of streaming data between subsystems // - avoids explicit window management // performance // - uses stb_zlib, a PD zlib implementation with fast huffman decoding #ifndef STBI_NO_PNG typedef struct { stbi__uint32 length; stbi__uint32 type; } stbi__pngchunk; static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) { stbi__pngchunk c; c.length = stbi__get32be(s); c.type = stbi__get32be(s); return c; } static int stbi__check_png_header(stbi__context *s) { static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; int i; for (i=0; i < 8; ++i) if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); return 1; } typedef struct { stbi__context *s; stbi_uc *idata, *expanded, *out; int depth; } stbi__png; enum { STBI__F_none=0, STBI__F_sub=1, STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, // synthetic filters used for first scanline to avoid needing a dummy row of 0s STBI__F_avg_first, STBI__F_paeth_first }; static stbi_uc first_row_filter[5] = { STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, STBI__F_paeth_first }; static int stbi__paeth(int a, int b, int c) { int p = a + b - c; int pa = abs(p-a); int pb = abs(p-b); int pc = abs(p-c); if (pa <= pb && pa <= pc) return a; if (pb <= pc) return b; return c; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { int bytes = (depth == 16? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; int k; int img_n = s->img_n; // copy it into a local for later int output_bytes = out_n*bytes; int filter_bytes = img_n*bytes; int width = x; STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); for (j=0; j < y; ++j) { stbi_uc *cur = a->out + stride*j; stbi_uc *prior; int filter = *raw++; if (filter > 4) return stbi__err("invalid filter","Corrupt PNG"); if (depth < 8) { if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place filter_bytes = 1; width = img_width_bytes; } prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; // handle first byte explicitly for (k=0; k < filter_bytes; ++k) { switch (filter) { case STBI__F_none : cur[k] = raw[k]; break; case STBI__F_sub : cur[k] = raw[k]; break; case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; case STBI__F_avg_first : cur[k] = raw[k]; break; case STBI__F_paeth_first: cur[k] = raw[k]; break; } } if (depth == 8) { if (img_n != out_n) cur[img_n] = 255; // first pixel raw += img_n; cur += out_n; prior += out_n; } else if (depth == 16) { if (img_n != out_n) { cur[filter_bytes] = 255; // first pixel top byte cur[filter_bytes+1] = 255; // first pixel bottom byte } raw += filter_bytes; cur += output_bytes; prior += output_bytes; } else { raw += 1; cur += 1; prior += 1; } // this is a little gross, so that we don't switch per-pixel or per-component if (depth < 8 || img_n == out_n) { int nk = (width - 1)*filter_bytes; #define STBI__CASE(f) \ case f: \ for (k=0; k < nk; ++k) switch (filter) { // "none" filter turns into a memcpy here; make that explicit. case STBI__F_none: memcpy(cur, raw, nk); break; STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; } #undef STBI__CASE raw += nk; } else { STBI_ASSERT(img_n+1 == out_n); #define STBI__CASE(f) \ case f: \ for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ for (k=0; k < filter_bytes; ++k) switch (filter) { STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; } #undef STBI__CASE // the loop above sets the high byte of the pixels' alpha, but for // 16 bit png files we also need the low byte set. we'll do that here. if (depth == 16) { cur = a->out + stride*j; // start at the beginning of the row again for (i=0; i < x; ++i,cur+=output_bytes) { cur[filter_bytes+1] = 255; } } } } // we make a separate pass to expand bits to pixels; for performance, // this could run two scanlines behind the above code, so it won't // intefere with filtering but will still be in the cache. if (depth < 8) { for (j=0; j < y; ++j) { stbi_uc *cur = a->out + stride*j; stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range // note that the final byte might overshoot and write more data than desired. // we can allocate enough data that this never writes out of memory, but it // could also overwrite the next scanline. can it overwrite non-empty data // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. // so we need to explicitly clamp the final ones if (depth == 4) { for (k=x*img_n; k >= 2; k-=2, ++in) { *cur++ = scale * ((*in >> 4) ); *cur++ = scale * ((*in ) & 0x0f); } if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { for (k=x*img_n; k >= 4; k-=4, ++in) { *cur++ = scale * ((*in >> 6) ); *cur++ = scale * ((*in >> 4) & 0x03); *cur++ = scale * ((*in >> 2) & 0x03); *cur++ = scale * ((*in ) & 0x03); } if (k > 0) *cur++ = scale * ((*in >> 6) ); if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); } else if (depth == 1) { for (k=x*img_n; k >= 8; k-=8, ++in) { *cur++ = scale * ((*in >> 7) ); *cur++ = scale * ((*in >> 6) & 0x01); *cur++ = scale * ((*in >> 5) & 0x01); *cur++ = scale * ((*in >> 4) & 0x01); *cur++ = scale * ((*in >> 3) & 0x01); *cur++ = scale * ((*in >> 2) & 0x01); *cur++ = scale * ((*in >> 1) & 0x01); *cur++ = scale * ((*in ) & 0x01); } if (k > 0) *cur++ = scale * ((*in >> 7) ); if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } if (img_n != out_n) { int q; // insert alpha = 255 cur = a->out + stride*j; if (img_n == 1) { for (q=x-1; q >= 0; --q) { cur[q*2+1] = 255; cur[q*2+0] = cur[q]; } } else { STBI_ASSERT(img_n == 3); for (q=x-1; q >= 0; --q) { cur[q*4+3] = 255; cur[q*4+2] = cur[q*3+2]; cur[q*4+1] = cur[q*3+1]; cur[q*4+0] = cur[q*3+0]; } } } } } else if (depth == 16) { // force the image data from big-endian to platform-native. // this is done in a separate pass due to the decoding relying // on the data being untouched, but could probably be done // per-line during decode if care is taken. stbi_uc *cur = a->out; stbi__uint16 *cur16 = (stbi__uint16*)cur; for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { *cur16 = (cur[0] << 8) | cur[1]; } } return 1; } static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) { int bytes = (depth == 16 ? 2 : 1); int out_bytes = out_n * bytes; stbi_uc *final; int p; if (!interlaced) return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); // de-interlacing final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); if (!final) return stbi__err("outofmem", "Out of memory"); for (p=0; p < 7; ++p) { int xorig[] = { 0,4,0,2,0,1,0 }; int yorig[] = { 0,0,4,0,2,0,1 }; int xspc[] = { 8,8,4,4,2,2,1 }; int yspc[] = { 8,8,8,4,4,2,2 }; int i,j,x,y; // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; if (x && y) { stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { STBI_FREE(final); return 0; } for (j=0; j < y; ++j) { for (i=0; i < x; ++i) { int out_y = j*yspc[p]+yorig[p]; int out_x = i*xspc[p]+xorig[p]; memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, a->out + (j*x+i)*out_bytes, out_bytes); } } STBI_FREE(a->out); image_data += img_len; image_data_len -= img_len; } } a->out = final; return 1; } static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi_uc *p = z->out; // compute color-based transparency, assuming we've // already got 255 as the alpha value in the output STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { for (i=0; i < pixel_count; ++i) { p[1] = (p[0] == tc[0] ? 0 : 255); p += 2; } } else { for (i=0; i < pixel_count; ++i) { if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) p[3] = 0; p += 4; } } return 1; } static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi__uint16 *p = (stbi__uint16*) z->out; // compute color-based transparency, assuming we've // already got 65535 as the alpha value in the output STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { for (i = 0; i < pixel_count; ++i) { p[1] = (p[0] == tc[0] ? 0 : 65535); p += 2; } } else { for (i = 0; i < pixel_count; ++i) { if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) p[3] = 0; p += 4; } } return 1; } static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) { stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; stbi_uc *p, *temp_out, *orig = a->out; p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); if (p == NULL) return stbi__err("outofmem", "Out of memory"); // between here and free(out) below, exitting would leak temp_out = p; if (pal_img_n == 3) { for (i=0; i < pixel_count; ++i) { int n = orig[i]*4; p[0] = palette[n ]; p[1] = palette[n+1]; p[2] = palette[n+2]; p += 3; } } else { for (i=0; i < pixel_count; ++i) { int n = orig[i]*4; p[0] = palette[n ]; p[1] = palette[n+1]; p[2] = palette[n+2]; p[3] = palette[n+3]; p += 4; } } STBI_FREE(a->out); a->out = temp_out; STBI_NOTUSED(len); return 1; } static int stbi__unpremultiply_on_load_global = 0; static int stbi__de_iphone_flag_global = 0; STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; } STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) { stbi__de_iphone_flag_global = flag_true_if_should_convert; } #ifndef STBI_THREAD_LOCAL #define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global #define stbi__de_iphone_flag stbi__de_iphone_flag_global #else static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; stbi__unpremultiply_on_load_set = 1; } STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) { stbi__de_iphone_flag_local = flag_true_if_should_convert; stbi__de_iphone_flag_set = 1; } #define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ ? stbi__unpremultiply_on_load_local \ : stbi__unpremultiply_on_load_global) #define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ ? stbi__de_iphone_flag_local \ : stbi__de_iphone_flag_global) #endif // STBI_THREAD_LOCAL static void stbi__de_iphone(stbi__png *z) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi_uc *p = z->out; if (s->img_out_n == 3) { // convert bgr to rgb for (i=0; i < pixel_count; ++i) { stbi_uc t = p[0]; p[0] = p[2]; p[2] = t; p += 3; } } else { STBI_ASSERT(s->img_out_n == 4); if (stbi__unpremultiply_on_load) { // convert bgr to rgb and unpremultiply for (i=0; i < pixel_count; ++i) { stbi_uc a = p[3]; stbi_uc t = p[0]; if (a) { stbi_uc half = a / 2; p[0] = (p[2] * 255 + half) / a; p[1] = (p[1] * 255 + half) / a; p[2] = ( t * 255 + half) / a; } else { p[0] = p[2]; p[2] = t; } p += 4; } } else { // convert bgr to rgb for (i=0; i < pixel_count; ++i) { stbi_uc t = p[0]; p[0] = p[2]; p[2] = t; p += 4; } } } } #define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { stbi_uc palette[1024], pal_img_n=0; stbi_uc has_trans=0, tc[3]={0}; stbi__uint16 tc16[3]; stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; int first=1,k,interlace=0, color=0, is_iphone=0; stbi__context *s = z->s; z->expanded = NULL; z->idata = NULL; z->out = NULL; if (!stbi__check_png_header(s)) return 0; if (scan == STBI__SCAN_type) return 1; for (;;) { stbi__pngchunk c = stbi__get_chunk_header(s); switch (c.type) { case STBI__PNG_TYPE('C','g','B','I'): is_iphone = 1; stbi__skip(s, c.length); break; case STBI__PNG_TYPE('I','H','D','R'): { int comp,filter; if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); first = 0; if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); s->img_x = stbi__get32be(s); s->img_y = stbi__get32be(s); if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); if (!pal_img_n) { s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); } else { // if paletted, then pal_n is our final components, and // img_n is # components to decompress/filter. s->img_n = 1; if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); } // even with SCAN_header, have to scan to see if we have a tRNS break; } case STBI__PNG_TYPE('P','L','T','E'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); pal_len = c.length / 3; if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); for (i=0; i < pal_len; ++i) { palette[i*4+0] = stbi__get8(s); palette[i*4+1] = stbi__get8(s); palette[i*4+2] = stbi__get8(s); palette[i*4+3] = 255; } break; } case STBI__PNG_TYPE('t','R','N','S'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); if (pal_img_n) { if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); pal_img_n = 4; for (i=0; i < c.length; ++i) palette[i*4+3] = stbi__get8(s); } else { if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); has_trans = 1; // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; } case STBI__PNG_TYPE('I','D','A','T'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); if (scan == STBI__SCAN_header) { // header scan definitely stops at first IDAT if (pal_img_n) s->img_n = pal_img_n; return 1; } if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); if ((int)(ioff + c.length) < (int)ioff) return 0; if (ioff + c.length > idata_limit) { stbi__uint32 idata_limit_old = idata_limit; stbi_uc *p; if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; while (ioff + c.length > idata_limit) idata_limit *= 2; STBI_NOTUSED(idata_limit_old); p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); z->idata = p; } if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); ioff += c.length; break; } case STBI__PNG_TYPE('I','E','N','D'): { stbi__uint32 raw_len, bpl; if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (scan != STBI__SCAN_load) return 1; if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); // initial guess for decoded data size to avoid unnecessary reallocs bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); if (z->expanded == NULL) return 0; // zlib should set error STBI_FREE(z->idata); z->idata = NULL; if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) s->img_out_n = s->img_n+1; else s->img_out_n = s->img_n; if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; if (has_trans) { if (z->depth == 16) { if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; } else { if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; } } if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) stbi__de_iphone(z); if (pal_img_n) { // pal_img_n == 3 or 4 s->img_n = pal_img_n; // record the actual colors we had s->img_out_n = pal_img_n; if (req_comp >= 3) s->img_out_n = req_comp; if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) return 0; } else if (has_trans) { // non-paletted image with tRNS -> source image has (constant) alpha ++s->img_n; } STBI_FREE(z->expanded); z->expanded = NULL; // end of PNG chunk, read and skip CRC stbi__get32be(s); return 1; } default: // if critical, fail if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if ((c.type & (1 << 29)) == 0) { #ifndef STBI_NO_FAILURE_STRINGS // not threadsafe static char invalid_chunk[] = "XXXX PNG chunk not known"; invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); #endif return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); } stbi__skip(s, c.length); break; } // end of PNG chunk, read and skip CRC stbi__get32be(s); } } static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) { void *result=NULL; if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { if (p->depth <= 8) ri->bits_per_channel = 8; else if (p->depth == 16) ri->bits_per_channel = 16; else return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); result = p->out; p->out = NULL; if (req_comp && req_comp != p->s->img_out_n) { if (ri->bits_per_channel == 8) result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); else result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); p->s->img_out_n = req_comp; if (result == NULL) return result; } *x = p->s->img_x; *y = p->s->img_y; if (n) *n = p->s->img_n; } STBI_FREE(p->out); p->out = NULL; STBI_FREE(p->expanded); p->expanded = NULL; STBI_FREE(p->idata); p->idata = NULL; return result; } static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi__png p; p.s = s; return stbi__do_png(&p, x,y,comp,req_comp, ri); } static int stbi__png_test(stbi__context *s) { int r; r = stbi__check_png_header(s); stbi__rewind(s); return r; } static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) { if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { stbi__rewind( p->s ); return 0; } if (x) *x = p->s->img_x; if (y) *y = p->s->img_y; if (comp) *comp = p->s->img_n; return 1; } static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) { stbi__png p; p.s = s; return stbi__png_info_raw(&p, x, y, comp); } static int stbi__png_is16(stbi__context *s) { stbi__png p; p.s = s; if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) return 0; if (p.depth != 16) { stbi__rewind(p.s); return 0; } return 1; } #endif // Microsoft/Windows BMP image #ifndef STBI_NO_BMP static int stbi__bmp_test_raw(stbi__context *s) { int r; int sz; if (stbi__get8(s) != 'B') return 0; if (stbi__get8(s) != 'M') return 0; stbi__get32le(s); // discard filesize stbi__get16le(s); // discard reserved stbi__get16le(s); // discard reserved stbi__get32le(s); // discard data offset sz = stbi__get32le(s); r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); return r; } static int stbi__bmp_test(stbi__context *s) { int r = stbi__bmp_test_raw(s); stbi__rewind(s); return r; } // returns 0..31 for the highest set bit static int stbi__high_bit(unsigned int z) { int n=0; if (z == 0) return -1; if (z >= 0x10000) { n += 16; z >>= 16; } if (z >= 0x00100) { n += 8; z >>= 8; } if (z >= 0x00010) { n += 4; z >>= 4; } if (z >= 0x00004) { n += 2; z >>= 2; } if (z >= 0x00002) { n += 1;/* >>= 1;*/ } return n; } static int stbi__bitcount(unsigned int a) { a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits a = (a + (a >> 8)); // max 16 per 8 bits a = (a + (a >> 16)); // max 32 per 8 bits return a & 0xff; } // extract an arbitrarily-aligned N-bit value (N=bits) // from v, and then make it 8-bits long and fractionally // extend it to full full range. static int stbi__shiftsigned(unsigned int v, int shift, int bits) { static unsigned int mul_table[9] = { 0, 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, }; static unsigned int shift_table[9] = { 0, 0,0,1,0,2,4,6,0, }; if (shift < 0) v <<= -shift; else v >>= shift; STBI_ASSERT(v < 256); v >>= (8-bits); STBI_ASSERT(bits >= 0 && bits <= 8); return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; } typedef struct { int bpp, offset, hsz; unsigned int mr,mg,mb,ma, all_a; int extra_read; } stbi__bmp_data; static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) { // BI_BITFIELDS specifies masks explicitly, don't override if (compress == 3) return 1; if (compress == 0) { if (info->bpp == 16) { info->mr = 31u << 10; info->mg = 31u << 5; info->mb = 31u << 0; } else if (info->bpp == 32) { info->mr = 0xffu << 16; info->mg = 0xffu << 8; info->mb = 0xffu << 0; info->ma = 0xffu << 24; info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 } else { // otherwise, use defaults, which is all-0 info->mr = info->mg = info->mb = info->ma = 0; } return 1; } return 0; // error } static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) { int hsz; if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); stbi__get32le(s); // discard filesize stbi__get16le(s); // discard reserved stbi__get16le(s); // discard reserved info->offset = stbi__get32le(s); info->hsz = hsz = stbi__get32le(s); info->mr = info->mg = info->mb = info->ma = 0; info->extra_read = 14; if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); if (hsz == 12) { s->img_x = stbi__get16le(s); s->img_y = stbi__get16le(s); } else { s->img_x = stbi__get32le(s); s->img_y = stbi__get32le(s); } if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); info->bpp = stbi__get16le(s); if (hsz != 12) { int compress = stbi__get32le(s); if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel stbi__get32le(s); // discard sizeof stbi__get32le(s); // discard hres stbi__get32le(s); // discard vres stbi__get32le(s); // discard colorsused stbi__get32le(s); // discard max important if (hsz == 40 || hsz == 56) { if (hsz == 56) { stbi__get32le(s); stbi__get32le(s); stbi__get32le(s); stbi__get32le(s); } if (info->bpp == 16 || info->bpp == 32) { if (compress == 0) { stbi__bmp_set_mask_defaults(info, compress); } else if (compress == 3) { info->mr = stbi__get32le(s); info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); info->extra_read += 12; // not documented, but generated by photoshop and handled by mspaint if (info->mr == info->mg && info->mg == info->mb) { // ?!?!? return stbi__errpuc("bad BMP", "bad BMP"); } } else return stbi__errpuc("bad BMP", "bad BMP"); } } else { // V4/V5 header int i; if (hsz != 108 && hsz != 124) return stbi__errpuc("bad BMP", "bad BMP"); info->mr = stbi__get32le(s); info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); info->ma = stbi__get32le(s); if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs stbi__bmp_set_mask_defaults(info, compress); stbi__get32le(s); // discard color space for (i=0; i < 12; ++i) stbi__get32le(s); // discard color space parameters if (hsz == 124) { stbi__get32le(s); // discard rendering intent stbi__get32le(s); // discard offset of profile data stbi__get32le(s); // discard size of profile data stbi__get32le(s); // discard reserved } } } return (void *) 1; } static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; unsigned int mr=0,mg=0,mb=0,ma=0, all_a; stbi_uc pal[256][4]; int psize=0,i,j,width; int flip_vertically, pad, target; stbi__bmp_data info; STBI_NOTUSED(ri); info.all_a = 255; if (stbi__bmp_parse_header(s, &info) == NULL) return NULL; // error code already set flip_vertically = ((int) s->img_y) > 0; s->img_y = abs((int) s->img_y); if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); mr = info.mr; mg = info.mg; mb = info.mb; ma = info.ma; all_a = info.all_a; if (info.hsz == 12) { if (info.bpp < 24) psize = (info.offset - info.extra_read - 24) / 3; } else { if (info.bpp < 16) psize = (info.offset - info.extra_read - info.hsz) >> 2; } if (psize == 0) { // accept some number of extra bytes after the header, but if the offset points either to before // the header ends or implies a large amount of extra data, reject the file as malformed int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); int header_limit = 1024; // max we actually read is below 256 bytes currently. int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { return stbi__errpuc("bad header", "Corrupt BMP"); } // we established that bytes_read_so_far is positive and sensible. // the first half of this test rejects offsets that are either too small positives, or // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn // ensures the number computed in the second half of the test can't overflow. if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { return stbi__errpuc("bad offset", "Corrupt BMP"); } else { stbi__skip(s, info.offset - bytes_read_so_far); } } if (info.bpp == 24 && ma == 0xff000000) s->img_n = 3; else s->img_n = ma ? 4 : 3; if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 target = req_comp; else target = s->img_n; // if they want monochrome, we'll post-convert // sanity-check size if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) return stbi__errpuc("too large", "Corrupt BMP"); out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (info.bpp < 16) { int z=0; if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } for (i=0; i < psize; ++i) { pal[i][2] = stbi__get8(s); pal[i][1] = stbi__get8(s); pal[i][0] = stbi__get8(s); if (info.hsz != 12) stbi__get8(s); pal[i][3] = 255; } stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); if (info.bpp == 1) width = (s->img_x + 7) >> 3; else if (info.bpp == 4) width = (s->img_x + 1) >> 1; else if (info.bpp == 8) width = s->img_x; else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } pad = (-width)&3; if (info.bpp == 1) { for (j=0; j < (int) s->img_y; ++j) { int bit_offset = 7, v = stbi__get8(s); for (i=0; i < (int) s->img_x; ++i) { int color = (v>>bit_offset)&0x1; out[z++] = pal[color][0]; out[z++] = pal[color][1]; out[z++] = pal[color][2]; if (target == 4) out[z++] = 255; if (i+1 == (int) s->img_x) break; if((--bit_offset) < 0) { bit_offset = 7; v = stbi__get8(s); } } stbi__skip(s, pad); } } else { for (j=0; j < (int) s->img_y; ++j) { for (i=0; i < (int) s->img_x; i += 2) { int v=stbi__get8(s),v2=0; if (info.bpp == 4) { v2 = v & 15; v >>= 4; } out[z++] = pal[v][0]; out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; if (i+1 == (int) s->img_x) break; v = (info.bpp == 8) ? stbi__get8(s) : v2; out[z++] = pal[v][0]; out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; } stbi__skip(s, pad); } } } else { int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; int z = 0; int easy=0; stbi__skip(s, info.offset - info.extra_read - info.hsz); if (info.bpp == 24) width = 3 * s->img_x; else if (info.bpp == 16) width = 2*s->img_x; else /* bpp = 32 and pad = 0 */ width=0; pad = (-width) & 3; if (info.bpp == 24) { easy = 1; } else if (info.bpp == 32) { if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) easy = 2; } if (!easy) { if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } // right shift amt to put high bit in position #7 rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } } for (j=0; j < (int) s->img_y; ++j) { if (easy) { for (i=0; i < (int) s->img_x; ++i) { unsigned char a; out[z+2] = stbi__get8(s); out[z+1] = stbi__get8(s); out[z+0] = stbi__get8(s); z += 3; a = (easy == 2 ? stbi__get8(s) : 255); all_a |= a; if (target == 4) out[z++] = a; } } else { int bpp = info.bpp; for (i=0; i < (int) s->img_x; ++i) { stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); unsigned int a; out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); all_a |= a; if (target == 4) out[z++] = STBI__BYTECAST(a); } } stbi__skip(s, pad); } } // if alpha channel is all 0s, replace with all 255s if (target == 4 && all_a == 0) for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) out[i] = 255; if (flip_vertically) { stbi_uc t; for (j=0; j < (int) s->img_y>>1; ++j) { stbi_uc *p1 = out + j *s->img_x*target; stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; for (i=0; i < (int) s->img_x*target; ++i) { t = p1[i]; p1[i] = p2[i]; p2[i] = t; } } } if (req_comp && req_comp != target) { out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); if (out == NULL) return out; // stbi__convert_format frees input on failure } *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; return out; } #endif // Targa Truevision - TGA // by Jonathan Dummer #ifndef STBI_NO_TGA // returns STBI_rgb or whatever, 0 on error static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) { // only RGB or RGBA (incl. 16bit) or grey allowed if (is_rgb16) *is_rgb16 = 0; switch(bits_per_pixel) { case 8: return STBI_grey; case 16: if(is_grey) return STBI_grey_alpha; // fallthrough case 15: if(is_rgb16) *is_rgb16 = 1; return STBI_rgb; case 24: // fallthrough case 32: return bits_per_pixel/8; default: return 0; } } static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) { int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; int sz, tga_colormap_type; stbi__get8(s); // discard Offset tga_colormap_type = stbi__get8(s); // colormap type if( tga_colormap_type > 1 ) { stbi__rewind(s); return 0; // only RGB or indexed allowed } tga_image_type = stbi__get8(s); // image type if ( tga_colormap_type == 1 ) { // colormapped (paletted) image if (tga_image_type != 1 && tga_image_type != 9) { stbi__rewind(s); return 0; } stbi__skip(s,4); // skip index of first colormap entry and number of entries sz = stbi__get8(s); // check bits per palette color entry if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { stbi__rewind(s); return 0; } stbi__skip(s,4); // skip image x and y origin tga_colormap_bpp = sz; } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { stbi__rewind(s); return 0; // only RGB or grey allowed, +/- RLE } stbi__skip(s,9); // skip colormap specification and image x/y origin tga_colormap_bpp = 0; } tga_w = stbi__get16le(s); if( tga_w < 1 ) { stbi__rewind(s); return 0; // test width } tga_h = stbi__get16le(s); if( tga_h < 1 ) { stbi__rewind(s); return 0; // test height } tga_bits_per_pixel = stbi__get8(s); // bits per pixel stbi__get8(s); // ignore alpha bits if (tga_colormap_bpp != 0) { if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { // when using a colormap, tga_bits_per_pixel is the size of the indexes // I don't think anything but 8 or 16bit indexes makes sense stbi__rewind(s); return 0; } tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); } else { tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); } if(!tga_comp) { stbi__rewind(s); return 0; } if (x) *x = tga_w; if (y) *y = tga_h; if (comp) *comp = tga_comp; return 1; // seems to have passed everything } static int stbi__tga_test(stbi__context *s) { int res = 0; int sz, tga_color_type; stbi__get8(s); // discard Offset tga_color_type = stbi__get8(s); // color type if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed sz = stbi__get8(s); // image type if ( tga_color_type == 1 ) { // colormapped (paletted) image if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 stbi__skip(s,4); // skip index of first colormap entry and number of entries sz = stbi__get8(s); // check bits per palette color entry if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; stbi__skip(s,4); // skip image x and y origin } else { // "normal" image w/o colormap if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE stbi__skip(s,9); // skip colormap specification and image x/y origin } if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height sz = stbi__get8(s); // bits per pixel if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; res = 1; // if we got this far, everything's good and we can return 1 instead of 0 errorEnd: stbi__rewind(s); return res; } // read 16bit value and convert to 24bit RGB static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) { stbi__uint16 px = (stbi__uint16)stbi__get16le(s); stbi__uint16 fiveBitMask = 31; // we have 3 channels with 5bits each int r = (px >> 10) & fiveBitMask; int g = (px >> 5) & fiveBitMask; int b = px & fiveBitMask; // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later out[0] = (stbi_uc)((r * 255)/31); out[1] = (stbi_uc)((g * 255)/31); out[2] = (stbi_uc)((b * 255)/31); // some people claim that the most significant bit might be used for alpha // (possibly if an alpha-bit is set in the "image descriptor byte") // but that only made 16bit test images completely translucent.. // so let's treat all 15 and 16bit TGAs as RGB with no alpha. } static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { // read in the TGA header stuff int tga_offset = stbi__get8(s); int tga_indexed = stbi__get8(s); int tga_image_type = stbi__get8(s); int tga_is_RLE = 0; int tga_palette_start = stbi__get16le(s); int tga_palette_len = stbi__get16le(s); int tga_palette_bits = stbi__get8(s); int tga_x_origin = stbi__get16le(s); int tga_y_origin = stbi__get16le(s); int tga_width = stbi__get16le(s); int tga_height = stbi__get16le(s); int tga_bits_per_pixel = stbi__get8(s); int tga_comp, tga_rgb16=0; int tga_inverted = stbi__get8(s); // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) // image data unsigned char *tga_data; unsigned char *tga_palette = NULL; int i, j; unsigned char raw_data[4] = {0}; int RLE_count = 0; int RLE_repeating = 0; int read_next_pixel = 1; STBI_NOTUSED(ri); STBI_NOTUSED(tga_x_origin); // @TODO STBI_NOTUSED(tga_y_origin); // @TODO if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); // do a tiny bit of precessing if ( tga_image_type >= 8 ) { tga_image_type -= 8; tga_is_RLE = 1; } tga_inverted = 1 - ((tga_inverted >> 5) & 1); // If I'm paletted, then I'll use the number of bits from the palette if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); // tga info *x = tga_width; *y = tga_height; if (comp) *comp = tga_comp; if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) return stbi__errpuc("too large", "Corrupt TGA"); tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); // skip to the data's starting position (offset usually = 0) stbi__skip(s, tga_offset ); if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { for (i=0; i < tga_height; ++i) { int row = tga_inverted ? tga_height -i - 1 : i; stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; stbi__getn(s, tga_row, tga_width * tga_comp); } } else { // do I need to load a palette? if ( tga_indexed) { if (tga_palette_len == 0) { /* you have to have at least one entry! */ STBI_FREE(tga_data); return stbi__errpuc("bad palette", "Corrupt TGA"); } // any data to skip? (offset usually = 0) stbi__skip(s, tga_palette_start ); // load the palette tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); if (!tga_palette) { STBI_FREE(tga_data); return stbi__errpuc("outofmem", "Out of memory"); } if (tga_rgb16) { stbi_uc *pal_entry = tga_palette; STBI_ASSERT(tga_comp == STBI_rgb); for (i=0; i < tga_palette_len; ++i) { stbi__tga_read_rgb16(s, pal_entry); pal_entry += tga_comp; } } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { STBI_FREE(tga_data); STBI_FREE(tga_palette); return stbi__errpuc("bad palette", "Corrupt TGA"); } } // load the data for (i=0; i < tga_width * tga_height; ++i) { // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? if ( tga_is_RLE ) { if ( RLE_count == 0 ) { // yep, get the next byte as a RLE command int RLE_cmd = stbi__get8(s); RLE_count = 1 + (RLE_cmd & 127); RLE_repeating = RLE_cmd >> 7; read_next_pixel = 1; } else if ( !RLE_repeating ) { read_next_pixel = 1; } } else { read_next_pixel = 1; } // OK, if I need to read a pixel, do it now if ( read_next_pixel ) { // load however much data we did have if ( tga_indexed ) { // read in index, then perform the lookup int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); if ( pal_idx >= tga_palette_len ) { // invalid index pal_idx = 0; } pal_idx *= tga_comp; for (j = 0; j < tga_comp; ++j) { raw_data[j] = tga_palette[pal_idx+j]; } } else if(tga_rgb16) { STBI_ASSERT(tga_comp == STBI_rgb); stbi__tga_read_rgb16(s, raw_data); } else { // read in the data raw for (j = 0; j < tga_comp; ++j) { raw_data[j] = stbi__get8(s); } } // clear the reading flag for the next pixel read_next_pixel = 0; } // end of reading a pixel // copy data for (j = 0; j < tga_comp; ++j) tga_data[i*tga_comp+j] = raw_data[j]; // in case we're in RLE mode, keep counting down --RLE_count; } // do I need to invert the image? if ( tga_inverted ) { for (j = 0; j*2 < tga_height; ++j) { int index1 = j * tga_width * tga_comp; int index2 = (tga_height - 1 - j) * tga_width * tga_comp; for (i = tga_width * tga_comp; i > 0; --i) { unsigned char temp = tga_data[index1]; tga_data[index1] = tga_data[index2]; tga_data[index2] = temp; ++index1; ++index2; } } } // clear my palette, if I had one if ( tga_palette != NULL ) { STBI_FREE( tga_palette ); } } // swap RGB - if the source data was RGB16, it already is in the right order if (tga_comp >= 3 && !tga_rgb16) { unsigned char* tga_pixel = tga_data; for (i=0; i < tga_width * tga_height; ++i) { unsigned char temp = tga_pixel[0]; tga_pixel[0] = tga_pixel[2]; tga_pixel[2] = temp; tga_pixel += tga_comp; } } // convert to target component count if (req_comp && req_comp != tga_comp) tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); // the things I do to get rid of an error message, and yet keep // Microsoft's C compilers happy... [8^( tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0; STBI_NOTUSED(tga_palette_start); // OK, done return tga_data; } #endif // ************************************************************************************************* // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB #ifndef STBI_NO_PSD static int stbi__psd_test(stbi__context *s) { int r = (stbi__get32be(s) == 0x38425053); stbi__rewind(s); return r; } static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) { int count, nleft, len; count = 0; while ((nleft = pixelCount - count) > 0) { len = stbi__get8(s); if (len == 128) { // No-op. } else if (len < 128) { // Copy next len+1 bytes literally. len++; if (len > nleft) return 0; // corrupt data count += len; while (len) { *p = stbi__get8(s); p += 4; len--; } } else if (len > 128) { stbi_uc val; // Next -len+1 bytes in the dest are replicated from next source byte. // (Interpret len as a negative 8-bit int.) len = 257 - len; if (len > nleft) return 0; // corrupt data val = stbi__get8(s); count += len; while (len) { *p = val; p += 4; len--; } } } return 1; } static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) { int pixelCount; int channelCount, compression; int channel, i; int bitdepth; int w,h; stbi_uc *out; STBI_NOTUSED(ri); // Check identifier if (stbi__get32be(s) != 0x38425053) // "8BPS" return stbi__errpuc("not PSD", "Corrupt PSD image"); // Check file type version. if (stbi__get16be(s) != 1) return stbi__errpuc("wrong version", "Unsupported version of PSD image"); // Skip 6 reserved bytes. stbi__skip(s, 6 ); // Read the number of channels (R, G, B, A, etc). channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); // Read the rows and columns of the image. h = stbi__get32be(s); w = stbi__get32be(s); if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); // Make sure the depth is 8 bits. bitdepth = stbi__get16be(s); if (bitdepth != 8 && bitdepth != 16) return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); // Make sure the color mode is RGB. // Valid options are: // 0: Bitmap // 1: Grayscale // 2: Indexed color // 3: RGB color // 4: CMYK color // 7: Multichannel // 8: Duotone // 9: Lab color if (stbi__get16be(s) != 3) return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) stbi__skip(s,stbi__get32be(s) ); // Skip the image resources. (resolution, pen tool paths, etc) stbi__skip(s, stbi__get32be(s) ); // Skip the reserved data. stbi__skip(s, stbi__get32be(s) ); // Find out if the data is compressed. // Known values: // 0: no compression // 1: RLE compressed compression = stbi__get16be(s); if (compression > 1) return stbi__errpuc("bad compression", "PSD has an unknown compression format"); // Check size if (!stbi__mad3sizes_valid(4, w, h, 0)) return stbi__errpuc("too large", "Corrupt PSD"); // Create the destination image. if (!compression && bitdepth == 16 && bpc == 16) { out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); ri->bits_per_channel = 16; } else out = (stbi_uc *) stbi__malloc(4 * w*h); if (!out) return stbi__errpuc("outofmem", "Out of memory"); pixelCount = w*h; // Initialize the data to zero. //memset( out, 0, pixelCount * 4 ); // Finally, the image data. if (compression) { // RLE as used by .PSD and .TIFF // Loop until you get the number of unpacked bytes you are expecting: // Read the next source byte into n. // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. // Else if n is 128, noop. // Endloop // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, // which we're going to just skip. stbi__skip(s, h * channelCount * 2 ); // Read the RLE data by channel. for (channel = 0; channel < 4; channel++) { stbi_uc *p; p = out+channel; if (channel >= channelCount) { // Fill this channel with default data. for (i = 0; i < pixelCount; i++, p += 4) *p = (channel == 3 ? 255 : 0); } else { // Read the RLE data. if (!stbi__psd_decode_rle(s, p, pixelCount)) { STBI_FREE(out); return stbi__errpuc("corrupt", "bad RLE data"); } } } } else { // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. // Read the data by channel. for (channel = 0; channel < 4; channel++) { if (channel >= channelCount) { // Fill this channel with default data. if (bitdepth == 16 && bpc == 16) { stbi__uint16 *q = ((stbi__uint16 *) out) + channel; stbi__uint16 val = channel == 3 ? 65535 : 0; for (i = 0; i < pixelCount; i++, q += 4) *q = val; } else { stbi_uc *p = out+channel; stbi_uc val = channel == 3 ? 255 : 0; for (i = 0; i < pixelCount; i++, p += 4) *p = val; } } else { if (ri->bits_per_channel == 16) { // output bpc stbi__uint16 *q = ((stbi__uint16 *) out) + channel; for (i = 0; i < pixelCount; i++, q += 4) *q = (stbi__uint16) stbi__get16be(s); } else { stbi_uc *p = out+channel; if (bitdepth == 16) { // input bpc for (i = 0; i < pixelCount; i++, p += 4) *p = (stbi_uc) (stbi__get16be(s) >> 8); } else { for (i = 0; i < pixelCount; i++, p += 4) *p = stbi__get8(s); } } } } } // remove weird white matte from PSD if (channelCount >= 4) { if (ri->bits_per_channel == 16) { for (i=0; i < w*h; ++i) { stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; if (pixel[3] != 0 && pixel[3] != 65535) { float a = pixel[3] / 65535.0f; float ra = 1.0f / a; float inv_a = 65535.0f * (1 - ra); pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); } } } else { for (i=0; i < w*h; ++i) { unsigned char *pixel = out + 4*i; if (pixel[3] != 0 && pixel[3] != 255) { float a = pixel[3] / 255.0f; float ra = 1.0f / a; float inv_a = 255.0f * (1 - ra); pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); } } } } // convert to desired output format if (req_comp && req_comp != 4) { if (ri->bits_per_channel == 16) out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); else out = stbi__convert_format(out, 4, req_comp, w, h); if (out == NULL) return out; // stbi__convert_format frees input on failure } if (comp) *comp = 4; *y = h; *x = w; return out; } #endif // ************************************************************************************************* // Softimage PIC loader // by Tom Seddon // // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ #ifndef STBI_NO_PIC static int stbi__pic_is4(stbi__context *s,const char *str) { int i; for (i=0; i<4; ++i) if (stbi__get8(s) != (stbi_uc)str[i]) return 0; return 1; } static int stbi__pic_test_core(stbi__context *s) { int i; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) return 0; for(i=0;i<84;++i) stbi__get8(s); if (!stbi__pic_is4(s,"PICT")) return 0; return 1; } typedef struct { stbi_uc size,type,channel; } stbi__pic_packet; static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) { int mask=0x80, i; for (i=0; i<4; ++i, mask>>=1) { if (channel & mask) { if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); dest[i]=stbi__get8(s); } } return dest; } static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) { int mask=0x80,i; for (i=0;i<4; ++i, mask>>=1) if (channel&mask) dest[i]=src[i]; } static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) { int act_comp=0,num_packets=0,y,chained; stbi__pic_packet packets[10]; // this will (should...) cater for even some bizarre stuff like having data // for the same channel in multiple packets. do { stbi__pic_packet *packet; if (num_packets==sizeof(packets)/sizeof(packets[0])) return stbi__errpuc("bad format","too many packets"); packet = &packets[num_packets++]; chained = stbi__get8(s); packet->size = stbi__get8(s); packet->type = stbi__get8(s); packet->channel = stbi__get8(s); act_comp |= packet->channel; if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? for(y=0; ytype) { default: return stbi__errpuc("bad format","packet has bad compression type"); case 0: {//uncompressed int x; for(x=0;xchannel,dest)) return 0; break; } case 1://Pure RLE { int left=width, i; while (left>0) { stbi_uc count,value[4]; count=stbi__get8(s); if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); if (count > left) count = (stbi_uc) left; if (!stbi__readval(s,packet->channel,value)) return 0; for(i=0; ichannel,dest,value); left -= count; } } break; case 2: {//Mixed RLE int left=width; while (left>0) { int count = stbi__get8(s), i; if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); if (count >= 128) { // Repeated stbi_uc value[4]; if (count==128) count = stbi__get16be(s); else count -= 127; if (count > left) return stbi__errpuc("bad file","scanline overrun"); if (!stbi__readval(s,packet->channel,value)) return 0; for(i=0;ichannel,dest,value); } else { // Raw ++count; if (count>left) return stbi__errpuc("bad file","scanline overrun"); for(i=0;ichannel,dest)) return 0; } left-=count; } break; } } } } return result; } static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) { stbi_uc *result; int i, x,y, internal_comp; STBI_NOTUSED(ri); if (!comp) comp = &internal_comp; for (i=0; i<92; ++i) stbi__get8(s); x = stbi__get16be(s); y = stbi__get16be(s); if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); stbi__get32be(s); //skip `ratio' stbi__get16be(s); //skip `fields' stbi__get16be(s); //skip `pad' // intermediate buffer is RGBA result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); if (!result) return stbi__errpuc("outofmem", "Out of memory"); memset(result, 0xff, x*y*4); if (!stbi__pic_load_core(s,x,y,comp, result)) { STBI_FREE(result); result=0; } *px = x; *py = y; if (req_comp == 0) req_comp = *comp; result=stbi__convert_format(result,4,req_comp,x,y); return result; } static int stbi__pic_test(stbi__context *s) { int r = stbi__pic_test_core(s); stbi__rewind(s); return r; } #endif // ************************************************************************************************* // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb #ifndef STBI_NO_GIF typedef struct { stbi__int16 prefix; stbi_uc first; stbi_uc suffix; } stbi__gif_lzw; typedef struct { int w,h; stbi_uc *out; // output buffer (always 4 components) stbi_uc *background; // The current "background" as far as a gif is concerned stbi_uc *history; int flags, bgindex, ratio, transparent, eflags; stbi_uc pal[256][4]; stbi_uc lpal[256][4]; stbi__gif_lzw codes[8192]; stbi_uc *color_table; int parse, step; int lflags; int start_x, start_y; int max_x, max_y; int cur_x, cur_y; int line_size; int delay; } stbi__gif; static int stbi__gif_test_raw(stbi__context *s) { int sz; if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; sz = stbi__get8(s); if (sz != '9' && sz != '7') return 0; if (stbi__get8(s) != 'a') return 0; return 1; } static int stbi__gif_test(stbi__context *s) { int r = stbi__gif_test_raw(s); stbi__rewind(s); return r; } static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) { int i; for (i=0; i < num_entries; ++i) { pal[i][2] = stbi__get8(s); pal[i][1] = stbi__get8(s); pal[i][0] = stbi__get8(s); pal[i][3] = transp == i ? 0 : 255; } } static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) { stbi_uc version; if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return stbi__err("not GIF", "Corrupt GIF"); version = stbi__get8(s); if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); stbi__g_failure_reason = ""; g->w = stbi__get16le(s); g->h = stbi__get16le(s); g->flags = stbi__get8(s); g->bgindex = stbi__get8(s); g->ratio = stbi__get8(s); g->transparent = -1; if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments if (is_info) return 1; if (g->flags & 0x80) stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); return 1; } static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) { stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); if (!g) return stbi__err("outofmem", "Out of memory"); if (!stbi__gif_header(s, g, comp, 1)) { STBI_FREE(g); stbi__rewind( s ); return 0; } if (x) *x = g->w; if (y) *y = g->h; STBI_FREE(g); return 1; } static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) { stbi_uc *p, *c; int idx; // recurse to decode the prefixes, since the linked-list is backwards, // and working backwards through an interleaved image would be nasty if (g->codes[code].prefix >= 0) stbi__out_gif_code(g, g->codes[code].prefix); if (g->cur_y >= g->max_y) return; idx = g->cur_x + g->cur_y; p = &g->out[idx]; g->history[idx / 4] = 1; c = &g->color_table[g->codes[code].suffix * 4]; if (c[3] > 128) { // don't render transparent pixels; p[0] = c[2]; p[1] = c[1]; p[2] = c[0]; p[3] = c[3]; } g->cur_x += 4; if (g->cur_x >= g->max_x) { g->cur_x = g->start_x; g->cur_y += g->step; while (g->cur_y >= g->max_y && g->parse > 0) { g->step = (1 << g->parse) * g->line_size; g->cur_y = g->start_y + (g->step >> 1); --g->parse; } } } static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) { stbi_uc lzw_cs; stbi__int32 len, init_code; stbi__uint32 first; stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; stbi__gif_lzw *p; lzw_cs = stbi__get8(s); if (lzw_cs > 12) return NULL; clear = 1 << lzw_cs; first = 1; codesize = lzw_cs + 1; codemask = (1 << codesize) - 1; bits = 0; valid_bits = 0; for (init_code = 0; init_code < clear; init_code++) { g->codes[init_code].prefix = -1; g->codes[init_code].first = (stbi_uc) init_code; g->codes[init_code].suffix = (stbi_uc) init_code; } // support no starting clear code avail = clear+2; oldcode = -1; len = 0; for(;;) { if (valid_bits < codesize) { if (len == 0) { len = stbi__get8(s); // start new block if (len == 0) return g->out; } --len; bits |= (stbi__int32) stbi__get8(s) << valid_bits; valid_bits += 8; } else { stbi__int32 code = bits & codemask; bits >>= codesize; valid_bits -= codesize; // @OPTIMIZE: is there some way we can accelerate the non-clear path? if (code == clear) { // clear code codesize = lzw_cs + 1; codemask = (1 << codesize) - 1; avail = clear + 2; oldcode = -1; first = 0; } else if (code == clear + 1) { // end of stream code stbi__skip(s, len); while ((len = stbi__get8(s)) > 0) stbi__skip(s,len); return g->out; } else if (code <= avail) { if (first) { return stbi__errpuc("no clear code", "Corrupt GIF"); } if (oldcode >= 0) { p = &g->codes[avail++]; if (avail > 8192) { return stbi__errpuc("too many codes", "Corrupt GIF"); } p->prefix = (stbi__int16) oldcode; p->first = g->codes[oldcode].first; p->suffix = (code == avail) ? p->first : g->codes[code].first; } else if (code == avail) return stbi__errpuc("illegal code in raster", "Corrupt GIF"); stbi__out_gif_code(g, (stbi__uint16) code); if ((avail & codemask) == 0 && avail <= 0x0FFF) { codesize++; codemask = (1 << codesize) - 1; } oldcode = code; } else { return stbi__errpuc("illegal code in raster", "Corrupt GIF"); } } } } // this function is designed to support animated gifs, although stb_image doesn't support it // two back is the image from two frames ago, used for a very specific disposal format static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) { int dispose; int first_frame; int pi; int pcount; STBI_NOTUSED(req_comp); // on first frame, any non-written pixels get the background colour (non-transparent) first_frame = 0; if (g->out == 0) { if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) return stbi__errpuc("too large", "GIF image is too large"); pcount = g->w * g->h; g->out = (stbi_uc *) stbi__malloc(4 * pcount); g->background = (stbi_uc *) stbi__malloc(4 * pcount); g->history = (stbi_uc *) stbi__malloc(pcount); if (!g->out || !g->background || !g->history) return stbi__errpuc("outofmem", "Out of memory"); // image is treated as "transparent" at the start - ie, nothing overwrites the current background; // background colour is only used for pixels that are not rendered first frame, after that "background" // color refers to the color that was there the previous frame. memset(g->out, 0x00, 4 * pcount); memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) memset(g->history, 0x00, pcount); // pixels that were affected previous frame first_frame = 1; } else { // second frame - how do we dispose of the previous one? dispose = (g->eflags & 0x1C) >> 2; pcount = g->w * g->h; if ((dispose == 3) && (two_back == 0)) { dispose = 2; // if I don't have an image to revert back to, default to the old background } if (dispose == 3) { // use previous graphic for (pi = 0; pi < pcount; ++pi) { if (g->history[pi]) { memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); } } } else if (dispose == 2) { // restore what was changed last frame to background before that frame; for (pi = 0; pi < pcount; ++pi) { if (g->history[pi]) { memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); } } } else { // This is a non-disposal case eithe way, so just // leave the pixels as is, and they will become the new background // 1: do not dispose // 0: not specified. } // background is what out is after the undoing of the previou frame; memcpy( g->background, g->out, 4 * g->w * g->h ); } // clear my history; memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame for (;;) { int tag = stbi__get8(s); switch (tag) { case 0x2C: /* Image Descriptor */ { stbi__int32 x, y, w, h; stbi_uc *o; x = stbi__get16le(s); y = stbi__get16le(s); w = stbi__get16le(s); h = stbi__get16le(s); if (((x + w) > (g->w)) || ((y + h) > (g->h))) return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); g->line_size = g->w * 4; g->start_x = x * 4; g->start_y = y * g->line_size; g->max_x = g->start_x + w * 4; g->max_y = g->start_y + h * g->line_size; g->cur_x = g->start_x; g->cur_y = g->start_y; // if the width of the specified rectangle is 0, that means // we may not see *any* pixels or the image is malformed; // to make sure this is caught, move the current y down to // max_y (which is what out_gif_code checks). if (w == 0) g->cur_y = g->max_y; g->lflags = stbi__get8(s); if (g->lflags & 0x40) { g->step = 8 * g->line_size; // first interlaced spacing g->parse = 3; } else { g->step = g->line_size; g->parse = 0; } if (g->lflags & 0x80) { stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); g->color_table = (stbi_uc *) g->lpal; } else if (g->flags & 0x80) { g->color_table = (stbi_uc *) g->pal; } else return stbi__errpuc("missing color table", "Corrupt GIF"); o = stbi__process_gif_raster(s, g); if (!o) return NULL; // if this was the first frame, pcount = g->w * g->h; if (first_frame && (g->bgindex > 0)) { // if first frame, any pixel not drawn to gets the background color for (pi = 0; pi < pcount; ++pi) { if (g->history[pi] == 0) { g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); } } } return o; } case 0x21: // Comment Extension. { int len; int ext = stbi__get8(s); if (ext == 0xF9) { // Graphic Control Extension. len = stbi__get8(s); if (len == 4) { g->eflags = stbi__get8(s); g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. // unset old transparent if (g->transparent >= 0) { g->pal[g->transparent][3] = 255; } if (g->eflags & 0x01) { g->transparent = stbi__get8(s); if (g->transparent >= 0) { g->pal[g->transparent][3] = 0; } } else { // don't need transparent stbi__skip(s, 1); g->transparent = -1; } } else { stbi__skip(s, len); break; } } while ((len = stbi__get8(s)) != 0) { stbi__skip(s, len); } break; } case 0x3B: // gif stream termination code return (stbi_uc *) s; // using '1' causes warning on some compilers default: return stbi__errpuc("unknown code", "Corrupt GIF"); } } } static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) { STBI_FREE(g->out); STBI_FREE(g->history); STBI_FREE(g->background); if (out) STBI_FREE(out); if (delays && *delays) STBI_FREE(*delays); return stbi__errpuc("outofmem", "Out of memory"); } static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { if (stbi__gif_test(s)) { int layers = 0; stbi_uc *u = 0; stbi_uc *out = 0; stbi_uc *two_back = 0; stbi__gif g; int stride; int out_size = 0; int delays_size = 0; STBI_NOTUSED(out_size); STBI_NOTUSED(delays_size); memset(&g, 0, sizeof(g)); if (delays) { *delays = 0; } do { u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { *x = g.w; *y = g.h; ++layers; stride = g.w * g.h * 4; if (out) { void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); if (!tmp) return stbi__load_gif_main_outofmem(&g, out, delays); else { out = (stbi_uc*) tmp; out_size = layers * stride; } if (delays) { int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); if (!new_delays) return stbi__load_gif_main_outofmem(&g, out, delays); *delays = new_delays; delays_size = layers * sizeof(int); } } else { out = (stbi_uc*)stbi__malloc( layers * stride ); if (!out) return stbi__load_gif_main_outofmem(&g, out, delays); out_size = layers * stride; if (delays) { *delays = (int*) stbi__malloc( layers * sizeof(int) ); if (!*delays) return stbi__load_gif_main_outofmem(&g, out, delays); delays_size = layers * sizeof(int); } } memcpy( out + ((layers - 1) * stride), u, stride ); if (layers >= 2) { two_back = out - 2 * stride; } if (delays) { (*delays)[layers - 1U] = g.delay; } } } while (u != 0); // free temp buffer; STBI_FREE(g.out); STBI_FREE(g.history); STBI_FREE(g.background); // do the final conversion after loading everything; if (req_comp && req_comp != 4) out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); *z = layers; return out; } else { return stbi__errpuc("not GIF", "Image was not as a gif type."); } } static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *u = 0; stbi__gif g; memset(&g, 0, sizeof(g)); STBI_NOTUSED(ri); u = stbi__gif_load_next(s, &g, comp, req_comp, 0); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { *x = g.w; *y = g.h; // moved conversion to after successful load so that the same // can be done for multiple frames. if (req_comp && req_comp != 4) u = stbi__convert_format(u, 4, req_comp, g.w, g.h); } else if (g.out) { // if there was an error and we allocated an image buffer, free it! STBI_FREE(g.out); } // free buffers needed for multiple frame loading; STBI_FREE(g.history); STBI_FREE(g.background); return u; } static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) { return stbi__gif_info_raw(s,x,y,comp); } #endif // ************************************************************************************************* // Radiance RGBE HDR loader // originally by Nicolas Schulz #ifndef STBI_NO_HDR static int stbi__hdr_test_core(stbi__context *s, const char *signature) { int i; for (i=0; signature[i]; ++i) if (stbi__get8(s) != signature[i]) return 0; stbi__rewind(s); return 1; } static int stbi__hdr_test(stbi__context* s) { int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); stbi__rewind(s); if(!r) { r = stbi__hdr_test_core(s, "#?RGBE\n"); stbi__rewind(s); } return r; } #define STBI__HDR_BUFLEN 1024 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) { int len=0; char c = '\0'; c = (char) stbi__get8(z); while (!stbi__at_eof(z) && c != '\n') { buffer[len++] = c; if (len == STBI__HDR_BUFLEN-1) { // flush to end of line while (!stbi__at_eof(z) && stbi__get8(z) != '\n') ; break; } c = (char) stbi__get8(z); } buffer[len] = 0; return buffer; } static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) { if ( input[3] != 0 ) { float f1; // Exponent f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); if (req_comp <= 2) output[0] = (input[0] + input[1] + input[2]) * f1 / 3; else { output[0] = input[0] * f1; output[1] = input[1] * f1; output[2] = input[2] * f1; } if (req_comp == 2) output[1] = 1; if (req_comp == 4) output[3] = 1; } else { switch (req_comp) { case 4: output[3] = 1; /* fallthrough */ case 3: output[0] = output[1] = output[2] = 0; break; case 2: output[1] = 1; /* fallthrough */ case 1: output[0] = 0; break; } } } static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { char buffer[STBI__HDR_BUFLEN]; char *token; int valid = 0; int width, height; stbi_uc *scanline; float *hdr_data; int len; unsigned char count, value; int i, j, k, c1,c2, z; const char *headerToken; STBI_NOTUSED(ri); // Check identifier headerToken = stbi__hdr_gettoken(s,buffer); if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) return stbi__errpf("not HDR", "Corrupt HDR image"); // Parse header for(;;) { token = stbi__hdr_gettoken(s,buffer); if (token[0] == 0) break; if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); // Parse width and height // can't use sscanf() if we're not using stdio! token = stbi__hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); token += 3; height = (int) strtol(token, &token, 10); while (*token == ' ') ++token; if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); token += 3; width = (int) strtol(token, NULL, 10); if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); *x = width; *y = height; if (comp) *comp = 3; if (req_comp == 0) req_comp = 3; if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) return stbi__errpf("too large", "HDR image is too large"); // Read data hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); if (!hdr_data) return stbi__errpf("outofmem", "Out of memory"); // Load image data // image data is stored as some number of sca if ( width < 8 || width >= 32768) { // Read flat data for (j=0; j < height; ++j) { for (i=0; i < width; ++i) { stbi_uc rgbe[4]; main_decode_loop: stbi__getn(s, rgbe, 4); stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); } } } else { // Read RLE-encoded data scanline = NULL; for (j = 0; j < height; ++j) { c1 = stbi__get8(s); c2 = stbi__get8(s); len = stbi__get8(s); if (c1 != 2 || c2 != 2 || (len & 0x80)) { // not run-length encoded, so we have to actually use THIS data as a decoded // pixel (note this can't be a valid pixel--one of RGB must be >= 128) stbi_uc rgbe[4]; rgbe[0] = (stbi_uc) c1; rgbe[1] = (stbi_uc) c2; rgbe[2] = (stbi_uc) len; rgbe[3] = (stbi_uc) stbi__get8(s); stbi__hdr_convert(hdr_data, rgbe, req_comp); i = 1; j = 0; STBI_FREE(scanline); goto main_decode_loop; // yes, this makes no sense } len <<= 8; len |= stbi__get8(s); if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } if (scanline == NULL) { scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); if (!scanline) { STBI_FREE(hdr_data); return stbi__errpf("outofmem", "Out of memory"); } } for (k = 0; k < 4; ++k) { int nleft; i = 0; while ((nleft = width - i) > 0) { count = stbi__get8(s); if (count > 128) { // Run value = stbi__get8(s); count -= 128; if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = value; } else { // Dump if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = stbi__get8(s); } } } for (i=0; i < width; ++i) stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); } if (scanline) STBI_FREE(scanline); } return hdr_data; } static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) { char buffer[STBI__HDR_BUFLEN]; char *token; int valid = 0; int dummy; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (stbi__hdr_test(s) == 0) { stbi__rewind( s ); return 0; } for(;;) { token = stbi__hdr_gettoken(s,buffer); if (token[0] == 0) break; if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } if (!valid) { stbi__rewind( s ); return 0; } token = stbi__hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) { stbi__rewind( s ); return 0; } token += 3; *y = (int) strtol(token, &token, 10); while (*token == ' ') ++token; if (strncmp(token, "+X ", 3)) { stbi__rewind( s ); return 0; } token += 3; *x = (int) strtol(token, NULL, 10); *comp = 3; return 1; } #endif // STBI_NO_HDR #ifndef STBI_NO_BMP static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) { void *p; stbi__bmp_data info; info.all_a = 255; p = stbi__bmp_parse_header(s, &info); if (p == NULL) { stbi__rewind( s ); return 0; } if (x) *x = s->img_x; if (y) *y = s->img_y; if (comp) { if (info.bpp == 24 && info.ma == 0xff000000) *comp = 3; else *comp = info.ma ? 4 : 3; } return 1; } #endif #ifndef STBI_NO_PSD static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) { int channelCount, dummy, depth; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (stbi__get32be(s) != 0x38425053) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 1) { stbi__rewind( s ); return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { stbi__rewind( s ); return 0; } *y = stbi__get32be(s); *x = stbi__get32be(s); depth = stbi__get16be(s); if (depth != 8 && depth != 16) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 3) { stbi__rewind( s ); return 0; } *comp = 4; return 1; } static int stbi__psd_is16(stbi__context *s) { int channelCount, depth; if (stbi__get32be(s) != 0x38425053) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 1) { stbi__rewind( s ); return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { stbi__rewind( s ); return 0; } STBI_NOTUSED(stbi__get32be(s)); STBI_NOTUSED(stbi__get32be(s)); depth = stbi__get16be(s); if (depth != 16) { stbi__rewind( s ); return 0; } return 1; } #endif #ifndef STBI_NO_PIC static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) { int act_comp=0,num_packets=0,chained,dummy; stbi__pic_packet packets[10]; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { stbi__rewind(s); return 0; } stbi__skip(s, 88); *x = stbi__get16be(s); *y = stbi__get16be(s); if (stbi__at_eof(s)) { stbi__rewind( s); return 0; } if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { stbi__rewind( s ); return 0; } stbi__skip(s, 8); do { stbi__pic_packet *packet; if (num_packets==sizeof(packets)/sizeof(packets[0])) return 0; packet = &packets[num_packets++]; chained = stbi__get8(s); packet->size = stbi__get8(s); packet->type = stbi__get8(s); packet->channel = stbi__get8(s); act_comp |= packet->channel; if (stbi__at_eof(s)) { stbi__rewind( s ); return 0; } if (packet->size != 8) { stbi__rewind( s ); return 0; } } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); return 1; } #endif // ************************************************************************************************* // Portable Gray Map and Portable Pixel Map loader // by Ken Miller // // PGM: http://netpbm.sourceforge.net/doc/pgm.html // PPM: http://netpbm.sourceforge.net/doc/ppm.html // // Known limitations: // Does not support comments in the header section // Does not support ASCII image data (formats P2 and P3) #ifndef STBI_NO_PNM static int stbi__pnm_test(stbi__context *s) { char p, t; p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { stbi__rewind( s ); return 0; } return 1; } static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; STBI_NOTUSED(ri); ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); if (ri->bits_per_channel == 0) return 0; if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) return stbi__errpuc("too large", "PNM too large"); out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { STBI_FREE(out); return stbi__errpuc("bad PNM", "PNM file truncated"); } if (req_comp && req_comp != s->img_n) { if (ri->bits_per_channel == 16) { out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); } else { out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); } if (out == NULL) return out; // stbi__convert_format frees input on failure } return out; } static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; } static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) { for (;;) { while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) *c = (char) stbi__get8(s); if (stbi__at_eof(s) || *c != '#') break; while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) *c = (char) stbi__get8(s); } } static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; } static int stbi__pnm_getinteger(stbi__context *s, char *c) { int value = 0; while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { value = value*10 + (*c - '0'); *c = (char) stbi__get8(s); if((value > 214748364) || (value == 214748364 && *c > '7')) return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); } return value; } static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) { int maxv, dummy; char c, p, t; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; stbi__rewind(s); // Get identifier p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { stbi__rewind(s); return 0; } *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm c = (char) stbi__get8(s); stbi__pnm_skip_whitespace(s, &c); *x = stbi__pnm_getinteger(s, &c); // read width if(*x == 0) return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); *y = stbi__pnm_getinteger(s, &c); // read height if (*y == 0) return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); maxv = stbi__pnm_getinteger(s, &c); // read max value if (maxv > 65535) return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); else if (maxv > 255) return 16; else return 8; } static int stbi__pnm_is16(stbi__context *s) { if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) return 1; return 0; } #endif static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) { #ifndef STBI_NO_JPEG if (stbi__jpeg_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PNG if (stbi__png_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_GIF if (stbi__gif_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_BMP if (stbi__bmp_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PSD if (stbi__psd_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PIC if (stbi__pic_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PNM if (stbi__pnm_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_HDR if (stbi__hdr_info(s, x, y, comp)) return 1; #endif // test tga last because it's a crappy test! #ifndef STBI_NO_TGA if (stbi__tga_info(s, x, y, comp)) return 1; #endif return stbi__err("unknown image type", "Image not of any known type, or corrupt"); } static int stbi__is_16_main(stbi__context *s) { #ifndef STBI_NO_PNG if (stbi__png_is16(s)) return 1; #endif #ifndef STBI_NO_PSD if (stbi__psd_is16(s)) return 1; #endif #ifndef STBI_NO_PNM if (stbi__pnm_is16(s)) return 1; #endif return 0; } #ifndef STBI_NO_STDIO STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) { FILE *f = stbi__fopen(filename, "rb"); int result; if (!f) return stbi__err("can't fopen", "Unable to open file"); result = stbi_info_from_file(f, x, y, comp); fclose(f); return result; } STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) { int r; stbi__context s; long pos = ftell(f); stbi__start_file(&s, f); r = stbi__info_main(&s,x,y,comp); fseek(f,pos,SEEK_SET); return r; } STBIDEF int stbi_is_16_bit(char const *filename) { FILE *f = stbi__fopen(filename, "rb"); int result; if (!f) return stbi__err("can't fopen", "Unable to open file"); result = stbi_is_16_bit_from_file(f); fclose(f); return result; } STBIDEF int stbi_is_16_bit_from_file(FILE *f) { int r; stbi__context s; long pos = ftell(f); stbi__start_file(&s, f); r = stbi__is_16_main(&s); fseek(f,pos,SEEK_SET); return r; } #endif // !STBI_NO_STDIO STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__info_main(&s,x,y,comp); } STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); return stbi__info_main(&s,x,y,comp); } STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__is_16_main(&s); } STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); return stbi__is_16_main(&s); } #endif // STB_IMAGE_IMPLEMENTATION /* revision history: 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 2.19 (2018-02-11) fix warning 2.18 (2018-01-30) fix warnings 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug 1-bit BMP *_is_16_bit api avoid warnings 2.16 (2017-07-23) all functions have 16-bit variants; STBI_NO_STDIO works again; compilation fixes; fix rounding in unpremultiply; optimize vertical flip; disable raw_len validation; documentation fixes 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; warning fixes; disable run-time SSE detection on gcc; uniform handling of optional "return" values; thread-safe initialization of zlib tables 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) allocate large structures on the stack remove white matting for transparent PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit support RGB-formatted JPEG read 16-bit PNGs (only as 8-bit) 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED 2.09 (2016-01-16) allow comments in PNM files 16-bit-per-pixel TGA (not bit-per-component) info() for TGA could break due to .hdr handling info() for BMP to shares code instead of sloppy parse can use STBI_REALLOC_SIZED if allocator doesn't support realloc code cleanup 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA 2.07 (2015-09-13) fix compiler warnings partial animated GIF support limited 16-bpc PSD support #ifdef unused functions bug with < 92 byte PIC,PNM,HDR,TGA 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit 2.03 (2015-04-12) extra corruption checking (mmozeiko) stbi_set_flip_vertically_on_load (nguillemot) fix NEON support; fix mingw support 2.02 (2015-01-19) fix incorrect assert, fix warning 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE GIF bugfix -- seemingly never worked STBI_NO_*, STBI_ONLY_* 1.48 (2014-12-14) fix incorrectly-named assert() 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with user-specified channel count (stb) 1.46 (2014-08-26) fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG 1.45 (2014-08-16) fix MSVC-ARM internal compiler error by wrapping malloc 1.44 (2014-08-07) various warning fixes from Ronny Chevalier 1.43 (2014-07-15) fix MSVC-only compiler problem in code changed in 1.42 1.42 (2014-07-09) don't define _CRT_SECURE_NO_WARNINGS (affects user code) fixes to stbi__cleanup_jpeg path added STBI_ASSERT to avoid requiring assert.h 1.41 (2014-06-25) fix search&replace from 1.36 that messed up comments/error messages 1.40 (2014-06-22) fix gcc struct-initialization warning 1.39 (2014-06-15) fix to TGA optimization when req_comp != number of components in TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) add support for BMP version 5 (more ignored fields) 1.38 (2014-06-06) suppress MSVC warnings on integer casts truncating values fix accidental rename of 'skip' field of I/O 1.37 (2014-06-04) remove duplicate typedef 1.36 (2014-06-03) convert to header file single-file library if de-iphone isn't set, load iphone images color-swapped instead of returning NULL 1.35 (2014-05-27) various warnings fix broken STBI_SIMD path fix bug where stbi_load_from_file no longer left file pointer in correct place fix broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by Arseny Kapoulkine 1.34 (unknown) use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case 1.33 (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements 1.32 (2011-07-13) support for "info" function for all supported filetypes (SpartanJ) 1.31 (2011-06-20) a few more leak fixes, bug in PNG handling (SpartanJ) 1.30 (2011-06-11) added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) removed deprecated format-specific test/load functions removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in decoding 32-bit BMP (David Woo) 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) 1.27 (2010-08-01) cast-to-stbi_uc to fix warnings 1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25 (2010-07-17) refix trans_data warning (Won Chun) 1.24 (2010-07-12) perf improvements reading from files on platforms with lock-heavy fgetc() minor perf improvements for jpeg deprecated type-specific functions so we'll get feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23 fixed bug in iPhone support 1.22 (2010-07-10) removed image *writing* support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher iPhone PNG-extensions from James Brown warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) 1.21 fix use of 'stbi_uc' in header (reported by jon blow) 1.20 added support for Softimage PIC, by Tom Seddon 1.19 bug in interlaced PNG corruption check (found by ryg) 1.18 (2008-08-02) fix a threading bug (local mutable static) 1.17 support interlaced PNG 1.16 major bugfix - stbi__convert_format converted one too many pixels 1.15 initialize some fields for thread safety 1.14 fix threadsafe conversion bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including) 1.13 threadsafe 1.12 const qualifiers in the API 1.11 Support installable IDCT, colorspace conversion routines 1.10 Fixes for 64-bit (don't use "unsigned long") optimized upsampling by Fabian "ryg" Giesen 1.09 Fix format-conversion for PSD code (bad global variables!) 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz 1.07 attempt to fix C++ warning/errors again 1.06 attempt to fix C++ warning/errors again 1.05 fix TGA loading to return correct *comp and use good luminance calc 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02 support for (subset of) HDR files, float interface for preferred access to them 1.01 fix bug: possible bug in handling right-side up bmps... not sure fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all 1.00 interface to zlib that skips zlib header 0.99 correct handling of alpha in palette 0.98 TGA loader by lonesock; dynamically add loaders (untested) 0.97 jpeg errors on too large a file; also catch another malloc failure 0.96 fix detection of invalid v value - particleman@mollyrocket forum 0.95 during header scan, seek to markers in case of padding 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same 0.93 handle jpegtran output; verbose errors 0.92 read 4,8,16,24,32-bit BMP files of several formats 0.91 output 24-bit Windows 3.0 BMP files 0.90 fix a few more warnings; bump version number to approach 1.0 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd 0.60 fix compiling as c++ 0.59 fix warnings: merge Dave Moore's -Wall fixes 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available 0.56 fix bug: zlib uncompressed mode len vs. nlen 0.55 fix bug: restart_interval not initialized to 0 0.54 allow NULL for 'int *comp' 0.53 fix bug in png 3->4; speedup png decoding 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments 0.51 obey req_comp requests, 1-component jpegs return as 1-component, on 'test' only check type, not whether we support this variant 0.50 (2006-11-19) first released version */ /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License Copyright (c) 2017 Sean Barrett Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ ALTERNATIVE B - Public Domain (www.unlicense.org) This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ */ ggml-org-ggml-7ec8045/examples/stb_image_write.h000066400000000000000000002130651506673203700216650ustar00rootroot00000000000000/* stb_image_write - v1.16 - public domain - http://nothings.org/stb writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 no warranty implied; use at your own risk Before #including, #define STB_IMAGE_WRITE_IMPLEMENTATION in the file that you want to have the implementation. Will probably not work correctly with strict-aliasing optimizations. ABOUT: This header file is a library for writing images to C stdio or a callback. The PNG output is not optimal; it is 20-50% larger than the file written by a decent optimizing implementation; though providing a custom zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. This library is designed for source code compactness and simplicity, not optimal image file size or run-time performance. BUILDING: You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace malloc,realloc,free. You can #define STBIW_MEMMOVE() to replace memmove() You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function for PNG compression (instead of the builtin one), it must have the following signature: unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); The returned data will be freed with STBIW_FREE() (free() by default), so it must be heap allocated with STBIW_MALLOC() (malloc() by default), UNICODE: If compiling for Windows and you wish to use Unicode filenames, compile with #define STBIW_WINDOWS_UTF8 and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert Windows wchar_t filenames to utf8. USAGE: There are five functions, one for each image file format: int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically There are also five equivalent functions that use an arbitrary write function. You are expected to open/close your file-equivalent before and after calling these: int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); where the callback is: void stbi_write_func(void *context, void *data, int size); You can configure it with these global variables: int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode You can define STBI_WRITE_NO_STDIO to disable the file variant of these functions, so the library will not use stdio.h at all. However, this will also disable HDR writing, because it requires stdio for formatted output. Each function returns 0 on failure and non-0 on success. The functions create an image file defined by the parameters. The image is a rectangle of pixels stored from left-to-right, top-to-bottom. Each pixel contains 'comp' channels of data stored interleaved with 8-bits per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. The *data pointer points to the first byte of the top-left-most pixel. For PNG, "stride_in_bytes" is the distance in bytes from the first byte of a row of pixels to the first byte of the next row of pixels. PNG creates output files with the same number of components as the input. The BMP format expands Y to RGB in the file format and does not output alpha. PNG supports writing rectangles of data even when the bytes storing rows of data are not consecutive in memory (e.g. sub-rectangles of a larger image), by supplying the stride between the beginning of adjacent rows. The other formats do not. (Thus you cannot write a native-format BMP through the BMP writer, both because it is in BGR order and because it may have padding at the end of the line.) PNG allows you to set the deflate compression level by setting the global variable 'stbi_write_png_compression_level' (it defaults to 8). HDR expects linear float data. Since the format is always 32-bit rgb(e) data, alpha (if provided) is discarded, and for monochrome data it is replicated across all three channels. TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed data, set the global variable 'stbi_write_tga_with_rle' to 0. JPEG does ignore alpha channels in input data; quality is between 1 and 100. Higher quality looks better but results in a bigger image. JPEG baseline (no JPEG progressive). CREDITS: Sean Barrett - PNG/BMP/TGA Baldur Karlsson - HDR Jean-Sebastien Guay - TGA monochrome Tim Kelsey - misc enhancements Alan Hickman - TGA RLE Emmanuel Julien - initial file IO callback implementation Jon Olick - original jo_jpeg.cpp code Daniel Gibson - integrate JPEG, allow external zlib Aarni Koskela - allow choosing PNG filter bugfixes: github:Chribba Guillaume Chereau github:jry2 github:romigrou Sergio Gonzalez Jonas Karlsson Filip Wasil Thatcher Ulrich github:poppolopoppo Patrick Boettcher github:xeekworx Cap Petschulat Simon Rodriguez Ivan Tikhonov github:ignotion Adam Schackart Andrew Kensler LICENSE See end of file for license information. */ #ifndef INCLUDE_STB_IMAGE_WRITE_H #define INCLUDE_STB_IMAGE_WRITE_H #include // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' #ifndef STBIWDEF #ifdef STB_IMAGE_WRITE_STATIC #define STBIWDEF static #else #ifdef __cplusplus #define STBIWDEF extern "C" #else #define STBIWDEF extern #endif #endif #endif #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations STBIWDEF int stbi_write_tga_with_rle; STBIWDEF int stbi_write_png_compression_level; STBIWDEF int stbi_write_force_png_filter; #endif #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); #ifdef STBIW_WINDOWS_UTF8 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); #endif #endif typedef void stbi_write_func(void *context, void *data, int size); STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); #endif//INCLUDE_STB_IMAGE_WRITE_H #ifdef STB_IMAGE_WRITE_IMPLEMENTATION #ifdef _WIN32 #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS #endif #ifndef _CRT_NONSTDC_NO_DEPRECATE #define _CRT_NONSTDC_NO_DEPRECATE #endif #endif #ifndef STBI_WRITE_NO_STDIO #include #endif // STBI_WRITE_NO_STDIO #include #include #include #include #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) // ok #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) // ok #else #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." #endif #ifndef STBIW_MALLOC #define STBIW_MALLOC(sz) malloc(sz) #define STBIW_REALLOC(p,newsz) realloc(p,newsz) #define STBIW_FREE(p) free(p) #endif #ifndef STBIW_REALLOC_SIZED #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) #endif #ifndef STBIW_MEMMOVE #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) #endif #ifndef STBIW_ASSERT #include #define STBIW_ASSERT(x) assert(x) #endif #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) #ifdef STB_IMAGE_WRITE_STATIC static int stbi_write_png_compression_level = 8; static int stbi_write_tga_with_rle = 1; static int stbi_write_force_png_filter = -1; #else int stbi_write_png_compression_level = 8; int stbi_write_tga_with_rle = 1; int stbi_write_force_png_filter = -1; #endif static int stbi__flip_vertically_on_write = 0; STBIWDEF void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; } typedef struct { stbi_write_func *func; void *context; unsigned char buffer[64]; int buf_used; } stbi__write_context; // initialize a callback-based context static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) { s->func = c; s->context = context; } #ifndef STBI_WRITE_NO_STDIO static void stbi__stdio_write(void *context, void *data, int size) { fwrite(data,1,size,(FILE*) context); } #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) #ifdef __cplusplus #define STBIW_EXTERN extern "C" #else #define STBIW_EXTERN extern #endif STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) { return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); } #endif static FILE *stbiw__fopen(char const *filename, char const *mode) { FILE *f; #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) wchar_t wMode[64]; wchar_t wFilename[1024]; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) return 0; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) return 0; #if defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != _wfopen_s(&f, wFilename, wMode)) f = 0; #else f = _wfopen(wFilename, wMode); #endif #elif defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) f=0; #else f = fopen(filename, mode); #endif return f; } static int stbi__start_write_file(stbi__write_context *s, const char *filename) { FILE *f = stbiw__fopen(filename, "wb"); stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); return f != NULL; } static void stbi__end_write_file(stbi__write_context *s) { fclose((FILE *)s->context); } #endif // !STBI_WRITE_NO_STDIO typedef unsigned int stbiw_uint32; typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) { while (*fmt) { switch (*fmt++) { case ' ': break; case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); s->func(s->context,&x,1); break; } case '2': { int x = va_arg(v,int); unsigned char b[2]; b[0] = STBIW_UCHAR(x); b[1] = STBIW_UCHAR(x>>8); s->func(s->context,b,2); break; } case '4': { stbiw_uint32 x = va_arg(v,int); unsigned char b[4]; b[0]=STBIW_UCHAR(x); b[1]=STBIW_UCHAR(x>>8); b[2]=STBIW_UCHAR(x>>16); b[3]=STBIW_UCHAR(x>>24); s->func(s->context,b,4); break; } default: STBIW_ASSERT(0); return; } } } static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) { va_list v; va_start(v, fmt); stbiw__writefv(s, fmt, v); va_end(v); } static void stbiw__write_flush(stbi__write_context *s) { if (s->buf_used) { s->func(s->context, &s->buffer, s->buf_used); s->buf_used = 0; } } static void stbiw__putc(stbi__write_context *s, unsigned char c) { s->func(s->context, &c, 1); } static void stbiw__write1(stbi__write_context *s, unsigned char a) { if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) stbiw__write_flush(s); s->buffer[s->buf_used++] = a; } static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) { int n; if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) stbiw__write_flush(s); n = s->buf_used; s->buf_used = n+3; s->buffer[n+0] = a; s->buffer[n+1] = b; s->buffer[n+2] = c; } static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) { unsigned char bg[3] = { 255, 0, 255}, px[3]; int k; if (write_alpha < 0) stbiw__write1(s, d[comp - 1]); switch (comp) { case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case case 1: if (expand_mono) stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp else stbiw__write1(s, d[0]); // monochrome TGA break; case 4: if (!write_alpha) { // composite against pink background for (k = 0; k < 3; ++k) px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); break; } /* FALLTHROUGH */ case 3: stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); break; } if (write_alpha > 0) stbiw__write1(s, d[comp - 1]); } static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) { stbiw_uint32 zero = 0; int i,j, j_end; if (y <= 0) return; if (stbi__flip_vertically_on_write) vdir *= -1; if (vdir < 0) { j_end = -1; j = y-1; } else { j_end = y; j = 0; } for (; j != j_end; j += vdir) { for (i=0; i < x; ++i) { unsigned char *d = (unsigned char *) data + (j*x+i)*comp; stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); } stbiw__write_flush(s); s->func(s->context, &zero, scanline_pad); } } static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) { if (y < 0 || x < 0) { return 0; } else { va_list v; va_start(v, fmt); stbiw__writefv(s, fmt, v); va_end(v); stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); return 1; } } static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) { if (comp != 4) { // write RGB bitmap int pad = (-x*3) & 3; return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, "11 4 22 4" "4 44 22 444444", 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header } else { // RGBA bitmaps need a v4 header // use BI_BITFIELDS mode with 32bpp and alpha mask // (straight BI_RGB with alpha mask doesn't work in most readers) return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0, "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444", 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header } } STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_bmp_core(&s, x, y, comp, data); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_bmp_core(&s, x, y, comp, data); stbi__end_write_file(&s); return r; } else return 0; } #endif //!STBI_WRITE_NO_STDIO static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) { int has_alpha = (comp == 2 || comp == 4); int colorbytes = has_alpha ? comp-1 : comp; int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 if (y < 0 || x < 0) return 0; if (!stbi_write_tga_with_rle) { return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); } else { int i,j,k; int jend, jdir; stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); if (stbi__flip_vertically_on_write) { j = 0; jend = y; jdir = 1; } else { j = y-1; jend = -1; jdir = -1; } for (; j != jend; j += jdir) { unsigned char *row = (unsigned char *) data + j * x * comp; int len; for (i = 0; i < x; i += len) { unsigned char *begin = row + i * comp; int diff = 1; len = 1; if (i < x - 1) { ++len; diff = memcmp(begin, row + (i + 1) * comp, comp); if (diff) { const unsigned char *prev = begin; for (k = i + 2; k < x && len < 128; ++k) { if (memcmp(prev, row + k * comp, comp)) { prev += comp; ++len; } else { --len; break; } } } else { for (k = i + 2; k < x && len < 128; ++k) { if (!memcmp(begin, row + k * comp, comp)) { ++len; } else { break; } } } } if (diff) { unsigned char header = STBIW_UCHAR(len - 1); stbiw__write1(s, header); for (k = 0; k < len; ++k) { stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); } } else { unsigned char header = STBIW_UCHAR(len - 129); stbiw__write1(s, header); stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); } } } stbiw__write_flush(s); } return 1; } STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_tga_core(&s, x, y, comp, (void *) data); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); stbi__end_write_file(&s); return r; } else return 0; } #endif // ************************************************************************************************* // Radiance RGBE HDR writer // by Baldur Karlsson #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) #ifndef STBI_WRITE_NO_STDIO static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) { int exponent; float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); if (maxcomp < 1e-32f) { rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; } else { float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; rgbe[0] = (unsigned char)(linear[0] * normalize); rgbe[1] = (unsigned char)(linear[1] * normalize); rgbe[2] = (unsigned char)(linear[2] * normalize); rgbe[3] = (unsigned char)(exponent + 128); } } static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) { unsigned char lengthbyte = STBIW_UCHAR(length+128); STBIW_ASSERT(length+128 <= 255); s->func(s->context, &lengthbyte, 1); s->func(s->context, &databyte, 1); } static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) { unsigned char lengthbyte = STBIW_UCHAR(length); STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code s->func(s->context, &lengthbyte, 1); s->func(s->context, data, length); } static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) { unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; unsigned char rgbe[4]; float linear[3]; int x; scanlineheader[2] = (width&0xff00)>>8; scanlineheader[3] = (width&0x00ff); /* skip RLE for images too small or large */ if (width < 8 || width >= 32768) { for (x=0; x < width; x++) { switch (ncomp) { case 4: /* fallthrough */ case 3: linear[2] = scanline[x*ncomp + 2]; linear[1] = scanline[x*ncomp + 1]; linear[0] = scanline[x*ncomp + 0]; break; default: linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; break; } stbiw__linear_to_rgbe(rgbe, linear); s->func(s->context, rgbe, 4); } } else { int c,r; /* encode into scratch buffer */ for (x=0; x < width; x++) { switch(ncomp) { case 4: /* fallthrough */ case 3: linear[2] = scanline[x*ncomp + 2]; linear[1] = scanline[x*ncomp + 1]; linear[0] = scanline[x*ncomp + 0]; break; default: linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; break; } stbiw__linear_to_rgbe(rgbe, linear); scratch[x + width*0] = rgbe[0]; scratch[x + width*1] = rgbe[1]; scratch[x + width*2] = rgbe[2]; scratch[x + width*3] = rgbe[3]; } s->func(s->context, scanlineheader, 4); /* RLE each component separately */ for (c=0; c < 4; c++) { unsigned char *comp = &scratch[width*c]; x = 0; while (x < width) { // find first run r = x; while (r+2 < width) { if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) break; ++r; } if (r+2 >= width) r = width; // dump up to first run while (x < r) { int len = r-x; if (len > 128) len = 128; stbiw__write_dump_data(s, len, &comp[x]); x += len; } // if there's a run, output it if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd // find next byte after run while (r < width && comp[r] == comp[x]) ++r; // output run up to r while (x < r) { int len = r-x; if (len > 127) len = 127; stbiw__write_run_data(s, len, comp[x]); x += len; } } } } } } static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) { if (y <= 0 || x <= 0 || data == NULL) return 0; else { // Each component is stored separately. Allocate scratch space for full output scanline. unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); int i, len; char buffer[128]; char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; s->func(s->context, header, sizeof(header)-1); #ifdef __STDC_LIB_EXT1__ len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #else len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #endif s->func(s->context, buffer, len); for(i=0; i < y; i++) stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); STBIW_FREE(scratch); return 1; } } STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_hdr_core(&s, x, y, comp, (float *) data); } STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); stbi__end_write_file(&s); return r; } else return 0; } #endif // STBI_WRITE_NO_STDIO ////////////////////////////////////////////////////////////////////////////// // // PNG writer // #ifndef STBIW_ZLIB_COMPRESS // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() #define stbiw__sbraw(a) ((int *) (void *) (a) - 2) #define stbiw__sbm(a) stbiw__sbraw(a)[0] #define stbiw__sbn(a) stbiw__sbraw(a)[1] #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) { int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); STBIW_ASSERT(p); if (p) { if (!*arr) ((int *) p)[1] = 0; *arr = (void *) ((int *) p + 2); stbiw__sbm(*arr) = m; } return *arr; } static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) { while (*bitcount >= 8) { stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); *bitbuffer >>= 8; *bitcount -= 8; } return data; } static int stbiw__zlib_bitrev(int code, int codebits) { int res=0; while (codebits--) { res = (res << 1) | (code & 1); code >>= 1; } return res; } static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) { int i; for (i=0; i < limit && i < 258; ++i) if (a[i] != b[i]) break; return i; } static unsigned int stbiw__zhash(unsigned char *data) { stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) #define stbiw__zlib_add(code,codebits) \ (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) // default huffman tables #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) #define stbiw__ZHASH 16384 #endif // STBIW_ZLIB_COMPRESS STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) { #ifdef STBIW_ZLIB_COMPRESS // user provided a zlib compress implementation, use that return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); #else // use builtin static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; unsigned int bitbuf=0; int i,j, bitcount=0; unsigned char *out = NULL; unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**)); if (hash_table == NULL) return NULL; if (quality < 5) quality = 5; stbiw__sbpush(out, 0x78); // DEFLATE 32K window stbiw__sbpush(out, 0x5e); // FLEVEL = 1 stbiw__zlib_add(1,1); // BFINAL = 1 stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman for (i=0; i < stbiw__ZHASH; ++i) hash_table[i] = NULL; i=0; while (i < data_len-3) { // hash next 3 bytes of data to be compressed int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; unsigned char *bestloc = 0; unsigned char **hlist = hash_table[h]; int n = stbiw__sbcount(hlist); for (j=0; j < n; ++j) { if (hlist[j]-data > i-32768) { // if entry lies within window int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); if (d >= best) { best=d; bestloc=hlist[j]; } } } // when hash table entry is too long, delete half the entries if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); stbiw__sbn(hash_table[h]) = quality; } stbiw__sbpush(hash_table[h],data+i); if (bestloc) { // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); hlist = hash_table[h]; n = stbiw__sbcount(hlist); for (j=0; j < n; ++j) { if (hlist[j]-data > i-32767) { int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); if (e > best) { // if next match is better, bail on current match bestloc = NULL; break; } } } } if (bestloc) { int d = (int) (data+i - bestloc); // distance back STBIW_ASSERT(d <= 32767 && best <= 258); for (j=0; best > lengthc[j+1]-1; ++j); stbiw__zlib_huff(j+257); if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); for (j=0; d > distc[j+1]-1; ++j); stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); i += best; } else { stbiw__zlib_huffb(data[i]); ++i; } } // write out final bytes for (;i < data_len; ++i) stbiw__zlib_huffb(data[i]); stbiw__zlib_huff(256); // end of block // pad with 0 bits to byte boundary while (bitcount) stbiw__zlib_add(0,1); for (i=0; i < stbiw__ZHASH; ++i) (void) stbiw__sbfree(hash_table[i]); STBIW_FREE(hash_table); // store uncompressed instead if compression was worse if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) { stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1 for (j = 0; j < data_len;) { int blocklen = data_len - j; if (blocklen > 32767) blocklen = 32767; stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8)); stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8)); memcpy(out+stbiw__sbn(out), data+j, blocklen); stbiw__sbn(out) += blocklen; j += blocklen; } } { // compute adler32 on input unsigned int s1=1, s2=0; int blocklen = (int) (data_len % 5552); j=0; while (j < data_len) { for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } s1 %= 65521; s2 %= 65521; j += blocklen; blocklen = 5552; } stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); stbiw__sbpush(out, STBIW_UCHAR(s2)); stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); stbiw__sbpush(out, STBIW_UCHAR(s1)); } *out_len = stbiw__sbn(out); // make returned pointer freeable STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); return (unsigned char *) stbiw__sbraw(out); #endif // STBIW_ZLIB_COMPRESS } static unsigned int stbiw__crc32(unsigned char *buffer, int len) { #ifdef STBIW_CRC32 return STBIW_CRC32(buffer, len); #else static unsigned int crc_table[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; unsigned int crc = ~0u; int i; for (i=0; i < len; ++i) crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; return ~crc; #endif } #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) static void stbiw__wpcrc(unsigned char **data, int len) { unsigned int crc = stbiw__crc32(*data - len - 4, len+4); stbiw__wp32(*data, crc); } static unsigned char stbiw__paeth(int a, int b, int c) { int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); if (pb <= pc) return STBIW_UCHAR(b); return STBIW_UCHAR(c); } // @OPTIMIZE: provide an option that always forces left-predict or paeth predict static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) { static int mapping[] = { 0,1,2,3,4 }; static int firstmap[] = { 0,1,0,5,6 }; int *mymap = (y != 0) ? mapping : firstmap; int i; int type = mymap[filter_type]; unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; if (type==0) { memcpy(line_buffer, z, width*n); return; } // first loop isn't optimized since it's just one pixel for (i = 0; i < n; ++i) { switch (type) { case 1: line_buffer[i] = z[i]; break; case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; case 5: line_buffer[i] = z[i]; break; case 6: line_buffer[i] = z[i]; break; } } switch (type) { case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; } } STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) { int force_filter = stbi_write_force_png_filter; int ctype[5] = { -1, 0, 4, 2, 6 }; unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; unsigned char *out,*o, *filt, *zlib; signed char *line_buffer; int j,zlen; if (stride_bytes == 0) stride_bytes = x * n; if (force_filter >= 5) { force_filter = -1; } filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } for (j=0; j < y; ++j) { int filter_type; if (force_filter > -1) { filter_type = force_filter; stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); } else { // Estimate the best filter by running through all of them: int best_filter = 0, best_filter_val = 0x7fffffff, est, i; for (filter_type = 0; filter_type < 5; filter_type++) { stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); // Estimate the entropy of the line using this filter; the less, the better. est = 0; for (i = 0; i < x*n; ++i) { est += abs((signed char) line_buffer[i]); } if (est < best_filter_val) { best_filter_val = est; best_filter = filter_type; } } if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); filter_type = best_filter; } } // when we get here, filter_type contains the filter type, and line_buffer contains the data filt[j*(x*n+1)] = (unsigned char) filter_type; STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); } STBIW_FREE(line_buffer); zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); STBIW_FREE(filt); if (!zlib) return 0; // each tag requires 12 bytes of overhead out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); if (!out) return 0; *out_len = 8 + 12+13 + 12+zlen + 12; o=out; STBIW_MEMMOVE(o,sig,8); o+= 8; stbiw__wp32(o, 13); // header length stbiw__wptag(o, "IHDR"); stbiw__wp32(o, x); stbiw__wp32(o, y); *o++ = 8; *o++ = STBIW_UCHAR(ctype[n]); *o++ = 0; *o++ = 0; *o++ = 0; stbiw__wpcrc(&o,13); stbiw__wp32(o, zlen); stbiw__wptag(o, "IDAT"); STBIW_MEMMOVE(o, zlib, zlen); o += zlen; STBIW_FREE(zlib); stbiw__wpcrc(&o, zlen); stbiw__wp32(o,0); stbiw__wptag(o, "IEND"); stbiw__wpcrc(&o,0); STBIW_ASSERT(o == out + *out_len); return out; } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) { FILE *f; int len; unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); if (png == NULL) return 0; f = stbiw__fopen(filename, "wb"); if (!f) { STBIW_FREE(png); return 0; } fwrite(png, 1, len, f); fclose(f); STBIW_FREE(png); return 1; } #endif STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) { int len; unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); if (png == NULL) return 0; func(context, png, len); STBIW_FREE(png); return 1; } /* *************************************************************************** * * JPEG writer * * This is based on Jon Olick's jo_jpeg.cpp: * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html */ static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { int bitBuf = *bitBufP, bitCnt = *bitCntP; bitCnt += bs[1]; bitBuf |= bs[0] << (24 - bitCnt); while(bitCnt >= 8) { unsigned char c = (bitBuf >> 16) & 255; stbiw__putc(s, c); if(c == 255) { stbiw__putc(s, 0); } bitBuf <<= 8; bitCnt -= 8; } *bitBufP = bitBuf; *bitCntP = bitCnt; } static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; float z1, z2, z3, z4, z5, z11, z13; float tmp0 = d0 + d7; float tmp7 = d0 - d7; float tmp1 = d1 + d6; float tmp6 = d1 - d6; float tmp2 = d2 + d5; float tmp5 = d2 - d5; float tmp3 = d3 + d4; float tmp4 = d3 - d4; // Even part float tmp10 = tmp0 + tmp3; // phase 2 float tmp13 = tmp0 - tmp3; float tmp11 = tmp1 + tmp2; float tmp12 = tmp1 - tmp2; d0 = tmp10 + tmp11; // phase 3 d4 = tmp10 - tmp11; z1 = (tmp12 + tmp13) * 0.707106781f; // c4 d2 = tmp13 + z1; // phase 5 d6 = tmp13 - z1; // Odd part tmp10 = tmp4 + tmp5; // phase 2 tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; // The rotator is modified from fig 4-8 to avoid extra negations. z5 = (tmp10 - tmp12) * 0.382683433f; // c6 z2 = tmp10 * 0.541196100f + z5; // c2-c6 z4 = tmp12 * 1.306562965f + z5; // c2+c6 z3 = tmp11 * 0.707106781f; // c4 z11 = tmp7 + z3; // phase 5 z13 = tmp7 - z3; *d5p = z13 + z2; // phase 6 *d3p = z13 - z2; *d1p = z11 + z4; *d7p = z11 - z4; *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; } static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { int tmp1 = val < 0 ? -val : val; val = val < 0 ? val-1 : val; bits[1] = 1; while(tmp1 >>= 1) { ++bits[1]; } bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { } // end0pos = first element in reverse order !=0 if(end0pos == 0) { stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); return DU[0]; } for(i = 1; i <= end0pos; ++i) { int startpos = i; int nrzeroes; unsigned short bits[2]; for (; DU[i]==0 && i<=end0pos; ++i) { } nrzeroes = i-startpos; if ( nrzeroes >= 16 ) { int lng = nrzeroes>>4; int nrmarker; for (nrmarker=1; nrmarker <= lng; ++nrmarker) stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); nrzeroes &= 15; } stbiw__jpg_calcBits(DU[i], bits); stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); } if(end0pos != 63) { stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); } return DU[0]; } static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { // Constants that don't pollute global namespace static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; static const unsigned char std_ac_luminance_values[] = { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; static const unsigned char std_ac_chrominance_values[] = { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; // Huffman tables static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; static const unsigned short YAC_HT[256][2] = { {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const unsigned short UVAC_HT[256][2] = { {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; int row, col, i, k, subsample; float fdtbl_Y[64], fdtbl_UV[64]; unsigned char YTable[64], UVTable[64]; if(!data || !width || !height || comp > 4 || comp < 1) { return 0; } quality = quality ? quality : 90; subsample = quality <= 90 ? 1 : 0; quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; quality = quality < 50 ? 5000 / quality : 200 - quality * 2; for(i = 0; i < 64; ++i) { int uvti, yti = (YQT[i]*quality+50)/100; YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); uvti = (UVQT[i]*quality+50)/100; UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); } for(row = 0, k = 0; row < 8; ++row) { for(col = 0; col < 8; ++col, ++k) { fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); } } // Write Headers { static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; s->func(s->context, (void*)head0, sizeof(head0)); s->func(s->context, (void*)YTable, sizeof(YTable)); stbiw__putc(s, 1); s->func(s->context, UVTable, sizeof(UVTable)); s->func(s->context, (void*)head1, sizeof(head1)); s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); stbiw__putc(s, 0x10); // HTYACinfo s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); stbiw__putc(s, 1); // HTUDCinfo s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); stbiw__putc(s, 0x11); // HTUACinfo s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); s->func(s->context, (void*)head2, sizeof(head2)); } // Encode 8x8 macroblocks { static const unsigned short fillBits[] = {0x7F, 7}; int DCY=0, DCU=0, DCV=0; int bitBuf=0, bitCnt=0; // comp == 2 is grey+alpha (alpha is ignored) int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; const unsigned char *dataR = (const unsigned char *)data; const unsigned char *dataG = dataR + ofsG; const unsigned char *dataB = dataR + ofsB; int x, y, pos; if(subsample) { for(y = 0; y < height; y += 16) { for(x = 0; x < width; x += 16) { float Y[256], U[256], V[256]; for(row = y, pos = 0; row < y+16; ++row) { // row >= height => use last input row int clamped_row = (row < height) ? row : height - 1; int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; for(col = x; col < x+16; ++col, ++pos) { // if col >= width => use pixel from last input column int p = base_p + ((col < width) ? col : (width-1))*comp; float r = dataR[p], g = dataG[p], b = dataB[p]; Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; } } DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); // subsample U,V { float subU[64], subV[64]; int yy, xx; for(yy = 0, pos = 0; yy < 8; ++yy) { for(xx = 0; xx < 8; ++xx, ++pos) { int j = yy*32+xx*2; subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f; subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f; } } DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); } } } } else { for(y = 0; y < height; y += 8) { for(x = 0; x < width; x += 8) { float Y[64], U[64], V[64]; for(row = y, pos = 0; row < y+8; ++row) { // row >= height => use last input row int clamped_row = (row < height) ? row : height - 1; int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; for(col = x; col < x+8; ++col, ++pos) { // if col >= width => use pixel from last input column int p = base_p + ((col < width) ? col : (width-1))*comp; float r = dataR[p], g = dataG[p], b = dataB[p]; Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; } } DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); } } } // Do the bit alignment of the EOI marker stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); } // EOI stbiw__putc(s, 0xFF); stbiw__putc(s, 0xD9); return 1; } STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); stbi__end_write_file(&s); return r; } else return 0; } #endif #endif // STB_IMAGE_WRITE_IMPLEMENTATION /* Revision history 1.16 (2021-07-11) make Deflate code emit uncompressed blocks when it would otherwise expand support writing BMPs with alpha channel 1.15 (2020-07-13) unknown 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels 1.13 1.12 1.11 (2019-08-11) 1.10 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 1.09 (2018-02-11) fix typo in zlib quality API, improve STB_I_W_STATIC in C++ 1.08 (2018-01-29) add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter 1.07 (2017-07-24) doc fix 1.06 (2017-07-23) writing JPEG (using Jon Olick's code) 1.05 ??? 1.04 (2017-03-03) monochrome BMP expansion 1.03 ??? 1.02 (2016-04-02) avoid allocating large structures on the stack 1.01 (2016-01-16) STBIW_REALLOC_SIZED: support allocators with no realloc support avoid race-condition in crc initialization minor compile issues 1.00 (2015-09-14) installable file IO function 0.99 (2015-09-13) warning fixes; TGA rle support 0.98 (2015-04-08) added STBIW_MALLOC, STBIW_ASSERT etc 0.97 (2015-01-18) fixed HDR asserts, rewrote HDR rle logic 0.96 (2015-01-17) add HDR output fix monochrome BMP 0.95 (2014-08-17) add monochrome TGA output 0.94 (2014-05-31) rename private functions to avoid conflicts with stb_image.h 0.93 (2014-05-27) warning fixes 0.92 (2010-08-01) casts to unsigned char to fix warnings 0.91 (2010-07-17) first public release 0.90 first internal release */ /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License Copyright (c) 2017 Sean Barrett Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ ALTERNATIVE B - Public Domain (www.unlicense.org) This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ */ ggml-org-ggml-7ec8045/examples/test-cmake/000077500000000000000000000000001506673203700203765ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/test-cmake/CMakeLists.txt000066400000000000000000000003661506673203700231430ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.14) project(ggml-simple) set(CMAKE_CXX_STANDARD 17) find_package(ggml CONFIG REQUIRED) set(TEST_TARGET test-cmake) add_executable(test-cmake test-cmake.cpp) target_link_libraries(test-cmake PRIVATE ggml::ggml) ggml-org-ggml-7ec8045/examples/test-cmake/README.md000066400000000000000000000001311506673203700216500ustar00rootroot00000000000000## cmake-test This directory can be built as a separate project with an installed ggml. ggml-org-ggml-7ec8045/examples/test-cmake/test-cmake.cpp000066400000000000000000000001311506673203700231320ustar00rootroot00000000000000#include "ggml-backend.h" int main(void) { ggml_backend_load_all(); return 0; } ggml-org-ggml-7ec8045/examples/yolo/000077500000000000000000000000001506673203700173235ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/yolo/CMakeLists.txt000066400000000000000000000002461506673203700220650ustar00rootroot00000000000000# # yolov3-tiny set(TEST_TARGET yolov3-tiny) add_executable(${TEST_TARGET} yolov3-tiny.cpp yolo-image.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml common) ggml-org-ggml-7ec8045/examples/yolo/README.md000066400000000000000000000040401506673203700206000ustar00rootroot00000000000000This example shows how to implement YOLO object detection with ggml using pretrained model. # YOLOv3-tiny Download the model weights: ```bash $ wget https://pjreddie.com/media/files/yolov3-tiny.weights $ sha1sum yolov3-tiny.weights 40f3c11883bef62fd850213bc14266632ed4414f yolov3-tiny.weights ``` Convert the weights to GGUF format: ```bash $ ./convert-yolov3-tiny.py yolov3-tiny.weights yolov3-tiny.weights converted to yolov3-tiny.gguf ``` Alternatively, you can download the converted model from [HuggingFace](https://huggingface.co/rgerganov/yolo-gguf/resolve/main/yolov3-tiny.gguf) Object detection: ```bash $ wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg $ ./yolov3-tiny -m yolov3-tiny.gguf -i dog.jpg load_model: using CUDA backend ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA T1200 Laptop GPU, compute capability 7.5, VMM: yes Layer 0 output shape: 416 x 416 x 16 x 1 Layer 1 output shape: 208 x 208 x 16 x 1 Layer 2 output shape: 208 x 208 x 32 x 1 Layer 3 output shape: 104 x 104 x 32 x 1 Layer 4 output shape: 104 x 104 x 64 x 1 Layer 5 output shape: 52 x 52 x 64 x 1 Layer 6 output shape: 52 x 52 x 128 x 1 Layer 7 output shape: 26 x 26 x 128 x 1 Layer 8 output shape: 26 x 26 x 256 x 1 Layer 9 output shape: 13 x 13 x 256 x 1 Layer 10 output shape: 13 x 13 x 512 x 1 Layer 11 output shape: 13 x 13 x 512 x 1 Layer 12 output shape: 13 x 13 x 1024 x 1 Layer 13 output shape: 13 x 13 x 256 x 1 Layer 14 output shape: 13 x 13 x 512 x 1 Layer 15 output shape: 13 x 13 x 255 x 1 Layer 18 output shape: 13 x 13 x 128 x 1 Layer 19 output shape: 26 x 26 x 128 x 1 Layer 20 output shape: 26 x 26 x 384 x 1 Layer 21 output shape: 26 x 26 x 256 x 1 Layer 22 output shape: 26 x 26 x 255 x 1 dog: 57% car: 52% truck: 56% car: 62% bicycle: 59% Detected objects saved in 'predictions.jpg' (time: 0.057000 sec.) ```ggml-org-ggml-7ec8045/examples/yolo/convert-yolov3-tiny.py000077500000000000000000000046471506673203700236050ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import gguf import numpy as np def save_conv2d_layer(f, gguf_writer, prefix, inp_c, filters, size, batch_normalize=True): biases = np.fromfile(f, dtype=np.float32, count=filters) gguf_writer.add_tensor(prefix + "_biases", biases, raw_shape=(1, filters, 1, 1)) if batch_normalize: scales = np.fromfile(f, dtype=np.float32, count=filters) gguf_writer.add_tensor(prefix + "_scales", scales, raw_shape=(1, filters, 1, 1)) rolling_mean = np.fromfile(f, dtype=np.float32, count=filters) gguf_writer.add_tensor(prefix + "_rolling_mean", rolling_mean, raw_shape=(1, filters, 1, 1)) rolling_variance = np.fromfile(f, dtype=np.float32, count=filters) gguf_writer.add_tensor(prefix + "_rolling_variance", rolling_variance, raw_shape=(1, filters, 1, 1)) weights_count = filters * inp_c * size * size l0_weights = np.fromfile(f, dtype=np.float32, count=weights_count) ## ggml doesn't support f32 convolution yet, use f16 instead l0_weights = l0_weights.astype(np.float16) gguf_writer.add_tensor(prefix + "_weights", l0_weights, raw_shape=(filters, inp_c, size, size)) if __name__ == '__main__': if len(sys.argv) != 2: print("Usage: %s " % sys.argv[0]) sys.exit(1) outfile = 'yolov3-tiny.gguf' gguf_writer = gguf.GGUFWriter(outfile, 'yolov3-tiny') f = open(sys.argv[1], 'rb') f.read(20) # skip header save_conv2d_layer(f, gguf_writer, "l0", 3, 16, 3) save_conv2d_layer(f, gguf_writer, "l1", 16, 32, 3) save_conv2d_layer(f, gguf_writer, "l2", 32, 64, 3) save_conv2d_layer(f, gguf_writer, "l3", 64, 128, 3) save_conv2d_layer(f, gguf_writer, "l4", 128, 256, 3) save_conv2d_layer(f, gguf_writer, "l5", 256, 512, 3) save_conv2d_layer(f, gguf_writer, "l6", 512, 1024, 3) save_conv2d_layer(f, gguf_writer, "l7", 1024, 256, 1) save_conv2d_layer(f, gguf_writer, "l8", 256, 512, 3) save_conv2d_layer(f, gguf_writer, "l9", 512, 255, 1, batch_normalize=False) save_conv2d_layer(f, gguf_writer, "l10", 256, 128, 1) save_conv2d_layer(f, gguf_writer, "l11", 384, 256, 3) save_conv2d_layer(f, gguf_writer, "l12", 256, 255, 1, batch_normalize=False) f.close() gguf_writer.write_header_to_file() gguf_writer.write_kv_data_to_file() gguf_writer.write_tensors_to_file() gguf_writer.close() print("{} converted to {}".format(sys.argv[1], outfile)) ggml-org-ggml-7ec8045/examples/yolo/data/000077500000000000000000000000001506673203700202345ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/yolo/data/coco.names000066400000000000000000000011611506673203700222030ustar00rootroot00000000000000person bicycle car motorbike aeroplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair sofa pottedplant bed diningtable toilet tvmonitor laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush ggml-org-ggml-7ec8045/examples/yolo/data/labels/000077500000000000000000000000001506673203700214765ustar00rootroot00000000000000ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_0.png000066400000000000000000000005001506673203700227160ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  %5IDATc΋g(u16޽b4Bkz'btWL F pWF%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabeldn IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_1.png000066400000000000000000000005711506673203700227270ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TnIDATc `fgxk8,wAy?kb`}dXx`?vu ^#D΄ijZ^5*07BP]?x6w7%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabeldn IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_2.png000066400000000000000000000007031506673203700227250ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  /UDIDAT(ci$ دC[% 0Ckd)p sy #/L*fkabb0@%zvĠb7ab3V|m2@|nj`y[q3B?o e.F {6~#r~nm %tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabeldn IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_3.png000066400000000000000000000007741506673203700227360ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  9ZIDAT8cit nQ 1uΑ@@y.<,̀mH"|au< pO w yA$ 1/1!!d.d |.қЂ`R(vx_",x?$& $?AH2o" qu)[!% T2b$L<%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabeldn IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_4.png000066400000000000000000000011011506673203700227200ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !K6IDAT8c uB07Jn5v|,⮓>`~|aHb@ȿhxA #AYVMiҌ??U>xtgr :)c@{ϊ$,]H}`Hu`=ҥH`Hҝ`cұHK+usd1!IqD!P̫˞gG 1-^B~k dXҀaFh2' Db*GM-fҐeuߠ3ɍ;lv27u} -JVCjb%tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabeldn IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/100_7.png000066400000000000000000000013611506673203700227330ustar00rootroot00000000000000PNG  IHDR0T gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATXO(ag{Hi?sO?9 khS.r$9E.Qāh5Qʟly=}~>mۃvbHteSl5y_Ho!1|"w- LsH8$n- 1kC:W6R?ɓB*@vs/d vư,  Km\Hg9H|^1DCcEpHo~ҕdhFDRf4zHj=3x(n&ij@3 8@) Z n[']9-8]jZo i| |vLFiӆb~wH^|eBO!&;oBنyk!Oo OsZwIzk^@|ܾ=pf=~. ~ ,*IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TyIDATc 7׌q'!aZr6 2C} f2{=~0t6H_ƿr1Lީ10r20D&G|U}㰊`Q|5HP?`Z!+`t:p&E.F6Q~Ԅ;˺Q58l`W[%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabele^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/101_6.png000066400000000000000000000013261506673203700227340ustar00rootroot00000000000000PNG  IHDR)Jk"gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO,`U9rT%UU='ʧ`91_'+2 ݪ?8Tf@ |̀?Py]+ EK$&*J0[`[r.LP"UebJϐ6,NUD EP,J{HZFLWVhG P Pgt* %2 ҀJ+J*u*TGyJ3ʉUAUbr ,mPI DOcJg CBEe\vߑn愉)2Wo L4n&<}y/H 'HQ'‹ *8>Y8 cQUU*LT^?F+JsL8a%S7FZ"FU*, Ԥ %tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabele^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/101_7.png000066400000000000000000000014441506673203700227360ustar00rootroot00000000000000PNG  IHDR/T71rgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATHO(eQ>"zi^(S IV,0a!HDBd#Ȕ$"RnۘfDH4<=>wJ| oxuXMv\d^1y{f $`b/$*kZSv*8HC@3* ^;ArtZ~Nj[t+]]J;vyQ$us7hOȻJͶ 1T{,+G7/FX\/oJS׶tA;.I n"o͠' }e߯o˾U_}.Dfx|І}1ަO`/] ]KE8Ǒ£ܝq!@jMy &aVGU]v};BӏT- ?ȣ>BL۱_όȨ#2%4ԕ~bKt>?駊40xf5y*R{N$Zh07~u\wrŖϡ>_kGH7`xÿ+"FqM%tEXtdate:create2016-11-05T07:33:36-07:00i#Q`%tEXtdate:modify2016-11-05T07:33:36-07:00~tEXtlabele^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/102_0.png000066400000000000000000000004611506673203700227260ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %&IDATc??A@*N*6'u5%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabelf9&IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/102_1.png000066400000000000000000000005241506673203700227270ustar00rootroot00000000000000PNG  IHDR 7zgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TIIDATc~SNpZuWĪgFcgs32F%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabelf9&IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/102_2.png000066400000000000000000000005421506673203700227300ustar00rootroot00000000000000PNG  IHDR pvgAMA a cHRMz&u0`:pQ<bKGD̿tIME  /UDWIDATc$YD#g3:``04s(9+0m ~gBgwABWCb@wp_ b -|F8&8!Qh%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabelf9&IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/102_6.png000066400000000000000000000006461506673203700227410ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c~o6b㒶IIblG[]&V\2Zd2ɂ Όp/P_pSdB%\ʸG9)iÐ i2/A*ˀ2A!36*3*C 0*3TeUI0%tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabelf9&IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/102_7.png000066400000000000000000000006721506673203700227410ustar00rootroot00000000000000PNG  IHDRTtgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATHcnOc4 k 3 K^ fdit, Hb't1>"pqfw.}pYk`/ˑz/#L&6Lz>ViaMҰdMLlҿaH&@LZ(~pC\zGɨ4qҸ4դ&(,%tEXtdate:create2016-11-05T07:33:36-07:00i#Q`%tEXtdate:modify2016-11-05T07:33:36-07:00~tEXtlabelf9&IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_0.png000066400000000000000000000005151506673203700227270ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %BIDATc tOK^F (55c< /\[9tO%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_1.png000066400000000000000000000006371506673203700227350ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TIDATc [d{7WQ{?7Cx ?:&@x q !_-Lq00msvv4 X,\00 dt eаd u $D5Beq θ=~JS%tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_3.png000066400000000000000000000011121506673203700227240ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  9Z?IDAT8c 0{Le @p?&sY \d*$j`1".$.km``2r *V#1mn5@,1'eZ2@̃pMf}^ThT#d.50ߊH2"kɀ9@ 2ʙtu H%~]1D>)^ձ*Q `ÚvR2 Pǁ+LF( i*qĩlqygTlPHP1{5gBO` Fo! &<@a~KXz#%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_4.png000066400000000000000000000012571506673203700227370ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KIDAT8c0JMS8Tb6'$: P{g0!+ѥATi @E&Dc˿VBS<P\A~ ýzYze`NRHCs!IbDKGh9$i [%UAlmiU$is[EZI:f${`N$Nd`?Bwp0 DB0 *-A!d?WФ%VT~(,KO_$Ye)\N3JݥNIw`F\$Xz.iHW Ґy@|P`iX`_yri[ u9h[#I${?40gb]ڡRPV2w;%tEXtdate:create2016-11-05T07:33:07-07:00A]7%tEXtdate:modify2016-11-05T07:33:07-07:000tEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_5.png000066400000000000000000000014151506673203700227340ustar00rootroot00000000000000PNG  IHDR#@ґqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !v/hIDATHcO0UC75:|%y"ån3O0|AR3 ؾ a}π `Ie@jfc(a`BUs *X!$Aja͆;+"3Lr#=p%hwV#3T[8 CV‘Dd5 K453pEVsnd5!45j~CҖ&3Q^(y‚&@QSˠ5j>BJ.HJa59jl`j޽jASc/AVS;C(9;05S~N~Us.4)y Uě:M`!?P? ?jP 4 ?6ɡ` :%<vfJ3 OXwd%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/103_6.png000066400000000000000000000015321506673203700227350ustar00rootroot00000000000000PNG  IHDR)Jk"gAMA a cHRMz&u0`:pQ<bKGD̿tIME !OIDATH_HQ;-K+*U  ^$`iT`NE͗$"(BC/ѲdA:isw9weio EYEMRo`׵e2R%dT~V)-z,x[u94ViCvTxG&㕪"2; `hR}3-2N _O9vVG'6H;-{v4]~<$g-(YU[t.s_!oVYe}8Sl:8@bjNl2H[S=>L fR"EpJ%gkάD2/JdE9X\,ta=?MopmMZ{'H[arMP)Ne de Iє7+s|M*mx/6~O)3VX'N %tEXtdate:create2016-11-05T07:33:36-07:00i#Q`%tEXtdate:modify2016-11-05T07:33:36-07:00~tEXtlabelgN?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/104_0.png000066400000000000000000000004731506673203700227330ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %0IDATcN t'[_ -:] ? iiݙIgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TNIDATc Swؓ yo )9ˈ.UP0Fé+&PC!1l@=V;lʏqP Sojl(0po`gJؕG y-EV!P"3\9/b<.`.ʟ¥(\%aRʅ0Tɕb*wT+T&Sd:0AFL2Áb&zD:m%"%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabelihIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/105_4.png000066400000000000000000000005401506673203700227330ustar00rootroot00000000000000PNG  IHDR 5N'gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KUIDAT(c Si] U>`q04 fKp| [d(ZaN}6ܠ0%tEXtdate:create2016-11-05T07:33:07-07:00A]7%tEXtdate:modify2016-11-05T07:33:07-07:000tEXtlabelihIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/105_5.png000066400000000000000000000005501506673203700227350ustar00rootroot00000000000000PNG  IHDR @FgAMA a cHRMz&u0`:pQ<bKGD̿tIME !v/h]IDAT(c ~Ki/@`-w̓Mg+`,(w1%̛5c$bBa0h>CY=XSyLO!gKn%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabelihIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/105_6.png000066400000000000000000000005731506673203700227430ustar00rootroot00000000000000PNG  IHDRJA(QIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !pIDAT8c YVAdIo@M [B? |y(oBP\0?˲="tsP|vG }r>o;=%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelihIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/105_7.png000066400000000000000000000006071506673203700227420ustar00rootroot00000000000000PNG  IHDRTAgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%W|IDAT8c֨.!ˎgE :T T!*r& ᆊ\CED`vM ?K Fšਮ}@EfC_;,4~ nz攑+")%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabelihIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_0.png000066400000000000000000000004451506673203700227340ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fpIDATc s@b?A2"ȷ%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_1.png000066400000000000000000000004631506673203700227350ustar00rootroot00000000000000PNG  IHDRqgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉T(IDATc C,  ޢ *fӘBo_t_B%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_2.png000066400000000000000000000004771506673203700227430ustar00rootroot00000000000000PNG  IHDR  f gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]I4IDATc p'O F  bc4Pʀqc%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_3.png000066400000000000000000000005171506673203700227370ustar00rootroot00000000000000PNG  IHDR +gAMA a cHRMz&u0`:pQ<bKGD̿tIME  9ZDIDATc Hn<, 5%<?@Y!zmn ty֣, fXlh%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_4.png000066400000000000000000000005341506673203700227370ustar00rootroot00000000000000PNG  IHDR 5PkU^gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KQIDAT(cP9f&߀r8100\p0@C8u`1ę 59x?lP w G9C+oy5{%tEXtdate:create2016-11-05T07:33:07-07:00A]7%tEXtdate:modify2016-11-05T07:33:07-07:000tEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_5.png000066400000000000000000000005531506673203700227410ustar00rootroot00000000000000PNG  IHDR @FgAMA a cHRMz&u0`:pQ<bKGD̿tIME !v/h`IDAT(c Yr?0/O fy `06&AgqCmbpPwc>|C\v}7ʣGla!\ %tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_6.png000066400000000000000000000005661506673203700227460ustar00rootroot00000000000000PNG  IHDRJA(QIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !kIDAT8c HoOgd5PMV03@F Ư`   p߽koQ*WN%01Q(PƕJwA%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/106_7.png000066400000000000000000000006061506673203700227420ustar00rootroot00000000000000PNG  IHDRT8gAMA a cHRMz&u0`:pQ<bKGD̿tIME !%W{IDAT8c(Ϯ I ~1@AL"LH  Fm =B]o: ZpRNH vFNH Pp.t8BBBB؅0!l Mظ%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabelj0aC IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_0.png000066400000000000000000000004721506673203700227350ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fp/IDATcخ0QV"Ң!;.BM}?=ڗ%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_1.png000066400000000000000000000005461506673203700227400ustar00rootroot00000000000000PNG  IHDR S"gAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉T[IDATcdbà*͉A5TY;kVpBdM{)`n%^p{631#\0AʣF%%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_2.png000066400000000000000000000006321506673203700227350ustar00rootroot00000000000000PNG  IHDR  .gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDAT(c ^030 2 WDH00GRsVA9 9dwNv300Aʀe{+00pFqMK(n> yn~@ >(( Yw%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_3.png000066400000000000000000000006761506673203700227460ustar00rootroot00000000000000PNG  IHDR+Y;'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDAT(cxW @yTA c+. g@q4C^YH)Es2 ́@B%`w|@e _HG"@;0a{-z>1(X0{;ȫ 0aͼ#@Σ@XB˻əa%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_4.png000066400000000000000000000007521506673203700227420ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KIDAT8c c @ {^bsakXa1a[žf0c 4\=u`q,g L_.`R0MXSL>&O L"w$v-31_=X6"9__Hǖ&ٜǰgr@--]_`KgY\/yqk9sMYf*HZ3Vg 13!+>c('yeb|i&Pohh˷ jN:Js~.C-+`/\O,RJ>`o)Jn`+ؑw(YQT{lĕ4`N+:`u1%-ګQ˕x529u 3C%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/107_7.png000066400000000000000000000011411506673203700227360ustar00rootroot00000000000000PNG  IHDR+T>BgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%WVIDATHO(aw?hvaAqvM’$ZBNqEePQ+{\ˣ#SKZk֣͌d5,sJӤUHw&w[!}Xusg<hCb4pH6LL^=^Lv6u'jEebgOJXԚcUbeT0՘bַK<y3pSX9pnޯB쯗lމ'SJ[rurʩ.XsC쿐u-9zO(}=Zk>0Y%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabelkGfsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_0.png000066400000000000000000000004351506673203700227350ustar00rootroot00000000000000PNG  IHDR hgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fpIDATcXnۥ OO%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_1.png000066400000000000000000000004401506673203700227320ustar00rootroot00000000000000PNG  IHDRqgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉TIDATc CKb9%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_2.png000066400000000000000000000004501506673203700227340ustar00rootroot00000000000000PNG  IHDR =gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDATc| ȥ%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_3.png000066400000000000000000000004521506673203700227370ustar00rootroot00000000000000PNG  IHDR +gAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDATc8ϰ~HczI%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_4.png000066400000000000000000000004521506673203700227400ustar00rootroot00000000000000PNG  IHDR 5N'gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KIDAT(cX 7X? 66Omd>%tEXtdate:create2016-11-05T07:33:07-07:00A]7%tEXtdate:modify2016-11-05T07:33:07-07:000tEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_5.png000066400000000000000000000004541506673203700227430ustar00rootroot00000000000000PNG  IHDR @FgAMA a cHRMz&u0`:pQ<bKGD̿tIME !v/h!IDAT(c . `P= d0x.Şy%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_6.png000066400000000000000000000004561506673203700227460ustar00rootroot00000000000000PNG  IHDRJ:wgAMA a cHRMz&u0`:pQ<bKGD̿tIME !#IDAT8cxy> {;F8H)ch7 %tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/108_7.png000066400000000000000000000004611506673203700227430ustar00rootroot00000000000000PNG  IHDRT8gAMA a cHRMz&u0`:pQ<bKGD̿tIME !%W&IDAT8cxq ;B1^}%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabell8IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_0.png000066400000000000000000000005011506673203700227300ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fp6IDATc ؙM" M0\-pt@3I\hZW+%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_1.png000066400000000000000000000005631506673203700227410ustar00rootroot00000000000000PNG  IHDRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  &׉ThIDATc !fϰ|!H/ERLbLB, FR w b6f`py?#|pH2tAjaO@ʝ" 30 Q%tEXtdate:create2016-11-05T07:32:38-07:00A%tEXtdate:modify2016-11-05T07:32:38-07:00tEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_2.png000066400000000000000000000006521506673203700227410ustar00rootroot00000000000000PNG  IHDR \gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDAT(c0 _{ۗom"w^i y DVV0`Qa00݂*v,dH2l`r/@O &Иd\: ke[dar"ˁo`V6y!)7 UgewC`%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_3.png000066400000000000000000000007331506673203700227420ustar00rootroot00000000000000PNG  IHDR#+ngAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDAT8cO0|j~n4m(–9`jf %` $5``1po(m?a#\ kڲK#B 3wLcqrjv-h^ l1j x 5K t('Eq.@9]@0\ +,:Aj^C9(jDa RQ5!%͏Eb%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_4.png000066400000000000000000000010201506673203700227310ustar00rootroot00000000000000PNG  IHDR+5MgAMA a cHRMz&u0`:pQ<bKGD̿tIME !KIDATHcO<`U;j_=@j3A&VI 2Īq1ngObg 0>;c@3YPԦ2 (|HjX $!e`{ 7*[%}dl~u*mAFe%D/$8>"qr < P.g[ #* "2,w߈ 6,ߌ6 6, htGՎPjG" ug#%tEXtdate:create2016-11-05T07:33:07-07:00A]7%tEXtdate:modify2016-11-05T07:33:07-07:000tEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_5.png000066400000000000000000000011131506673203700227350ustar00rootroot00000000000000PNG  IHDR3@|gAMA a cHRMz&u0`:pQ<bKGD̿tIME !v/h@IDATHcO:`3gTVe#ȫ\L(+R_p=Go`D\Ai L~9l+R4=PPߠ'&?] E F8@$ED LESkH{ sB3{~k\Ec&5p(z#LY1ezX?"@D"%^\d=i"6Ih/Dh6Td=HBWiD$t)z!b 虇Ewz3'3G%tEXtdate:create2016-11-05T07:33:17-07:00q]%tEXtdate:modify2016-11-05T07:33:17-07:00,tEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_6.png000066400000000000000000000011641506673203700227440ustar00rootroot00000000000000PNG  IHDR<JhrgAMA a cHRMz&u0`:pQ<bKGD̿tIME !iIDATXcO`Px4sQTx+wwjև ECTͰ~F5'JQͣG5l)G5j  5%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelm֮IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/109_7.png000066400000000000000000000012661506673203700227500ustar00rootroot00000000000000PNG  IHDRETlgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%WIDATXcO 0jʨ)2jʨ)T6T"#o=T%84|'>E5o11 ;d &À xBȅ&Ź)u w R밚rOS)IXd?`1V jߖ*0Ӕ%6ؕ2T!ci&d,,bBRo ŃRmXC) 0=Di!e>-Bl?@Dh݂y,nb il6dpD,$HG3SNE U74E-[&el+)h,!`NI` M9L+?v4#r MWz4RM&s]F)IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'JIDATc g*lʋabc``1/ CRcŐŁx )^ ΐ ᕓţ`6lD%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabeln7 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/110_2.png000066400000000000000000000005721506673203700227320ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IoIDAT(cУE-;A~300~ VÅv0@IGTۄI +&| 7 Lb*L "T  Z$B)\} %tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabeln7 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/110_3.png000066400000000000000000000006361506673203700227340ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDAT8c0^{7L|1X@@ZAp_Ȁl 9Z@pq?`Sׂ|2ΰ`-ėx'a;Ap0oaQ]OWC|*$%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabeln7 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/110_4.png000066400000000000000000000006741506673203700227370ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !DIDAT8c0~ρyJAc{7d1 ;Wd@ᨲG8P̏db`UENg =dgHCEN(TD ؀6e \!27‹h@ `[ipv";L4CZp}]jĤBբ[ tuGP=@U -EB%^AOIPC%+@W]]TfKǮn)ꖏUGEuF utv6 %tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabeln7 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/110_7.png000066400000000000000000000010401506673203700227260ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%WIDATHcO`U>|Trʕܘg-#0^$ДCe&7p2O8_fwq(ˀ h}Ū| vMV?b*ЌZa*#Np +O@2<5JN0$~cQ-]R(GO SR(_&\joЕITm`rS1c*MT^&׏|&v<ʷc*G L(NǣG*LQH9F ޡS<%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabeln7 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_0.png000066400000000000000000000005001506673203700227200ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  fp5IDATc X=|w2 /2(>9[J9TJ`%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_1.png000066400000000000000000000006141506673203700227270ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'IDATc [b87';Wu/C& A  5~1ԀxMib`$h6208|ؾҎdg3s"%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_2.png000066400000000000000000000007271506673203700227350ustar00rootroot00000000000000PNG  IHDR 7 jgAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDAT(c0 )\E|&E&xulq /~S```bd`{ o dVxA-e` 30XÜR 4a`p 200 20 00(U."v!,CS I7 ? JX&7lGȣXfq{cѽAhPSE%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_3.png000066400000000000000000000010261506673203700227270ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  :È IDAT8c 0#so۾Q7QdΨ0KR6d sEv4D򃥚2/@< އg 2 o X'05I]GV  ;?$ bG V ?g)B+9be"d _@/H?9 f5Lb,yY -pS7'S, %(a?'✽e8o=tpcIocDitd!UQO%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_4.png000066400000000000000000000011471506673203700227340ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !D\IDAT8c0w,d"<dEŀ ,Hd@Ry1`tx_Ax`[!o :&AP`73< ;<?4 uASt@ 2k9'QBU$$l_T `&j4ׁ促ڨ``AL?(``y0%XXnG~ X^|4X( `^%Bz$.@g?r|o4r]O\'4QhkB<w%}kbF!d`o ;C]ڬx V -F%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/111_5.png000066400000000000000000000012561506673203700227360ustar00rootroot00000000000000PNG  IHDR%@6gAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9IDATHcO `U5jP:-) f1.Uߧ26)0'~3`#h>0`lQT}6e x"6[pa|USb aZwACD8\X LUT`r@UPC{4xATB'BTM#x ^kq90]O8'DD+YeH|g(6BG] 5Dhx=FS .=4֠*uh<6C(}փz/P5*bg` G'"BA=Kh<˙ r%X72.S3lJU01`὘J_Z3`,Е>Pb*}&$*̉J%xbqm$pa70#>ýi"3L 9lVgÄbP|##T\7T}', & =)@eJa#-́J1+ s}fVz f,I*W VzPtҭHT2E)6]PIC0|Ģ`aJ`QK+6c'XXc*=$DixC+TJ/DsXAWfh4 lFUy 0`0#"Ôoqς+ZOJ3Ś/ ҈^Lj)ߖ! E{BmLl: UL;(I'Ƌc9ğzylêz$M;C)ԟ`NuR:9_9&nE&4r%NzW&_"` )äK`3 iA(>ߒ&(i4~")iI.L"¯wZ&.Fd%$eR/o.w1bO Hq3>qyH ߱ pDJ*í.&[an%FBp%J:թնRA2[T/:гwV緸[Yj@2g|'Ƞ૸C./9<+)f_"1lNvV?qR}룎 U_ ą}E ?L@ Kn3%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabelo@ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_0.png000066400000000000000000000005021506673203700227230ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fp7IDATc r@|-F mtRft Q#<"%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_1.png000066400000000000000000000005761506673203700227370ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'sIDATc g(g!Ysda`a`113IfPA- 2o'0OW 5%ӟu &' k n܂2"yx(bFО%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_2.png000066400000000000000000000007021506673203700227270ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDAT(cп f``|VH2Gus1@@9\+ Zbh``P 26<19 *t"ij.CT"򅑁*,4sD H7B@(&LA,nmw9pPBzEt%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_3.png000066400000000000000000000007731506673203700227400ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDAT8c04?b x:fX*qĻB3}J&X !Π{S bs=Z`λyů}a_2 # *.^3$\<dxL#73ބWx3s!2oT03>fH)!.{9^~7 EY;5OZ=~1PcjHE%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_4.png000066400000000000000000000010701506673203700227300ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !D-IDAT8c0~Se$ vIdYo 0=Đ-d@mhSzY^+ 溎, Bπsa_YPÚ MȲW8Y6`}G`Y) uC=mAȆ#˾#de߀eCȲY֯HH>Z$Y$k.,9ge#| X`l C?sF$YV4%ԠJ梥*F$ؿ)vLNv5fzXekLבeQYd~Y3\Lu4o%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_5.png000066400000000000000000000012011506673203700227250ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9vIDATHcO0*UBe% @E.AG(J>3A?%)6%O*L%oPA//tЉDB ( 0@U+>E^Me>+5)T MI ›/!BPhF P](JVDYO%3!nJP#XTYI=ZʁEYLASd(m$%hJ!§$)1GR;+X+r]FQ"(J+aTp9jL}xh!T#KH!FˣŰC :AM*,~e{ h*Ԟa(*_`dFdHEcQc ~A.!4`T@+OPG Y X_%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_6.png000066400000000000000000000013061506673203700227340ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHM(agyiiJK(J. ⦴Hrp0sV&PӬa={mg)P*<mZg\Z,$<(+l(cI}B.Hh0"I"{O'Ӌ?YNJ/9Wו?[•oၰD!4x?!ax\bl@l[7٤{}lbЏ@*X!@.c!ul'zRS`n`xzHv"HfnK7~r3xN1$Qh2gM+s3^R Rש~<)!p]ii`s5oluo? aҨP2V3~n @%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelpwIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/112_7.png000066400000000000000000000013761506673203700227440ustar00rootroot00000000000000PNG  IHDR/T71rgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%WIDATH?H[A :(JYA+JA"R!ЂJ,:(-!PDyXi m1N-Kr=ZEw{w{ f{wwBm u/geM-qր:~}QͲlXSp0!kb>{ľOrgc_3gzё?ٕvMz[|[em]T6ǕU zcŨ ]t/*D90}Po(X\{S/ шcC?BkIgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'wIDATc {'{˝Ļ+"Fگ7 Ӂ:y' 30\OL S\N1eNifNyI'..Pۡ\R;V%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_2.png000066400000000000000000000007021506673203700227300ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIDAT(cЋ%-so# }cc``` ` zoF0! '&_*t (2}f`PZ Re` UABP! PT(7`P@]?Yl`odɷla zH&9X&@pfj# AvEB%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_3.png000066400000000000000000000007721506673203700227400ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  :ÈIDAT8c0J{S O%| @p?*qQ  Kg*'evv]1B ZqBB\ u|uȿp 4K} 1"D=.. b=K̃K,%rA"Oi? :TɃ e$ׂ 6[sL%/g@ fA{ FQcn,Xt @"1;%a%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_4.png000066400000000000000000000010741506673203700227350ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !D1IDAT8c0osy /7+ (x3&ً$ʾ7g@KQdP>,[dUsNC]Yv|jdL0;d!n8B H &?$`)rF>3eo#dYȲ_d]H1ue[d-@L'HH5`v!Br;rX^ d`%8 |6Thχ"ERc>j{g$i-M3P),ƔE"~YKHB%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_5.png000066400000000000000000000011671506673203700227410ustar00rootroot00000000000000PNG  IHDR#@ґqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9lIDATHcO0UC/5fp\>3jNe_0ˑܵf@ 9+ʀb9ǀ87Ȁ4D$t2~RPGsKt5PDm 59d^F ~5 <#(jEQᕠ9&›o0Zz`CVc Q M O{JasiT5P@xPGQ CU!*&8ՂpBv(O=D({*Ll7-+ȯsZjPӵJA^%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_6.png000066400000000000000000000012761506673203700227430ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO$`U8pT! 5Kp+rb@ng@)\ƀ(\̀lPx:Mt-rѫ|xkYE>L Rc(<yVIt0OC]KT&dЋ̆S *<2}EWEBi$pTBcs S)\ UP EB?L( Ab<_a)3%hQ(o@W荦0GbDS PsEO=8ϐ`tC:DG(ym?!hoGP 2 RI LKZMeASXuP)m|@+]X2Ls~ǩ>}B40pT! Q &/6d%tEXtdate:create2016-11-05T07:33:27-07:00ZJ%tEXtdate:modify2016-11-05T07:33:27-07:00rtEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/113_7.png000066400000000000000000000013701506673203700227370ustar00rootroot00000000000000PNG  IHDR/T71rgAMA a cHRMz&u0`:pQ<bKGD̿tIME !%WIDATHK(DQ3Ƙ"I#F,XĬXBV$!IPBb!ư!d$w#|1YXn{ν ѽu?'U޿XHW;?h(wZ5}|zoA&R}"-D2qmAUJXWj6  |N#xOl>PSzzh"G)["}8ϼ/0{+}y ׸xѝ.Tt>}U䣁\l̎άzY^ً["PE%tEXtdate:create2016-11-05T07:33:37-07:00TZ%tEXtdate:modify2016-11-05T07:33:37-07:00 htEXtlabelqIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_0.png000066400000000000000000000004531506673203700227320ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  fp IDATc r]0 ;X#H c/kpA%tEXtdate:create2016-11-05T07:32:29-07:00J)%tEXtdate:modify2016-11-05T07:32:29-07:00^tEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_1.png000066400000000000000000000004751506673203700227370ustar00rootroot00000000000000PNG  IHDR MRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  '2IDATc d2KX!L% &׃ j+*$fR^bG%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_2.png000066400000000000000000000005241506673203700227330ustar00rootroot00000000000000PNG  IHDR 청HgAMA a cHRMz&u0`:pQ<bKGD̿tIME  0#]IIIDATc was-` U1 PhJ0S@ȼ((](T9`'Z4U%tEXtdate:create2016-11-05T07:32:48-07:00H%tEXtdate:modify2016-11-05T07:32:48-07:00FtEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_3.png000066400000000000000000000005531506673203700227360ustar00rootroot00000000000000PNG  IHDR+T%WgAMA a cHRMz&u0`:pQ<bKGD̿tIME  :È`IDAT(cm]_3K iLd HZXO! a X?lt~ |Bx?,nI}=L*V%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_4.png000066400000000000000000000006071506673203700227370ustar00rootroot00000000000000PNG  IHDR5dBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !D|IDAT8c0 #x4\i&'n17*4bH*+b  L j ;}d(N"8=,b$ZP F Dl%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_5.png000066400000000000000000000006351506673203700227410ustar00rootroot00000000000000PNG  IHDR@%gAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9IDAT8c0J ` X3dQ$` (:N%v#Kd#KȀy3Mf(@v'D?T$>.QEBqHH%QKD).Q $" ~p%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelr# [IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/114_6.png000066400000000000000000000006711506673203700227420ustar00rootroot00000000000000PNG  IHDRJompgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHc0ʍʑ+y<jЬ P $7]?Geb8*W$eE In986fa`*B3̎] n*S>FBF%_FC %zPwB{`( z"AbO5f,nnȀlbrIدXx10No Db;R|4uɽA6':L!-%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabelsT IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/115_4.png000066400000000000000000000010501506673203700227310ustar00rootroot00000000000000PNG  IHDR5~.tgAMA a cHRMz&u0`:pQ<bKGD̿tIME !DIDAT8c0J\hU>벫$`B{8; Hb@ \6:t,Kl@K"\ 鏉`J1%^ec< NYP%Z%H>"gs%eaPh~6D#IČ5`-`a!bA% Qp, d C!/`V"7 q$/(Qξ 'L#&^ݑ(%Sx%}%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelsT IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/115_5.png000066400000000000000000000011521506673203700227350ustar00rootroot00000000000000PNG  IHDR@,*HgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9_IDAT8c0J<_kE伛KeF(Uc#4|# T䂲^$[ ~Թשc Ldd,, sb>$=rJCz|&~i߰ |i0$!A!ZE : IgZ|>HDf\N6˲L&׋Чf^bM` Z $_ndL]!n,$ Q+&,<~J&C<|oJ!+`>ЃÈlFM.Rؾ+BZemȴec`um<ٸ`3Βkȯ%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelsT IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/115_6.png000066400000000000000000000012571506673203700227440ustar00rootroot00000000000000PNG  IHDR J~ gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO0*U0h\l-{ ]B1T Yl IyMpAo/i[ \ LzÿvsMx ufeH UPZdo.*%Bt L·pŅ538̀y9"V?g2->lkcD^h}OFZ$z5EE9 c*h̰)a0+?zV!(0)\A$W68! b ~A= a QFv5aFX@]ǖC<y{H:]!jz8ρ,7/F.NS)d4*U `uMe{%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabelsT IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/115_7.png000066400000000000000000000013761506673203700227470ustar00rootroot00000000000000PNG  IHDR&T˼gAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒIDATHcO`U6lT9vVW/( ʮ`Wâl4:`_ 6UvU E(6PtI_5uD7"Dp* 50Up \$ B-)$#b%NaSv?xׂ3=Gw^1$u 8QR6e Gz,?vH17U]aLeAS}O3FU2 P.2`ZW2 W7 `ʜ) Q:.fdekUE~(k){Pv@QU A$#H$ s,vR <<0jBĞ}Zpsם=8 ˀ (%"?+pcb`4L]-- |6lTj;%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabelsT IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_0.png000066400000000000000000000004531506673203700227340ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  D IDATc {) CE8!v%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_1.png000066400000000000000000000004721506673203700227360ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  '/IDATc HA̱33@g| *& ~$tY%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_2.png000066400000000000000000000005001506673203700227270ustar00rootroot00000000000000PNG  IHDR pvgAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZy5IDATcafff``N>'18y*@z6U6p#\K/ǰJ%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_3.png000066400000000000000000000005041506673203700227340ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME  :È9IDAT(c o^ķf``(8ÿ ;{OC>E+E%%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_4.png000066400000000000000000000005041506673203700227350ustar00rootroot00000000000000PNG  IHDR5i gAMA a cHRMz&u0`:pQ<bKGD̿tIME !D9IDAT(c t{=lAfA/d΀ nS$fmm1OG* Bgg%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_5.png000066400000000000000000000005101506673203700227330ustar00rootroot00000000000000PNG  IHDR@?cgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9=IDAT8c 0 r`U:f|Zpkè耉QQj#? o%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_6.png000066400000000000000000000005141506673203700227400ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD̿tIME !AIDAT8c 0 Oh0”QgQbeA"#́H3`"36*3*C 0*3TeȄ^gW %tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/116_7.png000066400000000000000000000005211506673203700227370ustar00rootroot00000000000000PNG  IHDRTmIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒFIDATHc0J"$oX$ ܙ z0H0"̀ hEA𣒣#R+$BY~ %tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabeltn~nIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_0.png000066400000000000000000000004721506673203700227360ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  D/IDATc b7#NkpR#@u"XAi)+@(Y%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_1.png000066400000000000000000000005361506673203700227400ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'SIDATc sf dr@<7v/ KfHx+6>6ی!%:##ĤC%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_2.png000066400000000000000000000006021506673203700227330ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZywIDAT(csv;<݅@BOQ4@B_B B `K30tA<B RA"uuq/o2 hH(CΏC:g;b`/ȕ%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_3.png000066400000000000000000000006451506673203700227430ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;IDAT8c0J/ n.+6L` K . $,1K,I|K1o$H ~bk`h{i_;vR_:%Q (` <?9#7#E9PQ&ǃhqdY>9] J PHe%tEXtdate:create2016-11-05T07:32:58-07:00H%tEXtdate:modify2016-11-05T07:32:58-07:00a8tEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_4.png000066400000000000000000000007261506673203700227440ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !DIDAT8c0uY} F`lDV${,"k b!^ˮȆ؆H m 6GlHDt\дK|0W6X&DYX:x?UR$#GT8VHCypϟ C%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_5.png000066400000000000000000000010031506673203700227320ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9IDATHcO0*UB%`>0?X~T0Rr gx@\(,QE?F`Df0EK %u<%!JAăyLOA* !#2U g WDu;gOY優a`䗗`qÕogD? )E۾HJP!~?3"+/{]Of&Ks2𩸔lU2JtǸBģ%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_6.png000066400000000000000000000010451506673203700227410ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO$`U8pT!3@stkQ /P)2*'e*| ". SUx ]D\pTtBq?KPMh@k` s@L.*\S*rU+T>\a;T7-0pOb^C {AOM}Ƈ* Iy8`UFp*#)_€ H|BQU:MQ]%h `Tr?/%r*Ӟ?6^/+o8zǨQ 8a"%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/117_7.png000066400000000000000000000011101506673203700227330ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&Β=IDATHcO`U>|Tr+gm,P9G*(_ W@Mpǰ|\TD8spP!,aʟ;BE`*ʱ+πpSTJ?\d շaRQ`bjʛaR‚a)r0abQ$T#) e:{@.\{S# pAg(70 E}5qd(3"є?ŧ|4VCJ83ǢBVlMǪ'5hj9v' z,WMa.&V^imXR G*9O ω+J%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabeluiNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_0.png000066400000000000000000000004731506673203700227400ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  D0IDATc E?_2( yU<<9N"%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_1.png000066400000000000000000000005611506673203700227370ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'fIDATc (ȰİaH509XxĐW -/ˣ a JX"x/8Rz,NG%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_2.png000066400000000000000000000006721506673203700227430ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyIDAT(c XO-H> WAjDC,޷; @L]J @70L f`@%#fL *ʐB%FT200pD;+FbbA%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_3.png000066400000000000000000000007661506673203700227500ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;IDAT8c0J/q5 n7X3%&a}KYtA)2J٢_{AqpWYy`^7%.edUڑa@Z3Vߧ]tM"_i"2C&lE +UOhk@܌t?bJ0P\Z h>`{6z#1!2  $x?$%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_4.png000066400000000000000000000011001506673203700227300ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !D5IDAT8c0J3: @ I HH7XrI,2b_ G\(H@7`s @? \U w|}P٣`~+`Ϳ=$t,ę,@B`ZSJM~~3Q4H?ԗrPO@Q]$ %#@A@4&vfD|wؒK&&cOL(Z#XzRqIccx$_ үU?^ԐVY%tEXtdate:create2016-11-05T07:33:08-07:00-%tEXtdate:modify2016-11-05T07:33:08-07:00ΕbtEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_5.png000066400000000000000000000012151506673203700227400ustar00rootroot00000000000000PNG  IHDR$@WgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9IDATHcO`U4hp*bTE_D<'(6Cìk+QA7}!3B8 k`O`HAPѷhD`r8 2ކ5Esb`?TE¼;L`j{ s !1XB/"+1S Nz s:KRK9Y` l);8V`ɺ^`z$߃ Ye`ǡ+HZD  ,S=?O*Z`5*)V4?^E7@@EsPtA7!EC'/Š_OF"|4.Ut%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_6.png000066400000000000000000000012521506673203700227420ustar00rootroot00000000000000PNG  IHDR)Jk"gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATH9KA&B F4DA--*X X+xDQ<""X JPDgJSdߏ2̕te5ep uh =,sAkN.gy-jLKF}Pz^jxIp@s3TrFb:zuw4Ni<%. Z2o7,Sò &+VW䱉EjHS7"Ka2_i7J$O N,$]6q ֤#:/((mdɸ 62le^`+I0 $wvp% !CBXd6K !. [AS EVÕo($lL%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/118_7.png000066400000000000000000000013561506673203700227500ustar00rootroot00000000000000PNG  IHDR0T gAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒIDATX+Caø+IH)"JRK-?Iu\ .Ȕ …X_Mq[)Y6J@Q8 SgH~V/BF xX`F]ju2p] PZlL<f%36^IhjDX&lWDBK`Xy4`,L=e*Mԁ3|I~EJ[}i/|Z< `NYtfic‹.H/e /U!W6x; Z%$󅏓@|b gxUpP>Hʷ:^o RnbPſ4Y@ x3rL56@?4]%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabelv$`BIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_0.png000066400000000000000000000005161506673203700227370ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  DCIDATc hQ}9 u{LoV]tOywth۹Cpa3+^sq%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_1.png000066400000000000000000000006601506673203700227400ustar00rootroot00000000000000PNG  IHDRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  'IDATc h$+/C" "@b؞EHwn3T?a 0| o-e V0T0igٻ܁ ` spXqI30= $7Xjظ D8,MCe0i%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_2.png000066400000000000000000000010461506673203700227400ustar00rootroot00000000000000PNG  IHDR \gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyIDAT(c0$}hr6BWC810XC_Ul11600=** b20Brrj bsC k``a?߲ y?da`5C "ї3@k10^FS>1TPlЉ׀@94``4|z n0cI4zw178Z:hp00a-@@b@\5Cm@18HtF@`cIMbۉm`=U A &% EtAak؁X ' Y,j00XA?E a&XQVctU9}ь8}"<`Tq@V@ȓ%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_4.png000066400000000000000000000013671506673203700227500ustar00rootroot00000000000000PNG  IHDR,56VgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! eCLIDATHcO`UUrU2 V!jt|@:/3auZ{4#vO) e;6 8HFY0e#̃BDģsO ;HŒASXPE:#@.7)9XZr#cS9X;½]_uS#!g؅ ]pn+{J :[oR\$n6(q*)+{?n4ħ)&Fu%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_5.png000066400000000000000000000015661506673203700227520ustar00rootroot00000000000000PNG  IHDR5@b`gAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9kIDATH]HQ"(AbR* /E>d>b`RD"DDHBP"AQdQ&)Yn3sg8;>܇;~|s!<˳tQ{(h=g4A 9YPNɾWty*;n!r lhù0n3`<GڃJ٭'tϐ6e/2BR@9J5OnWA At܊IVBLBr s;$Z-Ě;ipѰr4"nhMkV2:Oݭ(lEj~DTt6 *S[{8qxg_s;wn%tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_6.png000066400000000000000000000017211506673203700227440ustar00rootroot00000000000000PNG  IHDR=J-.LgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATXOHQ߶i E< ֢]*Ȋu$,e2c@y PAI \ qvߝ /}3ye0|۷}۷/o&Ӣ=6V{i}`cet;#X/j!}!9"#rJ]BT2OribA.O{~nf qd[=@wM64i޵t'zp~Lҍz:YhDcD%ɆjX^!lvZof==?d6?.* ;p!8Dx=/ &bvjwQ,ys_DTa`ff\U /+Jmm3NzxT-܁r9,JӼ)x7'~GR(~ eqG*pytR΁>*cp ^bn, kue;8K5 <v 4G:^ᱴ o«zHk]<| > fSezo4j\G mZ.- N.:JW6R.o]lUF..6O4]D,7OakՆKjR{2pRx;p-UX'{bQ8fK=`[ TG{|!۾/h3%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/119_7.png000066400000000000000000000020761506673203700227510ustar00rootroot00000000000000PNG  IHDRGTugAMA a cHRMz&u0`:pQ<bKGD̿tIME !&Β3IDATXkHQǯ>+2,BHI J"3K%T=( E h)KFPkP2TR2IMC|v3sl`\+ f{l=r0,OER#ˬ\LP,zC8B\:cwMӌj2xX0Ci'`|fHհ_ K@n4e$ҸL('L-[0lAݴaoü+GL*WF(KpE8Z@4 @<͘H -k7EiG~zj1vI'.Ni?ƯTM{)%o |_^q{3n0LJbjaY au,>YURy1'%GZx|TG㉟+|EZv8z,kuz{êgbOGٓV=pK7&8u7&1A#WjO|h%ˆP4OYa{l=/ "v%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabelwSg/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_0.png000066400000000000000000000004741506673203700227320ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  D1IDATc j"s4ğo8 j"kU~U;L{-%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_1.png000066400000000000000000000005741506673203700227340ustar00rootroot00000000000000PNG  IHDR S"gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SqIDATc(t1H?׊!RG`e0̇ 3 =g C^0`A+_z @PQP9ڌò%tEXtdate:create2016-11-05T07:32:39-07:00pJ%tEXtdate:modify2016-11-05T07:32:39-07:00 tEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_2.png000066400000000000000000000007041506673203700227300ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyIDAT(c?@`U׹Z2'Ps.?nVO= x:#|B(b9$B h'?O 1p~Sݵ*t z{`o 4d` Jg`|pfD{V%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_3.png000066400000000000000000000007751506673203700227410ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;IDAT8c0J q!{&dr6%,;0~y % vcÑ- 1,HʽGqUPH~/`:w2@A A~a.E`6HϿutbHΣI7g"@,@2YP mH@i_%p> 䉽zg@%@-e\l`=@v&-O<_tK>CJN\Za-p%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_4.png000066400000000000000000000011041506673203700227250ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! eCL9IDAT8c0A\*U0݌ 7  D{nYVO@ؤHjC Fv_[٧'bIE>֏9 q l vAFh!^ /b ɽP\tU6"kn]XduUH[DD]ֱ L>~osnC#l?X2Ĝ f!^(~ xe-;x. ?VUW\a KO~VeD}J[qQ$H%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_5.png000066400000000000000000000011721506673203700227330ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&9oIDATHcO0*U2J Q:7)@Lk1!E>5E0%~[^Bu0AUU/>ʀ)!hk!*04+XB/U0Cfе@4wuAXB XJLb| p{-`9+CaJp ׸2KI¹K+9„P"J`IN0dUySF7B`t%Du9ДAKAd U2"yL!JB?!u%B%0%!"aJA$]70!NZ-ü"QR̃љhU29I %tEXtdate:create2016-11-05T07:33:18-07:00{9-@%tEXtdate:modify2016-11-05T07:33:18-07:00 dtEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_6.png000066400000000000000000000013021506673203700227270ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO$`U8pT!r` ?*0@@&'PiHVf1E(Fqc%TTn, PPMh|.#xAe?x< KsC, QʝAXcfTV9hhYмpW58)$' ~G:hP+La MO}^ (k}v {\ ϳAT@UFPCjI3ʕ :e =Ŧp/#D*]a*|/ lr^C91 1-P5t+2\ P/TOъJP*\paX|YD,Q SPxKЅ 2g1ʰ * ڠb/! 2CxZcU鋞Q Bkz\*.%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabelx2EIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/120_7.png000066400000000000000000000014051506673203700227340ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒIDATHK(DQo3a&ȳ&yll,'RJY` ïsTN[j<0>{&4JB8 |ΈN6dIGE @CY%|Y>oq7SLhq[1}ťkEqH_2,~Ǚxn?!}i)C\ʶvT-jGSérTp]sNE~:}r VuBO `~$$P6̞ |"d7C] ݂ZOk0#`n6G%A\/ɫ`g#B!H "bG +1+[Fo1 @,;Ȳ@b1@[n+5Ab<_o1n>cCDG`0H4b֫費A]@T(FS%̴XRkv(8-]EA%}CPSXe `b8]H?v`q$cֺ,X;p$cֺ,X첐|1HRvY˚$ r+TZ%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelyIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/121_5.png000066400000000000000000000013261506673203700227350ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !! 6IDATHO(am?i$"9:99,_Ie` Q\)qUJN.(Jqp˶&3?qq}y~}fȐ I3Zvo ~A7G8Fj$@ga(.MU|4&'%>Kxh޵=%qBۗk#u1,-uXt9vw[E<,~M !d*Z٤w4p ^mNv=s0Br;Cw 2QE*kʉ)%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelyIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/121_6.png000066400000000000000000000014361506673203700227400ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATH]HSa9u)2J3PZ^B aUtE "(FA!Cb ]FD*#EC"Qx1 T#L{b{;;ٞtf{<=elYYgHR5Z1ņES7!4IA;wkzrjy4\C ] ș8Ůt7%QaPjH?i H!Jtw4RuPz8%@򢧄xE;|nEv[|$K 6Iα!xx]_+x.o]- ~$e #^'*ONLVن2J<%tEXtdate:create2016-11-05T07:33:28-07:00*%tEXtdate:modify2016-11-05T07:33:28-07:00tEXtlabelyIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/121_7.png000066400000000000000000000015421506673203700227370ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒWIDATHOHaTdiY$졢EEBy萡VR^,:ԡB A菇CHAHb:Oȼ73;n]v Lk%ȋ78-MOȪѓ 1+>ZΨ9c`>FI7 7g,w9d})EɀŻV8~b}3Q}8==1Keg6;7lZUg*x9Ms9do:XI2z?Kd 96:Om7";RzM}ؿڽn=MS#8YgcޛBǁ֙Fj!Z7En<_J=SD%wPJn5y܄޺|zU.ϴyb8[zD,x7b[kt҉h5 >D]Ptꌆqx;>,]>( 8A\|T-%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_1.png000066400000000000000000000005401506673203700227270ustar00rootroot00000000000000PNG  IHDR S"gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SUIDATc(3@|'A63H1L`qf'96= a l@|UZM/  h%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_2.png000066400000000000000000000006041506673203700227310ustar00rootroot00000000000000PNG  IHDR  .gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyyIDAT(co`9.VGl >ɏ מ!V"'Y" C%!``ກ쯇 PkbH3 6 "]B%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_3.png000066400000000000000000000006451506673203700227370ustar00rootroot00000000000000PNG  IHDR+Y;'gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;IDAT(c0 c^̀ c^UuԐ0 ~nf(.t `8( ]xP4wnq} (j](u =L3geF&E *~.8,{ll9 D:,۲=%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_4.png000066400000000000000000000007121506673203700227330ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! eCLIDAT8c0 9oLqp&Wf )ܹI,` Iq``  )Xa I`b ``& )ї&'Y@RHJ=6l.lib'#H.K1<8 ǒ`C1{"S+n}u~yA'%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_5.png000066400000000000000000000007641506673203700227430ustar00rootroot00000000000000PNG  IHDR!@'ALgAMA a cHRMz&u0`:pQ<bKGD̿tIME !! 6IDATHcO0UAspT'd,Bxr(b1Na+d D_Tc 0]Q3_HébV17 g>+K?Gƽ8G?ĎT\+PKoSqhQ3,`?pVr?.;Rq(~RIb2*x W#?@@kqB*X@HB`THV\3T%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/122_6.png000066400000000000000000000010351506673203700227340ustar00rootroot00000000000000PNG  IHDR%JWPgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO `U5jTnU ebTu'B_"TqOPOx(/TF-s}^"їxT_lwAK!QSjN|(?UG QY ϦD+ޮOp9(ֈcPמ(ֈ%(ֈAFJF`88jd kFlY:;ֈ 8؀j_ü/t i%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabelz-SiIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_0.png000066400000000000000000000005001506673203700227230ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9x5IDATc WmQ2< 9s % $*GT!}5*#BF%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_1.png000066400000000000000000000006001506673203700227250ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%uIDATc @9W%ʮf`+mX3,gXpWfjڕ`x L?)psl6ˑang`*$s3_ȿ ]R%tEXtdate:create2016-11-05T07:32:45-07:00)%tEXtdate:modify2016-11-05T07:32:45-07:00̑ftEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_2.png000066400000000000000000000006761506673203700227430ustar00rootroot00000000000000PNG  IHDR G}ugAMA a cHRMz&u0`:pQ<bKGD̿tIME  79ܦIDATc ? 11?@_  w20$#Є̏g`h'Ё``%/24)g? }YAO o0 Efd~ C2A 1ɬp8M ml&h9%tEXtdate:create2016-11-05T07:32:55-07:00qf)D%tEXtdate:modify2016-11-05T07:32:55-07:00;tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_3.png000066400000000000000000000007671506673203700227450ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !lgIDAT(c }wV`dX$vY j3` H%Auؑ joP-@h,bT%ܒ&f@ ["v( .,ӚYpS]Ltw 1+Hq$F_TL<*G(օ0X.XP, MK, M(f&6(Ul+æD{&Fň!ά $D5cK%tEXtdate:create2016-11-05T07:33:05-07:00DL%tEXtdate:modify2016-11-05T07:33:05-07:00tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_4.png000066400000000000000000000010471506673203700227360ustar00rootroot00000000000000PNG  IHDR5~.tgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! UyIDAT8c0!gE@spI$H$ܰH؃$1%C C%8p#KH$X?txj>*?6 c7i 2 o%?Hׂ$ , o,cEbH‚*qNm Z2LP܇)q,{C'$;-G$El%.FD* ]|0uC%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_5.png000066400000000000000000000011311506673203700227310ustar00rootroot00000000000000PNG  IHDR@#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !оNIDAT8+DQzLb?lllԔXd4++YHGD)AR~ %+KV &uSz vk@=}/--.scǵFW5tv h QYVAs>֠ymr&#z[v¶ }?8!u SxY@;7Gʾ綇Sv &~aGy6A]fAw z'^֩j}zBK[^ߨ|SȄJN/tD/,MaRUXOk /n%)j,-se%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_6.png000066400000000000000000000012251506673203700227360ustar00rootroot00000000000000PNG  IHDR!Jb*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !" IDATHO(Q[ݓOQkS)\\v/Anprكl({mvwV5GLy]-{}\ğR4iڣZ|BFSIzXw/q"81^h9y,0n;GP_~Yr(vIq"E 4+ qŊ3ϤGabA?ORzكJޜCElЪը.fJ&‹Y7鍿9!D)n.6mr؈"X$l+g6B hRTb_A k|!E E m{V\EO(b0QRćt(K.ŷkeS0 sup[M( H˕%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/123_7.png000066400000000000000000000013101506673203700227320ustar00rootroot00000000000000PNG  IHDR&T˼gAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ IDATH+ak iܦDpANv9P+F[(sYVA}?Zg0syϫ<Ij=}]-c̩rLelQXNbaFb*K;M Mol(+`ChO/͡,leY`(;`'(vS`q{@ٸupaQEm3i"j$>]&\q E^YWe7 5SXBr~̪ h1y|WRsCY&N)X@YVMLyY(K;G3cO6J[eY99`sk(QR/!,&;LhHeh268H/nS O Uf. oBK[a%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel{ZcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_0.png000066400000000000000000000004361506673203700227340ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9xIDATc?I"v ~%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_1.png000066400000000000000000000004401506673203700227300ustar00rootroot00000000000000PNG  IHDRqgAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%IDATcW _/}%tEXtdate:create2016-11-05T07:32:45-07:00)%tEXtdate:modify2016-11-05T07:32:45-07:00̑ftEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_2.png000066400000000000000000000004371506673203700227370ustar00rootroot00000000000000PNG  IHDR =gAMA a cHRMz&u0`:pQ<bKGD̿tIME  79ܦIDATcg #ElQ%tEXtdate:create2016-11-05T07:32:55-07:00qf)D%tEXtdate:modify2016-11-05T07:32:55-07:00;tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_3.png000066400000000000000000000004461506673203700227400ustar00rootroot00000000000000PNG  IHDR +gAMA a cHRMz&u0`:pQ<bKGD̿tIME !lgIDATc误_GY'%tEXtdate:create2016-11-05T07:33:05-07:00DL%tEXtdate:modify2016-11-05T07:33:05-07:00tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_4.png000066400000000000000000000004451506673203700227400ustar00rootroot00000000000000PNG  IHDR 5N'gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! UyIDAT(cPg``H Q6gS^ q%tEXtdate:create2016-11-05T07:33:15-07:00L%tEXtdate:modify2016-11-05T07:33:15-07:00k<tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_5.png000066400000000000000000000004531506673203700227400ustar00rootroot00000000000000PNG  IHDR @FgAMA a cHRMz&u0`:pQ<bKGD̿tIME !о IDAT(c^{C>TFyQN%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/124_7.png000066400000000000000000000004551506673203700227440ustar00rootroot00000000000000PNG  IHDRTAgAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ "IDAT8c_=,"M>W>>%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel|ĵ\IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/125_0.png000066400000000000000000000004761506673203700227410ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9x3IDATc ;AS `YdfuяE! D.X=]#Xvn!W ]CV,"X)XlX5vمN.I*Д׼dAtmL!? :qnm'َee*$政,Jd] yՔ%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel}IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/125_6.png000066400000000000000000000012171506673203700227410ustar00rootroot00000000000000PNG  IHDR!Jb*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !" IDATHO(aVӖh;Er`7)7WH9a%9r9Hю+F)Qi~=?weJy吏y}I!O@q8w})I($T) 'rxlD3YL dd""E- E(8P|Tr ;寸@RoW,0{Er1uoTE=TW]`MdƭW/23`o9qtm1 Q~H`V~}scS^\=_% )f wrh{gB1w5x4g@ 5hL Q`K] )V,OU>|TQhו)`%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel~*pIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/126_6.png000066400000000000000000000010141506673203700227350ustar00rootroot00000000000000PNG  IHDR6JgAMA a cHRMz&u0`:pQ<bKGD̿tIME !" IDATHcO`6mTۨQmCXǨ~*tUcV/Y ב`@V۾tVd@v 0ZM|AHa4&0`rv6\ 8a_a 3-4pƗΘ+TUOb2ytMHwHIt rT2stnLO4dgh؍5Mj6mTۨQmSwc:%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel~*pIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/126_7.png000066400000000000000000000010611506673203700227400ustar00rootroot00000000000000PNG  IHDR>TۤgAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ &IDATXcO`>}TQGҶk)/r]yU}˖=ƧM"C;?b5!ĀS & S-O-B 8UdC$c 'x џW a18Q0ҍ`hp.`ū;4K!E4C:vX"z9#fCȉ`'lf?^‹i~?ЌRxxCFd8?`QGj>}T!qj#%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel~*pIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_0.png000066400000000000000000000004301506673203700226440ustar00rootroot00000000000000PNG  IHDR t);gAMA a cHRMz&u0`:pQ<bKGD݊tIME  $9x IDATcÀw2 .Q%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_1.png000066400000000000000000000004311506673203700226460ustar00rootroot00000000000000PNG  IHDR ]0gAMA a cHRMz&u0`:pQ<bKGD݊tIME  .RtfIDATc2&ka'H%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_2.png000066400000000000000000000004311506673203700226470ustar00rootroot00000000000000PNG  IHDR  t:gAMA a cHRMz&u0`:pQ<bKGD݊tIME  79ܦIDATc5?a}%tEXtdate:create2016-11-05T07:32:55-07:00qf)D%tEXtdate:modify2016-11-05T07:32:55-07:00;tEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_3.png000066400000000000000000000004321506673203700226510ustar00rootroot00000000000000PNG  IHDR+]gAMA a cHRMz&u0`:pQ<bKGD݊tIME !lgIDATc&uy/%tEXtdate:create2016-11-05T07:33:05-07:00DL%tEXtdate:modify2016-11-05T07:33:05-07:00tEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_4.png000066400000000000000000000004321506673203700226520ustar00rootroot00000000000000PNG  IHDR55M0gAMA a cHRMz&u0`:pQ<bKGD݊tIME ! UyIDATc0-ˎX,%tEXtdate:create2016-11-05T07:33:15-07:00L%tEXtdate:modify2016-11-05T07:33:15-07:00k<tEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_5.png000066400000000000000000000004351506673203700226560ustar00rootroot00000000000000PNG  IHDR@1gAMA a cHRMz&u0`:pQ<bKGD݊tIME !x(IDATcQ\A%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_6.png000066400000000000000000000004351506673203700226570ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD݊tIME !#9IDATc?Q~&?ׯ%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/32_7.png000066400000000000000000000004351506673203700226600ustar00rootroot00000000000000PNG  IHDR$TYgAMA a cHRMz&u0`:pQ<bKGD݊tIME !-Y@IDATcFF靀LY%tEXtdate:create2016-11-05T07:33:45-07:00RB%tEXtdate:modify2016-11-05T07:33:45-07:00#SXtEXtlabel hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_0.png000066400000000000000000000004541506673203700226530ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  !Ii!IDATcp?6 r%Y@ #.8%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_1.png000066400000000000000000000004711506673203700226530ustar00rootroot00000000000000PNG  IHDRqgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?.IDATc Szen~Ho) 10Rt!bPr%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_2.png000066400000000000000000000005041506673203700226510ustar00rootroot00000000000000PNG  IHDR  f gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$09IDATc F ܩ8?jAm@F=7bG*yj%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_3.png000066400000000000000000000005231506673203700226530ustar00rootroot00000000000000PNG  IHDR +k-gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)HIDAT(c ~~!lخOP?ŧ\v PSea1s`DR %tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_4.png000066400000000000000000000005341506673203700226560ustar00rootroot00000000000000PNG  IHDR 5>`gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )QIDAT(c Py@ ~7xA`;+ lJ Cy ` f +<4e%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_5.png000066400000000000000000000005551506673203700226620ustar00rootroot00000000000000PNG  IHDR@r/gAMA a cHRMz&u0`:pQ<bKGD̿tIME !qBbIDAT(c ~?'d9ŕ@_d0n& po]qL``P53< \yj`j}#tkKTo%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_6.png000066400000000000000000000005761506673203700226660ustar00rootroot00000000000000PNG  IHDRJxOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !EsIDAT8c0E3@ȨȰyuNE"O.Y\Ng%]k"PD" ga"J00"?>{~6QD9N @`%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/33_7.png000066400000000000000000000006111506673203700226550ustar00rootroot00000000000000PNG  IHDRTHgAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-Є~IDAT8c0`t  NfQQQa$ IԸE( I$A b ;\!( Gw!1A !"&rG[]YR "vvvS#M%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabel!oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_0.png000066400000000000000000000004471506673203700226560ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiIDATc ݋7=G3@ĉ  2)//W%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_1.png000066400000000000000000000004541506673203700226550ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?!IDATc 3dɸm< }@Mk%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_2.png000066400000000000000000000004611506673203700226540ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0&IDATc 3(ӊ}al&4ٕ%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_3.png000066400000000000000000000004741506673203700226610ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)1IDAT(c wWիW30((H7/%tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_4.png000066400000000000000000000004761506673203700226640ustar00rootroot00000000000000PNG  IHDR5Ώ;gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )3IDAT8c {}@C *V dXQ1%<*6*6b]vY^%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_5.png000066400000000000000000000005001506673203700226510ustar00rootroot00000000000000PNG  IHDR@; gAMA a cHRMz&u0`:pQ<bKGD̿tIME !qB5IDAT8c0PE8YA 7p:lփΣ4e 09(wg+=%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_6.png000066400000000000000000000005001506673203700226520ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD̿tIME !E5IDAT8c 0K&Wؾ&Hƛ~8*3*3*3*Cg0N- |%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/34_7.png000066400000000000000000000005121506673203700226560ustar00rootroot00000000000000PNG  IHDRT[cwgAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-Є?IDATHc0 lU89;sYcps& GeGeR_r,c O5 %tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabel"HfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_0.png000066400000000000000000000005221506673203700226510ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiGIDATc @|W w/Xt< 'Xtw' ;b¼ @d~؜0;>0$s1ZX%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_1.png000066400000000000000000000006751506673203700226630ustar00rootroot00000000000000PNG  IHDR4gAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8IDATc 6# MbhC+!+4^`pߋa/L(^Aޞ>,`2p Av&aBLbZ,Πl GHB ?bC <?1`2p+w%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_2.png000066400000000000000000000010271506673203700226540ustar00rootroot00000000000000PNG  IHDR )7gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0 IDAT(Ւ?(70]2.)?%%np $HJg`ҍ )-tYRnA.J7|3ߨV~:PWӤfizN5RL km0N|3{X/%t #O"*LX3Xvo w>3d5-Ú1ulb6RIu0 G`BBAXljG=_O 5XWޅrMf++žWPS6LgyV%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_3.png000066400000000000000000000011741506673203700226600ustar00rootroot00000000000000PNG  IHDR!+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)qIDAT8+DQƟ,WR6&5%5 e&YXXL4 R d#&4isN%e]9=t8?~i䚁`rq &pDfLrJ`p5@[Т〝K;9ƭ@p@@^5²f@?XO[84ƅPn . LݥmMcf}@çPPUSvN \Z>ܸ%yL­zڅF1Ʈ[a?#nttfg0I16;BR1$2nv^.G1mv$D "<(;CO2%4,F(6Fl|4)d!jY`A|5s.Σlnu0c9ޥ !ܲb0KN^q`NN3!N#Yqa0U}<.be; ƭyϬ:"[T^XbSf⣃խz3t `Q|܁T wǾSq7Uq']`d6 gF!nP\ ~oRWT N9/6Uiem8UV̈́+Sh:c/R:kpq@r* W67!VQ,ڇU .|qƪQڎ1!yU7Xhw3Z$qI^|kWW"%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_5.png000066400000000000000000000014761506673203700226670ustar00rootroot00000000000000PNG  IHDR2@}{gAMA a cHRMz&u0`:pQ<bKGD̿tIME !K3IDATHMHTQǟ&?(̅BHc… Bl.Z C0x&s[E)"$k wN0DG7B(A})3rG3U7<ǜL1sw9NgVWe=]iЙ !z/3(tG(eQW^"4ŰCPdj)tna +k EtyiFU(Tez?+"H֕v(aU@Q?*k4I#*t4)d+zf+m5KU[_qm/e9"bP WٿȨBǠϺbkFW)'7A]YAPİˬ$RpJr {_c+_ʊE*^}=dVn1ҶVLj7ʄ|6H@u_ e!kTe'BUB}PBυו& |aXߨ 2 僮 #2AaTW(_.*))cuel"Aѕ C3]]V"lbAѕթr_]Zm~%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/35_6.png000066400000000000000000000016111506673203700226570ustar00rootroot00000000000000PNG  IHDR9J$ţ6gAMA a cHRMz&u0`:pQ<bKGD̿tIME !E~IDATXMHTQǟ̠҇FFTiQ*((!E- 2 L .FAj!} AYh#jB"X87Q,`WV%nN7%70703;ɯ/k>7~1 k\la~w-nVFRv=\Gɕ x~o1e#✏‡U-'jm\Q1Y Gԡ^DJ긄ISLAnW(ȤH$]qǶM k61TlH6[yg:Z+|{I֎{W=gr _#;ZGdΰqd%X~ ̤ ִԡ-"R NbG5d!2;8ZJ글{D7AUnnf6r"(Nӱ]oc㾵ܰ -|sZQ?᪷LvN.L}yTR8i Oq2OEVKǘ Qk{gx cvYNv\Ǥ'x@bz*ah;FTyGޑwyR_lGh%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabel#?a9IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_0.png000066400000000000000000000005301506673203700226510ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiMIDATcm(J}a@_u )  BK3 >/21zV0@9f M%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_1.png000066400000000000000000000006711506673203700226600ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8IDATc;C?Kk3@2WG'$pazn20򠦼ccP120(pD1w7P.͌ )p  kAXAz Q a>f`}܀2{ޅ=vH<8 I7P&%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_2.png000066400000000000000000000010051506673203700226510ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0IDAT(c?(U]!+  &8A~9 GXCv>f*X) !49YLPtS,ş{,r% Pe"H_԰ <~@g!b _1tA}&~ןasC P[ fjQG%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_3.png000066400000000000000000000011271506673203700226570ustar00rootroot00000000000000PNG  IHDR+G'gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)LIDAT8c }ɉ!?tG" x E>"$/7<'*c-#PATb#H\ ĹӸJ^hID"DNIXcvLb`hZCz 2B>yhpp4|G ݏP)3h("> UU0$ R%eɴbJIxa0I0Hg K1#A$>)x* Y -Db![r2 BV:ԃ|QwϿ!|E ],QfhF55/1Ab3ͩ_6%tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_4.png000066400000000000000000000012571506673203700226640ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )IDAT8c W 3MRMz&# -E~g̀&"I3Ge`>nH2ȥ@`|rOJ0JG؝ A K ?.j7ҋ!NK]+D>˄V%Ya2DLѥ{31'g%)+! 5Jo*Dv`iإhCXzBkdiQ! -H _HV#!3 Dr_ ɢ]* 11JucMfeՂ_Qt5X$g>tHܺO(eM5ۿ[9$moczK9%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/36_6.png000066400000000000000000000015471506673203700226700ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !E\IDATH_LQ2Zv6b7bؔ%5SKц$J1 B'-ڲxky-0lFL3~ϽM<~9{@F{%1Bő:(U [@*v5x*|ՠö ^M-VBAeSWſ/%| ]Zq߳2;KYvk), ZLWZ;a= aeId lEfn1^1ߙa?F89ɚ'=s]HjAH0xbLnhwe[ ]`5\W,u:J8~:$m_1uBYR K(N@x^1e1wFN g:.zGx+)=<f4c fRn*J,` &rStyhqgs?;[\R|3<=}`bpݚ"\t\3VLmΤ~ubZXzH7X 1tlYȣp̣ Wy)lHY9ݾ89nawYsY#,oS/ovx`^}Z{۠Mn=cЮ|Y;mA+_=VK7$)kgWi?3Cs+i7`6<%?E ^;bx>!Av1h,&En'Rt|Q=n2ۛ6<\CU3N5wa\ D9&3<oDN#|I#-%,.:Ά%[}j|^]~coF^%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabel$/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_0.png000066400000000000000000000005371506673203700226610ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiTIDATcb`x>gg ڸ ^ҢrEzOf֝t ļ^O3xZX\ {%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_1.png000066400000000000000000000007531506673203700226620ustar00rootroot00000000000000PNG  IHDRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8IDATc ,bbW_Ͱ\*7˷z2|Ef0ds3x½n^o #[Ȝ8˅?ߙ zb U7CIIp/R$ofXR 0[2tĂ݁*- ۠bX ='BcWgUt9E%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_2.png000066400000000000000000000012221506673203700226530ustar00rootroot00000000000000PNG  IHDR \gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0IDAT(ՒO(aǿ?mֆ$ȟZOjKsT`M+)).JE$YqܦaNf[x~M=y>}^ NHelXC% *E`kPx{ ; TD04¡Ib-OMK qeLDAFC5Dǀhci+ވSH`(D&g+r뷉NP%q3) ``k0ڹ,3FqGVh+ni)sɴA & ]Rֆ-6 P'a00w|HEd2Z}r`"[%N=Uɩ?RU3_ix_ :b}eZ%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_3.png000066400000000000000000000015321506673203700226600ustar00rootroot00000000000000PNG  IHDR$+5=ugAMA a cHRMz&u0`:pQ<bKGD̿tIME !)OIDAT8OHTQ?1,,Т?F]H,bMDAS(B4 Er ?.BBФp"4;}`YHHyr ?(!#$e%tR҂>fRϯjͳi}rn1< ׷D9XSζ2efpu;]s@+Cy+lyJq);|2װw7Se\(#oTYod^঩7;~Ky!0L2.7X3[r :,U)`[,`-sI9,Th %\$}W+*e'%r7xUy%0QK,K$rW1ۘXTC,S5z?|YA2p.r6nϟ)h"ʛ U/_FʊXrQ@)%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_5.png000066400000000000000000000022051506673203700226600ustar00rootroot00000000000000PNG  IHDR4@p gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KzIDATHYHTQǯ;nieQX-H/Y!փ䤥PbPR(XQXR{82wOu`03"قXW;~Apl uzQ] 0+qT8| GN{ma6ͲمX^"׀%K%5#<ĘјWGr8->纾-H-DU=F{&R7=S=iʄF:eaꁹtSI[w(mBy _)Uizڂ^Fft!|1RP4*6 f0D(]iBtG-tOӄ6_.Wr -!--U"Hyׂ5$Fj&PG/RrBPm(feJ>QCok>}VܣGbI9O鹣 A:hI STQ4bס 7qP.TGhF_.J D\æBPz>OnSPM)_$ 'ƕ+]uI >+qx%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_6.png000066400000000000000000000023531506673203700226650ustar00rootroot00000000000000PNG  IHDR=J-.LgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h IDATXYHTQϸ5NiVXXj9P=ddQa -neD!eXZJIڃFQiP˝=wsgΝ z's!"TNx_;MqZL/%ژ\%31B#F-Y\ ~ wS% m}2kܴ4 t A~hzz{ /=$IHNEK )#qcQ2M?\oQ n)fTHninܴ%XCb4kz;5TL!GWV ]z'\, &3:̛N 4)<Ԍ.jыw*rٔ Z:TX]LM60k4){i:j0V_hx&EKs` NY>Zꉦi8J"@l,jVP4!AiÅVV۬ΡhZj#rfb:;Mz9#MQmsk%@G+/=]^XWWv_V(4UA< .#Ʊ+ z(#3,TXj\œ;]LW2A(mVcd i.;| _@/Huh;|gSAox!C)LK=7ozMҜ^®5 H&N2魼 [=:ʌ=W:t3LTmH6'9&K7^wqqwF l1 ]Jb-P rP!͞ju` r!}#ʸ  ;(;,]^**O֨6<!J*fR9X4"uݠ0Vlȩm,Sp;p(`{F 3~#=ptV%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel% IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/37_7.png000066400000000000000000000027061506673203700226700ustar00rootroot00000000000000PNG  IHDRFT[gAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>IDATXkLWLJ],"iWT@LlZRM1hc(FتY,1b4jFD464H}EZ1!yVq;{gC?h49ss/M{A1uRP7ӡ{ ຬ\z%θJjYX8 )תT`٠tR--p 3 V/:W͙KVanɰcڔXϗ>”cxA}StJX@x'Љ窒['  MaOc&ScFc!4Z k'_ 6/Ikh^>ے:ZbZւ| hDF' +ª u 9b,Yj%;cؗX[PynquV[,^|" amR4NeN:5wY9cKQ!O1Q}SUחf0^ɯ!1Dv{۶ڦ?»` |vEGsXB+`t~b;֥$6 qX97梨]587~' Hc D8CxZIvD3t8`BA7i4򻩏T&PN=&r=KY GENs5;M9S̄~w>WpphG* ^+-ne'+&WIDAT(c0a̪]po@ Էfb`z"(>@ EAVa`F``ZP$vh*ETWKќLܼ[i  1aD&PL~#FH^&﵀auu < ^ _ Op308%~20Lk>f`x4i!ľ ' u karΆI\Z”h70@1Ȱ$[,^VOMlt5$*2lm %tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabel&O NIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/38_3.png000066400000000000000000000012721506673203700226620ustar00rootroot00000000000000PNG  IHDR+Jg`gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)IDAT8c0'ɒQ%V ;t)aH`(A ѥ@F HM//P)dyI?s, 4 d0W A[(yXR8 ]z*HT@Z+ 1$,PDNb ԃ,Yӻw93X{j 5:[^G=~k0Z~p&jœic _|g҅ Y@BK>Sx V[ vK1Alxn4XzH ԡĂ| el?KPhʾh>@3?8_ C K2]-,P{Pߜ e%]LI#0K!ޓ&hjy{c&6J&SA%tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel&O NIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/38_4.png000066400000000000000000000014671506673203700226710ustar00rootroot00000000000000PNG  IHDR&5GgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! ),IDATHcO`OZ[fWمXV0SٯlF8ۂCٿ@d 6e$GfVbS,e fcSV Q)+7,ؔŃ]8ؔe1ءؔ=bI1n2ln`9+Y؃3DV:l#kqz),1TSo8SoPE+ǩ.T-[DCٿDH KFVQĿ*7B*@^&u,u,\$$l?x@{5 K TIg!w[ǡ s3YH7* bz/! +~:ݠnAȪؠF/,.AQ bmxa1DHE3D|gFU~ XrCT<,w%U5@7!> D8P}lAd:J̀rߩ~CJ@՟`V!"HʠV0.ANXƷд̶9lĢ6Am⿌9t[.I/ ȕpAKB?!| <:Q]~#E5Q`TV |n%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel&O NIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/38_6.png000066400000000000000000000020351506673203700226630ustar00rootroot00000000000000PNG  IHDR5J>CgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h IDATHkHQ蚚fihV*RYJ;DBD%AQFiЃr?ZJ=(M "(R#1Z~ ik! ݛ̽wO7{gdQo}Y^/e/&JDPmTރ᱘^B&V!>craeqiW`3m yj{c 'bP^ }) @)PS-NrG@%\8sr! -k)1Wgy/ŏeexCBj 4TQ!E߸55XnbA. o*}SmF1v*Ƙ9LuQO)PXz tQ߹kmC06lTEW ±bwd8Tlօ+zX>}L:id7[YPiuJdF3mTQFim@>BZG8fȏ"fO;!;Pa VoX%U2JȈ5zź qGK{"h7?ggx;YHz>{mp2e:pǬ*:۽QSb.\Vz,f 25pTKY Kr)abKZXGEIO`LbV#u+bLf#|:Kѫdys|ÿ@Rq C=7rT{~{/%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel&O NIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/38_7.png000066400000000000000000000022071506673203700226650ustar00rootroot00000000000000PNG  IHDR=T`gAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>|IDATXkHQΙgSLR D%1-+? a*M$bb`D 0 H3MLh?L#l^I{wO}.!7'nkkOtF-rGxVLĄI1(tW m/to8UC #yt y\=ڜ`Hz7{IHt6錶-5<F:4Ar8Fi Iqt+i`jI(G?&v$ 8 o\5G;3PՓηNm')'8ӵp둞UQKγ#ǹUaƸ;Fꇘ 4tPrSUil bm=VqdS?Ê5pgCk5œxxՎ?mc?įJ~icHnha2]g8D_gIk_1Askcؒ{es*>zhDH$R%;ŔNs&[NSZWu;ur͞Tz'4crdqg/wm"m=j< cO!;`5vl\OY->Astx"қ:M~oX[2KHSEm#dދ\m2WQ)ɝÑ ,XtxY~*ATŧ+7$9L=Q;ܿO"5)gc)DdZYY{C5~Ȕ5v% /k\fkD6gŦvnWFA"jP1IC-?g!.ؚ"c*.kd @`r@H5?jsMjEoʽ=z>>/o0Iʇ%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel&O NIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_0.png000066400000000000000000000004541506673203700226610ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M!IDATc63<] ?!EC)hCX%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_1.png000066400000000000000000000004771506673203700226670ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +84IDATc`/"xQ3?T^%I%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_2.png000066400000000000000000000005241506673203700226610ustar00rootroot00000000000000PNG  IHDR n1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IIDATc @|"r'(3a&[A; 6 !jj{Pf C$p+~"6o=)3eS%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_3.png000066400000000000000000000005601506673203700226620ustar00rootroot00000000000000PNG  IHDR +i6gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)eIDAT(c@HCPuCL- Qa` JL +P 3z '! 7`K wšsV۝#`%tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_4.png000066400000000000000000000006041506673203700226620ustar00rootroot00000000000000PNG  IHDR5\]gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )yIDAT(c ?T;_8|G uDIoC\@  Ww!s%a``}J!ȿ߳S>1פ%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_5.png000066400000000000000000000006421506673203700226650ustar00rootroot00000000000000PNG  IHDR@2xwgAMA a cHRMz&u0`:pQ<bKGD̿tIME !KIDAT8cp 輅.S{Y;?pDZaHCVa$$l6cω QQQ! P[%>!Y%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_6.png000066400000000000000000000006741506673203700226730ustar00rootroot00000000000000PNG  IHDRJf+VgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h IDAT8c0 @/h'o=`t# BQXbfX)O`"bWQC( <",fMjW, 1(S 5HLF`j ĸޠ Q*H.9n, afexTpTpTp LJ%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/39_7.png000066400000000000000000000007261506673203700226720ustar00rootroot00000000000000PNG  IHDRT3gAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>IDATHc0-¶.V 1%@8_4&,t4!?$C5[^ ~&O"q(tHC wH.t/`q}`_ ѰC,.]-+X#g?cH؁%01#H9DXC }a@o0갨$QQQQQ JD>)?Q%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel'8 ~ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_0.png000066400000000000000000000005011506673203700226420ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M6IDATc=diyD}@+Dά@?-%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_1.png000066400000000000000000000006061506673203700226510ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8{IDATcպgY@1?apx˨XǐaT1̀0@F  E,/C=a SphaafH0>sBC+!ҼADI-}%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_2.png000066400000000000000000000007011506673203700226460ustar00rootroot00000000000000PNG  IHDR n1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IDATE! p២`1 hD@h2d`1j8-& aX&wwxpr0/WD=4.䂘'km[A%:g-8]:b?;h'a#a*>SDS%8Ѱ-*?W< Y~f-IU_l * %tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_3.png000066400000000000000000000010101506673203700226410ustar00rootroot00000000000000PNG  IHDR +u]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)IDAT(u?HBQA!9ɥAEZE "j%"$u; ~˽, foRXy(hf f `תךGTj6~ڌ=RIkzftE]J>u'S}z/i=j*RRJTGjR7HP6vM{czH0Co H91I=Խt H~KQGu4Ȝ?̸>Ԗ %tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_4.png000066400000000000000000000011101506673203700226430ustar00rootroot00000000000000PNG  IHDR5m48gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )=IDAT(]ұ+aN$%Ăd\20P\e2+&܀le8lDQ)EW]~z|?gx~|kw®s>6h ͣd]:atP`P= 6cQŝK%V;cqS  U M[҂`Zr-|<7-)H(2ϘA/à0e: p& &>XS9V]\LAs pܦc>o}x|Oe %tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_5.png000066400000000000000000000012011506673203700226450ustar00rootroot00000000000000PNG  IHDR@!IgAMA a cHRMz&u0`:pQ<bKGD̿tIME !KvIDAT8mO(DQ;X`7e%eXH)[ fAL1bj,LRR`afBӈ Qʈ7?y=͸[=羧tRJTf6 Sve1V!_Ѵ4ʦ轓iuBtC͑NL+5 5ئeQ6F3X yLkw4Y' .z_d8q ݑ,B%J풹ݦ"kXE6 {'}aod<,G %ۂRmGS+6L:ن-;dx 1l X7۬X+YEw >1CEY =d1Ȟ1A2KΓ?"/%Q$;{|g?l-%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_6.png000066400000000000000000000012511506673203700226530ustar00rootroot00000000000000PNG  IHDRJq@hgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h IDAT8mM(Da4;MMYؘ(vX)?HPR2J3Ib!ʆB6&RL)jĂp3^s{:au lXX3,hO戶qb?.25ΰ] F k#M)gm`)C=k` MGub @#^h-tVV8t5  3gCk? b~@YIq u^@gYߡpu|w'h((-TtWVEE]Y0"XvVVH%[:D{Y fR4S ZjUܩFhz)VZ'/#V:wEo6L%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/40_7.png000066400000000000000000000013661506673203700226630ustar00rootroot00000000000000PNG  IHDRTRD gAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>IDATHuKHQQ$JZ.j!In ](JЃ ѢDAڈ -*CBD$KK|07濘j9w8s=(~kW#"2Y @ 2vY)mXA꯰5(A~X҈I6E&Q$O2؟ y=,Io*Vy!\5va{ $؁~k, eRh &m-s iLJz!o$#qHdDiA$|nAKbQ8璟 {J2 y/y OK!.I5̤`r'fI7ܠT<5ق3#dXE=;M.Q g܁Z,LK7պ;1l_5!c*wڤnA\$x}HF!S&%h,l5³.v,oQXyxڔW%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel(cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/41_0.png000066400000000000000000000005021506673203700226440ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M7IDATcOb߁ b"W$ 2%4|$/zty $-+%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel)ߴS'IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/41_1.png000066400000000000000000000006011506673203700226450ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8vIDATc?01,02C@ =^#$_3ap a8a20A Ɵ`w& 9!5[0&'= +A]sUe+%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel)ߴS'IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/41_2.png000066400000000000000000000006711506673203700226550ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IDATM1 qle%$^IyFe3@VxI=xO=?Oeccme3E"l:3l p۰.54* 2~|>x{ #y ن@ΒV r5}ҞIJ=}g//E+ O3C%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel)ߴS'IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/41_3.png000066400000000000000000000010121506673203700226440ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !RIDAT(]!KQWdͲ"M(h~1Ų4X2 CPA 3 IX4d1wN{r8G!K|~ )ۅO8?, p^Ôxkd/ý4Ipn=\pCn.|eWᆽ5Ⱦ7gx~L9q x+i<KG;xE~V;-q?7~f} ְ6%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel)ߴS'IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/41_4.png000066400000000000000000000011011506673203700226440ustar00rootroot00000000000000PNG  IHDR5;_gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )6IDAT(eӱ+agRNYȕLJII )bl 7X$reHH aE1\6s~~->|yo?TvqJU)_ibNY+ʭd&:eUCʹdhėlK*%n1^(+#ʎdr*Y'i]ںj>٧if€cve`WuQ$g?GuD<_f ;nnc]k^)co@Gvwg%} ߠUv|};Bg4 ;An;{p/tr^*ŝdô3)\w2nogHc?Ŵv{x"^؏ ("D+~nIDATH}OHTQ Ӱ&pc &E89Df1b!RQčF+7b M)HEt ATZ%43:?ݼx7pι3sg?V2@u'JlzfvPhAoxdvjl^I߀=Q`( XA7&dc`{YfZ8X~ ʂ`4XD`#`s`f0WfvQd`òIOo`?da?j0WlVEvgH0;t(%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/42_1.png000066400000000000000000000005401506673203700226500ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8UIDATc @e? r}JSN*Xͷ `5E5y VU03?Gl`bRؿ%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/42_2.png000066400000000000000000000006201506673203700226500ustar00rootroot00000000000000PNG  IHDR G}ugAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IDATc Sj0t?a`~7 ?"S& @fw[ ?ԼsA K `S3&c1x/Q>N>(%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/42_3.png000066400000000000000000000007101506673203700226510ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !RIDAT(c p20,C I_20P.Tl.[ϕ >f02섈300h00tJu P̎;^`HE~3D+k X++C$P"V`ys*Ӻ s be;``8W#o@ 辭@/Dw?'&FFɜD%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/42_4.png000066400000000000000000000010101506673203700226440ustar00rootroot00000000000000PNG  IHDR5~.tgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4UIDAT8c0-E[[6 7$~=@ w/ƟJ|]=\A+@W30J 1f^b`z d2(X$ 300[!V8DX'?NK%ϣzJ#$;;1L@!5,@59THRs ̹Y 3owQo 3:3DHJ<^D-l 8G8B[%%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/42_5.png000066400000000000000000000010641506673203700226560ustar00rootroot00000000000000PNG  IHDR@#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !K)IDAT8c0PCl K%{ (Ɏdq"Ȳ@Y]p$% R\ GᲟ$A ~cLv'?S̑7*iMHr/&s KhHz| /m0"?PeLGv܂߳e oKpᲛ :Rn {`}@t=L q?U IDATHcO`pe@02 %L,2E4\ʾr@ʊ?Дe݀p@!5117qAz@' |/IjrB@b#+Y'K_1_ARrAԚFvH!,0aA8Q}zɖޅh a9Ȫ⊬VpE'pl28)D#ejy=*Mٗ6A,@n_de` Z ڎ,*(^8sks? *<G$9 +(^,)k3/2TȊYP8@eL-;Ede )VDHE27BQeF*U6lT`Sn[%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel*FIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_0.png000066400000000000000000000004551506673203700226550ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M"IDATc @| ΒGgu!ʾXh=%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_1.png000066400000000000000000000004701506673203700226530ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  +8-IDATc Y֦60/ADF>O4eK!(;%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_2.png000066400000000000000000000005001506673203700226460ustar00rootroot00000000000000PNG  IHDR 3WgAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S75IDAT(c0C0 4^Z Pa7:h#CF覻{בP8Y Q5TVZjT)EuU*%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_5.png000066400000000000000000000005231506673203700226560ustar00rootroot00000000000000PNG  IHDR)@gAMA a cHRMz&u0`:pQ<bKGD̿tIME !KHIDATHcO,`U9XT^< Qi*GUAr&3*K1U948QW !G5%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_6.png000066400000000000000000000005331506673203700226600ustar00rootroot00000000000000PNG  IHDR/JuRgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h PIDATHcO`U? ph 0{TQ7F\`RB`QG FՏUO<e%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/43_7.png000066400000000000000000000005421506673203700226610ustar00rootroot00000000000000PNG  IHDR7TngAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>WIDATXcO`7osgͣF7H[HB8`JbeTߨQ}#\a0oTߨQ} Rk%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel+12 IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_0.png000066400000000000000000000004501506673203700226510ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8MIDATc _@d}{ $ߴe4*W%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_1.png000066400000000000000000000004751506673203700226610ustar00rootroot00000000000000PNG  IHDR)1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  +82IDATc ^ A%? aX\I'>0 ywlSt>3%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_2.png000066400000000000000000000005241506673203700226550ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IIDATc # }9]Wc`8 ee`-a`8eepya/`eBdF٩%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_3.png000066400000000000000000000005601506673203700226560ustar00rootroot00000000000000PNG  IHDR +u]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !ReIDAT(c FyCݑ)0008y@^-w y Uyep} /a@< y lO` ip00܃\ht-`h%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_4.png000066400000000000000000000006201506673203700226540ustar00rootroot00000000000000PNG  IHDR5m48gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4UIDAT(cFF }&.o'B lDT U3N EkJJ@ f!9,ȗ% @w$X@ouF>&.@Z ʓ#%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_5.png000066400000000000000000000006401506673203700226570ustar00rootroot00000000000000PNG  IHDR@2xwgAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/IDAT8cFFFЎ}хd+dHB `!{HB`8$"L7BaH0^D: V)$? wd+:,GOnd  mF1@ ‡L BQ?:#K&%tEXtdate:create2016-11-05T07:33:22-07:00Qu%tEXtdate:modify2016-11-05T07:33:22-07:00 QtEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_6.png000066400000000000000000000007031506673203700226600ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h IDAT8c0 N]Ѕ[@79XA'e($TC Q ]V F·7$X=^*4_E_  &(34(pZ~>D^V Qh@Q6$x];F lQ|0:Z%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/44_7.png000066400000000000000000000007401506673203700226620ustar00rootroot00000000000000PNG  IHDRTRD gAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>IDATHc 0ʌʌʌʌPG'.Gab``EƐ `wt=02jP {t{´F)JX,Tf3a!O2ke~B$jYn ?2 2s0ba-DBLDf*F}KHYҋ۞` 2Y21d%>bʴsdjc8ӂ-6 ?^28NJ%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel,ާIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_0.png000066400000000000000000000004411506673203700226520ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8MIDATc |@(yf?a%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_1.png000066400000000000000000000004521506673203700226550ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JIDATc Tgan3nXc7%tEXtdate:create2016-11-05T07:32:43-07:00%tEXtdate:modify2016-11-05T07:32:43-07:00A\tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_2.png000066400000000000000000000004541506673203700226600ustar00rootroot00000000000000PNG  IHDR pvgAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7!IDATc&'8 03%Lf86%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_3.png000066400000000000000000000004561506673203700226630ustar00rootroot00000000000000PNG  IHDR+mBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !R#IDAT(cFx ;-6S9%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_4.png000066400000000000000000000004611506673203700226600ustar00rootroot00000000000000PNG  IHDR5i gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4U&IDAT(cF)?H 3 pʄGH2sQ.%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_5.png000066400000000000000000000004631506673203700226630ustar00rootroot00000000000000PNG  IHDR@?0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/(IDAT8c0 Tp0*p Ng@ƴ2*84Q&%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_6.png000066400000000000000000000004671506673203700226700ustar00rootroot00000000000000PNG  IHDRJZgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! 'h ,IDAT8c0JJJEBZA$Ё 8*1*AO tg4%tEXtdate:create2016-11-05T07:33:32-07:00lus%tEXtdate:modify2016-11-05T07:33:32-07:001tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/45_7.png000066400000000000000000000004741506673203700226670ustar00rootroot00000000000000PNG  IHDRTmIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !*$>1IDATHc0JJJJd86"Ỳ .G%G%G%i, r}֏ %tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel-ٗ>IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_0.png000066400000000000000000000004411506673203700226530ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8MIDATc 8ozk * %tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_1.png000066400000000000000000000004501506673203700226540ustar00rootroot00000000000000PNG  IHDRqgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JIDATc tQ $w$~%9ٞ%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_2.png000066400000000000000000000004641506673203700226620ustar00rootroot00000000000000PNG  IHDR  f gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7)IDATc C19q!8db /Q.8#3B%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_3.png000066400000000000000000000005021506673203700226540ustar00rootroot00000000000000PNG  IHDR +dFgAMA a cHRMz&u0`:pQ<bKGD̿tIME !R7IDATc L,̯Q\ fz1`ff~3N5v%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_4.png000066400000000000000000000005201506673203700226550ustar00rootroot00000000000000PNG  IHDR 5PkU^gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4UEIDAT(cF90YeP*0s3A5i(syoB8%kXCKt-%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_5.png000066400000000000000000000005341506673203700226630ustar00rootroot00000000000000PNG  IHDR@qgAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/QIDAT(cF\*qoͨ] `e`` cGn%0̈́r >(wNs5E,L,׫%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_6.png000066400000000000000000000005521506673203700226640ustar00rootroot00000000000000PNG  IHDRJ,gAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PX_IDAT8cFFFf[L dW` * ` 3Lc ,j^A +x" rc Y;D%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/46_7.png000066400000000000000000000005711506673203700226660ustar00rootroot00000000000000PNG  IHDRTEfSgAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#nIDAT8c FFFFPE$b:q~0y~p] p&֏FG݀} "r"d͏2eE -:!&%tEXtdate:create2016-11-05T07:33:42-07:00|j%tEXtdate:modify2016-11-05T07:33:42-07:00tEXtlabel.AƄIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_0.png000066400000000000000000000005061506673203700226560ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M;IDATc`6Lm IA_O`!PhE>*N0%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_1.png000066400000000000000000000005641506673203700226630ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JiIDATc t%$?5?v$S8 xl(`8}A}J$oo7C3;_!xPp*hY K1}K*@F8s%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_2.png000066400000000000000000000007001506673203700226540ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  5S7IDAT(c= ЄD gbDXUC0 19 l4oBYJ\_tU(?00E2fF c ф" =ce(D z;B$|z)N4! B&P'TW yhxMhfhB15:"̅%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_3.png000066400000000000000000000010061506673203700226550ustar00rootroot00000000000000PNG  IHDR+Y;'gAMA a cHRMz&u0`:pQ<bKGD̿tIME !RIDAT(ѯKQgA MP0AX  ,,,`0    .<*O.\c5e̳Gy F^q]y`չRk"x+0)~M IXb/S 2L`߼M0l855|o:į}=~JS+s BC\ 0OC),XxH6(:%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_4.png000066400000000000000000000010611506673203700226570ustar00rootroot00000000000000PNG  IHDR5wԶgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4U&IDAT8?(qqπ.NdC6eQR&JF%#J6l'YtLYPG??+g} `q10.x$p skSXApNl{^ ,†?߻{wa|DU,0/_K²z maǀx e <y~qX7GˣIqxW=/8\1%Xӽq5z?y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_5.png000066400000000000000000000011651506673203700226650ustar00rootroot00000000000000PNG  IHDR!@'ALgAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/jIDATH=HB%XFA B8D 6b5D-5b 5`S n. ҒM.CAMF5B}w;;\CZ86׈"w8@<(XDA$ɶ=oD,BDŞ!JDտ!Dd| "&!D"V!VDN+"␈<'!mE 퉈Ă qFD`Tta RDxL"  e"6Q]Լ"M"& K*kW S0D#:q-녈u%E₈Q " ]=QT̊.L )lNQV1*S,%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_6.png000066400000000000000000000012401506673203700226600ustar00rootroot00000000000000PNG  IHDR&J`ySgAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PXIDATHO(a1BmX.2)r`rB\)r\&'eRJ +0Wz>;]Su_b/6aP ryrb"M>$;&,UǾ FX(Al\,fUrCv3 OXZ- ={W|IѳW;a+P!l20mRaDދ„Cza'Pa=`cgzļg`!¶\BXaJ 6(z\_XVM^ʩ`m`gי) +zVvJ*TV A!TAX'ؔ]6~O$b]g%`GIZ4b(%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/47_7.png000066400000000000000000000013241506673203700226640ustar00rootroot00000000000000PNG  IHDR,TܞqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#IDATH=,QᯩhhHi" E&IME$tvN ݤgQDj8A_w ~p/e~YW9n|ĭ1JnU/p mK BK8 !045[W)0 #8G6!BpŭU1}: A0G o#̭Fp+  0 ! `-fȀ p!=>3r)gHR"C:)~9)3 S2Cjΐ;R]3h2 4_^fHƕNmdϐe(PE!C"~m5dHě4<E' x2Esj[e;u2Td;ːu??dhj%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel/6IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/48_0.png000066400000000000000000000005211506673203700226540ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  EFIDATc WJp^q0 Ě`ͮFX%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/48_3.png000066400000000000000000000012001506673203700226520ustar00rootroot00000000000000PNG  IHDR+CgAMA a cHRMz&u0`:pQ<bKGD̿tIME  79ܦuIDAT8c0YigYz ]G.8_FbÀ<2[ 9A,0? t0& 1I9xpwYx!RoXlCz!RA2# "Ub/G,+ rI]AH/R gT:,%d"jHj)XAjIKYRaBUZ X!2ARR ST , bFHR _ {ARp+ ;D7pw>_ 6/P=JI|H8dc:>rMf2å's^81!jtrH? 0ہX&zo#( "$)" xK(/SެQѸӓjo'dǻuc\QNs7?4]e=p~=Tςb3[;).Fp~%tEXtdate:create2016-11-05T07:33:05-07:00DL%tEXtdate:modify2016-11-05T07:33:05-07:00tEXtlabel0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/48_5.png000066400000000000000000000015061506673203700226650ustar00rootroot00000000000000PNG  IHDR&@SZ5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! Uy;IDATHKHTaߜ\d"хnEja تVV]LT 7. HRжA#Ը(ʠ6m"\50APq/}=sp^3g1_eS/ɏcdS+͟pwo;n,eU~VeBjb&j\6ylK.9VǸi`b/HX ev YU{EF<@;n1Fp]K[Epeϑ&KqP\A vȮݬ7Qd'Vj1D[}J6kyl=[r =Ҭ;Oc>}[C7.m%٠hM6 EY;(' Yn"D{XFʑWqEfsl#s2:TN[tl"8:I9HzlIZ {LIjr3oN&l11|dzY?u߶la[40l*J\ZE1I \gR a"={,謉t8fbSOF[t-@fb%tEXtdate:create2016-11-05T07:33:15-07:00L%tEXtdate:modify2016-11-05T07:33:15-07:00k<tEXtlabel0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/48_6.png000066400000000000000000000016521506673203700226700ustar00rootroot00000000000000PNG  IHDR,JBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !x(IDATH]HQeÜEjd&MyԐ( [Q]"%IDdEu(H!]h-#f.Qt*t>緳<9G8vi:/-mG70_sV7ܛ̪nq 3{LxtsF׋ނ3w[KEB1x8Qw4Rw"-Sp-ʮI+z^q'$| +Nw+sӍUN.4S2QnSOoȰ _x NNa.M1J:x \Qlœ#"/`>Dmb 'p[Mmk7#LQ>TlW`bm00׊r7QπT15/ߘgtC=92?ûtUyVWMq>#H$3)E  eeh&:5 {w"-g=xFп0fuMcߚ!o|1^]f-Lƴ {'Zpn7gMEUٙra[{y7bZ gS-d~Gt}o-[i@Gu*i c@5aߐuBj%>Šbܵ /J)ϓQnh:YwL/fhz0n+[Ss1jvL!Vz &N6 ALj2_06P* s5%`xYs9vP9kbz)؂LzX^!tيf ɃYzB& fl%2`pD103™Te vZfs ([*[1 Qִ1@nY赱: zQ{U_*B 3^!keNr\‘,lRFm@z`{1o0ώ_wCz5\CߣW\tҭ6vrd(UZL)r2&$]&҂sCY@ Y^&yL̾26*g~L3I!{[[ZvKeaj&X`O~FrȖdc(p:%Gt~FIuDNGDF199nj躴`C4fOZZ%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/49_0.png000066400000000000000000000004511506673203700226570ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  EIDATc .gvDG"-Fa%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel1qIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/49_1.png000066400000000000000000000004621506673203700226620ustar00rootroot00000000000000PNG  IHDR 7zgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9x'IDATc mX`[wCY }5?*%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel1qIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/49_2.png000066400000000000000000000004711506673203700226630ustar00rootroot00000000000000PNG  IHDR pvgAMA a cHRMz&u0`:pQ<bKGD̿tIME  .Rtf.IDATc8 P}  }C H3 4 =xjjw%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_1.png000066400000000000000000000006361506673203700226550ustar00rootroot00000000000000PNG  IHDRզgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9xIDATcf =L+ܿ lc a`720q-^ >pbe`f1pDp0bMpO72_Axa. 7q1@v$W!R Ȃf%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_2.png000066400000000000000000000007351506673203700226560ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c pSdwZ.Ѕ ?BI3_HKG9`@7s3[d`4y1+#tI102M 7bӁP{7-Λ(ەqjX kB >Pjmfb`"vhp~&%쿙 5ؿ`$ `#%~@ ͕%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_3.png000066400000000000000000000010461506673203700226530ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7IDAT8c 0,NYww3`@n7G'$.݈pހTBL{hDM T _Adւ80idx!23܁+Lf9H[d`ɜ"iI/@޹E&%KI(}%`:AZ1c;H/L>HBf v%ǐ'Ҳ?'D 2 2 )F@j2@>D8v 8d2})q%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_4.png000066400000000000000000000011471506673203700226560ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !lg\IDAT8+DawԨIM)_,(Vb v@AVJQjf|g)ӸG1eeٝs \gx7Tz2"\ޗ(Ogڞ/vʌ;,W#;w搜?07ʋgb ^~Waѣ|9,'0IWkq+8~78:<>ΓƒZ૤ip=U'{Ļ*nF9}K[&t<{ ,"bo\GuT3ԲMd$X1[ 4&U*v xSnrb؝TRSRAJ5^k (j /ӎ0ńfmXEřFMc9('a+*zP8Y^Pm*Bʇ JЊYTȮ%DaNѪ7QlDH F%]`I` ]:"*{cu_(UcCh 5ChEHJ2cTB#X "܇rWQ{ZuZ$<.^CPC„ZfɿWSV 7%tEXtdate:create2016-11-05T07:33:15-07:00L%tEXtdate:modify2016-11-05T07:33:15-07:00k<tEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_6.png000066400000000000000000000013501506673203700226540ustar00rootroot00000000000000PNG  IHDR*J\gAMA a cHRMz&u0`:pQ<bKGD̿tIME !x(IDATHK(Dao$ REjB)Yx/,,(+Ŏ6E %P&i )kwBN{{*{ۇuO5'1`4l`ؖ"j)#,RrݡEIdz5U_:SǴ-]~JE:ڨGFrT鯵ضjԎݐaǥd^jWEAJm\=&4JH BDxHzGE.SrLt W rY ġ /.teeA_qҎ:2+(.SG|GY(ALQbt,|1Cqb2iҲb*eP;e҉E&ʒ g@L]\&:ALrbQ}Ix23,ݲgI$}c&ie?~y\%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel2UњIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/50_7.png000066400000000000000000000015101506673203700226530ustar00rootroot00000000000000PNG  IHDR1T΀)gAMA a cHRMz&u0`:pQ<bKGD̿tIME !#9=IDATXMHTQ;ƌ)PM FfSDF- j*m" \آf!!Ѯm 6*Bi >srϽ73Y _}l#v'WOV|Wjz0je=HB:1 W Z߮|ᙃ3bJJG//Rd-^R"}n9D(ob)j5v!; 0}@Uk?[ڒF#JLy8b;"}_rQMrݵ젬5v/__Y|sYjTf-"Rr9'iTJqV}x=i)o}R4&C=Jߘη&H| U |~MG6!Z${Y'D; VS:v"Fԧ2!bA] |∼BDz뇄FxghBާ~^ HH633Ie#DI/;X&=6%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_1.png000066400000000000000000000006541506673203700226560ustar00rootroot00000000000000PNG  IHDRզgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NIDATc_"r}q b0"k~H/eU *j pb_pR&ɞnX1 u5{0_a Hbe8g`-7BF>ֵPV3+&\l2¿fR%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_2.png000066400000000000000000000007761506673203700226640ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c p]P`=b]@\> d'm9 C Xz#Pr=D lQ~t00FYyB q(GoVq” 5 ~g`Fׂ ݗ.6A,毭  f`010h] ]@V1X8ӴVH?qw׏ ,~ooMh10B1z{M%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_3.png000066400000000000000000000011401506673203700226470ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7UIDAT8c 07ty!Ձ XBȼ `H2ϴ2?g3':?kSa2A@؀o s-0[0ZE><&VLm7ϳ@ G '2`{L8dp׫@\}(5O@U;Ԁe~LX!#BL2#V'v *dZ[$X98)V|n 0FI )p;$)D4KA%үߋ+&7,3e=K++%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_4.png000066400000000000000000000012651506673203700226600ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !lgIDAT8c0-+LY4zGl[`+`dx U2*,VF 1d)k`O`1Bg9XsR#X~\^EȟDvPu  3ρ7?%S $m+~$/&ƐE^㓷{^,YҬ?Xy *`_g@ ,~< E8O8H- ;8w!İN!oo/s! _s@"lOK `0g2W`k ?<07C rFe>#r&:7-1.9m_ÂoT. %tEXtdate:create2016-11-05T07:33:05-07:00DL%tEXtdate:modify2016-11-05T07:33:05-07:00tEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_5.png000066400000000000000000000014231506673203700226550ustar00rootroot00000000000000PNG  IHDR%@6gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! UyIDATHcO `K3佺Ru=.`S0P@|ESuI BUU .ç̍PEUTTq½Uf}p*;"TpWuP-*o$oBRICſB"(m 1@l;UWG*}U O>P(y][~~x\3]któ?AUܩTª$g%B'@˵fepeyxU_ ɻ V_ ࿅_3:/VI@3XL՛`M X'LTYEYa!TA KQUA ?p P8xJVA#X+3Hi,FzDQw+A*@~3`oUM(č9AcE6:(O_)p3$̾QU s/%tEXtdate:create2016-11-05T07:33:15-07:00L%tEXtdate:modify2016-11-05T07:33:15-07:00k<tEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_6.png000066400000000000000000000015501506673203700226570ustar00rootroot00000000000000PNG  IHDR*J\gAMA a cHRMz&u0`:pQ<bKGD̿tIME !x(]IDATHOHQjn9AY)AJ{7 E.&v t%"]-L#+#{fI/~o}͟۝!:d PB<۪[|wo HL/YlGTDn"L&img%U<Fel2;zEY1z2P{X,z\fe VҋU?q7)1\+5<ߠٲ\6H,'CrڌtVJ3;4YRڇFVMz2߅]f=-}Ov%H+t[noK{zf|Odߜi.Uu)Q\AsA;pٔ"' ]f"  NUXvS°@/b'J%0F~ag't8ߙ9l.RVgW6EM: jП?[׸F9y5:)lgfg̋Ow?37,(,>o ;wG*H0kA7:ޘ1%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/51_7.png000066400000000000000000000016761506673203700226710ustar00rootroot00000000000000PNG  IHDR0T gAMA a cHRMz&u0`:pQ<bKGD̿tIME !#9IDATXOHTAI]ϮJ%BEԡBJE,ز[YI: %-E?^D)Xb4̾fޛEfпnUcm.GT.Yq*eͭxR\dTf3LP8 ,Jմ7]RXJS WJ@"xs:ctp'TEPȀp 1`>ZiZiCvA!+`3G3#"*w$x ?$d |wȶR` +r-P.ok=䧬$-AA;-PvK 2S9 {%{_hx~5҅  jU)&rf)Z% K^) ~It$AL1# ZTϠF ʏˆG*<@!)A o䢷 VI7 sh?Z"xpdkcuC/;ϞGI[;m(6lZn%{ 9#'D.cs70b{u|.E?d]Ivs^g5vW6l&۝ EMq_Olpg9 ԜV CA-_%t%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel3"֪]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_0.png000066400000000000000000000005011506673203700226450ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  E6IDATc m?q%aDNV3{W1)(>RBIGg%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_1.png000066400000000000000000000005601506673203700226530ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NeIDATc `j6F 1d?O~? &6O$~:Cw=w$aN b j@v8؎^wTcJ|e rݳ%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_2.png000066400000000000000000000006341506673203700226560ustar00rootroot00000000000000PNG  IHDR lTgAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c 0!dEt  XD?1`e" n,? 0DE20A} ~6o IEG ]ӁhP@?Xj0a`>B 0Q× %tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_3.png000066400000000000000000000007071506673203700226600ustar00rootroot00000000000000PNG  IHDR+N]gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7IDAT8c0$ǚ;NzܒGqK~P`-ɀ[r!n;%1b-y ;x`p쒳`|?6_F@%KbfKfd`kIМ%"KN H.y( b@/>ǀ y %1DMCXChJQIBt㓉%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_4.png000066400000000000000000000007661506673203700226660ustar00rootroot00000000000000PNG  IHDR"5f=gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8cO0,%sWrF 3RW x3R2K|f $ IV|J$(K)dߌSI,H%KAVr$?n%-A‘( PJɂQ 3G,:!%? ))` i4d–vVF ~%:8_%tEXtdate:create2016-11-05T07:33:06-07:00V%tEXtdate:modify2016-11-05T07:33:06-07:00?tEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_5.png000066400000000000000000000010431506673203700226540ustar00rootroot00000000000000PNG  IHDR'@91 gAMA a cHRMz&u0`:pQ<bKGD̿tIME !(XIDATHO+DqsJFJ)=ͼRvނyԘRLyhl~.i]>w| YHn+wH>ɭArXrO<$ U:%W!f(jCsq-޻tTu]nuJ`L]{q|<6c[ }~|wͲh7Ő&Ucoᵽx[x~r R짹_}_=}>"%tEXtdate:create2016-11-05T07:33:16-07:00+V%tEXtdate:modify2016-11-05T07:33:16-07:00Z[tEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_6.png000066400000000000000000000011151506673203700226550ustar00rootroot00000000000000PNG  IHDR.J9gAMA a cHRMz&u0`:pQ<bKGD̿tIME !x(BIDATHODq*iVԡi<:F׈حD%sDX}$CK!yy>Sܞ:<(5Cv_qp~, `wJ- 1a060|M~& 1 z~Yhƶ bjk 'AC9~^ud\mfHZWvB>nbS!d YxzN~gP^IZ^t!auM"u"6]Euҁ"4Xۼwa{䧀n!W仐n._;＀<]F]f(}>%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/52_7.png000066400000000000000000000011711506673203700226600ustar00rootroot00000000000000PNG  IHDR5T% SgAMA a cHRMz&u0`:pQ<bKGD̿tIME !#9nIDATX?(q.Rg$D)()f-łB 2$R7Jѥ$šSσP|9_~m\_UUrV/N֎W.X)%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel4?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_0.png000066400000000000000000000005121506673203700226500ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  E?IDATc 01"&>_Rւ@d܋H/ğ*хB}Gŧ=N%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_1.png000066400000000000000000000006371506673203700226610ustar00rootroot00000000000000PNG  IHDRզgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NIDATc_Pn'7B: cTBֻ! [`$xʵb6db`u k0 !q 0B~gaXq30\qk4:lF.n&{ht~yͿ8C!%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_2.png000066400000000000000000000007571506673203700226650ustar00rootroot00000000000000PNG  IHDR 7 jgAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c0dpA,L 31`3pB|xu[Hx qFR(LP{ՓEe>CxhWmCz z]pPt@` 3An30As债wӺ *J %!bYM-&5t[>}F%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_3.png000066400000000000000000000011011506673203700226460ustar00rootroot00000000000000PNG  IHDR+CgAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-76IDAT8c0b@R8>1 sA6T#Pjvgb 񗀚°KJM. :]J }ߢJjу(z`G*HpARV kV/ɮuf[6\/W2ɝ;@RXr -v)m.v)\5U3aR$!@RR9d2e>An ׸|.0T@C ^&' ٪_`U' !RY&?N4EM@l)>'`JP9 '%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_4.png000066400000000000000000000012221506673203700226530ustar00rootroot00000000000000PNG  IHDR 5 gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8cO0PMA<:@GQpYR @Z\xs1HA (Pʋí%Ȁh" ' K$e?FaU?שa(BʉYrXYr\ LʾM'1n`QeF$L戕Y%tEXtdate:create2016-11-05T07:33:16-07:00+V%tEXtdate:modify2016-11-05T07:33:16-07:00Z[tEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_6.png000066400000000000000000000014641506673203700226650ustar00rootroot00000000000000PNG  IHDR,JBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !x()IDATHcO`tg,2X/ Eq)UIPūHQGT8r F{&bwbp =D).(@bKZ( V GD)vUO_`9-+uab`qYWY2" 3XwaOBqϱ(ŕ$Oa('3}rDW| !+΋2)qͯf2#T@U=dt&FTR@Ђ!ob `Q!}Z B%Hst=/~"_s•~y@Au2`KAHw  v(EH/ge xGQ0NG)Zߗ1RUhAOHǓDPTz0b`P_h Qţb16%tEXtdate:create2016-11-05T07:33:25-07:00aKc%tEXtdate:modify2016-11-05T07:33:25-07:00<tEXtlabel5˵hIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/53_7.png000066400000000000000000000015731506673203700226670ustar00rootroot00000000000000PNG  IHDR2T;*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !#9pIDATXOHQ7ke+2D2ISËmrY)D:AeI`.f7 :BQ*&RQv)J k}3Q<|73;9r%yV1YMvLƐc1 t" 2H/'ݔI$cq#t_7%3pr NvR_QNQ͕y>p״lH!:J4"_vAMtJz۶W 5ƒ_|ՕhYntŐAr9]vɖ7fDfBr&RI6~g+7d!MUәӫCr̔`&( JجB |g1`vh`r `r58 & rL| &鄒 PŸLdbł W&o=ɺ̋%2\ϕOYImS>0D&>Ĉ' Mx n_wq1#_D+ z!8whz 1!Y>X/#qP2sSm@0V<տ`گo2$xVd+eapg*bug(.dePb0]300J330ԁo^x] Hdqk<rAFC0ׄ]aU zvq20܀Z(|$Gr3v&:%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel6R^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/54_2.png000066400000000000000000000010371506673203700226560ustar00rootroot00000000000000PNG  IHDR 7 jgAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c 0Vף肯% 30۔ Ae`SV  P *W& P@`ANPc@`6Ѐd` tw$P5{qA TWܛo X#r >%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel6R^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/54_3.png000066400000000000000000000011711506673203700226560ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7nIDAT8cF0 2x*IS,2A@ XL `₻<@ 2,R(r?y6LLZ@ XdJocgb``;S&P(#UKd^$5!DdA$!fKOiHU'Df"3Wq2/\t5$}&B&Ŀ yb-F4@2?A djA2/Ya W/ S L,^@d "1 72ȳ]eXOc!l\`q"~u!o9J:+|޾ڽ,!iO?%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel6R^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/54_4.png000066400000000000000000000013231506673203700226560ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c E3h,G~  feD_8AҒ/pHy?.UMo# i8*e^!f9F|o$@qGf;;I|暄03Q/13/N B~.o 8 L,A_!ă`?< JNp*/ @/R*dxx#&3D~7Y8.e!gQ]$a* !N yYUτRUV~Q˦D"Әn@pdkU4&%ї6(Y죠&#sY>P<&ԂhCwj gDT&ZqLgAMej$&b;4jE'/6R]k}b"[KX[!Qj#xceu`N"JBAe ׺@C^K55{mЅM:)Z8_8x1%ް6F y()vtZ.f)nߑ4=㢓"HZ.Nc5e%\l+[صH,Lz^agLV A1N &"Jz6zܜ wc⒋TBkEzt}|KcH&|uFIɮD;m*>uL,zCrхLӊb^-q' nDZL PpeQrHR xeΟ? /{XB yQ@)~2p-[?uyn~Yao/]Z-ֿ-*]U_~Y0H2>%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel6R^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/55_0.png000066400000000000000000000005071506673203700226560ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  EIgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NvIDATc &3@7G 8XyP5 ) ,w< pC8(eϓ!?w,λpbpmfCfI;[2yZ?jK\%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel7%nDIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/55_2.png000066400000000000000000000007011506673203700226540ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c 3:L1w BiUU ;C1 Q^q10䢹1;^t430>@$6ۨb_Eb¥:)0x U<CZ8YTj h ?Wx;p=._4G$m%0ae%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel7%nDIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/55_3.png000066400000000000000000000007671506673203700226710ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7IDAT8ҽ QW>#QeR"BDJ) R&d5F EPpEy_Y=sϽ%C%iW;/= T>0d(Ay2J-`?KvgLREb@R)70v@"yȒ# % nf1 H]*s 'C42ru1uf6BtB`Gk1h]CF!%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel7%nDIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/55_4.png000066400000000000000000000010441506673203700226570ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8?(q'!꒺&(u JruÕ2d0HE& ,LnQ ,$<g}ȂƯݴmrx8E8mk 9X>ǠRL/VP}j`B(P>;j`L(P>j`D(P>!±8  4E0@P }V^@[ =Co( [)p.@rI8V0s <'4 FP-&Dऀ7A(0-0 W V6ї 6* vj28RȼP}ـa@ t.2e=קq@u?w#"E||W?3ؗ(%tEXtdate:create2016-11-05T07:33:35-07:00XK%tEXtdate:modify2016-11-05T07:33:35-07:00)AtEXtlabel7%nDIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_0.png000066400000000000000000000005211506673203700226530ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  EFIDATc g+^1$Oo1I=2~λUd\P |O A 9 DW%tEXtdate:create2016-11-05T07:32:27-07:00<1t%tEXtdate:modify2016-11-05T07:32:27-07:00atEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_1.png000066400000000000000000000007011506673203700226540ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NIDATc Y*s»% h8̋`!N  "W 2!ϐWʠ_ ޯTF о6[!;gwsgpBx-%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_2.png000066400000000000000000000010511506673203700226540ustar00rootroot00000000000000PNG  IHDR 7 jgAMA a cHRMz&u0`:pQ<bKGD̿tIME  .RtfIDAT(c0,I 4r5cAg20,1o10L 20Bte`p 00\``F>c`P F30@̯d` ga`000wn}%Gׄ!bܧ>#S7&8 K:! cD$T / _&J`A4 ,P3410XB^00Tb~u2C d/^^dx@ɓl|oDHqo(u}ˍ$NG%tEXtdate:create2016-11-05T07:32:46-07:00$3G%tEXtdate:modify2016-11-05T07:32:46-07:00ytEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_3.png000066400000000000000000000012371506673203700226630ustar00rootroot00000000000000PNG  IHDR+CgAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7IDAT8c04B[5~?T`@h$I$Sٜd1Kywl]T'䝅I93vy`R@eR &2b?\j Wrfä>*SO}\遙+}`H .!<  RaR|_>j$?s O+_;@0HX R 1ym@~DJ\rE@~ D*LFH5 "J3W`2? @R!RXlc XhԂxYY 0?a(A<xdvdU2P Hu^[Կ % l%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_4.png000066400000000000000000000014061506673203700226620ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c0/޵=.c ”_&̀P1]Ё$~K] _vn n(x1 uȁ yN#DM 0`/IPL~[ 1}j9շ<9ީ YE2z Pߡ3$Yc*v#0엽 Ie>pC  ?dIΰ*pD8s~9TI@`C9@%(K(/bk(b=``oσدbOASv47 0O `4X:B%Up@`OD&mi)=#ߙ!+^-}lB3EH~%<iӦ#'vN^#Jo ~m#XH.sׁ= j=Ĩ=(![PUB ]`*z tCr FU,Pe؄ $v]\PTKNCB eHaqC_퐰7FK\e[@~%Mlaǖ&WҘ7cYhaP#T~6\GS96ZMi}qna0 ϲ I,Ro@D"TAf"/D  (E"z* r6ATTp8P>M[UU@!] ^`; @TsZQw]U{Ax߀ ' UU7D?? =MP-uc>!N]N&DxOxjn!pSTxKs()56Ez7PR4+F4Ubycymb;WAY8jثjcu_%tEXtdate:create2016-11-05T07:33:16-07:00+V%tEXtdate:modify2016-11-05T07:33:16-07:00Z[tEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_6.png000066400000000000000000000017441506673203700226710ustar00rootroot00000000000000PNG  IHDR,JBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATH[HQi3PR=d[×0FAôT z,YT&QfRH.hQ`=ڄ,mEl; ~眝_ꥇ>wvnQZPBqoBUpk of!r cbWwY6%)mͽ}-4p ml|J:daWDgCZ#`/E.6& /OCVh d Fo졂85*^(.&@Nh4$}dgC9y.9S~84h`!).w=e˃!Kq{on-Ҩ粈Z[  )- >lQ_k`=~s UU?k9UᒱacqqQ֑<2 n%fu9B:/ůC9:5H{lr)1.'ΰ$'Hk#Ï|xbE4h0'ky*=Ws";2Ⰸt/7BZ:-L>WhQU/2"= sҐ5'G&$o)]r%tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/56_7.png000066400000000000000000000021001506673203700226550ustar00rootroot00000000000000PNG  IHDR2T;*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 95IDATXKHTQ+MJ̨, JkJ# "4 $ LA AeA̚.DZffdzq<)$Wq`̟'1T$~H*֩AvsdUⰟ#ٸS q8ΑKY&-d/JL('#!E%Ix$ +pڡKyE;i}hhO)k|:힒\AVadUzjHtChMKwB,pkIo]}c. = b\]Laǽ?Pb"9E5^խ-Oi3݁'&%1Vqm&ȿL; x3ܴa#9idb2RGz5JRMb6a%sHnE`B ;3( whjBRB^xA?z);!EdO{7~QH)>*(Bz̐M&RNycmDwvpE{+Dׁ?"̭ƽJ6e( ! 31kd]>@ 0g)U 9a!74;5Hb6UO8mxb, Ψy6߰@Y2CfUV9Iv%tEXtdate:create2016-11-05T07:33:36-07:00i#Q`%tEXtdate:modify2016-11-05T07:33:36-07:00~tEXtlabel8sIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_0.png000066400000000000000000000005241506673203700226570ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %IIDATc ש}/ _s _ qH R_>X?";XBv%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_1.png000066400000000000000000000006631506673203700226640ustar00rootroot00000000000000PNG  IHDRզgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NIDATc*u=! *`7 rG@ A˞ es=Z@Y \O; {A_2~[ *8nˠ  ù|# _,,>z-ЄMh' F5 d%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_2.png000066400000000000000000000010241506673203700226550ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  /UD IDAT(c pIPʽ{Lbe\'& , Ȯ+ cXlU EA , bbԾ'@ 1ito&АkAX,Y^Ė ` `{ӯzN'H}w\l5<\>OOa`B , \Q^00t?b "Pc?3zx.GUl+qDTX U%tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_3.png000066400000000000000000000011651506673203700226640ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  8-7jIDAT8c 0V]],p$DY#]2} ( .י}7TTW̝ ds?$`y3^d~`_ od|`; i ;i! Ӂ`|21@m@&US "!?A* _absfphqk(sB QZ "~( LG_+TX =,J])s(SP7=.]K o&cIՁ_D%XOǔ ?P""$?e"i%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_4.png000066400000000000000000000013251506673203700226630ustar00rootroot00000000000000PNG  IHDR5ogAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8O(qwF?!JMiI\p@Djqa%QS+rA$r hmwa~}"{ݘ5mW|(K1rSwje9XߊB+.4kv{:Txe1HڢϨ~Nul5W 'y3u4Z=qt3nE7)ZEJ&tHTL}}_$Q_@_Qx ,ƒhOq3> Pf Avecr%:Kkf=/_{t({',~&Ϋϛ,ycs:lppoe Owr¹O<'} _ćO/5+!׀>^r+_ G{ O8 k/z%tEXtdate:create2016-11-05T07:33:06-07:00V%tEXtdate:modify2016-11-05T07:33:06-07:00?tEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_5.png000066400000000000000000000014671506673203700226730ustar00rootroot00000000000000PNG  IHDR%@6gAMA a cHRMz&u0`:pQ<bKGD̿tIME !(X,IDATHOHTA犭ƮQՈ@40Xo!KH^B$4:$(&b!dR(xE""HKC(Y Z}okk4̛18[NAۓUX77G#>қ6ԛRf_/CN8lڸ}LDVl#롕6piG>zFy!=C{V$ yo8{E9W1,%5Gg0w4Au%7PF1i ۄRⶫ@)*MJF~)*E3[tN&EzN|V0}B[Ѯ1zt 6)(d5(3mL (uyI2SzpVJJ{zJL\kՇi;Jmܿ>UGT%{* &78!GO_,7 ¥*.U67IF wG7*?]$$ We?~"tzdt)Daбw3r7)%N4蟫Z1JMӦ@jً48 %tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabel9CCIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/57_7.png000066400000000000000000000017641506673203700226750ustar00rootroot00000000000000PNG  IHDR0T gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATX_HSQBf +!(hEEdC""" )* $IPIK!XETbH`6W)gʶK~s4~߇se0b(]͇)6{FW錯e PCf Xʄ5p2#m̼x(3K򴷞]ˋj="GcHM^׺68%d+ꗎn㙔@5u7A_(ZPB]Z+b+9(؜0-&[Ђi]Q|ԦG`*xJWr0RABPld aOB7ꪲb͘h>)C"|ZLlc!P-kqmfʹ8NO y/=y@XhGE ϥ6ƕ;'r؂Dw?J1dGѧ9ŮJc Yr,p \#S fD`bn*ni/ݙfIrn> ojůj)oT( j)@GpHwSfKZG |H'%:5.8v @1λA SE < 6_:,.༵d@G  q~͘ l 0AIDATc Tg/3N@%㎳f&?K >f00p1,&_1v+P}@9{%tEXtdate:create2016-11-05T07:32:53-07:00~%tEXtdate:modify2016-11-05T07:32:53-07:00ctEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/58_3.png000066400000000000000000000005501506673203700226620ustar00rootroot00000000000000PNG  IHDR +dFgAMA a cHRMz&u0`:pQ<bKGD̿tIME !R]IDATc yۅSgiv d2,j# PJc]st8 a |r #A}VpҒ]k〆$}q%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/58_4.png000066400000000000000000000006031506673203700226620ustar00rootroot00000000000000PNG  IHDR 5PkU^gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4UxIDAT(cy@C6ȽsN1B08|sC XT 3޼9%`z yG0gs;v0%XDKB|=6;(0%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/58_5.png000066400000000000000000000006221506673203700226640ustar00rootroot00000000000000PNG  IHDR@r/gAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/IDAT(c Fߚ5 {-@*CuP~W@]0s4k] SrՔi<@_CG3 UP~(? ʯAD\0g)h<P>Z2%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/58_6.png000066400000000000000000000006521506673203700226700ustar00rootroot00000000000000PNG  IHDRJxOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PXIDAT8cFEL4Cvߞ 0U 0lL$&Îl!RIo?7;BlЉ^!/w"T1JL&"鄉DD+u` 8>xg_%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/58_7.png000066400000000000000000000007051506673203700226700ustar00rootroot00000000000000PNG  IHDRTEfSgAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#IDAT8c FņAfHb@pBl>iabbN0p!^yX\..VM/@"}QdG0$=A6g\X}ZEDz)F^#Z?o3S$7bdP~0ߨ14b%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel:[ IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_0.png000066400000000000000000000004621506673203700226620ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  "8M'IDATc (&y Bdw y^,Y 2r%tEXtdate:create2016-11-05T07:32:34-07:00~+w%tEXtdate:modify2016-11-05T07:32:34-07:00`#tEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_1.png000066400000000000000000000005171506673203700226640ustar00rootroot00000000000000PNG  IHDR 7zgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JDIDATc 9,;=(LYBX$CX _!C PK>cе*dG%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_2.png000066400000000000000000000005571506673203700226710ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0dIDATc tf0*cAI@(X]' >.f`(+ P3'K!fnGp.&:|%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_3.png000066400000000000000000000006301506673203700226620ustar00rootroot00000000000000PNG  IHDR+mBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !RIDAT(c,NL7:lqo oLvx/npI$*5p) [&d``JF!LNb`` ``Eػ"pG3Y^^Ifq$%tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_4.png000066400000000000000000000007041506673203700226650ustar00rootroot00000000000000PNG  IHDR5;_gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! b.4UIDAT(cFEl|,ۓ{320E?#DDR+kE3XN{"9[.+P`LE$bG2>#P@HHI:@H"AJZ]X$O(R@w"@JSkMK0/ =×E %tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_5.png000066400000000000000000000007531506673203700226720ustar00rootroot00000000000000PNG  IHDR@?cgAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/IDAT8c 0э>E3@ X":*Z":*"z* նTߓuXպlP }M>KUTQD/Ag `A?(Pm;T*ߢ AٟJ-$(U/H-VžD;=PP3lfP |D·[ =MglR JЩ%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_6.png000066400000000000000000000010221506673203700226610ustar00rootroot00000000000000PNG  IHDRJZgAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PXIDAT8c0JJ`xr;6 \ 1$@6NCv8JOT0q14ykŒFS`qHSA`IǔHĸs4_ ktidD$Dݎ/MK!HAx q?t 0|n W.,#;J1$1X\D5Xb*fD)ĥ~`Hk! Ϋ_1%bE+?c !9#d_%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/59_7.png000066400000000000000000000010601506673203700226640ustar00rootroot00000000000000PNG  IHDRTV0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#%IDATHc0ʍ }_[*UbS0TR n1Rt.tir9rg1ܒ rt77[,~;ϒaAGrfNi;)g rt}ØrP)G,~ЁÔ.Pr!RfX0Tf,r)C,["V?,r1b[Ѷ?9_,rorscq_d`/SAo%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel;, "oIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/60_0.png000066400000000000000000000005031506673203700226460ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  #8IDATc (@ƯK~g.~,uh'G5i7!ZsM>qә%tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel0IDAT(c 0 ftkQEK1A`k'6@v. (  Fr~a(:e *]v,(uoA&G ; 'BQHOak=Hg>z]3:˰k\'YWZ?qw%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabelL X%_w*2gtc)iQeD-0kC^z* 濨|R' )!RV`:X'KzrϰIKXb^(I*ݯD%iCR&/D^{,i.D+p$ "o43:p'qKk݇); ?Ipv_ %tEXtdate:create2016-11-05T07:33:03-07:00y$%tEXtdate:modify2016-11-05T07:33:03-07:00tEXtlabely%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel9 |*7Bk~LUXldLsl!TyLuQNj NeF3*9 `*٧OJU"}o<* aA({[%0Y4 2nS%0wCJj /pBN_`=ops)H)=nTd8U0ϕ| _4"l47N`lJ"DfED iHEؾD2,o:CקkVP9_F@*]o4s`\8OD':щoO^9%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel0+IDAT(c0 3a}t0pW>:8N5L,GHw . ,`믪'%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/61_3.png000066400000000000000000000004731506673203700226600ustar00rootroot00000000000000PNG  IHDR+N]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !0IDAT8c0JNҚ;(Lrd@s| aEH V%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/61_4.png000066400000000000000000000005021506673203700226520ustar00rootroot00000000000000PNG  IHDR#5= gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'e7IDAT8cO0UCo| jn. 0$F͍A- GՌ6>`%tEXtdate:create2016-11-05T07:33:13-07:00y>y%tEXtdate:modify2016-11-05T07:33:13-07:00ctEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/61_5.png000066400000000000000000000005121506673203700226540ustar00rootroot00000000000000PNG  IHDR)@gAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/?IDATHcO,`U9rT%. R6ax0<b*JU9rPҊ7o%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/61_6.png000066400000000000000000000005121506673203700226550ustar00rootroot00000000000000PNG  IHDR0JЈgAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PX?IDATHcO"`0aTèkf ` 0F5RX0h0aTè^چW"%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/61_7.png000066400000000000000000000005311506673203700226570ustar00rootroot00000000000000PNG  IHDR7TngAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#NIDATX1 DѺA0<_p8 KY{9rQmw*Pq*{p8.hja%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel=nZIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_0.png000066400000000000000000000005021506673203700226470ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  #7IDATc pƌƯV@H168~J] (1 M5Ju- %tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_1.png000066400000000000000000000006001506673203700226470ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JuIDATc HM—`Iݡ`'B-L b̻_.Kd_f10/f1p&A A] K=Av߶ߔD> Xe%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_2.png000066400000000000000000000006741506673203700226630ustar00rootroot00000000000000PNG  IHDR lTgAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0IDAT(c 0Wty1, f}A]̽p\նOSAuOp0(e/eAߣzWF8\t_v ~e2 YRP{_V ebnQ(]_+ABYvP$73zGIט/mb#? %tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_3.png000066400000000000000000000007601506673203700226600ustar00rootroot00000000000000PNG  IHDR+N]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c0 #I߾w%8N߰JZ0Pm,:36ڛϱx\4kl< מK SH_Bv@䭗|O ﱆ _fy)"e< 2 '\{H"C䯕PC^DJ)jcI&`#%3p%0L|yeH.]%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_4.png000066400000000000000000000011041506673203700226520ustar00rootroot00000000000000PNG  IHDR"5f=gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'e9IDAT8cO0*![Ɏ}lf`М 優a-Go{T`;a;_G50Epݯv0E ݫ|0EyqDzpGG`bUv"8bfO&:J3ck\[dWcQc L}L*))tV91X.B{0y4QgL RN&qÇ ӓBK,İ*6cI9@>*Y\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_5.png000066400000000000000000000012011506673203700226510ustar00rootroot00000000000000PNG  IHDR)@gAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/vIDATHcO,`U9TF$R5[a"UDd`ͺBJ [ d~/f!2'ǧDQ*2"HF-.Z"G8Bz Ă $T{ ʻH %Q#T^ΈP'UU0뤐uZl*neF(DOp[3 {3aE(L<3nJV?^U dtY:J[TF;FUJ?@z%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_6.png000066400000000000000000000012761506673203700226660ustar00rootroot00000000000000PNG  IHDR.J9gAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PXIDATHcO`U>OlK JIQ{@`8 )sW&"zɺ'x՛ YKОx 1+Sϥ(S3xPt-Oh.O9 gE>hCB>Έ->@pD )Bs )s)\(:j|&j")O&)oeDc.f1 -QE9n7 PҎۆ?8 \)S=e)ݣ)5yqGGQ^`ѬF(͟Y-kLohS+Д Csp@o{P )3^r(Zw@+Ry/ s$"RPL[U'R-Hן+lmQGr?Y%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel>\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/62_7.png000066400000000000000000000013711506673203700226630ustar00rootroot00000000000000PNG  IHDR6TPgAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#IDATX[(Qk/EQl>HۛKIxlDSJ"AJ6J$RD^HHd.q rf΋~sf}C)Df2j>gaDde`\4}F|qIxdax70BsQە~%PEte{:ktڣ3.Sdʔʸ{#tFgx9 V*]F2n{5:nu1Odܘʸ\4#YwHgT*rPdFb<ølXug ;8 &=$d2:G# ;j\mvPDzl0.ETv6 h\^sP)gm\gIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_0.png000066400000000000000000000005111506673203700226500ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  #>IDATc ?<}~g^h oZg.T =06쾶O%tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_1.png000066400000000000000000000006321506673203700226550ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ,7\JIDATc ox qx?ߊ30}3 bMLd0l?3/mŠ8+̵ N_Ğ,O K\1/CDޔ00o[K' -c\%%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_2.png000066400000000000000000000007501506673203700226570ustar00rootroot00000000000000PNG  IHDR G}ugAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0IDATc 0}E=N@eA؊@D0ߌ3~ O(oяDcbe` _s٧~j~. Ow{ފ10(nwo7$10Mj__1p0\bf`c`9#E ,Cf`?4.Ua6}?Mpq^%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_3.png000066400000000000000000000010751506673203700226610ustar00rootroot00000000000000PNG  IHDR+gAMA a cHRMz&u0`:pQ<bKGD̿tIME !2IDAT(c S."]g{p P ~ *R>C*l3:*IsPLH 1ȼ {dT@"`߿z ; w3 IE{_ rR߮Cܝ FO?'swkDDD QPK eC SNpp~ Sg``z b5<@yBւ쐌 b`f=w'Ts%R\ng07լbdpT3 1c4|^&gUIb2DRf8%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_4.png000066400000000000000000000012011506673203700226510ustar00rootroot00000000000000PNG  IHDR5~.tgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'evIDAT8c08#Ȩ}U ~AxȀv_,A \bi fIx`6Ty,Db;=jD">dC$N[$$ `!$xMD^?BQC]<ϑ%rCCDr/T`v;[!NGPc+VQ;,^#M@'(z49gk`g1}* &O`N8H^A -J PAN=/"I x㞄is1 ;uDOXAC`ٽS&8d0%@Cb7Xo#D&_H|'pxd>%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_5.png000066400000000000000000000013311506673203700226560ustar00rootroot00000000000000PNG  IHDR@#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !L/IDAT8c0#{BE+878{& HD=€$#ɾ`@6!V*dO3 # &{7 VsWd[=00,L ^E`k`\d VPwccz)Vu..d""oٌp9ή.ǖ-r1'Hum0ۘi'D%ɚG ÔnNrIIl ̻~p=pٿ`+p IH>ʅ7kv!$=lAhq4Y;fp9n腥c֒ы? @wp*c)C6 7쯱΀w3VPXeBr ]OlrsK>%tEXtdate:create2016-11-05T07:33:23-07:00~Y%tEXtdate:modify2016-11-05T07:33:23-07:00tEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_6.png000066400000000000000000000014721506673203700226650ustar00rootroot00000000000000PNG  IHDR!Jb*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !!PX/IDATHcO0PQNW5AF.sXUqc@{*Zt9 he% $*> H 2YWOօ))Ex]0b1D'T,Db -G **| $ET(9T!`*)H*2!*TW$@ÌK9~t) 5ްPMţlVx-bo 3"ncXYdyyyq$?;H n\12FU!/E#cUM cWP  "kSJ0F6&4B_@(&ԳLR TβWߏAӈ;X 9(⎩@' xΠ: DbH 1($'zDṉXR5K~6xT9=nTqqX=.|KTn\*^@>bFF .GvE "GU4dL*%tEXtdate:create2016-11-05T07:33:33-07:00;~%tEXtdate:modify2016-11-05T07:33:33-07:00JF{tEXtlabel?+`vIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/63_7.png000066400000000000000000000015751506673203700226720ustar00rootroot00000000000000PNG  IHDR&T˼gAMA a cHRMz&u0`:pQ<bKGD̿tIME !+#rIDATHOHQ.\"N HQ2V$e (vPDA ,XQBy2Qᡨ&b{{3oz:̼͗0XU5cFT_̪>oilL 6o\DQ{epbƃ"1z쐻d|t5)blY2lC( v*+d&{/Qk2썤;ŊdMYTp`W̾ru:v}t(hY*HXTe~fWYaʮ`XL({Rq'I,o%x!~x\BKfKooERf0QQI,2mLQ߈!oPbwi8nh2!S[%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel@ڊIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/64_2.png000066400000000000000000000014451506673203700226620ustar00rootroot00000000000000PNG  IHDR V%(gAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0IDAT8˕MHTQr 7M9 #(ʏY "hAF!E#ˆĊ!B lp$`ƙ9sKy?=b%YPX},R+ko#+o |մW2(#Siv.; >W%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel@ڊIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/64_3.png000066400000000000000000000020161506673203700226560ustar00rootroot00000000000000PNG  IHDR*++E"gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8˽kHTAŏe!>j,#%P6>XEYDio*%VAV@7AhRfE fz̽zw])|:so񟹳Qc~{8yƗtvkUsToqp ='y ije_ʭm݇Ϭс]lVQf&sC=Qejq=6 T1poTz".f)-,bMƑ La9#@.dÖό ~W'x]>kƬ*h`!rR`9 ,lb|/}t.@p Hh+Lk#EaȎspV$/DFc„Oe@,$.5:9n8Qp; U-NT*5=ŭ"Jtu,29J4QzUA@>+c\?Ѥ4w]rЛ2E:+m*N{Wg(f3Zhz$^S<&}$O^rAʞnH^G.q\JBJ&]\% y- t0'~>z!K>IJ+OOHm8uCZ q<]cc_Va,Q="\7ܳDRU,/ti5PftH~ cZF% w^.rA(, G>~ Ƌ vp Zi0KVa?, ]C%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel@ڊIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/64_4.png000066400000000000000000000022731506673203700226640ustar00rootroot00000000000000PNG  IHDR45+OgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'eIDATHkHTAgf ӊ0aP*$В44l +J)!AJ*4 {H1JV{iΝ{w>v>9sfXпWң#6:h߀v#K]hjR0DŽqk%d21&o]t6_L!JP&χe"XCyT U;O IX&h@ȡ2q-vl{BF<>Eш x)#I}~qv^ _%BD;S\v/A=C6tf?F:Z ΅鏛 4T (Ld͠sR+bb AؗT솬b@g)gAMA a cHRMz&u0`:pQ<bKGD̿tIME !оIDATHLUe+W*0!b?ȹ(LD1Lq-4uꔫMŌR+9MlVR+/WL(L dR2)?\#Oy9R=ܝ<2OJM(l9{F{MfA6&ƨ Т^[ċ!˴M<w)YSz/WE)o1O:~1(R0;vqx$sA9&Reae;?0]Ez|Z t4@ Swxtm 4M/>ë˺^z[Oyns~̱(lrM@6Lu^I7Kmml^~%z q~2F+$&!Y-3mxubӅܭo08˄~5μqY^.k8KHu uu4/lHWԥ=U7Z c],潺5h^8.9 >Vg&OJN8on:6d1 :VG0]51ϱ⦢oΊ?YMn s>88luMo1•pOaDt+XWnzr05U9r9wVypS>J%JZZz䌫rgm0~]aaSe2q]kg_inTؔ\;xb! U(a!7GO滶Po82 hF#qK 7o8^QSbꏹIye2{+ijaO4E#9"P8 u Joc> $LJ8}msA RPi@'9] 0Aٛ[„3 ̘"\%6&"ȆŌ_e$TtwR!qj?Hfʑha} AoCk O'LV G,>&>A1GlgPaA2"Ȯs6b7NfYj@kEP4V*GYi (6eH@ W)#U2]ҥ A!Y=c;, DA@4iE AQzAcU@؍!+0| 2* OdQLB>92 #Hdń1a0+UEi&fABS%߆4G;֜5{jd$ts9Q\>Ezʃ0G)ViFsd2 *Li>`}S'TetJkY.hY^F*KIKı{#P„)u|X_mdw^EȰS H3{7ܱ>Y` zǢ;ץ Ԩ;Tjc xR}3izҦ))"4\yJO(S&Sێߔ7C & 5.OQiM+p|樅2:F t!1kl |i#E>mKS0WFvwsݻ~{<~K7Բ;2R?mʴ .R"R<#Vz7qaxάzhڠ1M{ /ϼAn1bG.Soګi^z#6Rs!2i7]lUZ/9,6%I:jX%kѓE\bq,fVfevYi4wY(Lf3M0I'fk0'). 69aZAV i IiMH\bʘ)Ԡ`6GtLkes*)ez߮#W_p Ef5p|9J\bU<3H,ޟ#[xfD(:*CmtOo\$:Ye/*SH$O&Y %DT"uz0vG;P\DUl~µ2]lcƃ<\!m*#,|p&dfPG^12߂x@fa s17 >@68z<؆y^@Jf!=k`jUN.l&c}!(ԢdG(c|ZlJd'e\lO&8;w1aXK[ O%q&AtǙOݴ7*%G8'fio˥bQA t):}z& ċ%T)Z\U3O?TVJN5:uƴbBdrn S"GߠP/LR#@Q@ P]y32jax1fӿtNpK}:S28ꀺ7``? mxNǤŸF(EQI9{ &MC~~$SŒ#tLb6^23&Xpy%\T=7zr(zR6&\sYe.'Wd;u|17h. :lc( "%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel@ڊIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_0.png000066400000000000000000000005221506673203700226540ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  DGIDATc_AZ `-b:L[ ̓gA! @pļ@3 De ؙ [Z0&_%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_1.png000066400000000000000000000006501506673203700226570ustar00rootroot00000000000000PNG  IHDR(gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SIDATc$MU, ("  rDv2=eX$A!rw ;" @2 & @E a a< w003CO! $S@ "o9~G,):%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_2.png000066400000000000000000000010151506673203700226540ustar00rootroot00000000000000PNG  IHDR -5gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyIDAT(cϐ8$RNcxU/B X$&8_cHQ``c`ÐXaWt #@k&IZT 20,C 0HG10H30}g v!I<Q10 I2 sp|(pnqP_2 N0d`ȁJ,$7 ]%@.nKlΌAH 0%T]Ar*ACLZ!%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_3.png000066400000000000000000000011451506673203700226610ustar00rootroot00000000000000PNG  IHDR+. ^gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;ZIDAT8c P'wO,| x3300-fp)$p| X!wq$JDՀR~+lAv$[%$~OR"0参7HOŔ s,2` XADYx2M_=! WvC?%l/B D i$? ,B )l@Q&<9U,A3?AIED"H? ͂ǂPR_cV S?u b;2|2XS)811M\_SW⣵^T9?D>c)^C\i}/pm UIJN0%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_5.png000066400000000000000000000014241506673203700226630ustar00rootroot00000000000000PNG  IHDR/@qgAMA a cHRMz&u0`:pQ<bKGD̿tIME !! 6 IDATH=HAz/bD)b"(`LJDLcGCDm&$HJHlQPPAϜMr[vf[_{1w!Ǯϝw廨?K'ZqV_?c$Ҩ j/th2SOO2ҧkL[Q e" F.1ĕ|_Eiw)qBE_ebVJ?NeMֹ|oBf/C^$Id?w(%@RdK&޴mQ$&nU31|nt'$ͬs*WY O~LũyJnI:/F; {r}-_WK{l1h8 z,޺{i }en<æoCѳ?ޟF3gǛex_ >){VF& ?+n&'gLF{,%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/65_6.png000066400000000000000000000015231506673203700226640ustar00rootroot00000000000000PNG  IHDR6JgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1HIDATHMHTQ(T 8BRD!\ D0 (PAZRUmvI~*F]%~Q8{ysV-l=oeU ݙɊo͆,z>Nڳvxo~]sX5d]dcՒK#{db a 4g4~c)(3e-~/ bKkL[#ةTL)'!pUk(ag]9-!~4pݠeоzMުQ{nV l jF!~}]JC LJQ 'C F!|4C99MϪq6.ebxKLUtlc}!eTۤgAMA a cHRMz&u0`:pQ<bKGD̿tIME !&ΒIDATXKhQSbV1b+څvUXEuaJ[Ņ"D"JuW]+mMbڂHMrMOf̝t/7̙ G!O:w: q|6Qzt;32npɏd5xt {yESk!8 Al'84mWE4"or7K\y~VWhysojuiiBC$oJ ˄$WXӳ>530}%-RM3[}y% '7W*3f:S&Dg*&MxB0Uۄ[&JF <(c _7]|BSk<^ڢq 8GA{*}!LߗԢW𼓚/4KQ^N.\e9)#Q|l@gf'uCjk:Z$O2OU,eV+ࣳQT$?\#\MhηhcQp҈u\ZŇ8m0Ԫ_U7szۘ'Q^(ChژK~{81ǸYQ3.V'!SIs|OGs._sA -py/.Q%^%_ L%tEXtdate:create2016-11-05T07:33:38-07:009*=%tEXtdate:modify2016-11-05T07:33:38-07:00HAtEXtlabelAݺMIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_0.png000066400000000000000000000005051506673203700226560ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  D:IDATc 8؎h>`faX= 0h4q7%%%tEXtdate:create2016-11-05T07:32:30-07:001d%tEXtdate:modify2016-11-05T07:32:30-07:00ltEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_1.png000066400000000000000000000006021506673203700226550ustar00rootroot00000000000000PNG  IHDR S"gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SwIDATcN3\ 8ԾB8gߞd`\aBpZ`+C8 2MSv C<€E o ᜹ c0qh {PC)4ݶ%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_2.png000066400000000000000000000007021506673203700226570ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  1TZyIDAT(cmfFAGB C \h2a1y4!/T SXk`av!enY֢e ЭPq@t3$o^T@?Lho2Yу q,3u%tEXtdate:create2016-11-05T07:32:49-07:00zlC%tEXtdate:modify2016-11-05T07:32:49-07:00 1tEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_3.png000066400000000000000000000007621506673203700226660ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;IDAT8c0Z}of ,밋3D_My`2$qRAlY ?@@Ә 5L iB{;GhMU<p$ne&Ipϭn8'X}L 9L:o"xW)^b栋+g _+6X 1⌽XMb=Fx2 B(0qdmJ'%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_4.png000066400000000000000000000010721506673203700226620ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! eCL/IDAT8c0}<.[πU|[(G>Y( [Qe~O,<0sc _]ߊCo!!!k *ae`T3@<ߨaҐz;KAx㗣 VaqcQ_z*w-rp^vw."{9%첝[HDiauEȲS߿ѡ^h@3 EHYwWQd9eVC@4*KYJ9Ǟ%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_5.png000066400000000000000000000011601506673203700226610ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !! 6eIDATHcO0PGI:2`o0"!J*n߾}|}t%005wp)? 8JA(iQ2"G,P:%uxXEV"3t ?ª#:>JB1͵@bE(a' voNuЕXgAV\\rSOʃY;@,(5 n% "GI8D:%AmJ޳Aq+ t9N%WxJvP B7@i!0# ! )aD(1N!07HqV~MJͨ}F[sDD%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/66_6.png000066400000000000000000000012631506673203700226660ustar00rootroot00000000000000PNG  IHDR'JmgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1IDATHA(CqlìZrpT.Rr ԊBݤEM \(@cL26o~V.޷O~߫( ^f*u"U-6_Osݥ9890Gin6q \gsT4!ppc%kJݢ…p'Lsہ< E :1^:nx=51SIԎFq],d3}Bu$RgF{-ye \/cp!g8<,.X\Ĺbf;JtGtCJ'Cw)Pi8v ?AB/KᐈoNYx/":3VwN%mZwzʟK;Y7x?sژnSysq`Dй d󰘤֙w!`\DtS%tEXtdate:create2016-11-05T07:33:39-07:00k!%tEXtdate:modify2016-11-05T07:33:39-07:0065tEXtlabelBIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/67_0.png000066400000000000000000000005131506673203700226560ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  D@IDATc jXIk2\ʮ;p#XiI :rM igAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()IDAT(c0.ɘW%j7$`@+L-20?񤓦 R-87ĢOݢ6FS^Oe``:.4,_"ſʽ?anr ?5,"( SPY$|2%J~DyCù s?22dT5yބ1-bjFLQ˨.~8@Y/%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelCraIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/67_3.png000066400000000000000000000011401506673203700226560ustar00rootroot00000000000000PNG  IHDR+. ^gAMA a cHRMz&u0`:pQ<bKGD̿tIME  ;UIDAT8c0 s>L'ĨIxcD?/_`UA[j+_U4Xr Jj2U`U=TUP VJ@U%XO6@~+~Uo!y`UWZ ߲e3(%0U ʸ7+:PS(Dsd5w#T?ͶRx6G|pUgLY[8U2cS8E0Y(eD#[*TxEu U7wTq1 Ckj#bAw T_"^FEPŲ߈PRu\3XA{9+8Va3֍IPjƄhJ23*j /qn#) EV RQnU?o0 (_-fBZC$U)[6I CxsFY  \lMgVMh똶ḡD4U<s/o~1%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelCraIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/67_6.png000066400000000000000000000015541506673203700226720ustar00rootroot00000000000000PNG  IHDR4J;(gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1aIDATHOHTAnQbC]dPn ȃ"҆bRQB KjRt("T AZCP~mq潙~Y}h8:(;J=|V:h0U>dVMlPFph3X), #ضj*}!<Jl'wݚ|fw dza"^}Yj4gb@؝\M/|7)(-:[ˏ96F6c,j=E?en[RwcgВf6H v_.얓/*n.\**_2`j/ϡʡl_ۜÙ%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelCraIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/67_7.png000066400000000000000000000016761506673203700227000ustar00rootroot00000000000000PNG  IHDR;TgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'IDATX[HQٖݬ]a$HkYY"J=+FB(pDPы  ) >$? Z鿿r[/>©-7˷ڔ8DsL6XZ& H3\ebG)~o*doXFKyjx-m6b4̤(:w4ڬ._^V3FVhQi2XU'w`mج]FREnc ҋu*'֟m-CKcX?^^lCF߲vvMylkoS74 ;#y*Vh ]1tzMzB3PJXj,VBWA5b}0*ڣv]K f l-N@zۈ}!b j9Z\NvU tjr 8lWA| x/ XOaTlg-cf]>kH ca+]:r* m&>O7.Ve8d}PUY4R-i0uh2zůj+. %-vnjNAu,wɊکq:eσ|)ayYr^]\ž:Yݲοii_ۿG'(%tEXtdate:create2016-11-05T07:33:39-07:00k!%tEXtdate:modify2016-11-05T07:33:39-07:0065tEXtlabelCraIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_0.png000066400000000000000000000005111506673203700226550ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  t\>IDATc d>Yg?0! \1un`^6@<,,])K:*]%tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_1.png000066400000000000000000000006161506673203700226640ustar00rootroot00000000000000PNG  IHDRCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SIDATc3/ ~&0˫_Aփb`DE`f }(P(3@b`cKfy00h~ByT#Y S%tEXtdate:create2016-11-05T07:32:59-07:00C0%tEXtdate:modify2016-11-05T07:32:59-07:00ǛtEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_4.png000066400000000000000000000011221506673203700226600ustar00rootroot00000000000000PNG  IHDR$5 zgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! eCLGIDAT8cO` J`(̀NTsV`tE ܛPM1ÕM VPUDZ*PU p+DU>>EYS(\~S1Ĩ-xKdU?,_2Ut⨻xqevh;^E2*zqe6C},!?0!1Wam*3>ŧj<>(Cǣ4í D 8}ߛw%QMz| U6`?AEq?R$#X1"M-Ђ"zTѐUI p%tEXtdate:create2016-11-05T07:33:09-07:00&j%tEXtdate:modify2016-11-05T07:33:09-07:00`tEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_5.png000066400000000000000000000012111506673203700226600ustar00rootroot00000000000000PNG  IHDR+@хgAMA a cHRMz&u0`:pQ<bKGD̿tIME !! 6~IDATH+qoϐh 9Rs98r-Iɰ .;aZJ('jde|Lx}>s|^}߾>GޅScovW5L0Wk[akl+Ho/[-?>\m/`ՓZaIR4ӤSmAE0!Z~466N1lʰ7h7 ÂC//rl}waoѮ3l1΢3l^mڞa!E .I[7uirHDڥQv.۞CaS$B69&і&\U"' Q0ZEkQ40mIX V7;;NMf^- ^9%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_6.png000066400000000000000000000012761506673203700226740ustar00rootroot00000000000000PNG  IHDR1J7gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1IDATHO(q`vrKDj-)EM՜Q 2Z.;rP.,m,%?{C%Ou )D)'>u~yaZ쥧e#oc udw?5'r߈opZ%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/68_7.png000066400000000000000000000014051506673203700226670ustar00rootroot00000000000000PNG  IHDR9TgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'IDATXMHagE$|aD"=AA ;e IVPA / jċ0ZD`yH)Z]_huYf=7ea:q .NQJh{>)J!UEyh;(J!ʖT̨J=SBr~|XyXmY<^bB/|*LoK2ϙhDz9IuB"R($)X^t˜B7&ws1Ir|](qw )~Ä|1;>rWJPke-w}%&_';ɛ]%Hs%0y_eHYЀ0@kl'@~)0'b E7y;>UƟ𭐻Lތ=ԄeuwwZA9rnk3ezQV,|x={E4E?;v2+'?xֽ r)Ȍ3(a̯ƥ%tEXtdate:create2016-11-05T07:33:39-07:00k!%tEXtdate:modify2016-11-05T07:33:39-07:0065tEXtlabelDNIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/69_0.png000066400000000000000000000004661506673203700226670ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  t\+IDATc 53 @mϟCmz:䙉%tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelE~TIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/69_1.png000066400000000000000000000005011506673203700226560ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01S6IDATc :{sfFR^x~5^\An3OBЧ^%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelE~TIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/69_2.png000066400000000000000000000005051506673203700226630ustar00rootroot00000000000000PNG  IHDR >gAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S():IDAT(c03 D 4݆P膒!@;ʎC'*@f(xG ]ZVRrT%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelE~TIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/69_3.png000066400000000000000000000005071506673203700226660ustar00rootroot00000000000000PNG  IHDR+ gAMA a cHRMz&u0`:pQ<bKGD̿tIME !H1g%tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelF/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/70_1.png000066400000000000000000000004671506673203700226610ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01S,IDATc :{sfFRFV\zf#6lB%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelF/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/70_2.png000066400000000000000000000004661506673203700226610ustar00rootroot00000000000000PNG  IHDR G}ugAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()+IDATc aM;?BŊK1lnoX7@vIDAT8c0_π̧a&ÝCEz? x>d9[|r)|qϨ<}ٗcOMԗ?Z8/XXeC%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelF/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/70_7.png000066400000000000000000000005171506673203700226630ustar00rootroot00000000000000PNG  IHDR$TIʅgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'DIDATHcO`3 @E @"TEqw4KQO`QEFK"QECX=3ۍl9%tEXtdate:create2016-11-05T07:33:39-07:00k!%tEXtdate:modify2016-11-05T07:33:39-07:0065tEXtlabelF/IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_0.png000066400000000000000000000005311506673203700226510ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  t\NIDATc 1#=ؗȼx` tHOWm@E2AL$s1W. %tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_1.png000066400000000000000000000007001506673203700226500ustar00rootroot00000000000000PNG  IHDR4gAMA a cHRMz&u0`:pQ<bKGD̿tIME  (01SIDATc@Ĺ,MvKp/ `"˖Ag߉% ?`@4>daD32P# ,i @5$!nP#Ǐ'2ȁ22Lc+X<'CXղT0|5h܀Ŀ J ~"KxNq08En%tEXtdate:create2016-11-05T07:32:40-07:00}%tEXtdate:modify2016-11-05T07:32:40-07:00tEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_2.png000066400000000000000000000010541506673203700226540ustar00rootroot00000000000000PNG  IHDR )7gAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()!IDAT(c0!CE8goT?Ӥ`@e-#sd .u_$ 3s6k'T&':Ę [aʁ2f+.kX T_v LOJa`ooC]#T<\Jkv; C RR` m z`E@)Whwq\?w&BpPn'w D b`k X|7J;9ўx iLȱ9\L⇊AOlOkoh|)pJfnr`i%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_3.png000066400000000000000000000012461506673203700226600ustar00rootroot00000000000000PNG  IHDR"+8#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !HIDAT8cO0 s6us\J^11@vc,J^e1 7J3ѭ(JeX,JJ y^s'B|{7+e[<9&ܒRq50Do f ݿ,Go.Onk1=$Njdk,XI#HIcD\x1q(+JJ1=փnH'D!kU|C7)d*Ht7T.127QT3P%j 8#*SNEşbƃ+welj CJ?c2'#+wQ‡ &ϨY}) yǙEM~BMR Q%d+vB&I%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_4.png000066400000000000000000000013771506673203700226660ustar00rootroot00000000000000PNG  IHDR*5(&gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! JIDATHcO4` W釭N\L<٫SɓXRzʦ]46 Ce@c Scj1i$?FD;~ǡe|E ufυMXf,Ƭt6x;ؚI2e(j{>%tEXtdate:create2016-11-05T07:33:19-07:00N&%tEXtdate:modify2016-11-05T07:33:19-07:00HtEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_6.png000066400000000000000000000016721506673203700226660ustar00rootroot00000000000000PNG  IHDR:J5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1IDATXoHSQ675$+e,%1[bDQAB@1C@E"r `TE|0$&n߹{o޽w;9w{;dp0aY+lIDRa'3.i#*#1,R$CoوQ8{&/]~pϖEA5-Abo[JT[˧G#TA'zC(L˽S87_;5xÑ}E7sReտa/KҶqNQf7䡃A dv<#"eKRBR#(WEDi- bRH߉h"s%zLv )L>i51f(cT L|Ux-Lc0mmtB`4]qLl7M+4Ti?Yt3AXN#:4E>CVPԀ REmO*gWwYP_sg=ƯtRh~it*:/\J?/*JoE/8 ;\15qkɣBZ@C-W-Ѽ i5Ч9l]&K@۪qXz;٨)\<ZIGy _ | :<3;+ѝ3uLR9 d]U|!>),9))_>RS)p%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelGuxIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/71_7.png000066400000000000000000000020531506673203700226610ustar00rootroot00000000000000PNG  IHDRCTrՌgAMA a cHRMz&u0`:pQ<bKGD̿tIME !' IDATX_HAP3:.3Ӌ"**z*G$exC,Bʊ(EчŃ$BJ0!=8;&=offnw'{333ZlcJKiյnCm`s 0fnmA̰5f̺"noh07R>TktR'Z7"-BxƟcLkLe!8twrs>GViʒ+ܳaܗeӠLKsvt!ۙcJϋ~̋.|}=94fS;b-,jc:ADh"͸*@D@dI3ZDrӜQE!b @F=UF%w ;}0F40& %#L^CȀv$Aٴʾ P+lAYNe1rh:0,)(cl\" ^c&[J?K 2 hHHvc" ㌔E y&%Qi1eDtGڻRA *h,3"6 o2v1NH+rǹdSllf7e * ZW/z Z[Ov+'?=NWcĬo\>8RL1 jU:nMH'0VMDx)?υnT,^JR[7(67lڇC%tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_1.png000066400000000000000000000005041506673203700226550ustar00rootroot00000000000000PNG  IHDR MRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G69IDATc ;ad`bnp8X |#B@/^ҙ-yJk%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_2.png000066400000000000000000000005301506673203700226550ustar00rootroot00000000000000PNG  IHDR KgAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()MIDATcr\n=av3!Vpc`7Jꂹ P0t@VXH;5i|L.`g]j%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_3.png000066400000000000000000000005621506673203700226630ustar00rootroot00000000000000PNG  IHDR+PЇgAMA a cHRMz&u0`:pQ<bKGD̿tIME !HgIDAT(cgQ!%t $d 7$qyP*Xo"SB@%D>X2yV ދr~8,1k ՓUT!%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_4.png000066400000000000000000000006101506673203700226560ustar00rootroot00000000000000PNG  IHDR5|gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! J}IDAT8c 0)ͣ4M* 턉."E@E^ :h=ɛPadf@AuQE',?Զ0#X+w:g&~{P߆(7E/)%tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_5.png000066400000000000000000000006421506673203700226640ustar00rootroot00000000000000PNG  IHDR@!48gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EIDAT8c0P..JJJ NCn`J-J=K} J}I}g`d/J}q2̠2 fP *!*]j] ȴKF.? 1qj%nM +()io.%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_6.png000066400000000000000000000006751506673203700226730ustar00rootroot00000000000000PNG  IHDRJDgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$1IDATHc0PUzc=cNfQQQQ"_*]Z*,]*{*`$]2 @H;}eȇK1`3`Xe@`e˄)y .$,2Q Y;pI戇ѤF]))rSDds%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelJ cIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/74_7.png000066400000000000000000000007411506673203700226660ustar00rootroot00000000000000PNG  IHDR"TWgAMA a cHRMz&u0`:pQ<bKGD̿tIME !'IDATHcO0 0?dTɨQ%JF;+)ĥd\I%J2J|03x UKd * Up Jޛ؋/\;c L . P1|1¡a!LF* _͉Mϕ%ocYTh\eHNC Br 6HpYH 'pA) hu%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_2.png000066400000000000000000000007061506673203700226630ustar00rootroot00000000000000PNG  IHDR 7 jgAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()IDAT(c0`p>UJT䁬I?f h)ߨM@+ 7ge`~x `#B~t}: @R w'CуnELۂ@ 4@G hqtȮB pE$]U _%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_3.png000066400000000000000000000010141506673203700226550ustar00rootroot00000000000000PNG  IHDR+Ŭ$gAMA a cHRMz&u0`:pQ<bKGD̿tIME !HIDAT8c0gp~+ЁMn#MQ/" 7,SU^cqOS6wƁX\'`ވKAtp- a["b O!kX< "nc_:,هU-f95M<1Ğ`M/vXrxbs>ς#J56:&I3 eX%`繆MF Q9.HDo%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_4.png000066400000000000000000000011241506673203700226600ustar00rootroot00000000000000PNG  IHDR!5>gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! JIIDAT8/HQ0؂iA10 5ht5  hap``ZLiӆA ۑcX9|)ˏ,N{5kK>Ū$brY6I!8 H4 Dʑ:T!D-jE9xMC$o%(K{iAr5hK%+H,!HC-ҽ| )rY2(R$ӟR4I֥ gKK)F{c$C$RqR`"t-E{dGI}%P᰹m)pD IV xS EHP@P?&ӡD%tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_5.png000066400000000000000000000012071506673203700226630ustar00rootroot00000000000000PNG  IHDR&@SZ5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !E|IDATH;HQY"=H(Ih!#()jJjiАcAChE4VE A0H|}34\8pp lWD ċgƋU-2v@XD*;5 nlY^;EP<\(r# l^T9g=afa[ Q {ja]3eD$Aؔ( ja^S?cs 0SYPٌI‹z&e~aoN-iưP1uʒna p$1D?-PSe ]kpJ05m;g,?n1$L1pe|1ՍRݭ.mjÞ(hh'%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_6.png000066400000000000000000000012721506673203700226660ustar00rootroot00000000000000PNG  IHDR,JBgAMA a cHRMz&u0`:pQ<bKGD̿tIME !uIDATH=(ayőR.% Q겐E)d2(J )겐A7eRKKQsr o7wzXH"Ӄxϝp/L*C orڃwƦCఓvC਋v aely kX ?~oa18ǃX“Px//>/0 CfhCFcgxXvk@bfIdp_Z꒠#MCOZi˾Ęe:ۢ 㩒zDbک$qαz5љ] :zBb'nWb]Hm +P u&A=%1mq#qEafSJDmf%qOwU80E o c5DR8-0ܽz%tEXtdate:create2016-11-05T07:33:29-07:00S!%tEXtdate:modify2016-11-05T07:33:29-07:00"tEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/75_7.png000066400000000000000000000013701506673203700226660ustar00rootroot00000000000000PNG  IHDR3T ;PgAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*IDATXMHQ1*& QMBt[D+Ah#-!T\I$BA'DL cJafs.'2g>޻9~aF[Čt0ǐ/QnD%)!H.A1FϐLI$yA%yl\$OHoI`!)t_d!4-$3s!U$]NՒ5JR F Q7HC{9 ,A2u< hƤMܛ{@5 M{3M'1Wڿ,4 SRhePP44sz!ѥfV\3xlJMAVwA3glkn'46D -k=D;A)!M4~f0m+߷SO3gPhv  451Qwǹ5Z5 819 'eR&eN9j>8^4%tEXtdate:create2016-11-05T07:33:39-07:00k!%tEXtdate:modify2016-11-05T07:33:39-07:0065tEXtlabelK|SSIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_0.png000066400000000000000000000004521506673203700226600ustar00rootroot00000000000000PNG  IHDR y9JgAMA a cHRMz&u0`:pQ<bKGD̿tIME  t\IDATc >>3@P\6m V0%tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_1.png000066400000000000000000000004561506673203700226650ustar00rootroot00000000000000PNG  IHDR MRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6#IDATc ^@f&{cڔ~4%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_2.png000066400000000000000000000004631506673203700226640ustar00rootroot00000000000000PNG  IHDR G}ugAMA a cHRMz&u0`:pQ<bKGD̿tIME  2S()(IDATc  Gxs7C{('4ttx<%tEXtdate:create2016-11-05T07:32:50-07:00#^%tEXtdate:modify2016-11-05T07:32:50-07:00R_tEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_3.png000066400000000000000000000004651506673203700226670ustar00rootroot00000000000000PNG  IHDR+]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !H*IDAT(c0`f룂00! ?Ax&`%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_4.png000066400000000000000000000004761506673203700226720ustar00rootroot00000000000000PNG  IHDR5~.tgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! J3IDAT8c0YO1%@ĨĨDf3X$& ^X':x %tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_5.png000066400000000000000000000004741506673203700226710ustar00rootroot00000000000000PNG  IHDR@,*HgAMA a cHRMz&u0`:pQ<bKGD̿tIME !E1IDAT8c0/G%G%G%G%" #K)rLp"X%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/76_6.png000066400000000000000000000005021506673203700226620ustar00rootroot00000000000000PNG  IHDR!Jb*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !u7IDATHcO0G}(>bTŨQ* ` pUp~ةOUA e``QeF*U6l)Ӵ1a£ʰ+OU6l+̮vQ/Q%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelLlIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_0.png000066400000000000000000000005651506673203700226660ustar00rootroot00000000000000PNG  IHDR n3gAMA a cHRMz&u0`:pQ<bKGD̿tIME  t\jIDATc R !)5p (.|Vy-6y| 5Ɯ3=A$HYliQ %tEXtdate:create2016-11-05T07:32:31-07:00CF%tEXtdate:modify2016-11-05T07:32:31-07:002ltEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_1.png000066400000000000000000000010061506673203700226560ustar00rootroot00000000000000PNG  IHDR*gAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6IDATӵбKAHIEIEA[I`msR8[S[6FS Q;F==w?{>!ZoKFxzd+ ΆثK3 ekL}h'B3 *dfxrX`J/a)&9=;m9`ыQ:+Fdi <$esep##,|kϲ84w+rjZUយ^/now?(ۚOUM%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_2.png000066400000000000000000000012421506673203700226610ustar00rootroot00000000000000PNG  IHDR ,gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TIDAT(?Haoc4H#b[ĩRI(t0-PH"tufiԡPt(*HRiA`4>{.KwU0Şӕ)u/ZX? aMS}iaLFH v찜~,-/~.+n  W`[XW-zxȑӜ\M 04Ю3N&+7/7I3]No)5=c@dBi>Ƽ7oS%yEُ@MYmVMP#՜'>M'q;U͓2;:/ZUI9- v)]wQQYW: Oq?,9$T[t~ ul~^2_]F=%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_3.png000066400000000000000000000014641506673203700226700ustar00rootroot00000000000000PNG  IHDR'+ ΒgAMA a cHRMz&u0`:pQ<bKGD̿tIME !H)IDAT8MHAZj?PQ&(FeA-ڙQ*E$]Dj*wF"HаBJ̟~\i{f<̝93o/Ȍ=[FC\2g&UuiWC\7 rT `YDA({ϵ@[Gm>!Xp^XQLTu?wZq%I )^ڵJ41mz8|\;brlⓝr Uwqĭ\)/wƳ;8ˑ;=܂>JRZũj+?}{3{]dEO)g~Pk?۟ŏxSA9#I6z_OQ_!)w?3wا+. x s]~ 8u86op?rB_jYN>X{L<_aYޢպ]qeW3f4U~F˻q߀w4Qer.O4VJ5?ĨVǂ ԌlSXcSym*}..G;QyivQ*W яAB ^G8~8ަ*;ݰ}cCaG3Җo,Չ|3":l!!x qoA7 ^In%3|d-?aŸK*6a.]jEryq~+h7v?I aeX&ǗQ'RqOmTyrrXAMؤ6~?r )ٟëH=K?97SY#Y`?=ÝZu"DqgNWĭ.iUJuxo=}1"&L 1QO%tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_5.png000066400000000000000000000021121506673203700226610ustar00rootroot00000000000000PNG  IHDR9@^PgAMA a cHRMz&u0`:pQ<bKGD̿tIME !E?IDATH[HUALeԇ.%Tڃj=@J*Ą ).Y a=t2P*SC2L4Mss߳g<8kƽ5{$d͙j%9Q9r'B{ABq"+ j+Dd"'r8 >=jÖrϤG`G,'Os2cKs3͞M¸7۞ 2Ύ,GmvdfC+8yWȫLׄ׎.5ZeŠ#d|[^+?N>ݢVRKg똕<_m'L$h+V.Rh舙sv"(d!V+CԛI]ڳ14HcTb&/),[$v!($l|Q>2\^Y.jvvQ~@m^7.{P+})+#sѷ,BA/웂wH< chu\`*q:'?9JFsjŹsqIJ2x^7;އmC 3 DBЛH{#V荘%<0ГP5^{!!^;hՈ]遘b"+ {B6}ڬFX ZBjy (unI~?Jr)B܈񲟧D #1?Wd QD'b͞B`4N5v\OBdroZ@Ls m!v3 D,2[#|IF&8ȽrxiQ vZv#eWk. -eA2b12x+4RV]^Ȥ\dru. 7:X٩E0:sG>D@h>9I;) &qZb BIm̖؀])zcb,TH5~Da{#Zq~!Um$oJv Dw߇y{퀭G`8Y U\Іl; f/wV~>&]c{ U+HF|ESK&`uJOqL+ m,/KKu@X1l]蚄])>dB6Kk-Dms.erˈ-#0 Ծ%P",B1u$tr;K!?4ӝˍW?h υX標aBO&8/wXM]"deܢ@;dG~t8 XXvN1#qf'쁦GwxM *n( TTvi7Tћ|KÇ.·粐q?#S D?Am%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/77_7.png000066400000000000000000000026101506673203700226660ustar00rootroot00000000000000PNG  IHDRLT ygAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*}IDATXklEXnr ZZkKD $UM RIh$FH%\D Jj\FIK҄ JmOϰgwogvgvLOw9Ξ2~SˮuNpcD?-+\{&f$VClf(v[̆j|'>[++|_[tF,12}=Iۓ!ILlߍ`.Ď[E0MǕsl˂aO9.v&U'\azی@|z-pXM`;[{-J`SӜDX٘9J]vε1cGI fXd3&aٴ 'LI4Q672d!( #ogHY VIt)E1>tIyG`sIT iQL;z/—z6Y뿇T=l1śD;!WB!OIXy<_\M,%P8-nT\ a\IvZ u:Am@|+IBOL[U=HI4͔KD1W XIt'zEqQ:.bD,_Eyd?:zC~h"+Q)60;߮;G.p i wm ^+ /+a(>,P@g^^(Od|>EhJY酽 ъ]qSM&/zaOpF_'ݙEo\Ȗfct^#;iDjI߂=}ȉ5QEH塯h{yqYQ5Lua RxE/YGw=XU22lcIޕax}w=na_yeϔ=Z1|wku4v΁T¾A-O1qڧ.cvaRZh. 1.|Fk^$ũO'`u3/ckFsOZfug˴e 0?xڪՎ`j!`ETk'zzVjtYgbv^y *L'; lm)*ytúaݰnv;p'w%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelMkfIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_0.png000066400000000000000000000005111506673203700226560ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  >Ya>IDATc x A!"}zV,xz=ȂLR9w%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_1.png000066400000000000000000000006151506673203700226640ustar00rootroot00000000000000PNG  IHDRCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6IDATc{? `-vag8 `:&M n [N_e`[, d7e" +YCe^&dO5.BRM#4́()( g78%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_2.png000066400000000000000000000007011506673203700226610ustar00rootroot00000000000000PNG  IHDR -5gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TIDAT(c0C7DTneC+. J\ ypH0$!'"1QH|Ŕ| HZĔHdsLŀ oYrw0%?P<ƔL֩Ɣa"-8|0%u!g bpHCRA+i1exab%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_3.png000066400000000000000000000010001506673203700226530ustar00rootroot00000000000000PNG  IHDR+. ^gAMA a cHRMz&u0`:pQ<bKGD̿tIME !HIDAT8c?>;~y_X! ?3x~/ R).U`TVp`2pV y ?`bqV |`'p_Vw qo+P|KX9..:#~_ʌ_V7} NN/%Q7%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_4.png000066400000000000000000000010571506673203700226700ustar00rootroot00000000000000PNG  IHDR&5GgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! J$IDATH!HCSQ␅!,h`b2VfXrA6dE0̇ yVPILbAH؉ !dcM1db٦ `C"$ԭ\\[jF"ˆ.ÁLe82 Es7\2ڝ]3_e8Q7Sq.F-\1u.ոp3ng{x)abx^kb)zbC#G14a@U!jS핕=-%tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_5.png000066400000000000000000000011301506673203700226610ustar00rootroot00000000000000PNG  IHDR-@gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EMIDATH=(ܝRԥ$$a,L7xncrL6&1 EJYbn ]tܹ Ο#` 3GI4Ve-%1Fo`:e,QZ"ea~yѕyznHt֋Y[#3nk;~϶v@yo+U`k+L2i[#^8Qޘ5.j7'mz塴hR>5[wldw!~];C㱇xc4^F#3h|L109h8 a4LiD Zg4(.O%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/78_6.png000066400000000000000000000012061506673203700226660ustar00rootroot00000000000000PNG  IHDR5J>CgAMA a cHRMz&u0`:pQ<bKGD̿tIME !u{IDATH1(arQn[XXXdEnP%ePʠL%1PSd"uYXnaDqb O#`"/QJ)%3w;RI((3JFQ(zft<2Jr,$z(d%[FIQҜr kҫ^j51VH ?T+ MSo3tόUFKڲsM%-_iJ2Fix?h 1Z?-]WFK[hig2ZڝVTYuGcsEFs;Fs7[gyo{'gWb]FR}Fr}F@cF}u*345gơcߙqh>ith>lx|Ԩѝ|6PuFx@}FdHQ#ŲQ941Oi,tFc-f4rF㰘8)etT2׵u͌SkB;5j/[/4R:S/6@0F%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelN bIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/79_0.png000066400000000000000000000005411506673203700226620ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  >YaVIDATc wW1WY?a@ɇ3LW+p6c3Pm⪛4]jjgQi 2\lL_V[џ%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelO{eJIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/79_1.png000066400000000000000000000007111506673203700226620ustar00rootroot00000000000000PNG  IHDRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6IDATc @Ĺ,-NVCY P*ÌA *!b A̿ ` 0ËĊXľ 3ăl`)3D# !b <0o"  +Ka` b`0 iܱgTC% ߧp9̮.Ïb?\%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelO{eJIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/79_2.png000066400000000000000000000011121506673203700226570ustar00rootroot00000000000000PNG  IHDR \gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3T?IDAT(c0!{[ fhrg3PGd' @BHD*w)`ar\ /B+p< rXO#9PrX. ȪE*;XNh{TOk10HPl7Pn'F &<4)@@OL hr@ wZP^CHn&P\P$쿟 h6Cs0b?Aeu8jŋL J#H&(>4p $VHF ܍Mºy(ՀY-' "6EQ b L5o@[ NJA|? #x`JRhI98ur)VYuE=aπ$0% E!AտO׫A&#)_G S˜3's"²pF5AW(0T(V\C ,""**&wz͡%tEXtdate:create2016-11-05T07:33:00-07:00|c%tEXtdate:modify2016-11-05T07:33:00-07:00!tEXtlabelO{eJIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/79_4.png000066400000000000000000000015041506673203700226660ustar00rootroot00000000000000PNG  IHDR,56VgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! J9IDATHMHTQ7(*Y~- $H ?2)Dt!nm!n)FGD- j m 4L1Noͼ{q';7osF֛ony>olԦhQPsZHm-k75Hp0O^I+[Za)(,Kw壢]j{t=_A3/n^r֣9ZA*L lt:% 92k~dAG`b.Ks_l'π;8C<׀+PYzy.:xϱ`u\PⰛ{Pb205>ɠ KxPW<U+q7&\$3X ]aG]\e*n]{ ܇Nj_#N\*]xN'BO$^V]O93;Ko̜33 3*gٗyҤy:D#$U@$~-Lpu:bAtk%|}gU ZyUwN} /l86rkakji;Mۙ#'_ht9;ݸ7T^vҙ~0AN| VE qCز`%t;60u1TѳHO9 АTT|ؽ<E6k X|Lї0qC@]@.Z_qx!&Dt.gYj;E0~irCHbZ~!kr j^ 2.Y;#Pn<ب[/b8TSt;'16bc9ECf,o9־D}3V>Ի,@ [\b!if&SfN2f[1Kh^`o5&[p"I0lPx a7`6pؑbvn$Mjpq$Q5}otrY3U>bJ~OGUݔt4\6! Pbi?@C ;f9U=\67!]ݱ6* 6?ڨk7T:%-tCtکe/ݛ_ F1#czV@nվ#%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelO{eJIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/79_7.png000066400000000000000000000022171506673203700226730ustar00rootroot00000000000000PNG  IHDRFT[gAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*IDATX[HTAq׻[j$Ib킁]P^Ұ$]̐IA)MB(,nd LRt.so32XzYRX7$N~y|V̤q% Del̕H9Mp*R jE?uW|QdJB{䙩lqE>|@3UP8ipjX&arye'dyEzc$OKČK0-tQz=L1cQiS/bPA> !`‡A)RN.o(Gu2V2pME5:l!j mb΀ZL1$$z3Vf[[dt& wL1L22U%38Qalcqh1#8133{qS'C]sgr7rIfgD)բ7ECk1q& _gMsg3¹9M1f<[sC|p $Ve<1E8ʜ-ns<02pZ1SD-:e}l" !fPn@XMYOOL3ɻjW zcf,]#2MH*IT $f@MbRMv<rC:޸H2 Ya/IDATc 8c 8vۗvhv =T* ӏF <ƌ3%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_1.png000066400000000000000000000005351506673203700226560ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6RIDATc Q% r_10Lw3D P& `6'P9|9 fG*ldSd Ē%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_2.png000066400000000000000000000005761506673203700226640ustar00rootroot00000000000000PNG  IHDR >gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TsIDAT(c03@I= `[&v*tj) # *0$` o$W@-P-S9KC>6xR漇;<xJk %tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_3.png000066400000000000000000000006451506673203700226620ustar00rootroot00000000000000PNG  IHDR+]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~IDAT(c0`JSߣ T, Ο?wd x َ. _! bJ}0%$ `dt/^BxwH"n 9 "H+?|ob摷NtJԮL%"qcJF%P:Zxԍ%tEXtdate:create2016-11-05T07:33:10-07:00Hc'%tEXtdate:modify2016-11-05T07:33:10-07:009ۛtEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_5.png000066400000000000000000000007441506673203700226640ustar00rootroot00000000000000PNG  IHDR@#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EIDAT8c0P"h3 }l4jQu,'ԋOa^Yddl*[}U Jwa3D?vYP?b }U T+Vy!NÅȞdeb^jZtw[a!)!,, mH'Yѫxb_ΔbT'gٺFz=/ʎʒ'cS%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_6.png000066400000000000000000000010201506673203700226510ustar00rootroot00000000000000PNG  IHDR!Jb*gAMA a cHRMz&u0`:pQ<bKGD̿tIME !uIDATHcO0GE7`O] Л }T0, i+!  `HAVhDR I7<N•aU9 *`~ơ̐T,XK1.\*TT0[pC)?*DŒgm2Sq9}hjkkRi,{ SUҺYE>y?y_"z]l9`U1b8ZGqOUO8B%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/80_7.png000066400000000000000000000010541506673203700226610ustar00rootroot00000000000000PNG  IHDR&T˼gAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*!IDATHcO`e 0~u"10d!JC_1tQДi.*{3BHʒq(OIebp*e?xoKdSv.OK>eafc%p(|6% ) Kţ0 Wq+;'QZUsّuTerzzL!D~(eDTb3c?"!X;^]p1r ə=R!)*U6l)OU6l+W>Bl` [%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelPmIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_0.png000066400000000000000000000005501506673203700226530ustar00rootroot00000000000000PNG  IHDR cCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  >Ya]IDATc 5*3+1T7?ݬ?@G3LU+kpk'96gu7Hۏ*5V@@&,ZgtSkP>%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_1.png000066400000000000000000000007441506673203700226610ustar00rootroot00000000000000PNG  IHDRgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6IDATc @tmfV]Q P*ŐxssdB*Į20 /e`X +a`~ &s`G"A $+d18"@by o)Sfa`)Y L@CQsk˔D>@އ7V]-~{?CL &!)/2UCM%tEXtdate:create2016-11-05T07:32:41-07:00I %tEXtdate:modify2016-11-05T07:32:41-07:008޵utEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_2.png000066400000000000000000000011461506673203700226570ustar00rootroot00000000000000PNG  IHDR \gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3T[IDAT(c0!ce+Bhr?D`-md !wE $YJ5e%L+w HT.ȑ w=u _X #M$?J2^im  @@=@MnP (Hwɭ8(.@[ \(Pȝ W!b[3 < rbl@,I Ȝd[010| i4] ѴG Iƍ N<ʸ;C# )-ʖR=(i֊MYA;G| yD |5daݘ%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_3.png000066400000000000000000000013641506673203700226620ustar00rootroot00000000000000PNG  IHDR$+5=ugAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~IDAT8cO`@^4iٵ8-UbX=fb@Ng1b@Q˄IpQ%ADU:xWZe |HzBl?a:5<E#(XD"'j\*:ֳ=ˡ@W̸B0Il+bcPL1XQ#db@EH;)z "'"s.6EgA2`E f6E@2V`E@L-l"A29`Eo{&>$-`GkJ? 62uRF'%kpW% StOUsC`MpUP5`뾁x/f8B o#)ʡ Q/̺_ȊU^CDEQD ɂXh涩y@j]L5 s`TUA:Dn?a%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_4.png000066400000000000000000000016031506673203700226570ustar00rootroot00000000000000PNG  IHDR,56VgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`xIDATH_HSQ3E1kARjDCD!=dQ>TA/&aTHAXA$tD *D2Q0lmvܻ==}_Flxvm;-werlnVd`#Fx|68هfQ\rtx?͌jޡĘBks `ۯJ³[Wo۷5^QW!y)Ͳ"&pثQIƴPw<ńGODR#Ԍ7Vz2XЈUcӊa5eUZ}lYڂ@Ukg}TuKhS0YnD&wVU2;,(^9xGGx7R5rjq3~#k@=NpSTJFf8r Acp*ʹqe Ј&*જ + "<]RPM_>BK;]M eRvmy8=JA1%ڃYڅs&Ti1T MBu8=Usif~Q8U;'{8%eViAp$)%Lm`PT-Q97kFY(ϚVBwU)clp.KP\Tnj2U; GU9L\B9 m"Y, C7( ¬slAV`J}rhy&7ZޅsMJ*U$ͻW/5(wHL' F|YžPQjY*WMy\iHv<ʒTg +]?J, `cCQc Ac1Pמx:{OCm%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/81_6.png000066400000000000000000000021431506673203700226610ustar00rootroot00000000000000PNG  IHDR=J-.LgAMA a cHRMz&u0`:pQ<bKGD̿tIME !uXIDATXkHSaOn2E$IZHeiVXBRƔ"dB*E0ѕF ] ,UTXy-~h{w 2Gmnb;!.^Akϴ@ Pni Tw"bAcc;qՎtb$Fbgimyg?Җ,nO`A)=T5oL=w:n9[/ڙrL|iͣFaaI+ ,ٙ;^| ]am^itl)8gnطtq\1Psկic F 1E#=APcSmދabũrld-Ls+rF;U0mDљ_4Zn]fq60E YWZ %ln[;3ehӑP1)G~c)G/nMyI9&'RY,jޥ6}k0#%^$5фc/jR~%ߧ>@=Eԩu7TF5m ݮx`"b# " j.!V~iRRt' t95TB_fFm N5d -\,w^kM 7j/J ,ZSJÄȊ!) RWY|AJ[FJ<Io&I _: wЎwJSc}EVL==U*baHV EG6YhSjk߄\b']tK41Ntќ(=$o ɝ.9ǚ$1.'3v.)խTև*ɘD 1;iBX6_y(ayw lDKCذߖPBM5={l33"IgdI@>~n&ao̥\c5at@*2banN>ᯪ0H<%Cf&^d^ ­Ps$BdL?SL:Uϼ&fL&ɜg>NSD $3 d@2G,%]L)@dH&3Xd*ЋG=7 _){*W-0);񗅹ArNuOH[eOIJnx>ߔC@{_:$[e D+'IT Ùu,uF:/Q M.,d8}2xh'IJ`{p_@.UrVP/3 43ECWq9 r2cJg0Li#]8JfP2ݟqi3EW,J4k:H@I+e_t5: Fh; wt/T{~ֺN=V=RSq3CP :WgѦW \Z3ګY.y+w@SYnԲ~Ƹv30Κ!_/RHTWd83o%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelQj)IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_0.png000066400000000000000000000005011506673203700226500ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  >Ya6IDATc x!cCR!RAkCc 9 KD4&%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_1.png000066400000000000000000000006071506673203700226600ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  )G6|IDATc 3nray7W10a-wa%7b`d&g;/a b 'Y  RB)@\J2 2Q Cm2QZGB^@]ZPجDP/go^$Qg)B?BA5@<~100@_A F`iK0%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_3.png000066400000000000000000000007741506673203700226670ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~IDAT8c0?BFAg 63!{ݻ,I<bbbo0ŏض@lLNC(I"8ALX˸k8pnPs~C/#6 CAf uC#C#!)8xbd_ be db|0?q& ⿔R%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_4.png000066400000000000000000000010601506673203700226550ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`%IDAT8c0p_}.[Te>OA>Y44Y_- ey. c  b_%`.;/] Jms-Ps>w/Һm91A$u3i<Ġe\Ϥ{ 0kPL`bR eYz߱Ça5V#OCE.(sZ*hPFl5KHVcd5pXd?Br7%Ҩ,]dF%tEXtdate:create2016-11-05T07:33:11-07:00h%tEXtdate:modify2016-11-05T07:33:11-07:00/tEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_5.png000066400000000000000000000012211506673203700226550ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !qBIDATH=HBQ!AJ4DPSTD$574DDCAD-QR ADAX8TЇ;a(]t;{O24$#B%clъLΔDTDND+pJ|9[sǹ+$2e35DL\CVE8_&I+L6<`(+eiL\Ҍ(+h>;eOOx,tdQ'0)Ӫb0 =E윉v3Y'}d^.!s\AB+jcIN-H(X.7rcuنr[yS dYHCBj TO_Xd$qLCBCl&2$<`>y٧_ _4'z%tEXtdate:create2016-11-05T07:33:20-07:00Yd%tEXtdate:modify2016-11-05T07:33:20-07:00xtEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_6.png000066400000000000000000000012721506673203700226640ustar00rootroot00000000000000PNG  IHDR'JmgAMA a cHRMz&u0`:pQ<bKGD̿tIME !uIDATH?HBQg2" phh!hhh lh" $" ltP2l*'*"=Px~.wy[T{GѬےUUL}TOHI51'I+N}(g1'9\ahuRyQ3+ttə.uqvn=y8nC?v.ϱ>; iƐKLק>^ڶYV?XY<^9{{Z]EB G~r&;ATu0kO; ݚr*?tuw ~T79躆v:b8 Cp: rA1t c> t QHKp5ơ 1"G13%(ʽYn%eWvdD~!kI%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/82_7.png000066400000000000000000000013431506673203700226640ustar00rootroot00000000000000PNG  IHDR-T3\OgAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*IDATHI(Q' 2HJRv! aaAQl!C )IJ@R!`A6)CcH|$뷸uϹ]A:GҺ͕f 6dh١thѪtѢх^$tCQ^ҟy\4!!k*XS" }VAp߮PХOay9m\A Q?o6\$&E{/%sm|W:_Tu` -|&AGJyA.ֆڸw%z3s&8o/+= ~B%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelRcIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_0.png000066400000000000000000000005201506673203700226520ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  >YaEIDATc 'KaKU?K?c3+ G =+`"D%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_1.png000066400000000000000000000006521506673203700226610ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?IDATc ?Ϫ[@o| Rk.53_v10GUe`X0<&)A|N0 <+T@zE 1sϾ8n}`g2|F]Q $%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_2.png000066400000000000000000000007711506673203700226640ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TIDAT(cpigR.l_Bv32_9 [sށ@z6Xd (4loӁBSQq(]v@1-(BT@;maGMǝz`A }'̀ba] jxm )  bbh9.T9M"3Ɵd2T@Xq!+ &IAk AbA׃,D18S :A{@1U_z!$|#|f&O-BOPs6?T3Jc>D8 ʟPāz 9h?utt@=gă1- O+SAN {q ,a^A=Z9 d`. bKfS@X DdŐQƢp{S&e <:gN8b3%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_4.png000066400000000000000000000012601506673203700226600ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`IDAT8c0%ss ~܂:`3%5Ed Y *`^}+ÀoeB:z! GN06a/.n$9D쟪H>++l)Y3lp6C\),aqd fU34lc^3_swؿdlDCOWE3åEO'_J^Xd>MyX8dC4?."{&km$ٟsR(8D)X)@XR*H1| Hg`58|wH>28DI88(IģDD/s/V Ubд{3I(ƥ<<11gƮ$Dre7BSlJoDN)(%9ٜTO/J-'Ȋ`*R,+_lJ&!Y׏U 0ӕp”?Įga05ٸUP5q+9Uҁ[_nTJkA MGS QUg90 M`%?&ȃ8:wD BR ݏ @,s* 8@C, gm`esMc "e~Jf"' * GVC Q%I\J&-L3`2k=lJpQ%#E 9- #%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_6.png000066400000000000000000000015501506673203700226640ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !u]IDATHMHawK7P$!;.R*ZICD%OCy(*dLrZ??.>=~< hnHe]=Za.H4da8/d8VC,m p:x!>7Aӛx(MnM%3gt\G]{-7LjNY6ݓFx&PCZKp :i'rVr8^cb@3U^?»[d>`OcfuzY\32eݓE=zaA\ !dv݊ B(F*nk!@Cbc9P2½IhX t-6r,\#7W%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/83_7.png000066400000000000000000000017031506673203700226650ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*IDATH_HSQ]i_ȱiL)j){T"# 3F62 Z !D^ Y=TaC%hؔQkX;s=C}ϹAOA ȓݍH09~mF,AT;&8M($"WFB K73R(笈V_@x=69nS`:ߌr8 S tXC Pz5H<Wq`KAWrƯp6orZy.M78)? o>F>?v=Z){p>"8{%{0wCayޖn\>+uݍAd!)_Cӕulk$՚gd:}I8GGS7tW*qNiϘk9XSJXRɋŲzkʾT~ݏ-mJqħi厦>A& _1RL%tEXtdate:create2016-11-05T07:33:40-07:006mC%tEXtdate:modify2016-11-05T07:33:40-07:00qktEXtlabelSodIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_0.png000066400000000000000000000004631506673203700226610ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  >Ya(IDATc ?St?l}8a@@29?C%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_1.png000066400000000000000000000004701506673203700226600ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?-IDATc ` a@Rn7yQ;%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_2.png000066400000000000000000000004751506673203700226660ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  3T2IDAT(c=g@s69`Bݨ6 Q,D(.x,5߶%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_3.png000066400000000000000000000004771506673203700226710ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~4IDAT8c0fSOyg@9t8%h`ޙ/M%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_4.png000066400000000000000000000004771506673203700226720ustar00rootroot00000000000000PNG  IHDR5wԶgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`4IDAT8c0<8gHpfkPIL0*I;I-!*<%tEXtdate:create2016-11-05T07:33:11-07:00h%tEXtdate:modify2016-11-05T07:33:11-07:00/tEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_5.png000066400000000000000000000005131506673203700226620ustar00rootroot00000000000000PNG  IHDR!@'ALgAMA a cHRMz&u0`:pQ<bKGD̿tIME !qB@IDATHcO0PCŧx !ҸClTŨQ*`Ћ[V0bN>`o{%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_6.png000066400000000000000000000005121506673203700226620ustar00rootroot00000000000000PNG  IHDR'JmgAMA a cHRMz&u0`:pQ<bKGD̿tIME !u?IDATHcO`K @u 582cTݨQuFՍpQuꆂ:N O%tEXtdate:create2016-11-05T07:33:30-07:00 dZ%tEXtdate:modify2016-11-05T07:33:30-07:00{tEXtlabelT^IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/84_7.png000066400000000000000000000005311506673203700226640ustar00rootroot00000000000000PNG  IHDR,TܞqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !()*NIDATHcO`$&*g \ULbH ,'ݣG*UYa9IDATc 7!'΅Ջv@ xaaOs%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_1.png000066400000000000000000000005611506673203700226620ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?fIDATc .30030|x-U~ï j hA{0q0>/ Mۂe9cQOe>`}p,%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_2.png000066400000000000000000000006441506673203700226650ustar00rootroot00000000000000PNG  IHDR 3WgAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TIDAT(c00/rߏ c @@!(|(d`_~O0Q^pYPѯ&@NXȘ;@æ@& H\&-LD>he@Do zFI7Om똱#t2h@/%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_3.png000066400000000000000000000007331506673203700226650ustar00rootroot00000000000000PNG  IHDR+N]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~IDAT8c0\ ף#Kr)Q*{ bdJ~%(rB@}$dIzu)s)d,H$,b*~E b2e/-ow̭* PW] h@[&r#$?s< 9|Z[58ee?%tEXtdate:create2016-11-05T07:33:11-07:00h%tEXtdate:modify2016-11-05T07:33:11-07:00/tEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_5.png000066400000000000000000000010731506673203700226650ustar00rootroot00000000000000PNG  IHDR*@IǺgAMA a cHRMz&u0`:pQ<bKGD̿tIME !qB0IDATHcO4`r0*"pTQJi(J!=JC>+mp*=Ut" IƟ`kaSsBaS 3(bQjL Ds(+Rw&(_ c #T 7ZHC%CirL|\Y ͷow 2+_IyQJX1g[eFP1?RUtS?]Q&G gJ=QN&Yȶ"FQJW Ah'\D%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_6.png000066400000000000000000000011651506673203700226700ustar00rootroot00000000000000PNG  IHDR1J7gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EjIDATHcO*`ЏS]  Sn ۣ:Fu1cc;t(e0!:\?tLxQ?ť#DG? Wa5LG- :.tб7;L"v!0 0dtDcLLuGL] hKoqk`Xб!E8oq92wp I,&72POt5@bB@4\6|n>n cL9ꝏFR^t劅#-ȄdG5!N& cwGu1:)kJE%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/85_7.png000066400000000000000000000012441506673203700226670ustar00rootroot00000000000000PNG  IHDR8T۫gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-ЄIDATXcO&`B~2G5j8qTa/>"i+ yOc&\c8y|^C5EӨPpyxc0,kq\LC᪢aq{P .&&p-npkzG1koV{Mc:Bf$sn`h$/IOe$O(mPBeI5R [@%i@S84 uRgGf>U{U@4/?w5 +EqF%DOg%^zKpbt;.PV+`blQ1:ewVVX*r2j$QjQFN}*W.%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelUn0IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_0.png000066400000000000000000000005301506673203700226560ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  >YaMIDATc y!7;d}Gf߽n0kW02[o(U j2SL%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_1.png000066400000000000000000000006621506673203700226650ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?IDATӭ1 aX X L>RbgP2(tmfJMr1swٛޯ^=6l^Qd(T{EN^`p`ЗKaP^? = |>vMC9L_V;ļ(f]? [ riX%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_2.png000066400000000000000000000010211506673203700226540ustar00rootroot00000000000000PNG  IHDR >igAMA a cHRMz&u0`:pQ<bKGD̿tIME  3TIDAT(c0 IA;P3BP@?U};00hAẃB  >đ0f00<PGį T<|$~_s$\|=Pd[#B3~ &A#^600?;Cx"A[b5=4!"GC`?S%tEXtdate:create2016-11-05T07:32:51-07:00) W%tEXtdate:modify2016-11-05T07:32:51-07:00ttEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_3.png000066400000000000000000000011451506673203700226640ustar00rootroot00000000000000PNG  IHDR+cgAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~ZIDAT8c0MdEhq e@s kv/ ( &A$dɿ @`_@6;$M P778=H Pl@?mF;A-z*{I~$!g Rۈ APYw`?D"GށBHh> v),@!%rCc0$b,E=,;H O!yIv]HA* $cb8Rd cJ(,} `)VٝIwPdec?;xQ*?eo0xM<%~ QY$dd%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_4.png000066400000000000000000000013041506673203700226620ustar00rootroot00000000000000PNG  IHDR#5= gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`IDAT8=,aCU1$" ђN6E6b0R6>&Ab"CQ1{"xy/{t#i h1MU19{5S "gLTr?զ'1'rC70HޏYS2VF0_hwdfY WwL&i_'H3?g1d\bH%) K &9ٹ"kɓTdΥxH΁nOaf< K6.1A>ڋz&/_+YYWrmG3FjW&ccN0t!M+c3q6d4 >eH\`>_ k -\4)@䯶cA0A\ b9c:x3Bf7W棆?1C3ܮylJ$cC7^Zt5|%tEXtdate:create2016-11-05T07:33:11-07:00h%tEXtdate:modify2016-11-05T07:33:11-07:00/tEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_5.png000066400000000000000000000014201506673203700226620ustar00rootroot00000000000000PNG  IHDR*@IǺgAMA a cHRMz&u0`:pQ<bKGD̿tIME !qBIDATHK(DQkȫke,)y5EVV,(lbce9嵱bA,l(E+ E FHw>333d}sU }Vc"X8<8iƭ \ I!nj{G.}1u-ϹN#b:Q+ kS \3XhHJ]xn$=>']-7"z9aÌ."VJc薪V$e gI)uZ;pepTa0lQaRj)G!FtGDJ#Uҏr$&]rٓQ֊a.);4!rJ5/Iqtּ0?G.O4:F3BRwe!ɺn p!GpOvpΫOV7n ;-N.e*1k5B:;U-S8^'Z+40Vik0fw3-;ψ1u)z29t" pAxTߗs;n>Mn x-{]%Ak5ppSȨV4f@0lf8+.8}l<2 zt>]xP~eyΉꌝ F䈱j:ΜO$CMs.`46\ 65oe DE@L@ԒLT/S24WML`jex'D,0^BL0`,B>%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/86_7.png000066400000000000000000000016761506673203700227010ustar00rootroot00000000000000PNG  IHDR8T۫gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-ЄIDATXOHqU\H-"xPtV[ !APuX+$C!:d!=ĶP;Zg}έ9ov%(o= \aV 7pTb8p] ';B3ԝn~D>({D΄/5\۱C+dx >u[tk^xpۖ$x4/$ipSpo^0Na aįXL=ɀ 9>o}kd,nqz`.8mSV? 88ੀ\RlY`ٛ>{MZ0] R&qaý€t jGы|]_ 84 vqɺ?*oF"_E{{bR79#-N岓^D(R'5({/qAwk(.~5m_|q5PxzD8s"\qEB:@/lp[iP Z|-^1s;Tf,p{L >R/8bT6T j 6CNDB| px/doJ7bX|2Cwiǀ˃UO]6$Q,o)ńTUX; zw4[%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelV?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/87_0.png000066400000000000000000000005701506673203700226630ustar00rootroot00000000000000PNG  IHDR n3gAMA a cHRMz&u0`:pQ<bKGD̿tIME  >YamIDATc?/ ?a/ g6y@e?s_T,?U0g7;!*oS}? ħC9pFiFxf%tEXtdate:create2016-11-05T07:32:32-07:00rM%tEXtdate:modify2016-11-05T07:32:32-07:00tEXtlabelWh IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/87_1.png000066400000000000000000000010111506673203700226530ustar00rootroot00000000000000PNG  IHDR3gAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?IDATӵϱKBQSP M 0*$hV֠""IjSY{gp(,MQqvw{Tp<'v4=%3~NsnDM²5nixMޥ^D5w.q,5Wx܂gRdÎ|V~ O!>(uD^!w,Q^bɀI"Ļfx0Íxi*m&O&Y`?{%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelWh IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/87_2.png000066400000000000000000000012651506673203700226670ustar00rootroot00000000000000PNG  IHDR V%(gAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0IDAT8Փ?hSQH@&%]BAE0T(BJv(bi5t ,:b v(Ғj|7yj))ӞBFsɝ}K'h"r33F/xz"2S/q՝\Dr:|=zA~͗$ $_.r͐ȧ ?9T%uY gcNҒMM C ' ̇I?ȿ?%tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelWh IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/87_4.png000066400000000000000000000017351506673203700226730ustar00rootroot00000000000000PNG  IHDR45+OgAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`IDATHYHTQLS* cՃEFE+$AH>I")8-DKQ F PMjh DVFQFIER1V44_9sgýo1hR-a`VbCk)zIYX [YzԂ>z?gIT1vĂQ0LS;Ej?"͡#!&c=V3rzrD N:!ao9a_&(IS!Sk4DV{UD:"M b?dLw[Ge'6 Q+ZxvT}8\-"'L"V/x- yS׌^Bqʪ)f#MQB1[A"%TNg-R4 >#yJvi9݋)qY*Xl6kUIJt-)m&w*(kF1Id/({>lYvN򧟶,)CꠑeZi?J{ LJ\k@t5I9`o tzrm=c $+gY,"Yh\F)XNEJh097nkW rqBCK0ЦPLR{BZqT6kQL4\12rYp4c ט[ÝPn0DzN7= =XݘpÈ'n6mA}*]2۠ڏ⌱N2ug4{xH`U֤?4!Tem$Aw&\s5{Sg/0,/bTnn\oZK7c wBq[zC8~S3xzV]b#Rqv%de>gzTRMW 7lALwyoniam R֒Je7DfDnZl#C_Vpa}VCiJl XEd7Yvɮb:DV`/k%ǰ#UDDv2Y0;?x0aZ8\M&ib쑬LhQukN1 |ݍ:A/'/=ڴJ %Bl_RePƏgCOʵM ipnvpx)rH 'sc<զ<*Ff%i1oOmlZt61bg+WZ#;;@="gMiq,[U>B'lV1`_vmž'lV8C0%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelWh IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/87_6.png000066400000000000000000000024111506673203700226650ustar00rootroot00000000000000PNG  IHDRGJEgAMA a cHRMz&u0`:pQ<bKGD̿tIME !EIDATXyMQϼ1co0%B,Hvce$[h5d!{2(0W3{{wc~}ν 8/^8Ui~Gyw}<枊\O]̗u6cjZUsIky2c=\81u~ [kFbtYقҼG|LJ:烸[*sMIP( 0o+0\:NbZGpPӼKC16Uo W|%k"fܾXui' s%;yۥPvmyK]Ip[G͜k`$q^šzjirllFC,?qZR.﭅o"eNO 'dB P(rh9gxUq:go"iKٳgJS,qB͠l3b^@ްshny/Rh!-98W<2hpKGיϝ"`ny+g7ĆU<_6h z*k@*Х:HUuђ YY:j4:1fXz3+D> >cFo20ݞhN0ewoϛԥ60ͨG<.u6U:2fjz=gg#I<Ƒ{ix<\~Czk6c%:Q[+QWntuF$B$:h,R u]Ȏx$z8*xJ[Z "z2%`[B|Zm;,Z ng(,b2*A;ĕ1H ZJߤHTUOŔrLXĽW13p4MNIGy,nzy{'1m?~5RҍKC(Jthy,2⿜(g Y,5@Doo#nZZ5ibo̗(bî PlCe|c:BK:-|q0,>O` +p Ľ(}` X Ui7u:acKS$6ԏ;"` |?ؠ/-2 σ-F`';-C+%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelWh IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_0.png000066400000000000000000000005221506673203700226610ustar00rootroot00000000000000PNG  IHDR 2gAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiGIDATc eWߠY^a)! {!obz IqHR\%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_1.png000066400000000000000000000006731506673203700226710ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?IDATc ebX bd`>Og<7B 3H~Og|pa(C71F&xH.H< "@k1,8  o 0B ̿0$ ķg0 `(ُ+^%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_2.png000066400000000000000000000010341506673203700226620ustar00rootroot00000000000000PNG  IHDR lTgAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0IDAT(c 0 dO1```b1Fh%%@U } naY10(|Ea6В`r+Y00(a`Eqyf5gc)h~(4lpxA;hPq9Lt3  /:```>;m)Ht;,~@^b`;(Cc H%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_3.png000066400000000000000000000011761506673203700226720ustar00rootroot00000000000000PNG  IHDR+Ŭ$gAMA a cHRMz&u0`:pQ<bKGD̿tIME !kx~sIDAT8c0 f ~$D'-&K {5 &MC czeH;Ddze [Q6F|C)?(\b es/`kGJw ]5Q\e2% {)H-a]'I<K! Tm 1 ^fASG)(SxP_0?۠& BX>a`¯^eE(TPbyPc9RFbǥ0 "o6#CX?) w@læ"qZ05y7X1L4B ShFc؃p-Zr(kFUZ@>),:1":͠).8 h$!bڿ"j!iP("D$5]S@SK "!pT @P.iT *UHB%5-%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_6.png000066400000000000000000000016451506673203700226760ustar00rootroot00000000000000PNG  IHDR/JuRgAMA a cHRMz&u0`:pQ<bKGD̿tIME !EIDATHOHQYm[5<%$(DA-xc"ȃaDA)R(:%B`%=x% 'ԶP*o潙7:tw~̼f4eeZ^#s 嘂rr~^Yx#B!g,2+&vC?AxuRzķ.pQwy`#߃L0&𩕈WlR߂X/^ !u+:$7dI"n"pyi" .kVSeU}0{Y"íg;w?gwО_ͷ_ЮUHצ97TVrf44fÊlsuH}5Rsgn}yiCA.&}/ʼn+dB9n@i! ?8O cRmTaoǻJ_؆[&e9ix-_,l8Zl%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/88_7.png000066400000000000000000000017651506673203700227020ustar00rootroot00000000000000PNG  IHDR5T% SgAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-ЄIDATXKHQ׮)$z+)D&"2 hZ$Y-"Yj#-"Ea-au13 -Er_9sqO"j߹3sf a|z yAPMa[nC^NtuYQpg9XUU!y|yxNe;j/Eᢧ_`W.#d49VEgYGlp}"!ydv|WC%c_G2NNXCrTC QcsQjF]<: e,MQ>OT=.jԟXj8++p¨v]P8E :z+ڤkQft3V=58OVeNBu H=Pj(smu8Õ!]U$M kc0##╆G+P,OQVR ~6a^lhžwms(qSL^i;D. 1ߓ>~c$찣ڊ5&# ʥAmFlD6mwI2ꭏ3I`'*j\G<͢n)V!A ^ʌۊډ>RG1ri&ÉdksF=t I56K/R!L[pP85SGNk:1Ԕo1hJ%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelXIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_0.png000066400000000000000000000005011506673203700226570ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  !Ii6IDATc /DR{UՁ@3`8s0Kv( %tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_1.png000066400000000000000000000006031506673203700226630ustar00rootroot00000000000000PNG  IHDR:dgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?xIDATc 1^(04l͟@+ ܓL@/c}.C7g*N }srX1"ĮB#H`;ZR$w ( ڍ.U w0cSH B ϭ QA2I ry:8a? )Xv6H{M/r %tEXtdate:create2016-11-05T07:33:01-07:00" h %tEXtdate:modify2016-11-05T07:33:01-07:00SVбtEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_4.png000066400000000000000000000010711506673203700226660ustar00rootroot00000000000000PNG  IHDR"5f=gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! M`.IDAT8cO0dg=,Y TrNW d0X-4~T%Px"x yQXM("_ v T, 8#Lwb >-$%,jj`Jfн c V5 N`W%0Z}Yb)Ѹ,.`KaƴXM/Œ |k5X 3y`Ǫ~CX cq)y N8 V!`%G*`TWظwF%tEXtdate:create2016-11-05T07:33:11-07:00h%tEXtdate:modify2016-11-05T07:33:11-07:00/tEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_5.png000066400000000000000000000011541506673203700226710ustar00rootroot00000000000000PNG  IHDR(@M2jgAMA a cHRMz&u0`:pQ<bKGD̿tIME !qBaIDATHcO$`B'00M&XP 6 Xupxb* V] P,t ;Bij@U",)/DE)jpb0 " !H2I&pu?!"=hQpAS^ " _b)D, UrqHq!OX4 7?! ?Vߕ ҩB<+%nF @8BNO9* ǣ\g| `)PuA (| UxPG*UHB`TB:()^4Ӯ%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_6.png000066400000000000000000000012471506673203700226750ustar00rootroot00000000000000PNG  IHDR.J9gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EIDATHM(afڔh%."q#˅V.vR+\N.K9P 5Al~f>\54J34 p(5P;x{9~bE՛HKnTSv ԠӑQFnk!l]hVS0֘>)vgb xx7}5y8gVcNn~[M&jCdiIR{sRs:tw"'oF_PVV$eô3 jʉ! .xl.]ﵒ6O%tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/89_7.png000066400000000000000000000013471506673203700226770ustar00rootroot00000000000000PNG  IHDR5T% SgAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-ЄIDATXM(a)Ju(9:8MR$'r(Q(RPJ$Ȳ?>Όi˼y{wwe ޯVzimh9΃׊y@WkuBPթ2aUE6cWAXT3QQ7fYmP`\}Ma5J5r1[SP]4?H8)_Џ=(ta}rdϥQM^it&Sü&d <+P4B Y*v28Qx((=f9ߕ>7krpKWkBg~OIȶ*k"nv~=*,au垉Nh(T0[ H|T!A-0%dc5ZNCaZzKDt@7نN>jnezEB14P2Zy\2sn6٣%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelY"IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_0.png000066400000000000000000000005161506673203700226550ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  !IiCIDATc o??vn\V8Xl}"?*PC3X1tBG1D%tEXtdate:create2016-11-05T07:32:33-07:00%tEXtdate:modify2016-11-05T07:32:33-07:00EtEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_1.png000066400000000000000000000006031506673203700226530ustar00rootroot00000000000000PNG  IHDRզgAMA a cHRMz&u0`:pQ<bKGD̿tIME  *?xIDATcg<{s80 "sk2DGp /Z#I J?ӌ ς X!$΃%n &+ ݿTf%tEXtdate:create2016-11-05T07:32:42-07:00xkT%tEXtdate:modify2016-11-05T07:32:42-07:00 6tEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_2.png000066400000000000000000000006641506673203700226630ustar00rootroot00000000000000PNG  IHDR lTgAMA a cHRMz&u0`:pQ<bKGD̿tIME  4$0IDAT(c 0g@o¦Tm(Ha``:n] 700E4u6{)20C[$znЪwVAo EVE@21׃AZUsCUJҦpp,z %.n XD0`%-= (fhi%tEXtdate:create2016-11-05T07:32:52-07:00%tEXtdate:modify2016-11-05T07:32:52-07:00ŜvtEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_3.png000066400000000000000000000007641506673203700226650ustar00rootroot00000000000000PNG  IHDR+Ŭ$gAMA a cHRMz&u0`:pQ<bKGD̿tIME !)IDAT8c0;̀ pi- pG@){XVR2_ʅ- );G(tT V(%\&Hp(H~$5n8bikMPJ;V5I9&[(r\H[lrR"%M4Pwld"K-d)I\nvma%tEXtdate:create2016-11-05T07:33:02-07:00r%tEXtdate:modify2016-11-05T07:33:02-07:00b,tEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_4.png000066400000000000000000000010451506673203700226570ustar00rootroot00000000000000PNG  IHDR!5>gAMA a cHRMz&u0`:pQ<bKGD̿tIME ! )IDAT8-hQdd8 `d03" f,.hAL2L _{=%=^%DE2j}Rb}(E6_{1 \QQ*.?PQH ~ =.Q3PQͩ0 f9;E;~R˃yo3MJل-{B%tEXtdate:create2016-11-05T07:33:12-07:00Ir%tEXtdate:modify2016-11-05T07:33:12-07:00ʲtEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_5.png000066400000000000000000000011231506673203700226550ustar00rootroot00000000000000PNG  IHDR'@91 gAMA a cHRMz&u0`:pQ<bKGD̿tIME !qBHIDATH/HQ9a04j\ jAM!*hX+b"hR42); 억޽`k;q*67cube;ߗ %|ݿW}ѥ#M-jWҥ7E6Xծe YhwH]nڭ}hw;vdZ%hCs[ iw,{}%YIR[!W e$ːn閡&YwI>#. Ղ+6vr]wN]y&;;v&9r- &C+%tEXtdate:create2016-11-05T07:33:21-07:00`.op%tEXtdate:modify2016-11-05T07:33:21-07:00stEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_6.png000066400000000000000000000012041506673203700226560ustar00rootroot00000000000000PNG  IHDR-J gAMA a cHRMz&u0`:pQ<bKGD̿tIME !EyIDATH+Ho~X ,Xղ6X,jM|EhZ,N1XbS0aCd |;݇EqqwY>ղK-aћd=&%n|s$ze)ʸi)}s$rU)uJk/θ +Rztue=Eemqt]le] >z8 cqt]jg+zAJ$@1>B#]{#^uA~ A`'@'ޑWIY)%]?"@0&]c%@/JCKH:g}=}0h ^GBEOBkdIUWuUK~Z %tEXtdate:create2016-11-05T07:33:31-07:00o%tEXtdate:modify2016-11-05T07:33:31-07:00RtEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/90_7.png000066400000000000000000000012711506673203700226630ustar00rootroot00000000000000PNG  IHDR5T% SgAMA a cHRMz&u0`:pQ<bKGD̿tIME !)^-ЄIDATXO(qL4RV 2%qp99bJ$vpA;(R9JȟZYq<.&_~}iHe?.\)j^MEm!{fHAU RPqm29_Ԯw^ϨWIZ1#h`13*Ւ VwE̟`u$h`madjMRS0&%C:2>\>;*XմK# ᪛΋+g#XKIpee=!Xm}p-\Ij`70jUVVFF7ZB:P>p5,HԡUQ5V%5 XʨW2udu*43'X;RE*hTQ6%)NEYpPo*?onuǒ%tEXtdate:create2016-11-05T07:33:41-07:00Af%tEXtdate:modify2016-11-05T07:33:41-07:00KtEXtlabelZsIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_0.png000066400000000000000000000004441506673203700226560ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  #IDATc`w?I[ biB+}%tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_1.png000066400000000000000000000004551506673203700226610ustar00rootroot00000000000000PNG  IHDR)1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%"IDATc :B?0tZ{`^nke%tEXtdate:create2016-11-05T07:32:44-07:00"n%tEXtdate:modify2016-11-05T07:32:44-07:00jtEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_2.png000066400000000000000000000004611506673203700226570ustar00rootroot00000000000000PNG  IHDR n1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0&IDATc @t7BfΝO!f%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_3.png000066400000000000000000000004701506673203700226600ustar00rootroot00000000000000PNG  IHDR +u]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !-IDAT(c @8ޝ"xQL ~ޙo2}5;%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_4.png000066400000000000000000000004741506673203700226650ustar00rootroot00000000000000PNG  IHDR5\]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'e1IDAT(c  P`M_g{$> ?* ! )lz%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_5.png000066400000000000000000000005011506673203700226550ustar00rootroot00000000000000PNG  IHDR@2xwgAMA a cHRMz&u0`:pQ<bKGD̿tIME !о6IDAT8c3;u|E;BBB$df(B)c !O%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel[aC7IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/91_6.png000066400000000000000000000005071506673203700226640ustar00rootroot00000000000000PNG  IHDRJf+VgAMA a cHRMz&u0`:pQ<bKGD̿tIME !" 0IDAT(ϥ! AEკ QM($I$$I2A2I$f4I( l6ۛ=~/]wT0UyL`QWOOŪ$*|Tl {g TN>*V!骬`PTT*"|W9PJ?* X)*TBoO%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel\֔IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/92_3.png000066400000000000000000000007751506673203700226710ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8пGpw]rK9NSD4PS4ihHKCD%ѐ"!R[$jI:=@CS/cJgxgc9eMxLq<Úxl`S%|)h~}_PU(ߧxi܉aE ᾰ,=%7 ńia;Otж߀4hs1[ Gp&Uao /IX둮ڮ X K{gmT8Ox* ~/Iy a/"%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel\֔IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/92_5.png000066400000000000000000000011441506673203700226620ustar00rootroot00000000000000PNG  IHDR!@'ALgAMA a cHRMz&u0`:pQ<bKGD̿tIME !оYIDATH;(aRA drɂ26#6$†b#2. SwQ'<=}{lZo#:eD]+R-Z2JP" DDdcQ{9iD+WV/2CbGa $EAX3V)DbUD$[ KjL{aZ2F1 "^,$IE.@XD7K0%-Qka$o@X ӄ e+$>O@ؐD;[G֤%$% 7AؼD !AX-"Dlzʝ_U*F&%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel\֔IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/92_6.png000066400000000000000000000012151506673203700226620ustar00rootroot00000000000000PNG  IHDR&J`ySgAMA a cHRMz&u0`:pQ<bKGD̿tIME !" IDATH?,ZƆԨUCR1JVm(ZIQ4FiLRth':c0|/O.py~z;%+6Gewl+mnsY_tZ}bbĜ! bĞ\̖"v)6frGĦ5宀٦2uyv8lL؞,^U։U5vE+9r%`[ {Y\X rybbb 0K%ռ.rbbAef}|)FFcf3eƉ9As`{i5`P.I"6fCby0f@ Rbqbc?dA4ذBrb9(_ck':\?%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel\֔IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/92_7.png000066400000000000000000000013451506673203700226670ustar00rootroot00000000000000PNG  IHDR,TܞqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ IDATHODq_ZfM6"4fE44Eԡ53bbu(ku(F2QCRYU~~vzvTs~=}DOD`wg718F vkn~Ǒ[ҵO:N"r@[6$Qp)‹'Z"JnDxD8a%݃\Ml/~Q1kCIC]X3[ s|a`S=½™[0M/G9>@)݀X vG.? g#}q8VtsOlJ C vS7887Vt ¿8>`W-}[ ^v|l`+Wq=K[ҝp1e=8j`+R )[t/ތӷ`{t^'+ݎ4(eµ%tEXtdate:create2016-11-05T07:33:43-07:001w%tEXtdate:modify2016-11-05T07:33:43-07:00@btEXtlabel\֔IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_0.png000066400000000000000000000004441506673203700226600ustar00rootroot00000000000000PNG  IHDR IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  #IDATc8?lyM+v%tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_1.png000066400000000000000000000004641506673203700226630ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%)IDATc A΃ ]€f̈́K"%tEXtdate:create2016-11-05T07:32:45-07:00)%tEXtdate:modify2016-11-05T07:32:45-07:00̑ftEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_2.png000066400000000000000000000004641506673203700226640ustar00rootroot00000000000000PNG  IHDR gAMA a cHRMz&u0`:pQ<bKGD̿tIME  6>0)IDATc @`^|*Q6}X@cn%tEXtdate:create2016-11-05T07:32:54-07:00"%tEXtdate:modify2016-11-05T07:32:54-07:00LLtEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_3.png000066400000000000000000000004731506673203700226650ustar00rootroot00000000000000PNG  IHDR +u]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !0IDAT(c v3@|/@D(o>?>F塦<<\Kwab%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_4.png000066400000000000000000000004751506673203700226700ustar00rootroot00000000000000PNG  IHDR5;_gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'e2IDAT(c0Era `F""C$H"1"hv(o%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_5.png000066400000000000000000000005011506673203700226570ustar00rootroot00000000000000PNG  IHDR@!IgAMA a cHRMz&u0`:pQ<bKGD̿tIME !о6IDAT8c I3A15ĒU Lb@C2L@9_Z%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_6.png000066400000000000000000000005041506673203700226630ustar00rootroot00000000000000PNG  IHDRJkgAMA a cHRMz&u0`:pQ<bKGD̿tIME !" 9IDAT8c0&|ܦf00G٣£££#UA>STRE?V@0Eo%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/93_7.png000066400000000000000000000005121506673203700226630ustar00rootroot00000000000000PNG  IHDRTsxgAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ ?IDATHc0'u{7 8$U΀?*5*5*5*5*5*5/.LARQb]P%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel]IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_0.png000066400000000000000000000004631506673203700226620ustar00rootroot00000000000000PNG  IHDR ggAMA a cHRMz&u0`:pQ<bKGD̿tIME  #(IDATcKS<d<ӻP\Q"%tEXtdate:create2016-11-05T07:32:35-07:00 %tEXtdate:modify2016-11-05T07:32:35-07:00TtEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_1.png000066400000000000000000000005311506673203700226570ustar00rootroot00000000000000PNG  IHDRCgAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%NIDATc@u $1de(c0~ ;9@7 cmF/`a`X;bFm%tEXtdate:create2016-11-05T07:32:45-07:00)%tEXtdate:modify2016-11-05T07:32:45-07:00̑ftEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_2.png000066400000000000000000000006061506673203700226630ustar00rootroot00000000000000PNG  IHDR 3WgAMA a cHRMz&u0`:pQ<bKGD̿tIME  79ܦ{IDAT(c0@1 ]  /cd`h```݅,8%~nAL4C?DP\DUU 6A?lg \n΅o?%tEXtdate:create2016-11-05T07:32:55-07:00qf)D%tEXtdate:modify2016-11-05T07:32:55-07:00;tEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_3.png000066400000000000000000000006551506673203700226700ustar00rootroot00000000000000PNG  IHDR+cgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c0+;>Uv-XQ 3Hz-aeW-貯AsgPeC$,G K__lHy?=ȒaQ$d @#ȑIQQQY2e@vb3$%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_4.png000066400000000000000000000007551506673203700226720ustar00rootroot00000000000000PNG  IHDR#5= gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'eIDAT8cO0fe=qJwBt@h[煨V%v$!@B 6o5 !"7Rh3NCԦ`(Q9 !QQۈ4,4iPCT.D 2(vdѳ=zjy"=2H_V+~,c`o!J!AϺ?VP Vk - JG*U:tTR,e%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_6.png000066400000000000000000000010761506673203700226710ustar00rootroot00000000000000PNG  IHDR1J7gAMA a cHRMz&u0`:pQ<bKGD̿tIME !" 3IDATHcO*`:2@"u2tt1 @:1:3"`h#%t\*elx*U({O'}2p U$b|©#D>wCG TgHvJRbtleH*?BlղSinc>YBYw'Q}.0-|QuQ=ň):CY -Iݏ bc ο: "5Bt@Z/M Jjh0cTǨQ:Fu1cu"rF%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel^շIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/94_7.png000066400000000000000000000011221506673203700226620ustar00rootroot00000000000000PNG  IHDR8T۫gAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ GIDATXM+DQ3U2f0Fb LV6gQ|D$!,vҵ[{6\eV2e!Nw0@1 O{; nPx&?{*sCQ<4m4Nt3.9x,ХwxG$jy*j1r3 N8@z?3]ɾ84$jp+WrZ2|:s'Z$ngp/w.;IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%IDATc F2 ?t\;%%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel_f҇.IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/95_7.png000066400000000000000000000005011506673203700226630ustar00rootroot00000000000000PNG  IHDR.TkZLgAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ 6IDATHʱ ~bB2'u]u]u]?s[u]%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel_f҇.IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_0.png000066400000000000000000000004541506673203700226640ustar00rootroot00000000000000PNG  IHDR }swgAMA a cHRMz&u0`:pQ<bKGD̿tIME  $9x!IDATc63<] ?!EC)hCX%tEXtdate:create2016-11-05T07:32:36-07:00:^%tEXtdate:modify2016-11-05T07:32:36-07:00tEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_1.png000066400000000000000000000004771506673203700226720ustar00rootroot00000000000000PNG  IHDRظgAMA a cHRMz&u0`:pQ<bKGD̿tIME  -@[%4IDATc`/"xQ3?T^%I%tEXtdate:create2016-11-05T07:32:45-07:00)%tEXtdate:modify2016-11-05T07:32:45-07:00̑ftEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_2.png000066400000000000000000000005241506673203700226640ustar00rootroot00000000000000PNG  IHDR n1gAMA a cHRMz&u0`:pQ<bKGD̿tIME  79ܦIIDATc @|"r'(3a&[A; 6 !jj{Pf C$p+~"6o=)3eS%tEXtdate:create2016-11-05T07:32:55-07:00qf)D%tEXtdate:modify2016-11-05T07:32:55-07:00;tEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_3.png000066400000000000000000000005601506673203700226650ustar00rootroot00000000000000PNG  IHDR +i6gAMA a cHRMz&u0`:pQ<bKGD̿tIME !eIDAT(c@HCPuCL- Qa` JL +P 3z '! 7`K wšsV۝#`%tEXtdate:create2016-11-05T07:33:04-07:00p3G%tEXtdate:modify2016-11-05T07:33:04-07:00ntEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_4.png000066400000000000000000000006041506673203700226650ustar00rootroot00000000000000PNG  IHDR5\]gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'eyIDAT(c ?T;_8|G uDIoC\@  Ww!s%a``}J!ȿ߳S>1פ%tEXtdate:create2016-11-05T07:33:14-07:00G4%tEXtdate:modify2016-11-05T07:33:14-07:00tEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_5.png000066400000000000000000000006421506673203700226700ustar00rootroot00000000000000PNG  IHDR@2xwgAMA a cHRMz&u0`:pQ<bKGD̿tIME !оIDAT8cp 輅.S{Y;?pDZaHCVa$$l6cω QQQ! P[%>!Y%tEXtdate:create2016-11-05T07:33:24-07:002@%tEXtdate:modify2016-11-05T07:33:24-07:00CKktEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_6.png000066400000000000000000000006741506673203700226760ustar00rootroot00000000000000PNG  IHDRJf+VgAMA a cHRMz&u0`:pQ<bKGD̿tIME !" IDAT8c0 @/h'o=`t# BQXbfX)O`"bWQC( <",fMjW, 1(S 5HLF`j ĸޠ Q*H.9n, afexTpTpTp LJ%tEXtdate:create2016-11-05T07:33:34-07:00@I%tEXtdate:modify2016-11-05T07:33:34-07:00tEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/96_7.png000066400000000000000000000007261506673203700226750ustar00rootroot00000000000000PNG  IHDRT3gAMA a cHRMz&u0`:pQ<bKGD̿tIME !,.G$ IDATHc0-¶.V 1%@8_4&,t4!?$C5[^ ~&O"q(tHC wH.t/`q}`_ ѰC,.]-+X#g?cH؁%01#H9DXC }a@o0갨$QQQQQ JD>)?Q%tEXtdate:create2016-11-05T07:33:44-07:00yIP%tEXtdate:modify2016-11-05T07:33:44-07:00$tEXtlabel`дIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_0.png000066400000000000000000000004751506673203700226700ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %2IDATc tmKO ojʶ q>%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_1.png000066400000000000000000000005661506673203700226720ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NkIDATc [d{7WQ{?7Cx ?:&@x q !$ !t)q4%tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_3.png000066400000000000000000000007521506673203700226710ustar00rootroot00000000000000PNG  IHDR+0|gAMA a cHRMz&u0`:pQ<bKGD̿tIME  9ZIDAT8c 0{Le @p?&sY \d*$j`1".$.km``2r *V#1mn5@,1'eZ2@̃pMf}^ThT#d.50ߊH2"kɀ9@ 2ʙtu HY[`؂sA$t)^ձL(IFty(ΰ%tEXtdate:create2016-11-05T07:32:56-07:00@3%tEXtdate:modify2016-11-05T07:32:56-07:001ӋetEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_4.png000066400000000000000000000010621506673203700226650ustar00rootroot00000000000000PNG  IHDR5s0gAMA a cHRMz&u0`:pQ<bKGD̿tIME !'IDAT8c0JQiZl*1a>'3] p]F^ P@Dֿl9@zUE/W~`_ݑkZX, F~,mAIi$i-%uU@lmi $i[EZI:οd#;9$= Yz,l9tjĀyr7 b#=hπ& QDZJs5d"6&)3!RGw㟃'R2Ѩ4Vϕ%tEXtdate:create2016-11-05T07:33:06-07:00V%tEXtdate:modify2016-11-05T07:33:06-07:00?tEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_5.png000066400000000000000000000011751506673203700226730ustar00rootroot00000000000000PNG  IHDR#@ґqgAMA a cHRMz&u0`:pQ<bKGD̿tIME !(XrIDATHcO0UC75;}%y"å~ O0|AR3 ؾ a}F)e@z_00xt5100js K_lP S +]yv:+3DLDS3݃ljb!<݀F[P/Q(YcG-\pLCVs‰AS3YI'MM[N0 d5c45(# (J>qB5<ĞP`,B1Hc;LBJ(*F5A;Wєp̃QĒOa`}_MWbqkuU3juV;rE%tEXtdate:create2016-11-05T07:33:16-07:00+V%tEXtdate:modify2016-11-05T07:33:16-07:00Z[tEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_6.png000066400000000000000000000012711506673203700226710ustar00rootroot00000000000000PNG  IHDR)Jk"gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATHcO,`U9rT%5UTb%̯1^:`3Pd?r2\Ke#hǪ_&XMe K1UNa 0TBxd_?*AҁPK;?,*1Äj_ VHJ|VPiz{U~d i4aZO'\4|~ijMeo1J*2LlL|**adGUY U9C4w6C*oEE*yh*A*cT%;h +@&<`?p#{0 )܅paBR'φ-Üv $c rED]p̀ b BmXTɍom"H.8C4Tܐm)%p?0hjQ*)R ۴ C%tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabelaIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/97_7.png000066400000000000000000000013601506673203700226710ustar00rootroot00000000000000PNG  IHDR0T gAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATX+q~cYMZQJJNXQSX8avceqf ?88P3B6x<=|\vz>ޯz?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_1.png000066400000000000000000000005711506673203700226670ustar00rootroot00000000000000PNG  IHDR >IgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NnIDATc ʩc!^s`f`a`0 113IfA- f``iJĠd|!Sb`d`x}- z%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabelb>?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_2.png000066400000000000000000000007061506673203700226700ustar00rootroot00000000000000PNG  IHDR :w-gAMA a cHRMz&u0`:pQ<bKGD̿tIME  /UDIDAT(c|?2`y ?yd*@ApBrB\ ]l@ C;H8?!@vtU&B{Br iPnFZ/d s# G `B  zS2qH!64pc ;X(.w%tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabelb>?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_3.png000066400000000000000000000007751506673203700226770ustar00rootroot00000000000000PNG  IHDR+LgAMA a cHRMz&u0`:pQ<bKGD̿tIME  9ZIDAT8c8 @~T&ܦ S *P%0@w$pgg kR (A3q,|yxp:1yppX g/){ A$~* .^3 \<c y3P*_k` U8{S=D"cC 0 U/e |/ ?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_4.png000066400000000000000000000010701506673203700226650ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !-IDAT8cX  c @?*;$dnͰTQqoMv*0Ǽm`@ChYE!YȲ@wk- <(!@ˑe,`,1Hǐ,!+|=`#,tl(+lB6Y=X!k,{,$,[uH`Hvϐ+5`*J,J`;Pc* LPJJLPvrk _(#4&T&>ycƩhy)Fe% 2sR(%tEXtdate:create2016-11-05T07:33:06-07:00V%tEXtdate:modify2016-11-05T07:33:06-07:00?tEXtlabelb>?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_5.png000066400000000000000000000011731506673203700226720ustar00rootroot00000000000000PNG  IHDR"@ZOgAMA a cHRMz&u0`:pQ<bKGD̿tIME !(XpIDATHc N >3AQ%4PnV"'cmJU0@S=lJn2 yJH0j % M2PGY,}$ >AWj CH!b7A?(QZ P,DDd%|Pe"DTY'j݇2~ERRRHJ&)q_BR2MID n4%~xR%D6]T!0AQr"*(J (J8#.*Cߢ8X\ɂDa: EvX % ܺp^jڵo264\G1r.!13p̱9L9H1r)y?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_6.png000066400000000000000000000012741506673203700226750ustar00rootroot00000000000000PNG  IHDR(JIgAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDATH;HBQs+j!D Aˆi j E5BZʢh^A!4f6TTS0;YcpC.;ss-t TMZnh1yh<ڵ,Ѓ"G0>M?$]CeLN=8$FEth&.;-C<a+{ғ> n8ea]b. .V+i\, $ #Β1I+I0e234NVq.YMyn~✃- y'Ѵ5CT!|'.@BQOIf!S갘&CEpG),8XVBq(c|ۙlH*]w6*?/ %tEXtdate:create2016-11-05T07:33:26-07:00Q%tEXtdate:modify2016-11-05T07:33:26-07:00BtEXtlabelb>?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/98_7.png000066400000000000000000000013621506673203700226740ustar00rootroot00000000000000PNG  IHDR/T71rgAMA a cHRMz&u0`:pQ<bKGD̿tIME !$ 9IDATHK(DQo\MIY)YP jIѰe%+Pf,faID6 L^kq9L};<$Pk^$5]ˡ=h>ULS \BWH Dd~ >#)TPK|JWT^縣 W|1^@}VJiAf/7uynXhoiߡ 9COD8LݬG}XoF=EG=b^/UE\&~uL5N/ў =}.\UN<4(DCƫ/>Hˆ+ϯfSTȿ%jRďy2C0?Vs7~5(ē["䤒ձv8WGH[Kc醟p 5]A}%tEXtdate:create2016-11-05T07:33:36-07:00i#Q`%tEXtdate:modify2016-11-05T07:33:36-07:00~tEXtlabelb>?IENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_0.png000066400000000000000000000004711506673203700226660ustar00rootroot00000000000000PNG  IHDR tgAMA a cHRMz&u0`:pQ<bKGD̿tIME  %.IDATc t$ V@: "^ FQACF i%tEXtdate:create2016-11-05T07:32:28-07:00tA%tEXtdate:modify2016-11-05T07:32:28-07:00k)!tEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_1.png000066400000000000000000000005571506673203700226740ustar00rootroot00000000000000PNG  IHDR 39gAMA a cHRMz&u0`:pQ<bKGD̿tIME  %NdIDATc iM2000Xؕ , 3?e`8 c?g`8cc(110w 4bL9,QfFEq7Q%tEXtdate:create2016-11-05T07:32:37-07:00 1%tEXtdate:modify2016-11-05T07:32:37-07:00QˉVtEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_2.png000066400000000000000000000006471506673203700226750ustar00rootroot00000000000000PNG  IHDR >gAMA a cHRMz&u0`:pQ<bKGD̿tIME  /UDIDAT(c}s/F03 ( dw&A30^|$(ٖ\! (``0D(f`,hh?$S{%m w[WL)"vT7y d4o$%tEXtdate:create2016-11-05T07:32:47-07:00*S8%tEXtdate:modify2016-11-05T07:32:47-07:00[OtEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_3.png000066400000000000000000000007201506673203700226660ustar00rootroot00000000000000PNG  IHDR+]gAMA a cHRMz&u0`:pQ<bKGD̿tIME  9ZIDAT(c00KZ$[ X܏K2A5TCp1š*$l ?`)H`AeOTw>)@sy24 hW@߂SD(M4,^,,vZ`smu`R'P ?ʅ!B)בɹ .LɆC9H%tEXtdate:create2016-11-05T07:32:57-07:008m%tEXtdate:modify2016-11-05T07:32:57-07:00tEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_4.png000066400000000000000000000010111506673203700226610ustar00rootroot00000000000000PNG  IHDR5gAMA a cHRMz&u0`:pQ<bKGD̿tIME !IDAT8c 0_ú,7Qdfq0@1B_/|vg`*ÉL4[z'[ҹ2!|,1KEd=D.XBO'eA XWp3`1Hbɰ"v,232XdebSsT$BoTf7$DSU<| R]~]?aQƢ!qBp!!Rp %%b Q<2<4O\%tEXtdate:create2016-11-05T07:33:06-07:00V%tEXtdate:modify2016-11-05T07:33:06-07:00?tEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_5.png000066400000000000000000000011001506673203700226610ustar00rootroot00000000000000PNG  IHDR@#gAMA a cHRMz&u0`:pQ<bKGD̿tIME !(X5IDAT8c0ʎʂùަF^ˀS>#ˮQ`@_p>d-R_[zٷiC=:0/"IH^ <L N2!,EV3챰"]vD,eAdknbU."} Ol*vyYfp"U"-I ,~+._ai~`%)]D$'V}94yO2>R6!dϧGpJZ-%P2R" xvt%tEXtdate:create2016-11-05T07:33:16-07:00+V%tEXtdate:modify2016-11-05T07:33:16-07:00Z[tEXtlabelcIIENDB`ggml-org-ggml-7ec8045/examples/yolo/data/labels/99_6.png000066400000000000000000000011651506673203700226750ustar00rootroot00000000000000PNG  IHDR"J)gAMA a cHRMz&u0`:pQ<bKGD̿tIME !jIDATHcO0*U2*=ӏ]3BRx0EZ:WuY\]hGB1:oX?,:sS= a.w) x1 = a.w-1; if (x2 < 0) x2 = 0; if (x2 >= a.w) x2 = a.w-1; if (y1 < 0) y1 = 0; if (y1 >= a.h) y1 = a.h-1; if (y2 < 0) y2 = 0; if (y2 >= a.h) y2 = a.h-1; for (int i = x1; i <= x2; ++i){ a.data[i + y1*a.w + 0*a.w*a.h] = r; a.data[i + y2*a.w + 0*a.w*a.h] = r; a.data[i + y1*a.w + 1*a.w*a.h] = g; a.data[i + y2*a.w + 1*a.w*a.h] = g; a.data[i + y1*a.w + 2*a.w*a.h] = b; a.data[i + y2*a.w + 2*a.w*a.h] = b; } for (int i = y1; i <= y2; ++i){ a.data[x1 + i*a.w + 0*a.w*a.h] = r; a.data[x2 + i*a.w + 0*a.w*a.h] = r; a.data[x1 + i*a.w + 1*a.w*a.h] = g; a.data[x2 + i*a.w + 1*a.w*a.h] = g; a.data[x1 + i*a.w + 2*a.w*a.h] = b; a.data[x2 + i*a.w + 2*a.w*a.h] = b; } } void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) { for (int i = 0; i < w; ++i) { draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); } } bool save_image(const yolo_image & im, const char *name, int quality) { uint8_t *data = (uint8_t*)calloc(im.w*im.h*im.c, sizeof(uint8_t)); for (int k = 0; k < im.c; ++k) { for (int i = 0; i < im.w*im.h; ++i) { data[i*im.c+k] = (uint8_t) (255*im.data[i + k*im.w*im.h]); } } int success = stbi_write_jpg(name, im.w, im.h, im.c, data, quality); free(data); if (!success) { fprintf(stderr, "Failed to write image %s\n", name); return false; } return true; } bool load_image(const char *fname, yolo_image & img) { int w, h, c; uint8_t * data = stbi_load(fname, &w, &h, &c, 3); if (!data) { return false; } c = 3; img.w = w; img.h = h; img.c = c; img.data.resize(w*h*c); for (int k = 0; k < c; ++k){ for (int j = 0; j < h; ++j){ for (int i = 0; i < w; ++i){ int dst_index = i + w*j + w*h*k; int src_index = k + c*i + c*w*j; img.data[dst_index] = (float)data[src_index]/255.; } } } stbi_image_free(data); return true; } static yolo_image resize_image(const yolo_image & im, int w, int h) { yolo_image resized(w, h, im.c); yolo_image part(w, im.h, im.c); float w_scale = (float)(im.w - 1) / (w - 1); float h_scale = (float)(im.h - 1) / (h - 1); for (int k = 0; k < im.c; ++k){ for (int r = 0; r < im.h; ++r) { for (int c = 0; c < w; ++c) { float val = 0; if (c == w-1 || im.w == 1){ val = im.get_pixel(im.w-1, r, k); } else { float sx = c*w_scale; int ix = (int) sx; float dx = sx - ix; val = (1 - dx) * im.get_pixel(ix, r, k) + dx * im.get_pixel(ix+1, r, k); } part.set_pixel(c, r, k, val); } } } for (int k = 0; k < im.c; ++k){ for (int r = 0; r < h; ++r){ float sy = r*h_scale; int iy = (int) sy; float dy = sy - iy; for (int c = 0; c < w; ++c){ float val = (1-dy) * part.get_pixel(c, iy, k); resized.set_pixel(c, r, k, val); } if (r == h-1 || im.h == 1) continue; for (int c = 0; c < w; ++c){ float val = dy * part.get_pixel(c, iy+1, k); resized.add_pixel(c, r, k, val); } } } return resized; } static void embed_image(const yolo_image & source, yolo_image & dest, int dx, int dy) { for (int k = 0; k < source.c; ++k) { for (int y = 0; y < source.h; ++y) { for (int x = 0; x < source.w; ++x) { float val = source.get_pixel(x, y, k); dest.set_pixel(dx+x, dy+y, k, val); } } } } yolo_image letterbox_image(const yolo_image & im, int w, int h) { int new_w = im.w; int new_h = im.h; if (((float)w/im.w) < ((float)h/im.h)) { new_w = w; new_h = (im.h * w)/im.w; } else { new_h = h; new_w = (im.w * h)/im.h; } yolo_image resized = resize_image(im, new_w, new_h); yolo_image boxed(w, h, im.c); boxed.fill(0.5); embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); return boxed; } static yolo_image tile_images(const yolo_image & a, const yolo_image & b, int dx) { if (a.w == 0) { return b; } yolo_image c(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, a.c); c.fill(1.0f); embed_image(a, c, 0, 0); embed_image(b, c, a.w + dx, 0); return c; } static yolo_image border_image(const yolo_image & a, int border) { yolo_image b(a.w + 2*border, a.h + 2*border, a.c); b.fill(1.0f); embed_image(a, b, border, border); return b; } yolo_image get_label(const std::vector & alphabet, const std::string & label, int size) { size = size/10; size = std::min(size, 7); yolo_image result(0,0,0); for (int i = 0; i < (int)label.size(); ++i) { int ch = label[i]; yolo_image img = alphabet[size*128 + ch]; result = tile_images(result, img, -size - 1 + (size+1)/2); } return border_image(result, (int)(result.h*.25)); } void draw_label(yolo_image & im, int row, int col, const yolo_image & label, const float * rgb) { int w = label.w; int h = label.h; if (row - h >= 0) { row = row - h; } for (int j = 0; j < h && j + row < im.h; j++) { for (int i = 0; i < w && i + col < im.w; i++) { for (int k = 0; k < label.c; k++) { float val = label.get_pixel(i, j, k); im.set_pixel(i + col, j + row, k, rgb[k] * val); } } } }ggml-org-ggml-7ec8045/examples/yolo/yolo-image.h000066400000000000000000000025021506673203700215350ustar00rootroot00000000000000#pragma once #include #include #include struct yolo_image { int w, h, c; std::vector data; yolo_image() : w(0), h(0), c(0) {} yolo_image(int w, int h, int c) : w(w), h(h), c(c), data(w*h*c) {} float get_pixel(int x, int y, int c) const { assert(x >= 0 && x < w && y >= 0 && y < h && c >= 0 && c < this->c); return data[c*w*h + y*w + x]; } void set_pixel(int x, int y, int c, float val) { assert(x >= 0 && x < w && y >= 0 && y < h && c >= 0 && c < this->c); data[c*w*h + y*w + x] = val; } void add_pixel(int x, int y, int c, float val) { assert(x >= 0 && x < w && y >= 0 && y < h && c >= 0 && c < this->c); data[c*w*h + y*w + x] += val; } void fill(float val) { std::fill(data.begin(), data.end(), val); } }; bool load_image(const char *fname, yolo_image & img); void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); yolo_image letterbox_image(const yolo_image & im, int w, int h); bool save_image(const yolo_image & im, const char *name, int quality); yolo_image get_label(const std::vector & alphabet, const std::string & label, int size); void draw_label(yolo_image & im, int row, int col, const yolo_image & label, const float * rgb); ggml-org-ggml-7ec8045/examples/yolo/yolov3-tiny.cpp000066400000000000000000000573701506673203700222570ustar00rootroot00000000000000#include "ggml.h" #include "gguf.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "yolo-image.h" #include #include #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif struct conv2d_layer { struct ggml_tensor * weights; struct ggml_tensor * biases; struct ggml_tensor * scales; struct ggml_tensor * rolling_mean; struct ggml_tensor * rolling_variance; int padding = 1; bool batch_normalize = true; bool activate = true; // true for leaky relu, false for linear }; struct yolo_model { int width = 416; int height = 416; std::vector conv2d_layers; ggml_backend_t backend; ggml_backend_buffer_t buffer; struct ggml_context * ctx; }; struct yolo_layer { int classes = 80; std::vector mask; std::vector anchors; std::vector predictions; int w; int h; yolo_layer(int classes, const std::vector & mask, const std::vector & anchors, struct ggml_tensor * prev_layer) : classes(classes), mask(mask), anchors(anchors) { w = prev_layer->ne[0]; h = prev_layer->ne[1]; predictions.resize(ggml_nbytes(prev_layer)/sizeof(float)); ggml_backend_tensor_get(prev_layer, predictions.data(), 0, ggml_nbytes(prev_layer)); } int entry_index(int location, int entry) const { int n = location / (w*h); int loc = location % (w*h); return n*w*h*(4+classes+1) + entry*w*h + loc; } }; struct box { float x, y, w, h; }; struct detection { box bbox; std::vector prob; float objectness; }; static bool load_model(const std::string & fname, yolo_model & model) { struct ggml_context * tmp_ctx = nullptr; struct gguf_init_params gguf_params = { /*.no_alloc =*/ false, /*.ctx =*/ &tmp_ctx, }; gguf_context * gguf_ctx = gguf_init_from_file(fname.c_str(), gguf_params); if (!gguf_ctx) { fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__); return false; } int num_tensors = gguf_get_n_tensors(gguf_ctx); struct ggml_init_params params { /*.mem_size =*/ ggml_tensor_overhead() * num_tensors, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; model.ctx = ggml_init(params); for (int i = 0; i < num_tensors; i++) { const char * name = gguf_get_tensor_name(gguf_ctx, i); struct ggml_tensor * src = ggml_get_tensor(tmp_ctx, name); struct ggml_tensor * dst = ggml_dup_tensor(model.ctx, src); ggml_set_name(dst, name); } model.buffer = ggml_backend_alloc_ctx_tensors(model.ctx, model.backend); // copy tensors from main memory to backend for (struct ggml_tensor * cur = ggml_get_first_tensor(model.ctx); cur != NULL; cur = ggml_get_next_tensor(model.ctx, cur)) { struct ggml_tensor * src = ggml_get_tensor(tmp_ctx, ggml_get_name(cur)); size_t n_size = ggml_nbytes(src); ggml_backend_tensor_set(cur, ggml_get_data(src), 0, n_size); } gguf_free(gguf_ctx); model.width = 416; model.height = 416; model.conv2d_layers.resize(13); model.conv2d_layers[7].padding = 0; model.conv2d_layers[9].padding = 0; model.conv2d_layers[9].batch_normalize = false; model.conv2d_layers[9].activate = false; model.conv2d_layers[10].padding = 0; model.conv2d_layers[12].padding = 0; model.conv2d_layers[12].batch_normalize = false; model.conv2d_layers[12].activate = false; for (int i = 0; i < (int)model.conv2d_layers.size(); i++) { char name[256]; snprintf(name, sizeof(name), "l%d_weights", i); model.conv2d_layers[i].weights = ggml_get_tensor(model.ctx, name); snprintf(name, sizeof(name), "l%d_biases", i); model.conv2d_layers[i].biases = ggml_get_tensor(model.ctx, name); if (model.conv2d_layers[i].batch_normalize) { snprintf(name, sizeof(name), "l%d_scales", i); model.conv2d_layers[i].scales = ggml_get_tensor(model.ctx, name); snprintf(name, sizeof(name), "l%d_rolling_mean", i); model.conv2d_layers[i].rolling_mean = ggml_get_tensor(model.ctx, name); snprintf(name, sizeof(name), "l%d_rolling_variance", i); model.conv2d_layers[i].rolling_variance = ggml_get_tensor(model.ctx, name); } } return true; } static bool load_labels(const char * filename, std::vector & labels) { std::ifstream file_in(filename); if (!file_in) { return false; } std::string line; while (std::getline(file_in, line)) { labels.push_back(line); } GGML_ASSERT(labels.size() == 80); return true; } static bool load_alphabet(std::vector & alphabet) { alphabet.resize(8 * 128); for (int j = 0; j < 8; j++) { for (int i = 32; i < 127; i++) { char fname[256]; snprintf(fname, sizeof(fname), "data/labels/%d_%d.png", i, j); if (!load_image(fname, alphabet[j*128 + i])) { fprintf(stderr, "Cannot load '%s'\n", fname); return false; } } } return true; } static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const conv2d_layer & layer) { struct ggml_tensor * result = ggml_conv_2d(ctx, layer.weights, input, 1, 1, layer.padding, layer.padding, 1, 1); if (layer.batch_normalize) { result = ggml_sub(ctx, result, ggml_repeat(ctx, layer.rolling_mean, result)); result = ggml_div(ctx, result, ggml_sqrt(ctx, ggml_repeat(ctx, layer.rolling_variance, result))); result = ggml_mul(ctx, result, ggml_repeat(ctx, layer.scales, result)); } result = ggml_add(ctx, result, ggml_repeat(ctx, layer.biases, result)); if (layer.activate) { result = ggml_leaky_relu(ctx, result, 0.1f, true); } return result; } static void activate_array(float * x, const int n) { // logistic activation for (int i = 0; i < n; i++) { x[i] = 1./(1. + exp(-x[i])); } } static void apply_yolo(yolo_layer & layer) { int w = layer.w; int h = layer.h; int N = layer.mask.size(); float * data = layer.predictions.data(); for (int n = 0; n < N; n++) { int index = layer.entry_index(n*w*h, 0); activate_array(data + index, 2*w*h); index = layer.entry_index(n*w*h, 4); activate_array(data + index, (1+layer.classes)*w*h); } } static box get_yolo_box(const yolo_layer & layer, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) { const float * predictions = layer.predictions.data(); box b; b.x = (i + predictions[index + 0*stride]) / lw; b.y = (j + predictions[index + 1*stride]) / lh; b.w = exp(predictions[index + 2*stride]) * layer.anchors[2*n] / w; b.h = exp(predictions[index + 3*stride]) * layer.anchors[2*n+1] / h; return b; } static void correct_yolo_box(box & b, int im_w, int im_h, int net_w, int net_h) { int new_w = 0; int new_h = 0; if (((float)net_w/im_w) < ((float)net_h/im_h)) { new_w = net_w; new_h = (im_h * net_w)/im_w; } else { new_h = net_h; new_w = (im_w * net_h)/im_h; } b.x = (b.x - (net_w - new_w)/2./net_w) / ((float)new_w/net_w); b.y = (b.y - (net_h - new_h)/2./net_h) / ((float)new_h/net_h); b.w *= (float)net_w/new_w; b.h *= (float)net_h/new_h; } static void get_yolo_detections(const yolo_layer & layer, std::vector & detections, int im_w, int im_h, int netw, int neth, float thresh) { int w = layer.w; int h = layer.h; int N = layer.mask.size(); const float * predictions = layer.predictions.data(); std::vector result; for (int i = 0; i < w*h; i++) { for (int n = 0; n < N; n++) { int obj_index = layer.entry_index(n*w*h + i, 4); float objectness = predictions[obj_index]; if (objectness <= thresh) { continue; } detection det; int box_index = layer.entry_index(n*w*h + i, 0); int row = i / w; int col = i % w; det.bbox = get_yolo_box(layer, layer.mask[n], box_index, col, row, w, h, netw, neth, w*h); correct_yolo_box(det.bbox, im_w, im_h, netw, neth); det.objectness = objectness; det.prob.resize(layer.classes); for (int j = 0; j < layer.classes; j++) { int class_index = layer.entry_index(n*w*h + i, 4 + 1 + j); float prob = objectness*predictions[class_index]; det.prob[j] = (prob > thresh) ? prob : 0; } detections.push_back(det); } } } static float overlap(float x1, float w1, float x2, float w2) { float l1 = x1 - w1/2; float l2 = x2 - w2/2; float left = l1 > l2 ? l1 : l2; float r1 = x1 + w1/2; float r2 = x2 + w2/2; float right = r1 < r2 ? r1 : r2; return right - left; } static float box_intersection(const box & a, const box & b) { float w = overlap(a.x, a.w, b.x, b.w); float h = overlap(a.y, a.h, b.y, b.h); if (w < 0 || h < 0) return 0; float area = w*h; return area; } static float box_union(const box & a, const box & b) { float i = box_intersection(a, b); float u = a.w*a.h + b.w*b.h - i; return u; } static float box_iou(const box & a, const box & b) { return box_intersection(a, b)/box_union(a, b); } static void do_nms_sort(std::vector & dets, int classes, float thresh) { int k = (int)dets.size()-1; for (int i = 0; i <= k; ++i) { if (dets[i].objectness == 0) { std::swap(dets[i], dets[k]); --k; --i; } } int total = k+1; for (int k = 0; k < classes; ++k) { std::sort(dets.begin(), dets.begin()+total, [=](const detection & a, const detection & b) { return a.prob[k] > b.prob[k]; }); for (int i = 0; i < total; ++i) { if (dets[i].prob[k] == 0) { continue; } box a = dets[i].bbox; for (int j = i+1; j < total; ++j){ box b = dets[j].bbox; if (box_iou(a, b) > thresh) { dets[j].prob[k] = 0; } } } } } static float get_color(int c, int x, int max) { float colors[6][3] = { {1,0,1}, {0,0,1}, {0,1,1}, {0,1,0}, {1,1,0}, {1,0,0} }; float ratio = ((float)x/max)*5; int i = floor(ratio); int j = ceil(ratio); ratio -= i; float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; return r; } static void draw_detections(yolo_image & im, const std::vector & dets, float thresh, const std::vector & labels, const std::vector & alphabet) { int classes = (int)labels.size(); for (int i = 0; i < (int)dets.size(); i++) { std::string labelstr; int cl = -1; for (int j = 0; j < (int)dets[i].prob.size(); j++) { if (dets[i].prob[j] > thresh) { if (cl < 0) { labelstr = labels[j]; cl = j; } else { labelstr += ", "; labelstr += labels[j]; } printf("%s: %.0f%%\n", labels[j].c_str(), dets[i].prob[j]*100); } } if (cl >= 0) { int width = im.h * .006; int offset = cl*123457 % classes; float red = get_color(2,offset,classes); float green = get_color(1,offset,classes); float blue = get_color(0,offset,classes); float rgb[3]; rgb[0] = red; rgb[1] = green; rgb[2] = blue; box b = dets[i].bbox; int left = (b.x-b.w/2.)*im.w; int right = (b.x+b.w/2.)*im.w; int top = (b.y-b.h/2.)*im.h; int bot = (b.y+b.h/2.)*im.h; if (left < 0) left = 0; if (right > im.w-1) right = im.w-1; if (top < 0) top = 0; if (bot > im.h-1) bot = im.h-1; draw_box_width(im, left, top, right, bot, width, red, green, blue); yolo_image label = get_label(alphabet, labelstr, (im.h*.03)); draw_label(im, top + width, left, label, rgb); } } } static void print_shape(int layer, const ggml_tensor * t) { printf("Layer %2d output shape: %3d x %3d x %4d x %3d\n", layer, (int)t->ne[0], (int)t->ne[1], (int)t->ne[2], (int)t->ne[3]); } static struct ggml_cgraph * build_graph(struct ggml_context * ctx_cgraph, const yolo_model & model) { struct ggml_cgraph * gf = ggml_new_graph(ctx_cgraph); struct ggml_tensor * input = ggml_new_tensor_4d(ctx_cgraph, GGML_TYPE_F32, model.width, model.height, 3, 1); ggml_set_name(input, "input"); struct ggml_tensor * result = apply_conv2d(ctx_cgraph, input, model.conv2d_layers[0]); print_shape(0, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); print_shape(1, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[1]); print_shape(2, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); print_shape(3, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[2]); print_shape(4, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); print_shape(5, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[3]); print_shape(6, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); print_shape(7, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[4]); struct ggml_tensor * layer_8 = result; print_shape(8, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 2, 2, 0, 0); print_shape(9, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[5]); print_shape(10, result); result = ggml_pool_2d(ctx_cgraph, result, GGML_OP_POOL_MAX, 2, 2, 1, 1, 0.5, 0.5); print_shape(11, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[6]); print_shape(12, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[7]); struct ggml_tensor * layer_13 = result; print_shape(13, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[8]); print_shape(14, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[9]); struct ggml_tensor * layer_15 = result; ggml_set_output(layer_15); ggml_set_name(layer_15, "layer_15"); print_shape(15, result); result = apply_conv2d(ctx_cgraph, layer_13, model.conv2d_layers[10]); print_shape(18, result); result = ggml_upscale(ctx_cgraph, result, 2, GGML_SCALE_MODE_NEAREST); print_shape(19, result); result = ggml_concat(ctx_cgraph, result, layer_8, 2); print_shape(20, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[11]); print_shape(21, result); result = apply_conv2d(ctx_cgraph, result, model.conv2d_layers[12]); struct ggml_tensor * layer_22 = result; ggml_set_output(layer_22); ggml_set_name(layer_22, "layer_22"); print_shape(22, result); ggml_build_forward_expand(gf, layer_15); ggml_build_forward_expand(gf, layer_22); return gf; } void detect(yolo_image & img, struct ggml_cgraph * gf, const yolo_model & model, float thresh, const std::vector & labels, const std::vector & alphabet) { std::vector detections; yolo_image sized = letterbox_image(img, model.width, model.height); struct ggml_tensor * input = ggml_graph_get_tensor(gf, "input"); ggml_backend_tensor_set(input, sized.data.data(), 0, ggml_nbytes(input)); if (ggml_backend_graph_compute(model.backend, gf) != GGML_STATUS_SUCCESS) { fprintf(stderr, "%s: ggml_backend_graph_compute() failed\n", __func__); return; } struct ggml_tensor * layer_15 = ggml_graph_get_tensor(gf, "layer_15"); yolo_layer yolo16{ 80, {3, 4, 5}, {10, 14, 23, 27, 37,58, 81, 82, 135, 169, 344, 319}, layer_15}; apply_yolo(yolo16); get_yolo_detections(yolo16, detections, img.w, img.h, model.width, model.height, thresh); struct ggml_tensor * layer_22 = ggml_graph_get_tensor(gf, "layer_22"); yolo_layer yolo23{ 80, {0, 1, 2}, {10, 14, 23, 27, 37,58, 81, 82, 135, 169, 344, 319}, layer_22}; apply_yolo(yolo23); get_yolo_detections(yolo23, detections, img.w, img.h, model.width, model.height, thresh); do_nms_sort(detections, yolo23.classes, .45); draw_detections(img, detections, thresh, labels, alphabet); } struct yolo_params { float thresh = 0.5; std::string model = "yolov3-tiny.gguf"; std::string fname_inp = "input.jpg"; std::string fname_out = "predictions.jpg"; int n_threads = std::max(1U, std::thread::hardware_concurrency()/2); std::string device; }; void yolo_print_usage(int argc, char ** argv, const yolo_params & params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -d, --device DEV device to use\n"); fprintf(stderr, " -t, --threads N number of threads for the CPU backend (default: %d)\n", params.n_threads); fprintf(stderr, " -th, --thresh T detection threshold (default: %.2f)\n", params.thresh); fprintf(stderr, " -m, --model FNAME model path (default: %s)\n", params.model.c_str()); fprintf(stderr, " -i, --inp FNAME input file (default: %s)\n", params.fname_inp.c_str()); fprintf(stderr, " -o, --out FNAME output file (default: %s)\n", params.fname_out.c_str()); fprintf(stderr, "\n"); } bool yolo_params_parse(int argc, char ** argv, yolo_params & params) { for (int i = 1; i < argc; i++) { std::string arg = argv[i]; if (arg == "-th" || arg == "--thresh") { params.thresh = std::stof(argv[++i]); if (params.thresh < 0 || params.thresh > 1) { fprintf(stderr, "error: invalid threshold: %.2f\n", params.thresh); return false; } } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-i" || arg == "--inp") { params.fname_inp = argv[++i]; } else if (arg == "-o" || arg == "--out") { params.fname_out = argv[++i]; } else if (arg == "-t" || arg == "--threads") { if (++i >= argc) { return false; } params.n_threads = std::stoi(argv[i]); if (params.n_threads <= 0) { fprintf(stderr, "error: invalid number of threads: %d\n", params.n_threads); return false; } } else if (arg == "-d" || arg == "--device") { if (++i >= argc) { return false; } params.device = argv[i]; if (ggml_backend_dev_by_name(params.device.c_str()) == nullptr) { fprintf(stderr, "error: unknown device: %s\n", params.device.c_str()); fprintf(stderr, "available devices:\n"); for (size_t i = 0; i < ggml_backend_dev_count(); i++) { auto * dev = ggml_backend_dev_get(i); size_t free, total; ggml_backend_dev_memory(dev, &free, &total); printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); } return false; } } else if (arg == "-h" || arg == "--help") { yolo_print_usage(argc, argv, params); exit(0); } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); yolo_print_usage(argc, argv, params); exit(0); } } return true; } static ggml_backend_t create_backend(const yolo_params & params) { ggml_backend_t backend = nullptr; if (!params.device.empty()) { ggml_backend_dev_t dev = ggml_backend_dev_by_name(params.device.c_str()); if (dev) { backend = ggml_backend_dev_init(dev, nullptr); if (!backend) { fprintf(stderr, "Failed to create backend for device %s\n", params.device.c_str()); return nullptr; } } } // try to initialize a GPU backend first if (!backend) { backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); } // if there aren't GPU backends fallback to CPU backend if (!backend) { backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); } if (backend) { fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend)); // set the number of threads ggml_backend_dev_t dev = ggml_backend_get_device(backend); ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; if (reg) { auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); if (ggml_backend_set_n_threads_fn) { ggml_backend_set_n_threads_fn(backend, params.n_threads); } } } return backend; } int main(int argc, char *argv[]) { ggml_backend_load_all(); ggml_time_init(); yolo_model model; yolo_params params; if (!yolo_params_parse(argc, argv, params)) { return 1; } model.backend = create_backend(params); if (!model.backend) { fprintf(stderr, "Failed to create backend\n"); return 1; } if (!load_model(params.model, model)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } yolo_image img(0,0,0); if (!load_image(params.fname_inp.c_str(), img)) { fprintf(stderr, "%s: failed to load image from '%s'\n", __func__, params.fname_inp.c_str()); return 1; } std::vector labels; if (!load_labels("data/coco.names", labels)) { fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__); return 1; } std::vector alphabet; if (!load_alphabet(alphabet)) { fprintf(stderr, "%s: failed to load alphabet\n", __func__); return 1; } struct ggml_init_params params0 = { /*.mem_size =*/ ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; struct ggml_context * ctx_cgraph = ggml_init(params0); struct ggml_cgraph * gf = build_graph(ctx_cgraph, model); ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); ggml_gallocr_alloc_graph(allocr, gf); const int64_t t_start_ms = ggml_time_ms(); detect(img, gf, model, params.thresh, labels, alphabet); const int64_t t_detect_ms = ggml_time_ms() - t_start_ms; if (!save_image(img, params.fname_out.c_str(), 80)) { fprintf(stderr, "%s: failed to save image to '%s'\n", __func__, params.fname_out.c_str()); return 1; } printf("Detected objects saved in '%s' (time: %f sec.)\n", params.fname_out.c_str(), t_detect_ms / 1000.0f); ggml_free(ctx_cgraph); ggml_gallocr_free(allocr); ggml_free(model.ctx); ggml_backend_buffer_free(model.buffer); ggml_backend_free(model.backend); return 0; } ggml-org-ggml-7ec8045/ggml.pc.in000066400000000000000000000004331506673203700164020ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ Name: ggml Description: The GGML Tensor Library for Machine Learning Version: @GGML_VERSION@ Cflags: -I${includedir} Libs: -L${libdir} -lggml ggml-org-ggml-7ec8045/include/000077500000000000000000000000001506673203700161465ustar00rootroot00000000000000ggml-org-ggml-7ec8045/include/ggml-alloc.h000066400000000000000000000057031506673203700203420ustar00rootroot00000000000000#pragma once #include "ggml.h" #ifdef __cplusplus extern "C" { #endif typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; typedef struct ggml_backend_buffer * ggml_backend_buffer_t; typedef struct ggml_backend * ggml_backend_t; // Tensor allocator struct ggml_tallocr { ggml_backend_buffer_t buffer; void * base; size_t alignment; size_t offset; }; GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); // Graph allocator /* Example usage: ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); // optional: create a worst-case graph and reserve the buffers to avoid reallocations ggml_gallocr_reserve(galloc, build_graph(max_batch)); // allocate the graph struct ggml_cgraph * graph = build_graph(batch); ggml_gallocr_alloc_graph(galloc, graph); printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); // evaluate the graph ggml_backend_graph_compute(backend, graph); */ // special tensor flags for use with the graph allocator: // ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses // ggml_set_output(): output tensors are never freed and never overwritten typedef struct ggml_gallocr * ggml_gallocr_t; GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); // pre-allocate buffers from a measure graph - does not allocate or modify the graph // call with a worst-case graph to avoid buffer reallocations // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed // returns false if the buffer allocation failed GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); GGML_API bool ggml_gallocr_reserve_n( ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids); // automatic reallocation if the topology changes when using a single buffer // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); // Utils // Create a buffer and allocate all the tensors in a ggml_context GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-backend.h000066400000000000000000000505021506673203700206340ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-alloc.h" #ifdef GGML_BACKEND_SHARED # if defined(_WIN32) && !defined(__MINGW32__) # ifdef GGML_BACKEND_BUILD # define GGML_BACKEND_API __declspec(dllexport) extern # else # define GGML_BACKEND_API __declspec(dllimport) extern # endif # else # define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern # endif #else # define GGML_BACKEND_API extern #endif #ifdef __cplusplus extern "C" { #endif typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; typedef struct ggml_backend_buffer * ggml_backend_buffer_t; typedef struct ggml_backend_event * ggml_backend_event_t; typedef struct ggml_backend * ggml_backend_t; typedef void * ggml_backend_graph_plan_t; typedef struct ggml_backend_reg * ggml_backend_reg_t; typedef struct ggml_backend_device * ggml_backend_dev_t; // // Backend buffer type // GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft); GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size); GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft); GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft); // // Backend buffer // enum ggml_backend_buffer_usage { GGML_BACKEND_BUFFER_USAGE_ANY = 0, GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1, GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2, }; GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer); GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer); GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer); GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer); GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor); GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value); GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer); GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer); GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer); GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer); // tensor copy between different backends GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); // // Backend (stream) // GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend); GGML_API const char * ggml_backend_name(ggml_backend_t backend); GGML_API void ggml_backend_free(ggml_backend_t backend); GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend); GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); // "offset" refers to the offset in tensor->data for setting/getting data GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); GGML_API void ggml_backend_synchronize(ggml_backend_t backend); GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph); // NOTE: will be removed, use device version instead GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft); GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op); // asynchronous copy // the copy is performed after all the currently queued operations in backend_src // backend_dst will wait for the copy to complete before performing other operations // automatic fallback to sync copy if async is not supported GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst); GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend); // // Events // GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device); GGML_API void ggml_backend_event_free(ggml_backend_event_t event); GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend); GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event); GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event); // // Backend device // enum ggml_backend_dev_type { // CPU device using system memory GGML_BACKEND_DEVICE_TYPE_CPU, // GPU device using dedicated memory GGML_BACKEND_DEVICE_TYPE_GPU, // integrated GPU device using host memory GGML_BACKEND_DEVICE_TYPE_IGPU, // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX) GGML_BACKEND_DEVICE_TYPE_ACCEL }; // functionality supported by the device struct ggml_backend_dev_caps { // asynchronous operations bool async; // pinned host buffer bool host_buffer; // creating buffers from host ptr bool buffer_from_host_ptr; // event synchronization bool events; }; // all the device properties struct ggml_backend_dev_props { // device name const char * name; // device description const char * description; // device free memory in bytes size_t memory_free; // device total memory in bytes size_t memory_total; // device type enum ggml_backend_dev_type type; // device id // for PCI devices, this should be the PCI bus id formatted as "domain:bus:device.function" (e.g. "0000:01:00.0") // if the id is unknown, this should be NULL const char * device_id; // device capabilities struct ggml_backend_dev_caps caps; }; GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device); GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device); GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total); GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device); GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props); GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device); GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params); GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op); GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft); GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op); // // Backend (reg) // GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg); GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg); GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index); GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name); // Common functions that may be obtained using ggml_backend_reg_get_proc_address // Split buffer type for tensor parallelism typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split); // Set the number of threads for the backend typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads); // Get additional buffer types provided by the device (returns a NULL-terminated array) typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device); // Set the abort callback for the backend typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data); // Get a list of feature flags supported by the backend (returns a NULL-terminated array) struct ggml_backend_feature { const char * name; const char * value; }; typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg); // // Backend registry // GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); // Backend (reg) enumeration GGML_API size_t ggml_backend_reg_count(void); GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index); GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); // Device enumeration GGML_API size_t ggml_backend_dev_count(void); GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index); GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type); // Direct backend (stream) initialization // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params) GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params); // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params) GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params); // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL) GGML_API ggml_backend_t ggml_backend_init_best(void); // Load a backend from a dynamic library and register it GGML_API ggml_backend_reg_t ggml_backend_load(const char * path); // Unload a backend if loaded dynamically and unregister it GGML_API void ggml_backend_unload(ggml_backend_reg_t reg); // Load all known backends from dynamic libraries GGML_API void ggml_backend_load_all(void); GGML_API void ggml_backend_load_all_from_path(const char * dir_path); // // Backend scheduler // // The backend scheduler allows for multiple backend devices to be used together // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends // The backends are selected based on: // - the backend that supports the operation // - the location of the pre-allocated tensors (e.g. the weights) /* Example usage: // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned // preferrably to run on the same backend as the buffer ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false, true); // initialize buffers from a max size graph (optional) reserve_graph = build_graph(sched, max_batch_size); // manually assign nodes to a backend (optional, should not be needed in most cases) struct ggml_tensor * node = ggml_mul_mat(ctx, ...); ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu); ggml_backend_sched_reserve(sched, reserve_graph); // compute graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation for (int i = 0; i < 10; ++i) { ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically } // if there are graph inputs: graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called) ggml_backend_sched_reset(sched); // clear the allocation of the previous graph ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors ggml_backend_sched_graph_compute(sched, graph); // execute the graph // as an alternative to the above it is also possible to assign the inputs to a dedicated context and // allocate them statically via ggml_backend_alloc_ctx_tensors } */ typedef struct ggml_backend_sched * ggml_backend_sched_t; // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback) // when ask == true, the scheduler wants to know if the user wants to observe this node // this allows the scheduler to batch nodes together in order to evaluate them in a single call // // when ask == false, the scheduler is passing the node tensor to the user for observation // if the user returns false, the scheduler will cancel the graph compute // typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); // Initialize a backend scheduler, backends with low index are given priority over backends with high index GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool op_offload); GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); // Initialize backend buffers from a measure graph GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched); GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i); // Get the number of splits of the last graph GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched); GGML_API ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend); GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); // Split graph without allocating it GGML_API void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // Allocate and compute graph on the backend scheduler GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph); GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched); // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. // The correct way to use this API is to discard the deallocated tensors and create new ones. GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); // Set a callback to be called for each resulting node during graph compute GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data); // // Utils // struct ggml_backend_graph_copy { ggml_backend_buffer_t buffer; struct ggml_context * ctx_allocated; struct ggml_context * ctx_unallocated; struct ggml_cgraph * graph; }; // Copy a graph to a different backend GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph); GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy); typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); // Compare the output of two backends GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node); // Tensor initialization GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor); // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-blas.h000066400000000000000000000011121506673203700201570ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif // backend API GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); // number of threads used for conversion to float // for openblas and blis, this will also set the number of threads used for blas operations GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-cann.h000066400000000000000000000107111506673203700201620ustar00rootroot00000000000000/* * Copyright (c) 2023-2024 The ggml authors * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #pragma once #include "ggml-backend.h" #include "ggml.h" #ifdef __cplusplus extern "C" { #endif /** * @brief Maximum number of CANN devices supported. */ #define GGML_CANN_MAX_DEVICES 16 GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void); /** * @brief Initializes the CANN backend for a specified device. * * This function initializes the CANN backend for the given device. * It verifies the device index, allocates a context, and creates a backend * instance. * * @param device The index of the device to initialize. * @return A pointer to the initialized backend instance, or nullptr on failure. */ GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device); /** * @brief Checks if a given backend is a CANN backend. * * This function verifies if the provided backend is a CANN backend by comparing * its GUID with the CANN backend's GUID. * * @param backend The backend instance to check. * @return True if the backend is a CANN backend, false otherwise. */ GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend); /** * @brief Retrieves the CANN buffer type for a specified device. * * This function initializes and returns the buffer type interface associated * with the given device. It ensures thread-safe access using a mutex. * * @param device The device index for which to retrieve the buffer type. * @return A pointer to the buffer type interface for the specified device, or * nullptr if the device index is out of range. */ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_buffer_type(int32_t device); /** * @brief Retrieves the number of CANN devices available. * * This function returns the number of CANN devices available based on * information obtained from `ggml_cann_info()`. * * @return The number of CANN devices available. */ GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void); /** * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU. * * @return A pointer to the host buffer type interface. */ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void); /** * @brief Retrieves the description of a specific CANN device. * * This function sets the specified device, retrieves the SoC name, * and writes it into the provided description buffer. * * @param device The device index to retrieve the description for. * @param description Pointer to a buffer where the description will be written. * @param description_size Size of the description buffer. */ GGML_BACKEND_API void ggml_backend_cann_get_device_description( int32_t device, char* description, size_t description_size); /** * @brief Retrieves the memory information of a specific CANN device. * * This function sets the specified device, retrieves the free and total * memory information of the specified type (ACL_HBM_MEM), and stores them * in the provided pointers. * * @param device The device index to retrieve memory information for. * @param free Pointer to a variable where the free memory size will be stored. * @param total Pointer to a variable where the total memory size will be * stored. */ GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device, size_t* free, size_t* total); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-cpp.h000066400000000000000000000031641506673203700200310ustar00rootroot00000000000000#pragma once #ifndef __cplusplus #error "This header is for C++ only" #endif #include "ggml.h" #include "ggml-alloc.h" #include "ggml-backend.h" #include "gguf.h" #include // Smart pointers for ggml types // ggml struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } }; struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } }; typedef std::unique_ptr ggml_context_ptr; typedef std::unique_ptr gguf_context_ptr; // ggml-alloc struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } }; typedef std::unique_ptr ggml_gallocr_ptr; // ggml-backend struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } }; struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } }; struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } }; struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } }; typedef std::unique_ptr ggml_backend_ptr; typedef std::unique_ptr ggml_backend_buffer_ptr; typedef std::unique_ptr ggml_backend_event_ptr; typedef std::unique_ptr ggml_backend_sched_ptr; ggml-org-ggml-7ec8045/include/ggml-cpu.h000066400000000000000000000164151506673203700200410ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif // the compute plan that needs to be prepared for ggml_graph_compute() // since https://github.com/ggml-org/ggml/issues/287 struct ggml_cplan { size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` int n_threads; struct ggml_threadpool * threadpool; // abort ggml_graph_compute when true ggml_abort_callback abort_callback; void * abort_callback_data; }; // numa strategies enum ggml_numa_strategy { GGML_NUMA_STRATEGY_DISABLED = 0, GGML_NUMA_STRATEGY_DISTRIBUTE = 1, GGML_NUMA_STRATEGY_ISOLATE = 2, GGML_NUMA_STRATEGY_NUMACTL = 3, GGML_NUMA_STRATEGY_MIRROR = 4, GGML_NUMA_STRATEGY_COUNT }; GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value); GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value); GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i); GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value); GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value); GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i); GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value); GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value); GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params); GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool); GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool); GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool); GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool); // ggml_graph_plan() has to be called before ggml_graph_compute() // when plan.work_size > 0, caller must allocate memory for plan.work_data GGML_BACKEND_API struct ggml_cplan ggml_graph_plan( const struct ggml_cgraph * cgraph, int n_threads, /* = GGML_DEFAULT_N_THREADS */ struct ggml_threadpool * threadpool /* = NULL */ ); GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); // same as ggml_graph_compute() but the work data is allocated as a part of the context // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); // // system info // // x86 GGML_BACKEND_API int ggml_cpu_has_sse3 (void); GGML_BACKEND_API int ggml_cpu_has_ssse3 (void); GGML_BACKEND_API int ggml_cpu_has_avx (void); GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void); GGML_BACKEND_API int ggml_cpu_has_avx2 (void); GGML_BACKEND_API int ggml_cpu_has_bmi2 (void); GGML_BACKEND_API int ggml_cpu_has_f16c (void); GGML_BACKEND_API int ggml_cpu_has_fma (void); GGML_BACKEND_API int ggml_cpu_has_avx512 (void); GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void); GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void); GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void); GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void); // ARM GGML_BACKEND_API int ggml_cpu_has_neon (void); GGML_BACKEND_API int ggml_cpu_has_arm_fma (void); GGML_BACKEND_API int ggml_cpu_has_fp16_va (void); GGML_BACKEND_API int ggml_cpu_has_dotprod (void); GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void); GGML_BACKEND_API int ggml_cpu_has_sve (void); GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes GGML_BACKEND_API int ggml_cpu_has_sme (void); // other GGML_BACKEND_API int ggml_cpu_has_riscv_v (void); GGML_BACKEND_API int ggml_cpu_has_vsx (void); GGML_BACKEND_API int ggml_cpu_has_vxe (void); GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void); GGML_BACKEND_API int ggml_cpu_has_llamafile (void); // Internal types and functions exposed for tests and benchmarks typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx, const void * GGML_RESTRICT y, size_t by, int nrc); struct ggml_type_traits_cpu { ggml_from_float_t from_float; ggml_vec_dot_t vec_dot; enum ggml_type vec_dot_type; int64_t nrows; // number of rows to process simultaneously }; GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type); GGML_BACKEND_API void ggml_cpu_init(void); // // CPU backend // GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void); GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend); GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads); GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool); GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void); GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t); GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t); GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t); GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t); GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t); GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-cuda.h000066400000000000000000000030611506673203700201570ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif #ifdef GGML_USE_HIP #define GGML_CUDA_NAME "ROCm" #define GGML_CUBLAS_NAME "hipBLAS" #elif defined(GGML_USE_MUSA) #define GGML_CUDA_NAME "MUSA" #define GGML_CUBLAS_NAME "muBLAS" #else #define GGML_CUDA_NAME "CUDA" #define GGML_CUBLAS_NAME "cuBLAS" #endif #define GGML_CUDA_MAX_DEVICES 16 // backend API GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device); GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend); // device buffer GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device); // split tensor buffer that splits matrices by rows across multiple devices GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split); // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void); GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void); GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size); GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total); GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size); GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-metal.h000066400000000000000000000041351506673203700203500ustar00rootroot00000000000000// Note: this description is outdated // // An interface allowing to compute ggml_cgraph with Metal // // This is a fully functional interface that extends ggml with GPU support for Apple devices. // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.) // // How it works? // // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.) // // You only need to make sure that all memory buffers that you used during the graph creation // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is // used during the graph evaluation to determine the arguments of the compute kernels. // // Synchronization between device and host memory (for example for input and output tensors) // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions. // #pragma once #include "ggml.h" #include "ggml-backend.h" #include #include struct ggml_tensor; struct ggml_cgraph; #ifdef __cplusplus extern "C" { #endif // // backend API // user-code should use only these functions // // TODO: remove in the future GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void); GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend); GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data); // helper to check if the device supports a specific family // ideally, the user code should be doing these checks // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family); // capture all command buffers committed the next time `ggml_backend_graph_compute` is called GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-opencl.h000066400000000000000000000011051506673203700205200ustar00rootroot00000000000000#ifndef GGML_OPENCL_H #define GGML_OPENCL_H #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif // // backend API // GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void); GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void); #ifdef __cplusplus } #endif #endif // GGML_OPENCL_H ggml-org-ggml-7ec8045/include/ggml-opt.h000066400000000000000000000333271506673203700200550ustar00rootroot00000000000000// This file contains functionality for training models using GGML. // It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets. // At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code. // // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) #pragma once #include "ggml.h" #include "ggml-backend.h" #include #ifdef __cplusplus extern "C" { #endif struct ggml_opt_dataset; struct ggml_opt_context; struct ggml_opt_result; typedef struct ggml_opt_dataset * ggml_opt_dataset_t; typedef struct ggml_opt_context * ggml_opt_context_t; typedef struct ggml_opt_result * ggml_opt_result_t; // ====== Loss ====== // built-in loss types, i.e. the built-in quantities minimized by the optimizer // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value enum ggml_opt_loss_type { GGML_OPT_LOSS_TYPE_MEAN, GGML_OPT_LOSS_TYPE_SUM, GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR, }; // ====== Dataset ====== GGML_API ggml_opt_dataset_t ggml_opt_dataset_init( enum ggml_type type_data, // the type for the internal data tensor enum ggml_type type_label, // the type for the internal labels tensor int64_t ne_datapoint, // number of elements per datapoint int64_t ne_label, // number of elements per label int64_t ndata, // total number of datapoints/labels int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied) GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset); // get underlying tensors that store the data GGML_API int64_t ggml_opt_dataset_ndata (ggml_opt_dataset_t dataset); GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata] GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata] // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata); // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch GGML_API void ggml_opt_dataset_get_batch( ggml_opt_dataset_t dataset, struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch] struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch] int64_t ibatch); GGML_API void ggml_opt_dataset_get_batch_host( ggml_opt_dataset_t dataset, void * data_batch, size_t nb_data_batch, void * labels_batch, int64_t ibatch); // ====== Model / Context ====== enum ggml_opt_build_type { GGML_OPT_BUILD_TYPE_FORWARD = 10, GGML_OPT_BUILD_TYPE_GRAD = 20, GGML_OPT_BUILD_TYPE_OPT = 30, }; enum ggml_opt_optimizer_type { GGML_OPT_OPTIMIZER_TYPE_ADAMW, GGML_OPT_OPTIMIZER_TYPE_SGD, GGML_OPT_OPTIMIZER_TYPE_COUNT }; // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss struct ggml_opt_optimizer_params { struct { float alpha; // learning rate float beta1; // first AdamW momentum float beta2; // second AdamW momentum float eps; // epsilon for numerical stability float wd; // weight decay - 0.0f to disable } adamw; struct { float alpha; // learning rate float wd; // weight decay } sgd; }; // callback to calculate optimizer parameters prior to a backward pass // userdata can be used to pass arbitrary data typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata); // returns the default optimizer params (constant, hard-coded values) // userdata is not used GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata); // casts userdata to ggml_opt_optimizer_params and returns it GGML_API struct ggml_opt_optimizer_params ggml_opt_get_constant_optimizer_params(void * userdata); // parameters for initializing a new optimization context struct ggml_opt_params { ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs // by default the forward graph needs to be reconstructed for each eval // if ctx_compute, inputs, and outputs are set the graphs are instead allocated statically struct ggml_context * ctx_compute; struct ggml_tensor * inputs; struct ggml_tensor * outputs; enum ggml_opt_loss_type loss_type; enum ggml_opt_build_type build_type; int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters void * get_opt_pars_ud; // userdata for calculating optimizer parameters // only GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor enum ggml_opt_optimizer_type optimizer; }; // get parameters for an optimization context with defaults set where possible // parameters for which no sensible defaults exist are supplied as arguments to this function GGML_API struct ggml_opt_params ggml_opt_default_params( ggml_backend_sched_t backend_sched, enum ggml_opt_loss_type loss_type); GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params); GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx); // set gradients to zero, initilize loss, and optionally reset the optimizer GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer); GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically // get underlying tensors that store data // if not using static graphs these pointers become invalid with the next call to ggml_opt_alloc GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels // get the gradient accumulator for a node from the forward graph GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node); GGML_API enum ggml_opt_optimizer_type ggml_opt_context_optimizer_type(ggml_opt_context_t); //TODO consistent naming scheme GGML_API const char * ggml_opt_optimizer_name(enum ggml_opt_optimizer_type); // ====== Optimization Result ====== GGML_API ggml_opt_result_t ggml_opt_result_init(void); GGML_API void ggml_opt_result_free(ggml_opt_result_t result); GGML_API void ggml_opt_result_reset(ggml_opt_result_t result); // get data from result, uncertainties are optional and can be ignored by passing NULL GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value // ====== Computation ====== // if not using static graphs, this function must be called prior to ggml_opt_alloc GGML_API void ggml_opt_prepare_alloc( ggml_opt_context_t opt_ctx, struct ggml_context * ctx_compute, struct ggml_cgraph * gf, struct ggml_tensor * inputs, struct ggml_tensor * outputs); // allocate the next graph for evaluation, either forward or forward + backward // must be called exactly once prior to calling ggml_opt_eval GGML_API void ggml_opt_alloc(ggml_opt_context_t opt_ctx, bool backward); // do forward pass, increment result if not NULL, do backward pass if allocated GGML_API void ggml_opt_eval(ggml_opt_context_t opt_ctx, ggml_opt_result_t result); // ############################################################################ // ## The high-level functions start here. They do not depend on any private ## // ## functions or structs and can be copied to and adapted for user code. ## // ############################################################################ // ====== Intended Usage ====== // // 1. Select the appropriate loss for your problem. // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them. // Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster). // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors. // The first context should contain the model parameters and inputs and be allocated statically in user code. // The second context should contain all other tensors and will be (re)allocated automatically. // Due to this automated allocation the data of the second context is not defined when accessed in user code. // Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors. // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead. // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation typedef void (*ggml_opt_epoch_callback)( bool train, // true after training evaluation, false after validation evaluation ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result, // result associated with the dataset subsection int64_t ibatch, // number of batches that have been evaluated so far int64_t ibatch_max, // total number of batches in this dataset subsection int64_t t_start_us); // time at which the evaluation on the dataset subsection was started // do training on front of dataset, do evaluation only on back of dataset GGML_API void ggml_opt_epoch( ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result_train, // result to increment during training, ignored if NULL ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL int64_t idata_split, // data index at which to split training and evaluation ggml_opt_epoch_callback callback_train, ggml_opt_epoch_callback callback_eval); // callback that prints a progress bar on stderr GGML_API void ggml_opt_epoch_callback_progress_bar( bool train, ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result, int64_t ibatch, int64_t ibatch_max, int64_t t_start_us); // fit model defined by inputs and outputs to dataset GGML_API void ggml_opt_fit( ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs struct ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs struct ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch] struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used ggml_opt_dataset_t dataset, // dataset with data and optionally also labels enum ggml_opt_loss_type loss_type, // loss to minimize enum ggml_opt_optimizer_type optimizer, // sgd or adamw ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t) int64_t nepoch, // how many times the dataset should be iterated over int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f) bool silent); // whether or not info prints to stderr should be suppressed #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-rpc.h000066400000000000000000000020551506673203700200310ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif #define RPC_PROTO_MAJOR_VERSION 2 #define RPC_PROTO_MINOR_VERSION 0 #define RPC_PROTO_PATCH_VERSION 0 #define GGML_RPC_MAX_SERVERS 16 // backend API GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint); GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint); GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total); GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, const char * cache_dir, size_t free_mem, size_t total_mem); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-sycl.h000066400000000000000000000033431506673203700202200ustar00rootroot00000000000000// // MIT license // Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: MIT // #pragma once #include "ggml.h" #include "ggml-backend.h" #define GGML_SYCL_NAME "SYCL" #define GGML_SYCL_MAX_DEVICES 48 #ifdef __cplusplus extern "C" { #endif // backend API GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device); GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend); // devide buffer GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device); // split tensor buffer that splits matrices by rows across multiple devices GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split); // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void); GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void); GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len); GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device, char *description, size_t description_size); GGML_BACKEND_API int ggml_backend_sycl_get_device_count(); GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total); // SYCL doesn't support registering host memory, keep here for reference // GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size); // GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-vulkan.h000066400000000000000000000016701506673203700205470ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif #define GGML_VK_NAME "Vulkan" #define GGML_VK_MAX_DEVICES 16 // backend API GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num); GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend); GGML_BACKEND_API int ggml_backend_vk_get_device_count(void); GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size); GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num); // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-webgpu.h000066400000000000000000000005101506673203700205300ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif #define GGML_WEBGPU_NAME "WebGPU" // Needed for examples in ggml GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml-zdnn.h000066400000000000000000000004441506673203700202160ustar00rootroot00000000000000#pragma once #include "ggml.h" #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif // device buffer GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/ggml.h000066400000000000000000002723701506673203700172600ustar00rootroot00000000000000#pragma once // // GGML Tensor Library // // This documentation is still a work in progress. // If you wish some specific topics to be covered, feel free to drop a comment: // // https://github.com/ggerganov/whisper.cpp/issues/40 // // ## Overview // // This library implements: // // - a set of tensor operations // - automatic differentiation // - basic optimization algorithms // // The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes, // but is not limited to, the following: // // - linear regression // - support vector machines // - neural networks // // The library allows the user to define a certain function using the available tensor operations. This function // definition is represented internally via a computation graph. Each tensor operation in the function definition // corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the // function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized // using one of the available optimization algorithms. // // For example, here we define the function: f(x) = a*x^2 + b // // { // struct ggml_init_params params = { // .mem_size = 16*1024*1024, // .mem_buffer = NULL, // }; // // // memory allocation happens here // struct ggml_context * ctx = ggml_init(params); // // struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); // // ggml_set_param(ctx, x); // x is an input variable // // struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); // struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); // struct ggml_tensor * x2 = ggml_mul(ctx, x, x); // struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b); // // ... // } // // Notice that the function definition above does not involve any actual computation. The computation is performed only // when the user explicitly requests it. For example, to compute the function's value at x = 2.0: // // { // ... // // struct ggml_cgraph * gf = ggml_new_graph(ctx); // ggml_build_forward_expand(gf, f); // // // set the input variable and parameter values // ggml_set_f32(x, 2.0f); // ggml_set_f32(a, 3.0f); // ggml_set_f32(b, 4.0f); // // ggml_graph_compute_with_ctx(ctx, &gf, n_threads); // // printf("f = %f\n", ggml_get_f32_1d(f, 0)); // // ... // } // // The actual computation is performed in the ggml_graph_compute() function. // // The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the // ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory // and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was // actually needed. // // The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic // differentiation and optimization algorithms. // // The described approach allows to define the function graph once and then compute its forward or backward graphs // multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way // the user can avoid the memory allocation overhead at runtime. // // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class // citizens, but in theory the library can be extended to support FP8 and integer data types. // // Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary // and binary operations. Most of the available operations fall into one of these two categories. With time, it became // clear that the library needs to support more complex operations. The way to support these operations is not clear // yet, but a few examples are demonstrated in the following operations: // // - ggml_permute() // - ggml_conv_1d_1s() // - ggml_conv_1d_2s() // // For each tensor operator, the library implements a forward and backward computation function. The forward function // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the // input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a // calculus class, or watch the following video: // // What is Automatic Differentiation? // https://www.youtube.com/watch?v=wG_nF1awSSY // // // ## Tensor data (struct ggml_tensor) // // The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example: // // { // struct ggml_tensor * c = ggml_add(ctx, a, b); // // assert(c->src[0] == a); // assert(c->src[1] == b); // } // // The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and // permutation. All tensor operations have to take the stride into account and not assume that the tensor is // contiguous in memory. // // The data of the tensor is accessed via the "data" pointer. For example: // // { // const int nx = 2; // const int ny = 3; // // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny); // // for (int y = 0; y < ny; y++) { // for (int x = 0; x < nx; x++) { // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y; // } // } // // ... // } // // Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used. // // ## The matrix multiplication operator (ggml_mul_mat) // // TODO // // // ## Multi-threading // // TODO // // // ## Overview of ggml.c // // TODO // // // ## SIMD optimizations // // TODO // // // ## Debugging ggml // // TODO // // #ifdef GGML_SHARED # if defined(_WIN32) && !defined(__MINGW32__) # ifdef GGML_BUILD # define GGML_API __declspec(dllexport) extern # else # define GGML_API __declspec(dllimport) extern # endif # else # define GGML_API __attribute__ ((visibility ("default"))) extern # endif #else # define GGML_API extern #endif // TODO: support for clang #ifdef __GNUC__ # define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint))) #elif defined(_MSC_VER) # define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func #else # define GGML_DEPRECATED(func, hint) func #endif #ifndef __GNUC__ # define GGML_ATTRIBUTE_FORMAT(...) #elif defined(__MINGW32__) && !defined(__clang__) # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) #else # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) #endif #include #include #include #include #define GGML_FILE_MAGIC 0x67676d6c // "ggml" #define GGML_FILE_VERSION 2 #define GGML_QNT_VERSION 2 // bump this on quantization format changes #define GGML_QNT_VERSION_FACTOR 1000 // do not change this #define GGML_MAX_DIMS 4 #define GGML_MAX_PARAMS 2048 #define GGML_MAX_SRC 10 #define GGML_MAX_N_THREADS 512 #define GGML_MAX_OP_PARAMS 64 #ifndef GGML_MAX_NAME # define GGML_MAX_NAME 64 #endif #define GGML_DEFAULT_N_THREADS 4 #define GGML_DEFAULT_GRAPH_SIZE 2048 #if UINTPTR_MAX == 0xFFFFFFFF #define GGML_MEM_ALIGN 4 #else #define GGML_MEM_ALIGN 16 #endif #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 #define GGML_ROPE_TYPE_NEOX 2 #define GGML_ROPE_TYPE_MROPE 8 #define GGML_ROPE_TYPE_VISION 24 #define GGML_MROPE_SECTIONS 4 #define GGML_UNUSED(x) (void)(x) #ifdef __CUDACC__ template __host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {} #define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__) #else #define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0) #endif // __CUDACC__ #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) #ifndef NDEBUG # define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0) #elif defined(__GNUC__) # define GGML_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) # define GGML_UNREACHABLE() __assume(0) #else # define GGML_UNREACHABLE() ((void) 0) #endif #ifdef __cplusplus # define GGML_NORETURN [[noreturn]] #elif defined(_MSC_VER) # define GGML_NORETURN __declspec(noreturn) #else # define GGML_NORETURN _Noreturn #endif #define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) #define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) // used to copy the number of elements and stride in bytes of tensors into local variables. // main purpose is to reduce code duplication and improve readability. // // example: // // GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); // GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); // #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \ const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \ GGML_UNUSED(prefix##0); #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \ GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \ const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \ GGML_UNUSED(prefix##1); #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \ GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \ const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \ GGML_UNUSED(prefix##2); #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \ GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \ const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \ GGML_UNUSED(prefix##3); #define GGML_TENSOR_UNARY_OP_LOCALS \ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ GGML_TENSOR_LOCALS(size_t, nb, dst, nb) #define GGML_TENSOR_BINARY_OP_LOCALS \ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ GGML_TENSOR_LOCALS(size_t, nb, dst, nb) #define GGML_TENSOR_TERNARY_OP_LOCALS \ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \ GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \ GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ GGML_TENSOR_LOCALS(size_t, nb, dst, nb) #define GGML_TENSOR_BINARY_OP_LOCALS01 \ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) #ifdef __cplusplus extern "C" { #endif // Function type used in fatal error callbacks typedef void (*ggml_abort_callback_t)(const char * error_message); // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout) // Returns the old callback for chaining GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback); GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4) GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...); enum ggml_status { GGML_STATUS_ALLOC_FAILED = -2, GGML_STATUS_FAILED = -1, GGML_STATUS_SUCCESS = 0, GGML_STATUS_ABORTED = 1, }; // get ggml_status name string GGML_API const char * ggml_status_to_string(enum ggml_status status); // ieee 754-2008 half-precision float16 // todo: make this not an integral type typedef uint16_t ggml_fp16_t; GGML_API float ggml_fp16_to_fp32(ggml_fp16_t); GGML_API ggml_fp16_t ggml_fp32_to_fp16(float); GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t); GGML_API void ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t); // google brain half-precision bfloat16 typedef struct { uint16_t bits; } ggml_bf16_t; GGML_API ggml_bf16_t ggml_fp32_to_bf16(float); GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16 GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t); GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t); GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t); struct ggml_object; struct ggml_context; struct ggml_cgraph; // NOTE: always add types at the end of the enum to keep backward compatibility enum ggml_type { GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_1 = 3, // GGML_TYPE_Q4_2 = 4, support has been removed // GGML_TYPE_Q4_3 = 5, support has been removed GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q8_0 = 8, GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q2_K = 10, GGML_TYPE_Q3_K = 11, GGML_TYPE_Q4_K = 12, GGML_TYPE_Q5_K = 13, GGML_TYPE_Q6_K = 14, GGML_TYPE_Q8_K = 15, GGML_TYPE_IQ2_XXS = 16, GGML_TYPE_IQ2_XS = 17, GGML_TYPE_IQ3_XXS = 18, GGML_TYPE_IQ1_S = 19, GGML_TYPE_IQ4_NL = 20, GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ4_XS = 23, GGML_TYPE_I8 = 24, GGML_TYPE_I16 = 25, GGML_TYPE_I32 = 26, GGML_TYPE_I64 = 27, GGML_TYPE_F64 = 28, GGML_TYPE_IQ1_M = 29, GGML_TYPE_BF16 = 30, // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files // GGML_TYPE_Q4_0_4_8 = 32, // GGML_TYPE_Q4_0_8_8 = 33, GGML_TYPE_TQ1_0 = 34, GGML_TYPE_TQ2_0 = 35, // GGML_TYPE_IQ4_NL_4_4 = 36, // GGML_TYPE_IQ4_NL_4_8 = 37, // GGML_TYPE_IQ4_NL_8_8 = 38, GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block) GGML_TYPE_COUNT = 40, }; // precision enum ggml_prec { GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default GGML_PREC_F32 = 10, }; // model file types enum ggml_ftype { GGML_FTYPE_UNKNOWN = -1, GGML_FTYPE_ALL_F32 = 0, GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors }; // available tensor operations: enum ggml_op { GGML_OP_NONE = 0, GGML_OP_DUP, GGML_OP_ADD, GGML_OP_ADD_ID, GGML_OP_ADD1, GGML_OP_ACC, GGML_OP_SUB, GGML_OP_MUL, GGML_OP_DIV, GGML_OP_SQR, GGML_OP_SQRT, GGML_OP_LOG, GGML_OP_SIN, GGML_OP_COS, GGML_OP_SUM, GGML_OP_SUM_ROWS, GGML_OP_MEAN, GGML_OP_ARGMAX, GGML_OP_COUNT_EQUAL, GGML_OP_REPEAT, GGML_OP_REPEAT_BACK, GGML_OP_CONCAT, GGML_OP_SILU_BACK, GGML_OP_NORM, // normalize GGML_OP_RMS_NORM, GGML_OP_RMS_NORM_BACK, GGML_OP_GROUP_NORM, GGML_OP_L2_NORM, GGML_OP_MUL_MAT, GGML_OP_MUL_MAT_ID, GGML_OP_OUT_PROD, GGML_OP_SCALE, GGML_OP_SET, GGML_OP_CPY, GGML_OP_CONT, GGML_OP_RESHAPE, GGML_OP_VIEW, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE, GGML_OP_GET_ROWS, GGML_OP_GET_ROWS_BACK, GGML_OP_SET_ROWS, GGML_OP_DIAG, GGML_OP_DIAG_MASK_INF, GGML_OP_DIAG_MASK_ZERO, GGML_OP_SOFT_MAX, GGML_OP_SOFT_MAX_BACK, GGML_OP_ROPE, GGML_OP_ROPE_BACK, GGML_OP_CLAMP, GGML_OP_CONV_TRANSPOSE_1D, GGML_OP_IM2COL, GGML_OP_IM2COL_BACK, GGML_OP_IM2COL_3D, GGML_OP_CONV_2D, GGML_OP_CONV_3D, GGML_OP_CONV_2D_DW, GGML_OP_CONV_TRANSPOSE_2D, GGML_OP_POOL_1D, GGML_OP_POOL_2D, GGML_OP_POOL_2D_BACK, GGML_OP_UPSCALE, GGML_OP_PAD, GGML_OP_PAD_REFLECT_1D, GGML_OP_ROLL, GGML_OP_ARANGE, GGML_OP_TIMESTEP_EMBEDDING, GGML_OP_ARGSORT, GGML_OP_LEAKY_RELU, GGML_OP_FLASH_ATTN_EXT, GGML_OP_FLASH_ATTN_BACK, GGML_OP_SSM_CONV, GGML_OP_SSM_SCAN, GGML_OP_WIN_PART, GGML_OP_WIN_UNPART, GGML_OP_GET_REL_POS, GGML_OP_ADD_REL_POS, GGML_OP_RWKV_WKV6, GGML_OP_GATED_LINEAR_ATTN, GGML_OP_RWKV_WKV7, GGML_OP_UNARY, GGML_OP_MAP_CUSTOM1, GGML_OP_MAP_CUSTOM2, GGML_OP_MAP_CUSTOM3, GGML_OP_CUSTOM, GGML_OP_CROSS_ENTROPY_LOSS, GGML_OP_CROSS_ENTROPY_LOSS_BACK, GGML_OP_OPT_STEP_ADAMW, GGML_OP_OPT_STEP_SGD, GGML_OP_GLU, GGML_OP_COUNT, }; enum ggml_unary_op { GGML_UNARY_OP_ABS, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, GGML_UNARY_OP_TANH, GGML_UNARY_OP_ELU, GGML_UNARY_OP_RELU, GGML_UNARY_OP_SIGMOID, GGML_UNARY_OP_GELU, GGML_UNARY_OP_GELU_QUICK, GGML_UNARY_OP_SILU, GGML_UNARY_OP_HARDSWISH, GGML_UNARY_OP_HARDSIGMOID, GGML_UNARY_OP_EXP, GGML_UNARY_OP_GELU_ERF, GGML_UNARY_OP_COUNT, }; enum ggml_glu_op { GGML_GLU_OP_REGLU, GGML_GLU_OP_GEGLU, GGML_GLU_OP_SWIGLU, GGML_GLU_OP_SWIGLU_OAI, GGML_GLU_OP_GEGLU_ERF, GGML_GLU_OP_GEGLU_QUICK, GGML_GLU_OP_COUNT, }; enum ggml_object_type { GGML_OBJECT_TYPE_TENSOR, GGML_OBJECT_TYPE_GRAPH, GGML_OBJECT_TYPE_WORK_BUFFER }; enum ggml_log_level { GGML_LOG_LEVEL_NONE = 0, GGML_LOG_LEVEL_DEBUG = 1, GGML_LOG_LEVEL_INFO = 2, GGML_LOG_LEVEL_WARN = 3, GGML_LOG_LEVEL_ERROR = 4, GGML_LOG_LEVEL_CONT = 5, // continue previous log }; // this tensor... enum ggml_tensor_flag { GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up) }; struct ggml_init_params { // memory pool size_t mem_size; // bytes void * mem_buffer; // if NULL, memory will be allocated internally bool no_alloc; // don't allocate memory for the tensor data }; // n-dimensional tensor struct ggml_tensor { enum ggml_type type; struct ggml_backend_buffer * buffer; int64_t ne[GGML_MAX_DIMS]; // number of elements size_t nb[GGML_MAX_DIMS]; // stride in bytes: // nb[0] = ggml_type_size(type) // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding // nb[i] = nb[i-1] * ne[i-1] // compute data enum ggml_op op; // op params - allocated as int32_t for alignment int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; int32_t flags; struct ggml_tensor * src[GGML_MAX_SRC]; // source tensor and offset for views struct ggml_tensor * view_src; size_t view_offs; void * data; char name[GGML_MAX_NAME]; void * extra; // extra things e.g. for ggml-cuda.cu char padding[8]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); // Abort callback // If not NULL, called before ggml computation // If it returns true, the computation is aborted typedef bool (*ggml_abort_callback)(void * data); // // GUID // // GUID types typedef uint8_t ggml_guid[16]; typedef ggml_guid * ggml_guid_t; GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b); // misc GGML_API const char * ggml_version(void); GGML_API const char * ggml_commit(void); GGML_API void ggml_time_init(void); // call this once at the beginning of the program GGML_API int64_t ggml_time_ms(void); GGML_API int64_t ggml_time_us(void); GGML_API int64_t ggml_cycles(void); GGML_API int64_t ggml_cycles_per_ms(void); // accepts a UTF-8 path, even on Windows GGML_API FILE * ggml_fopen(const char * fname, const char * mode); GGML_API void ggml_print_object (const struct ggml_object * obj); GGML_API void ggml_print_objects(const struct ggml_context * ctx); GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN GGML_API int64_t ggml_blck_size(enum ggml_type type); GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row GGML_DEPRECATED( GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float "use ggml_row_size() instead"); GGML_API const char * ggml_type_name(enum ggml_type type); GGML_API const char * ggml_op_name (enum ggml_op op); GGML_API const char * ggml_op_symbol(enum ggml_op op); GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op); GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op); GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); GGML_API bool ggml_is_quantized(enum ggml_type type); // TODO: temporary until model loading of ggml examples is refactored GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype); GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor); GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor); GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor); GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor); GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor); GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor); GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars // returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation) GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor); GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous() GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1 GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2 // returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok) GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor); // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor); // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor); GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1); GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1); GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1); // use this to compute the memory overhead of a tensor GGML_API size_t ggml_tensor_overhead(void); GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes); // main GGML_API struct ggml_context * ggml_init (struct ggml_init_params params); GGML_API void ggml_reset(struct ggml_context * ctx); GGML_API void ggml_free (struct ggml_context * ctx); GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx); GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx); GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx); GGML_API struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, int n_dims, const int64_t *ne); GGML_API struct ggml_tensor * ggml_new_tensor_1d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0); GGML_API struct ggml_tensor * ggml_new_tensor_2d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1); GGML_API struct ggml_tensor * ggml_new_tensor_3d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2); GGML_API struct ggml_tensor * ggml_new_tensor_4d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes); GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src); // Context tensor enumeration and lookup GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx); GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor); GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); // Converts a flat index into coordinates GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3); GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor); GGML_API void * ggml_get_data (const struct ggml_tensor * tensor); GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor); GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name); GGML_ATTRIBUTE_FORMAT(2, 3) GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...); // Tensor flags GGML_API void ggml_set_input(struct ggml_tensor * tensor); GGML_API void ggml_set_output(struct ggml_tensor * tensor); GGML_API void ggml_set_param(struct ggml_tensor * tensor); GGML_API void ggml_set_loss(struct ggml_tensor * tensor); // // operations on tensors with backpropagation // GGML_API struct ggml_tensor * ggml_dup( struct ggml_context * ctx, struct ggml_tensor * a); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_dup_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_add_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_add_cast( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, enum ggml_type type); // dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]] GGML_API struct ggml_tensor * ggml_add_id( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * ids); GGML_API struct ggml_tensor * ggml_add1( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_add1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // dst = a // view(dst, nb1, nb2, nb3, offset) += b // return dst GGML_API struct ggml_tensor * ggml_acc( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); GGML_API struct ggml_tensor * ggml_acc_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); GGML_API struct ggml_tensor * ggml_sub( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_sub_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_mul( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_mul_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_div( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_div_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_sqr( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sqr_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sqrt( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sqrt_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_log( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_log_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sin( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sin_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_cos( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_cos_inplace( struct ggml_context * ctx, struct ggml_tensor * a); // return scalar GGML_API struct ggml_tensor * ggml_sum( struct ggml_context * ctx, struct ggml_tensor * a); // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d] GGML_API struct ggml_tensor * ggml_sum_rows( struct ggml_context * ctx, struct ggml_tensor * a); // mean along rows GGML_API struct ggml_tensor * ggml_mean( struct ggml_context * ctx, struct ggml_tensor * a); // argmax along rows GGML_API struct ggml_tensor * ggml_argmax( struct ggml_context * ctx, struct ggml_tensor * a); // count number of equal elements in a and b GGML_API struct ggml_tensor * ggml_count_equal( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // if a is the same shape as b, and a is not parameter, return a // otherwise, return a new tensor: repeat(a) to fit in b GGML_API struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // repeat a to the specified shape GGML_API struct ggml_tensor * ggml_repeat_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); // sums repetitions in a into shape of b GGML_API struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride // concat a and b along dim // used in stable-diffusion GGML_API struct ggml_tensor * ggml_concat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int dim); GGML_API struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_leaky_relu( struct ggml_context * ctx, struct ggml_tensor * a, float negative_slope, bool inplace); GGML_API struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sigmoid( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sigmoid_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); // GELU using erf (error function) when possible // some backends may fallback to approximation based on Abramowitz and Stegun formula GGML_API struct ggml_tensor * ggml_gelu_erf( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_gelu_erf_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); // a - x // b - dy GGML_API struct ggml_tensor * ggml_silu_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // hardswish(x) = x * relu6(x + 3) / 6 GGML_API struct ggml_tensor * ggml_hardswish( struct ggml_context * ctx, struct ggml_tensor * a); // hardsigmoid(x) = relu6(x + 3) / 6 GGML_API struct ggml_tensor * ggml_hardsigmoid( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_exp( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_exp_inplace( struct ggml_context * ctx, struct ggml_tensor * a); // gated linear unit ops // A: n columns, r rows, // result is n / 2 columns, r rows, // expects gate in second half of row, unless swapped is true GGML_API struct ggml_tensor * ggml_glu( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_glu_op op, bool swapped); GGML_API struct ggml_tensor * ggml_reglu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_reglu_swapped( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu_swapped( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_swiglu( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_swiglu_swapped( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu_erf( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu_erf_swapped( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu_quick( struct ggml_context * ctx, struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_geglu_quick_swapped( struct ggml_context * ctx, struct ggml_tensor * a); // A: n columns, r rows, // B: n columns, r rows, GGML_API struct ggml_tensor * ggml_glu_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, enum ggml_glu_op op); GGML_API struct ggml_tensor * ggml_reglu_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_geglu_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_swiglu_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_geglu_erf_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_geglu_quick_split( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); GGML_API struct ggml_tensor * ggml_swiglu_oai( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float alpha, float limit); // normalize along rows GGML_API struct ggml_tensor * ggml_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); GGML_API struct ggml_tensor * ggml_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); GGML_API struct ggml_tensor * ggml_rms_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); GGML_API struct ggml_tensor * ggml_rms_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); // group normalize along ne0*ne1*n_groups // used in stable-diffusion GGML_API struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups, float eps); GGML_API struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups, float eps); // l2 normalize along rows // used in rwkv v7 GGML_API struct ggml_tensor * ggml_l2_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); GGML_API struct ggml_tensor * ggml_l2_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); // a - x // b - dy GGML_API struct ggml_tensor * ggml_rms_norm_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float eps); // A: k columns, n rows => [ne03, ne02, n, k] // B: k columns, m rows (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k] // result is n columns, m rows => [ne03 * x, ne02 * y, m, n] GGML_API struct ggml_tensor * ggml_mul_mat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // change the precision of a matrix multiplication // set to GGML_PREC_F32 for higher precision (useful for phi-2) GGML_API void ggml_mul_mat_set_prec( struct ggml_tensor * a, enum ggml_prec prec); // indirect matrix multiplication GGML_API struct ggml_tensor * ggml_mul_mat_id( struct ggml_context * ctx, struct ggml_tensor * as, struct ggml_tensor * b, struct ggml_tensor * ids); // A: m columns, n rows, // B: p columns, n rows, // result is m columns, p rows GGML_API struct ggml_tensor * ggml_out_prod( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // // operations on tensors without backpropagation // GGML_API struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, float s); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float s); // x = s * a + b GGML_API struct ggml_tensor * ggml_scale_bias( struct ggml_context * ctx, struct ggml_tensor * a, float s, float b); GGML_API struct ggml_tensor * ggml_scale_bias_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float s, float b); // b -> view(a,offset,nb1,nb2,3), return modified a GGML_API struct ggml_tensor * ggml_set( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); // in bytes // b -> view(a,offset,nb1,nb2,3), return view(a) GGML_API struct ggml_tensor * ggml_set_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); // in bytes GGML_API struct ggml_tensor * ggml_set_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); // in bytes GGML_API struct ggml_tensor * ggml_set_1d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); // in bytes // b -> view(a,offset,nb1,nb2,3), return modified a GGML_API struct ggml_tensor * ggml_set_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); // in bytes // b -> view(a,offset,nb1,nb2,3), return view(a) GGML_API struct ggml_tensor * ggml_set_2d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); // in bytes // a -> b, return view(b) GGML_API struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // note: casting from f32 to i32 will discard the fractional part GGML_API struct ggml_tensor * ggml_cast( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_type type); // make contiguous GGML_API struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a); // make contiguous, with new shape GGML_API struct ggml_tensor * ggml_cont_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0); GGML_API struct ggml_tensor * ggml_cont_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1); GGML_API struct ggml_tensor * ggml_cont_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2); GGML_API struct ggml_tensor * ggml_cont_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); // return view(a), b specifies the new shape // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // return view(a) // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0); GGML_API struct ggml_tensor * ggml_reshape_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1); // return view(a) // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2); GGML_API struct ggml_tensor * ggml_reshape_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); // offset in bytes GGML_API struct ggml_tensor * ggml_view_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, size_t offset); GGML_API struct ggml_tensor * ggml_view_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, size_t nb1, // row stride in bytes size_t offset); GGML_API struct ggml_tensor * ggml_view_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, size_t nb1, // row stride in bytes size_t nb2, // slice stride in bytes size_t offset); GGML_API struct ggml_tensor * ggml_view_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, size_t nb1, // row stride in bytes size_t nb2, // slice stride in bytes size_t nb3, size_t offset); GGML_API struct ggml_tensor * ggml_permute( struct ggml_context * ctx, struct ggml_tensor * a, int axis0, int axis1, int axis2, int axis3); // alias for ggml_permute(ctx, a, 1, 0, 2, 3) GGML_API struct ggml_tensor * ggml_transpose( struct ggml_context * ctx, struct ggml_tensor * a); // supports 4D a: // a [n_embd, ne1, ne2, ne3] // b I32 [n_rows, ne2, ne3, 1] // // return [n_embd, n_rows, ne2, ne3] GGML_API struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, // data struct ggml_tensor * b); // row indices GGML_API struct ggml_tensor * ggml_get_rows_back( struct ggml_context * ctx, struct ggml_tensor * a, // gradients of ggml_get_rows result struct ggml_tensor * b, // row indices struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape // a TD [n_embd, ne1, ne2, ne3] // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3 // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1) // // undefined behavior if destination rows overlap // // broadcast: // ne2 % ne11 == 0 // ne3 % ne12 == 0 // // return view(a) GGML_API struct ggml_tensor * ggml_set_rows( struct ggml_context * ctx, struct ggml_tensor * a, // destination struct ggml_tensor * b, // source struct ggml_tensor * c); // row indices GGML_API struct ggml_tensor * ggml_diag( struct ggml_context * ctx, struct ggml_tensor * a); // set elements above the diagonal to -INF GGML_API struct ggml_tensor * ggml_diag_mask_inf( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); // set elements above the diagonal to 0 GGML_API struct ggml_tensor * ggml_diag_mask_zero( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); GGML_API struct ggml_tensor * ggml_soft_max( struct ggml_context * ctx, struct ggml_tensor * a); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_soft_max_inplace( struct ggml_context * ctx, struct ggml_tensor * a); // a [ne0, ne01, ne02, ne03] // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional // // broadcast: // ne02 % ne12 == 0 // ne03 % ne13 == 0 // // fused soft_max(a*scale + mask*(ALiBi slope)) // max_bias = 0.0f for no ALiBi GGML_API struct ggml_tensor * ggml_soft_max_ext( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * mask, float scale, float max_bias); GGML_API void ggml_soft_max_add_sinks( struct ggml_tensor * a, struct ggml_tensor * sinks); GGML_API struct ggml_tensor * ggml_soft_max_ext_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float scale, float max_bias); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float scale, float max_bias); // rotary position embedding // if (mode & 1) - skip n_past elements (NOT SUPPORTED) // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style // // b is an int32 vector with size a->ne[2], it contains the positions GGML_API struct ggml_tensor * ggml_rope( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int n_dims, int mode); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int n_dims, int mode); // custom RoPE // c is freq factors (e.g. phi3-128k), (optional) GGML_API struct ggml_tensor * ggml_rope_ext( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); GGML_API struct ggml_tensor * ggml_rope_multi( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, int sections[GGML_MROPE_SECTIONS], int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_ext_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); GGML_API struct ggml_tensor * ggml_rope_multi_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, int sections[GGML_MROPE_SECTIONS], int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int n_dims, int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow), "use ggml_rope_ext instead"); GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int n_dims, int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow), "use ggml_rope_ext_inplace instead"); // compute correction dims for YaRN RoPE scaling GGML_API void ggml_rope_yarn_corr_dims( int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]); // rotary position embedding backward, i.e compute dx from dy // a - dy GGML_API struct ggml_tensor * ggml_rope_ext_back( struct ggml_context * ctx, struct ggml_tensor * a, // gradients of ggml_rope result struct ggml_tensor * b, // positions struct ggml_tensor * c, // freq factors int n_dims, int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); GGML_API struct ggml_tensor * ggml_rope_multi_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, int n_dims, int sections[4], int mode, int n_ctx_orig, float freq_base, float freq_scale, float ext_factor, float attn_factor, float beta_fast, float beta_slow); // clamp // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_clamp( struct ggml_context * ctx, struct ggml_tensor * a, float min, float max); // im2col // converts data into a format that effectively results in a convolution when combined with matrix multiplication GGML_API struct ggml_tensor * ggml_im2col( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 int p1, // padding dimension 1 int d0, // dilation dimension 0 int d1, // dilation dimension 1 bool is_2D, enum ggml_type dst_type); GGML_API struct ggml_tensor * ggml_im2col_back( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // gradient of im2col output int64_t * ne, // shape of im2col input int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 int p1, // padding dimension 1 int d0, // dilation dimension 0 int d1, // dilation dimension 1 bool is_2D); GGML_API struct ggml_tensor * ggml_conv_1d( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride int p0, // padding int d0); // dilation // conv_1d with padding = half // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d) GGML_API struct ggml_tensor* ggml_conv_1d_ph( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s, // stride int d); // dilation // depthwise // TODO: this is very likely wrong for some cases! - needs more testing GGML_API struct ggml_tensor * ggml_conv_1d_dw( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride int p0, // padding int d0); // dilation GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride int d0); // dilation GGML_API struct ggml_tensor * ggml_conv_transpose_1d( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride int p0, // padding int d0); // dilation GGML_API struct ggml_tensor * ggml_conv_2d( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 int p1, // padding dimension 1 int d0, // dilation dimension 0 int d1); // dilation dimension 1 GGML_API struct ggml_tensor * ggml_im2col_3d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int64_t IC, int s0, // stride width int s1, // stride height int s2, // stride depth int p0, // padding width int p1, // padding height int p2, // padding depth int d0, // dilation width int d1, // dilation height int d2, // dilation depth enum ggml_type dst_type); // a: [OC*IC, KD, KH, KW] // b: [N*IC, ID, IH, IW] // result: [N*OC, OD, OH, OW] GGML_API struct ggml_tensor * ggml_conv_3d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int64_t IC, int s0, // stride width int s1, // stride height int s2, // stride depth int p0, // padding width int p1, // padding height int p2, // padding depth int d0, // dilation width int d1, // dilation height int d2 // dilation depth ); // kernel size is a->ne[0] x a->ne[1] // stride is equal to kernel size // padding is zero // example: // a: 16 16 3 768 // b: 1024 1024 3 1 // res: 64 64 768 1 // used in sam GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // kernel size is a->ne[0] x a->ne[1] // stride is 1 // padding is half // example: // a: 3 3 256 256 // b: 64 64 256 1 // res: 64 64 256 1 // used in sam GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); // depthwise (via im2col and mul_mat) GGML_API struct ggml_tensor * ggml_conv_2d_dw( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel struct ggml_tensor * b, // data int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 int p1, // padding dimension 1 int d0, // dilation dimension 0 int d1); // dilation dimension 1 // Depthwise 2D convolution // may be faster than ggml_conv_2d_dw, but not available in all backends // a: KW KH 1 C convolution kernel // b: W H C N input data // res: W_out H_out C N GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int stride0, int stride1, int pad0, int pad1, int dilation0, int dilation1); GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int stride); GGML_API struct ggml_tensor * ggml_conv_2d_direct( struct ggml_context * ctx, struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC] struct ggml_tensor * b, // input data [W, H, C, N] int s0, // stride dimension 0 int s1, // stride dimension 1 int p0, // padding dimension 0 int p1, // padding dimension 1 int d0, // dilation dimension 0 int d1); // dilation dimension 1 GGML_API struct ggml_tensor * ggml_conv_3d_direct( struct ggml_context * ctx, struct ggml_tensor * a, // kernel [KW, KH, KD, IC * OC] struct ggml_tensor * b, // input [W, H, D, C * N] int s0, // stride int s1, int s2, int p0, // padding int p1, int p2, int d0, // dilation int d1, int d2, int n_channels, int n_batch, int n_channels_out); enum ggml_op_pool { GGML_OP_POOL_MAX, GGML_OP_POOL_AVG, GGML_OP_POOL_COUNT, }; GGML_API struct ggml_tensor * ggml_pool_1d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, // kernel size int s0, // stride int p0); // padding // the result will have 2*p0 padding for the first dimension // and 2*p1 padding for the second dimension GGML_API struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int k1, int s0, int s1, float p0, float p1); GGML_API struct ggml_tensor * ggml_pool_2d_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * af, // "a"/input used in forward pass enum ggml_op_pool op, int k0, int k1, int s0, int s1, float p0, float p1); enum ggml_scale_mode { GGML_SCALE_MODE_NEAREST = 0, GGML_SCALE_MODE_BILINEAR = 1, GGML_SCALE_MODE_COUNT }; enum ggml_scale_flag { GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8) }; // interpolate // multiplies ne0 and ne1 by scale factor GGML_API struct ggml_tensor * ggml_upscale( struct ggml_context * ctx, struct ggml_tensor * a, int scale_factor, enum ggml_scale_mode mode); // interpolate // interpolate scale to specified dimensions GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext( struct ggml_context * ctx, struct ggml_tensor * a, int ne0, int ne1, int ne2, int ne3, enum ggml_scale_mode mode), "use ggml_interpolate instead"); // Up- or downsamples the input to the specified size. // 2D scale modes (eg. bilinear) are applied to the first two dimensions. GGML_API struct ggml_tensor * ggml_interpolate( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( struct ggml_context * ctx, struct ggml_tensor * a, int p0, int p1, int p2, int p3); GGML_API struct ggml_tensor * ggml_pad_ext( struct ggml_context * ctx, struct ggml_tensor * a, int lp0, int rp0, int lp1, int rp1, int lp2, int rp2, int lp3, int rp3 ); // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c] GGML_API struct ggml_tensor * ggml_pad_reflect_1d( struct ggml_context * ctx, struct ggml_tensor * a, int p0, int p1); // Move tensor elements by an offset given for each dimension. Elements that // are shifted beyond the last position are wrapped around to the beginning. GGML_API struct ggml_tensor * ggml_roll( struct ggml_context * ctx, struct ggml_tensor * a, int shift0, int shift1, int shift2, int shift3); // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151 // timesteps: [N,] // return: [N, dim] GGML_API struct ggml_tensor * ggml_timestep_embedding( struct ggml_context * ctx, struct ggml_tensor * timesteps, int dim, int max_period); // sort rows enum ggml_sort_order { GGML_SORT_ORDER_ASC, GGML_SORT_ORDER_DESC, }; GGML_API struct ggml_tensor * ggml_argsort( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_sort_order order); GGML_API struct ggml_tensor * ggml_arange( struct ggml_context * ctx, float start, float stop, float step); // top k elements per row GGML_API struct ggml_tensor * ggml_top_k( struct ggml_context * ctx, struct ggml_tensor * a, int k); #define GGML_KQ_MASK_PAD 64 // q: [n_embd_k, n_batch, n_head, ne3 ] // k: [n_embd_k, n_kv, n_head_kv, ne3 ] // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !! // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !! // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !! // // broadcast: // n_head % n_head_kv == 0 // n_head % ne32 == 0 // ne3 % ne33 == 0 // GGML_API struct ggml_tensor * ggml_flash_attn_ext( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * mask, float scale, float max_bias, float logit_softcap); GGML_API void ggml_flash_attn_ext_set_prec( struct ggml_tensor * a, enum ggml_prec prec); GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec( const struct ggml_tensor * a); GGML_API void ggml_flash_attn_ext_add_sinks( struct ggml_tensor * a, struct ggml_tensor * sinks); // TODO: needs to be adapted to ggml_flash_attn_ext GGML_API struct ggml_tensor * ggml_flash_attn_back( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * d, bool masked); GGML_API struct ggml_tensor * ggml_ssm_conv( struct ggml_context * ctx, struct ggml_tensor * sx, struct ggml_tensor * c); GGML_API struct ggml_tensor * ggml_ssm_scan( struct ggml_context * ctx, struct ggml_tensor * s, struct ggml_tensor * x, struct ggml_tensor * dt, struct ggml_tensor * A, struct ggml_tensor * B, struct ggml_tensor * C, struct ggml_tensor * ids); // partition into non-overlapping windows with padding if needed // example: // a: 768 64 64 1 // w: 14 // res: 768 14 14 25 // used in sam GGML_API struct ggml_tensor * ggml_win_part( struct ggml_context * ctx, struct ggml_tensor * a, int w); // reverse of ggml_win_part // used in sam GGML_API struct ggml_tensor * ggml_win_unpart( struct ggml_context * ctx, struct ggml_tensor * a, int w0, int h0, int w); GGML_API struct ggml_tensor * ggml_unary( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); GGML_API struct ggml_tensor * ggml_unary_inplace( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); // used in sam GGML_API struct ggml_tensor * ggml_get_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, int qh, int kh); // used in sam GGML_API struct ggml_tensor * ggml_add_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph); GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph); GGML_API struct ggml_tensor * ggml_rwkv_wkv6( struct ggml_context * ctx, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * r, struct ggml_tensor * tf, struct ggml_tensor * td, struct ggml_tensor * state); GGML_API struct ggml_tensor * ggml_gated_linear_attn( struct ggml_context * ctx, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * q, struct ggml_tensor * g, struct ggml_tensor * state, float scale); GGML_API struct ggml_tensor * ggml_rwkv_wkv7( struct ggml_context * ctx, struct ggml_tensor * r, struct ggml_tensor * w, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * state); // custom operators typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata); typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata); typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata); #define GGML_N_TASKS_MAX (-1) // n_tasks == GGML_N_TASKS_MAX means to use max number of tasks GGML_API struct ggml_tensor * ggml_map_custom1( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_map_custom1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_map_custom2( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_map_custom2_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_map_custom3( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_map_custom3_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata); GGML_API struct ggml_tensor * ggml_custom_4d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, struct ggml_tensor ** args, int n_args, ggml_custom_op_t fun, int n_tasks, void * userdata); GGML_API struct ggml_tensor * ggml_custom_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor ** args, int n_args, ggml_custom_op_t fun, int n_tasks, void * userdata); // loss function GGML_API struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_context * ctx, struct ggml_tensor * a, // logits struct ggml_tensor * b); // labels GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back( struct ggml_context * ctx, struct ggml_tensor * a, // logits struct ggml_tensor * b, // labels struct ggml_tensor * c); // gradients of cross_entropy_loss result // AdamW optimizer step // Paper: https://arxiv.org/pdf/1711.05101v3.pdf // PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html GGML_API struct ggml_tensor * ggml_opt_step_adamw( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * grad, struct ggml_tensor * m, struct ggml_tensor * v, struct ggml_tensor * adamw_params); // parameters such as the learning rate // stochastic gradient descent step (with weight decay) GGML_API struct ggml_tensor * ggml_opt_step_sgd( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * grad, struct ggml_tensor * sgd_params); // alpha, weight decay // // automatic differentiation // GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); GGML_API void ggml_build_backward_expand( struct ggml_context * ctx, // context for gradient computation struct ggml_cgraph * cgraph, struct ggml_tensor ** grad_accs); // graph allocation in a context GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads); GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads); GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1 GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph); GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i] GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph); GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph); GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); GGML_API size_t ggml_graph_overhead(void); GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads); GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name); GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node); GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node); // print info and performance information for the graph GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph); // dump the graph into a file using the dot format GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); // TODO these functions were sandwiched in the old optimization interface, is there a better place for them? typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data); // Set callback for all future logging events. // If this is not called, or NULL is supplied, everything is output on stderr. GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data); GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor); // // quantization // // - ggml_quantize_init can be called multiple times with the same type // it will only initialize the quantization tables for the first call or after ggml_quantize_free // automatically called by ggml_quantize_chunk for convenience // // - ggml_quantize_free will free any memory allocated by ggml_quantize_init // call this at the end of the program to avoid memory leaks // // note: these are thread-safe // GGML_API void ggml_quantize_init(enum ggml_type type); GGML_API void ggml_quantize_free(void); // some quantization type cannot be used without an importance matrix GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type); // calls ggml_quantize_init internally (i.e. can allocate memory) GGML_API size_t ggml_quantize_chunk( enum ggml_type type, const float * src, void * dst, int64_t start, int64_t nrows, int64_t n_per_row, const float * imatrix); #ifdef __cplusplus // restrict not standard in C++ # if defined(__GNUC__) # define GGML_RESTRICT __restrict__ # elif defined(__clang__) # define GGML_RESTRICT __restrict # elif defined(_MSC_VER) # define GGML_RESTRICT __restrict # else # define GGML_RESTRICT # endif #else # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L) # define GGML_RESTRICT __restrict # else # define GGML_RESTRICT restrict # endif #endif typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k); typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k); struct ggml_type_traits { const char * type_name; int64_t blck_size; int64_t blck_size_interleave; // interleave elements in blocks size_t type_size; bool is_quantized; ggml_to_float_t to_float; ggml_from_float_t from_float_ref; }; GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type); // ggml threadpool // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend // the goal should be to create an API that other backends can use move everything to the ggml base // scheduling priorities enum ggml_sched_priority { GGML_SCHED_PRIO_LOW = -1, GGML_SCHED_PRIO_NORMAL, GGML_SCHED_PRIO_MEDIUM, GGML_SCHED_PRIO_HIGH, GGML_SCHED_PRIO_REALTIME }; // threadpool params // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults struct ggml_threadpool_params { bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings) int n_threads; // number of threads enum ggml_sched_priority prio; // thread priority uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling) bool strict_cpu; // strict cpu placement bool paused; // start in paused state }; struct ggml_threadpool; // forward declaration, see ggml.c typedef struct ggml_threadpool * ggml_threadpool_t; GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads); GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads); GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/include/gguf.h000066400000000000000000000237601506673203700172570ustar00rootroot00000000000000// This file contains functionality related to "GGUF" files, the binary file format used by ggml. // GGUF files have the following structure: // // 1. File magic "GGUF" (4 bytes). // 2. File version (uint32_t). // 3. Number of ggml tensors in file (int64_t). // 4. Number of key-value-pairs in file (int64_t). // 5. For each KV pair: // 1. The key (string). // 2. The value type (gguf_type). // 3a. If the value type is GGUF_TYPE_ARRAY: // 1. The type of the array (gguf_type). // 2. The number of elements in the array (uint64_t). // 3. The binary representation of each element in the array. // 3b. Otherwise: // 1. The binary representation of the value. // 6. For each ggml tensor: // 1. The tensor name (string). // 2. The number of dimensions of the tensor (uint32_t). // 3. For each dimension: // 1. The size of the tensor in the dimension (int64_t). // 4. The tensor data type (ggml_type). // 5. The tensor data offset in the tensor data binary blob (uint64_t). // 7. The tensor data binary blob (optional, aligned). // // Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator. // All enums are stored as int32_t. // All bool values are stored as int8_t. // If the special key "general.alignment" (uint32_t) is defined it is used for alignment, // otherwise GGUF_DEFAULT_ALIGNMENT is used. // // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) #pragma once #include "ggml.h" #include #include #define GGUF_MAGIC "GGUF" #define GGUF_VERSION 3 #define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment" #define GGUF_DEFAULT_ALIGNMENT 32 #ifdef __cplusplus extern "C" { #endif // types that can be stored as GGUF KV data enum gguf_type { GGUF_TYPE_UINT8 = 0, GGUF_TYPE_INT8 = 1, GGUF_TYPE_UINT16 = 2, GGUF_TYPE_INT16 = 3, GGUF_TYPE_UINT32 = 4, GGUF_TYPE_INT32 = 5, GGUF_TYPE_FLOAT32 = 6, GGUF_TYPE_BOOL = 7, GGUF_TYPE_STRING = 8, GGUF_TYPE_ARRAY = 9, GGUF_TYPE_UINT64 = 10, GGUF_TYPE_INT64 = 11, GGUF_TYPE_FLOAT64 = 12, GGUF_TYPE_COUNT, // marks the end of the enum }; struct gguf_context; struct gguf_init_params { bool no_alloc; // if not NULL, create a ggml_context and allocate the tensor data in it struct ggml_context ** ctx; }; GGML_API struct gguf_context * gguf_init_empty(void); GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); //GGML_API struct gguf_context * gguf_init_from_buffer(..); GGML_API void gguf_free(struct gguf_context * ctx); GGML_API const char * gguf_type_name(enum gguf_type type); GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx); GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx); GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id); GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id); GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id); // will abort if the wrong type is used for the key GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id); GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id); GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id); GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id); GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id); GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id); GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id); GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id); GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id); GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id); GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id); GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id); GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id); GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id); // get raw pointer to the first element of the array with the given key_id // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference) GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id); // get ith C string from array with given key_id GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i); GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx); GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id); GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id); GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id); GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id); // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist) GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key); // overrides an existing KV pair or adds a new one, the new KV pair is always at the back GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); // creates a new array with n elements of the given type and copies the corresponding number of bytes from data GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n); // creates a new array with n strings and copies the corresponding strings from data GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n); // set or add KV pairs from another context GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src); // add tensor to GGUF context, tensor name must be unique GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated // in such a way that the tensor data remains as one contiguous block (except for padding) GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); // assumes that at least gguf_get_tensor_size bytes can be read from data GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data); // writing gguf files can be done in 3 ways: // // - write the entire gguf_context to a binary file in a single pass: // // gguf_write_to_file(ctx, fname, /*only_meta =*/ false); // // - write only the meta data to a file, then re-open the file and append the tensor data: // // gguf_write_to_file(ctx, fname, /*only_meta =*/ true); // FILE * f = fopen(fname, "ab"); // fwrite(f, ...); // write tensor data // fclose(f); // // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: // // FILE * f = fopen(fname, "wb"); // const size_t size_meta = gguf_get_meta_size(ctx); // fseek(f, size_meta, SEEK_SET); // fwrite(f, ...); // write tensor data // void * data = malloc(size_meta); // gguf_get_meta_data(ctx, data); // rewind(f); // fwrite(data, 1, data, f); // free(data); // fclose(f); // // write the entire context to a binary file GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); // writes the meta data to pointer "data" GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/requirements.txt000066400000000000000000000003271506673203700200110ustar00rootroot00000000000000accelerate==0.19.0 numpy>=2.0.2 sentencepiece~=0.1.98 torchvision>=0.15.2 transformers>=4.35.2,<5.0.0 gguf>=0.1.0 keras==3.5.0 tensorflow==2.18.0 --extra-index-url https://download.pytorch.org/whl/cpu torch~=2.5.1 ggml-org-ggml-7ec8045/scripts/000077500000000000000000000000001506673203700162125ustar00rootroot00000000000000ggml-org-ggml-7ec8045/scripts/gen-authors.sh000077500000000000000000000005311506673203700210040ustar00rootroot00000000000000#!/usr/bin/env bash printf "# date: $(date)\n" > AUTHORS printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS git log --format='%an <%ae>' --reverse --date=short master | awk '!seen[$0]++' | sort >> AUTHORS # if necessary, update your name here. for example: jdoe -> John Doe sed -i '' 's/^jdoe/John Doe/g' AUTHORS ggml-org-ggml-7ec8045/scripts/release.sh000077500000000000000000000223161506673203700201750ustar00rootroot00000000000000#!/bin/bash # # Automated release script for ggml. # # Note: Sync from llama.cpp should be done separately via PR process # prior to running this script. # # Usage: # ./scripts/release.sh prepare [major|minor|patch] [--dry-run] # ./scripts/release.sh finalize [--dry-run] # # Two-stage release process: # # Stage 1 - Prepare: # $ ./scripts/release.sh prepare minor # This creates a release candidate branch with version bump and removes -dev suffix. # The branch should then be manually pushed and a PR created, reviewed, and merged. # # Stage 2 - Finalize: # $ ./scripts/release.sh finalize # After the RC PR is merged, this reads the current version from CMakeLists.txt, # creates the release tag, and prepares the next development cycle. # # Prepare stage: # 1. Creates release candidate branch # 2. Updates version and removes -dev suffix # 3. Commits the version bump # # Finalize stage: # 1. Reads current release version from CMakeLists.txt # 2. Creates signed git tag on master # 3. Adds -dev suffix back for next development cycle # 4. Creates branch and commit for development version # set -e if [ ! -f "CMakeLists.txt" ] || [ ! -d "scripts" ]; then echo "Error: Must be run from ggml root directory" exit 1 fi # Parse command line arguments COMMAND="" VERSION_TYPE="" DRY_RUN=false # First argument should be the command if [ $# -eq 0 ]; then echo "Error: Missing command" echo "Usage: $0 prepare [major|minor|patch] [--dry-run]" echo " $0 finalize [--dry-run]" exit 1 fi COMMAND="$1" shift # Parse remaining arguments for arg in "$@"; do case $arg in --dry-run) DRY_RUN=true ;; major|minor|patch) if [ "$COMMAND" = "prepare" ]; then VERSION_TYPE="$arg" else echo "Error: Version type only valid for 'prepare' command" exit 1 fi ;; *) echo "Error: Unknown argument '$arg'" echo "Usage: $0 prepare [major|minor|patch] [--dry-run]" echo " $0 finalize [--dry-run]" exit 1 ;; esac done # Validate command if [[ ! "$COMMAND" =~ ^(prepare|finalize)$ ]]; then echo "Error: Command must be 'prepare' or 'finalize'" echo "Usage: $0 prepare [major|minor|patch] [--dry-run]" echo " $0 finalize [--dry-run]" exit 1 fi # For prepare command, default to patch if no version type specified if [ "$COMMAND" = "prepare" ]; then VERSION_TYPE="${VERSION_TYPE:-patch}" if [[ ! "$VERSION_TYPE" =~ ^(major|minor|patch)$ ]]; then echo "Error: Version type must be 'major', 'minor', or 'patch'" echo "Usage: $0 prepare [major|minor|patch] [--dry-run]" exit 1 fi fi # Common validation functions check_git_status() { # Check for uncommitted changes (skip in dry-run) if [ "$DRY_RUN" = false ] && ! git diff-index --quiet HEAD --; then echo "Error: You have uncommitted changes. Please commit or stash them first." exit 1 fi } check_master_branch() { # Ensure we're on master branch CURRENT_BRANCH=$(git branch --show-current) if [ "$CURRENT_BRANCH" != "master" ]; then if [ "$DRY_RUN" = true ]; then echo "[dry run] Warning: Not on master branch (currently on: $CURRENT_BRANCH). Continuing with dry-run..." echo "" else echo "Error: Must be on master branch. Currently on: $CURRENT_BRANCH" exit 1 fi fi } check_master_up_to_date() { # Check if we have the latest from master (skip in dry-run) if [ "$DRY_RUN" = false ]; then echo "Checking if local master is up-to-date with remote..." git fetch origin master LOCAL=$(git rev-parse HEAD) REMOTE=$(git rev-parse origin/master) if [ "$LOCAL" != "$REMOTE" ]; then echo "Error: Your local master branch is not up-to-date with origin/master." echo "Please run 'git pull origin master' first." exit 1 fi echo "✓ Local master is up-to-date with remote" echo "" elif [ "$(git branch --show-current)" = "master" ]; then echo "[dry run] Warning: Dry-run mode - not checking if master is up-to-date with remote" echo "" fi } prepare_release() { if [ "$DRY_RUN" = true ]; then echo "[dry-run] Preparing release (no changes will be made)" else echo "Starting release preparation..." fi echo "" check_git_status check_master_branch check_master_up_to_date # Extract current version from CMakeLists.txt echo "Step 1: Reading current version..." MAJOR=$(grep "set(GGML_VERSION_MAJOR" CMakeLists.txt | sed 's/.*MAJOR \([0-9]*\).*/\1/') MINOR=$(grep "set(GGML_VERSION_MINOR" CMakeLists.txt | sed 's/.*MINOR \([0-9]*\).*/\1/') PATCH=$(grep "set(GGML_VERSION_PATCH" CMakeLists.txt | sed 's/.*PATCH \([0-9]*\).*/\1/') echo "Current version: $MAJOR.$MINOR.$PATCH" # Calculate new version case $VERSION_TYPE in major) NEW_MAJOR=$((MAJOR + 1)) NEW_MINOR=0 NEW_PATCH=0 ;; minor) NEW_MAJOR=$MAJOR NEW_MINOR=$((MINOR + 1)) NEW_PATCH=0 ;; patch) NEW_MAJOR=$MAJOR NEW_MINOR=$MINOR NEW_PATCH=$((PATCH + 1)) ;; esac NEW_VERSION="$NEW_MAJOR.$NEW_MINOR.$NEW_PATCH" RC_BRANCH="ggml-rc-v$NEW_VERSION" echo "New release version: $NEW_VERSION" echo "Release candidate branch: $RC_BRANCH" echo "" # Create release candidate branch echo "Step 2: Creating release candidate branch..." if [ "$DRY_RUN" = true ]; then echo " [dry-run] Would create branch: $RC_BRANCH" else git checkout -b "$RC_BRANCH" echo "✓ Created and switched to branch: $RC_BRANCH" fi echo "" # Update CMakeLists.txt for release echo "Step 3: Updating version in CMakeLists.txt..." if [ "$DRY_RUN" = true ]; then echo " [dry-run] Would update GGML_VERSION_MAJOR to $NEW_MAJOR" echo " [dry-run] Would update GGML_VERSION_MINOR to $NEW_MINOR" echo " [dry-run] Would update GGML_VERSION_PATCH to $NEW_PATCH" else sed -i'' -e "s/set(GGML_VERSION_MAJOR [0-9]*)/set(GGML_VERSION_MAJOR $NEW_MAJOR)/" CMakeLists.txt sed -i'' -e "s/set(GGML_VERSION_MINOR [0-9]*)/set(GGML_VERSION_MINOR $NEW_MINOR)/" CMakeLists.txt sed -i'' -e "s/set(GGML_VERSION_PATCH [0-9]*)/set(GGML_VERSION_PATCH $NEW_PATCH)/" CMakeLists.txt fi echo "" # Commit version bump echo "Step 4: Committing version bump..." if [ "$DRY_RUN" = true ]; then echo " [dry-run] Would commit: 'ggml : bump version to $NEW_VERSION'" else git add CMakeLists.txt git commit -m "ggml : bump version to $NEW_VERSION" fi echo "" echo "" if [ "$DRY_RUN" = true ]; then echo "[dry-run] Summary (no changes were made):" echo " • Would have created branch: $RC_BRANCH" echo " • Would have updated version to: $NEW_VERSION" else echo "Release preparation completed!" echo "Summary:" echo " • Created branch: $RC_BRANCH" echo " • Updated version to: $NEW_VERSION" echo "" echo "Next steps:" echo " • Push branch to remote: git push origin $RC_BRANCH" echo " • Create a Pull Request from $RC_BRANCH to master" echo " • After PR is merged, run: ./scripts/release.sh finalize" fi } finalize_release() { if [ "$DRY_RUN" = true ]; then echo "[dry-run] Finalizing release (no changes will be made)" else echo "Starting release finalization..." fi echo "" check_git_status check_master_branch check_master_up_to_date # Read current version from CMakeLists.txt echo "Step 1: Reading current release version..." MAJOR=$(grep "set(GGML_VERSION_MAJOR" CMakeLists.txt | sed 's/.*MAJOR \([0-9]*\).*/\1/') MINOR=$(grep "set(GGML_VERSION_MINOR" CMakeLists.txt | sed 's/.*MINOR \([0-9]*\).*/\1/') PATCH=$(grep "set(GGML_VERSION_PATCH" CMakeLists.txt | sed 's/.*PATCH \([0-9]*\).*/\1/') RELEASE_VERSION="$MAJOR.$MINOR.$PATCH" echo "Release version: $RELEASE_VERSION" echo "" # Create git tag echo "Step 2: Creating signed git tag..." if [ "$DRY_RUN" = true ]; then echo " [dry-run] Would create signed tag: v$RELEASE_VERSION with message 'Release version $RELEASE_VERSION'" else git tag -s "v$RELEASE_VERSION" -m "Release version $RELEASE_VERSION" echo "✓ Created signed tag: v$RELEASE_VERSION" fi echo "" echo "" if [ "$DRY_RUN" = true ]; then echo "[dry-run] Summary (no changes were made):" echo " • Would have created tag: v$RELEASE_VERSION" else echo "Release finalization completed!" echo "Summary:" echo " • Created signed tag: v$RELEASE_VERSION" echo "" echo "Next steps:" echo " • Push tag to remote: git push origin v$RELEASE_VERSION" echo " • The release is now complete!" fi } # Execute the appropriate command case $COMMAND in prepare) prepare_release ;; finalize) finalize_release ;; esac ggml-org-ggml-7ec8045/scripts/sync-llama-am.sh000077500000000000000000000122501506673203700212040ustar00rootroot00000000000000#!/bin/bash # # Synchronize llama.cpp changes to ggml # # Usage: # # $ cd /path/to/ggml # $ ./scripts/sync-llama-am.sh -skip hash0,hash1,hash2... -C 3 # set -e sd=$(dirname $0) cd $sd/../ SRC_GGML=$(pwd) SRC_LLAMA=$(cd ../llama.cpp; pwd) if [ ! -d $SRC_LLAMA ]; then echo "llama.cpp not found at $SRC_LLAMA" exit 1 fi lc=$(cat $SRC_GGML/scripts/sync-llama.last) echo "Syncing llama.cpp changes since commit $lc" to_skip="" # context for git patches in number of lines ctx="8" while [ "$1" != "" ]; do case $1 in -skip ) shift to_skip=$1 ;; -C ) shift ctx=$1 ;; esac shift done cd $SRC_LLAMA git log --oneline $lc..HEAD git log --oneline $lc..HEAD --reverse | grep -v "(ggml/[0-9]*)" | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_GGML/llama-commits if [ ! -s $SRC_GGML/llama-commits ]; then rm -v $SRC_GGML/llama-commits echo "No new commits" exit 0 fi if [ -f $SRC_GGML/llama-src.patch ]; then rm -v $SRC_GGML/llama-src.patch fi while read c; do if [ -n "$to_skip" ]; then if [[ $to_skip == *"$c"* ]]; then echo "Skipping $c" continue fi fi git format-patch -U${ctx} -k $c~1..$c --stdout -- \ ggml/CMakeLists.txt \ ggml/src/CMakeLists.txt \ ggml/cmake/BuildTypes.cmake \ ggml/cmake/GitVars.cmake \ ggml/cmake/common.cmake \ ggml/cmake/ggml-config.cmake.in \ ggml/src/ggml-cpu/cmake/FindSIMD.cmake \ ggml/src/ggml* \ ggml/include/ggml*.h \ ggml/include/gguf*.h \ tests/test-opt.cpp \ tests/test-quantize-fns.cpp \ tests/test-quantize-perf.cpp \ tests/test-backend-ops.cpp \ LICENSE \ scripts/gen-authors.sh \ >> $SRC_GGML/llama-src.patch done < $SRC_GGML/llama-commits rm -v $SRC_GGML/llama-commits # delete files if empty if [ ! -s $SRC_GGML/llama-src.patch ]; then rm -v $SRC_GGML/llama-src.patch fi cd $SRC_GGML if [ -f $SRC_GGML/llama-src.patch ]; then # replace PR numbers # # Subject: some text (#1234) # Subject: some text (llama/1234) cat llama-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (llama\/\2)/' > llama-src.patch.tmp mv llama-src.patch.tmp llama-src.patch cat llama-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (llama\/\2)/' > llama-src.patch.tmp mv llama-src.patch.tmp llama-src.patch # replace filenames: # # ggml/CMakelists.txt -> CMakeLists.txt # ggml/src/CMakelists.txt -> src/CMakeLists.txt # # ggml/cmake/BuildTypes.cmake -> cmake/BuildTypes.cmake # ggml/cmake/GitVars.cmake -> cmake/GitVars.cmake # ggml/cmake/common.cmake -> cmake/common.cmake # ggml/cmake/ggml-config.cmake.in -> cmake/ggml-config.cmake.in # ggml/src/ggml-cpu/cmake/FindSIMD.cmake -> src/ggml-cpu/cmake/FindSIMD.cmake # # ggml/src/ggml* -> src/ggml* # # ggml/include/ggml*.h -> include/ggml*.h # ggml/include/gguf*.h -> include/gguf*.h # # tests/test-opt.cpp -> tests/test-opt.cpp # tests/test-quantize-fns.cpp -> tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp # tests/test-backend-ops.cpp -> tests/test-backend-ops.cpp # # LICENSE -> LICENSE # scripts/gen-authors.sh -> scripts/gen-authors.sh cat llama-src.patch | sed -E \ -e 's/(^[[:space:]]| [ab]\/)ggml\/CMakeLists\.txt/\1CMakeLists.txt/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/src\/CMakeLists\.txt/\1src\/CMakeLists.txt/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/cmake\/BuildTypes\.cmake/\1cmake\/BuildTypes\.cmake/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/cmake\/GitVars\.cmake/\1cmake\/GitVars\.cmake/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/cmake\/common\.cmake/\1cmake\/common\.cmake/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/cmake\/ggml-config\.cmake\.in/\1cmake\/ggml-config\.cmake\.in/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/src\/ggml-cpu\/cmake\/FindSIMD\.cmake/\1src\/ggml-cpu\/cmake\/FindSIMD\.cmake/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/src\/ggml(.*)/\1src\/ggml\2/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/include\/ggml(.*)\.h/\1include\/ggml\2.h/g' \ -e 's/(^[[:space:]]| [ab]\/)ggml\/include\/gguf(.*)\.h/\1include\/gguf\2.h/g' \ -e 's/(^[[:space:]]| [ab]\/)tests\/test-opt\.cpp/\1tests\/test-opt.cpp/g' \ -e 's/(^[[:space:]]| [ab]\/)tests\/test-quantize-fns\.cpp/\1tests\/test-quantize-fns.cpp/g' \ -e 's/(^[[:space:]]| [ab]\/)tests\/test-quantize-perf\.cpp/\1tests\/test-quantize-perf.cpp/g' \ -e 's/(^[[:space:]]| [ab]\/)tests\/test-backend-ops\.cpp/\1tests\/test-backend-ops.cpp/g' \ -e 's/(^[[:space:]]| [ab]\/)LICENSE/\1LICENSE/g' \ -e 's/(^[[:space:]]| [ab]\/)scripts\/gen-authors\.sh/\1scripts\/gen-authors.sh/g' \ > llama-src.patch.tmp mv llama-src.patch.tmp llama-src.patch git am -C${ctx} llama-src.patch rm -v $SRC_GGML/llama-src.patch fi # update last commit cd $SRC_LLAMA git log -1 --format=%H > $SRC_GGML/scripts/sync-llama.last echo "Done" exit 0 ggml-org-ggml-7ec8045/scripts/sync-llama.last000066400000000000000000000000511506673203700211330ustar00rootroot00000000000000a014310374a16f9204f2bcc1b458fc1eda67e469 ggml-org-ggml-7ec8045/scripts/sync-llama.sh000077500000000000000000000015041506673203700206110ustar00rootroot00000000000000#!/bin/bash cp -rpv ../llama.cpp/ggml/CMakeLists.txt CMakeLists.txt cp -rpv ../llama.cpp/ggml/src/CMakeLists.txt src/CMakeLists.txt cp -rpv ../llama.cpp/ggml/cmake/* cmake/ cp -rpv ../llama.cpp/ggml/src/ggml-cpu/cmake/* src/ggml-cpu/cmake/ cp -rpv ../llama.cpp/ggml/src/ggml* src/ cp -rpv ../llama.cpp/ggml/include/ggml*.h include/ cp -rpv ../llama.cpp/ggml/include/gguf*.h include/ cp -rpv ../llama.cpp/tests/test-opt.cpp tests/test-opt.cpp cp -rpv ../llama.cpp/tests/test-quantize-fns.cpp tests/test-quantize-fns.cpp cp -rpv ../llama.cpp/tests/test-quantize-perf.cpp tests/test-quantize-perf.cpp cp -rpv ../llama.cpp/tests/test-backend-ops.cpp tests/test-backend-ops.cpp cp -rpv ../llama.cpp/LICENSE ./LICENSE cp -rpv ../llama.cpp/scripts/gen-authors.sh ./scripts/gen-authors.sh ggml-org-ggml-7ec8045/scripts/sync-whisper-am.sh000077500000000000000000000074161506673203700216070ustar00rootroot00000000000000#!/bin/bash # # Synchronize whisper.cpp changes to ggml # # Usage: # # $ cd /path/to/ggml # $ ./scripts/sync-whisper-am.sh -skip hash0,hash1,hash2... # set -e sd=$(dirname $0) cd $sd/../ SRC_GGML=$(pwd) SRC_WHISPER=$(cd ../whisper.cpp; pwd) if [ ! -d $SRC_WHISPER ]; then echo "whisper.cpp not found at $SRC_WHISPER" exit 1 fi lc=$(cat $SRC_GGML/scripts/sync-whisper.last) echo "Syncing whisper.cpp changes since commit $lc" to_skip="" if [ "$1" == "-skip" ]; then to_skip=$2 fi cd $SRC_WHISPER git log --oneline $lc..HEAD git log --oneline $lc..HEAD --reverse | grep -v "(ggml/[0-9]*)" | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_GGML/whisper-commits if [ ! -s $SRC_GGML/whisper-commits ]; then rm -v $SRC_GGML/whisper-commits echo "No new commits" exit 0 fi if [ -f $SRC_GGML/whisper-src.patch ]; then rm -v $SRC_GGML/whisper-src.patch fi while read c; do if [ -n "$to_skip" ]; then if [[ $to_skip == *"$c"* ]]; then echo "Skipping $c" continue fi fi git format-patch -k $c~1..$c --stdout -- \ ggml/CMakeLists.txt \ ggml/src/CMakeLists.txt \ ggml/cmake/FindSIMD.cmake \ ggml/src/ggml* \ ggml/include/ggml*.h \ ggml/include/gguf*.h \ examples/common-ggml.h \ examples/common-ggml.cpp \ LICENSE \ scripts/gen-authors.sh \ >> $SRC_GGML/whisper-src.patch done < $SRC_GGML/whisper-commits rm -v $SRC_GGML/whisper-commits # delete files if empty if [ ! -s $SRC_GGML/whisper-src.patch ]; then rm -v $SRC_GGML/whisper-src.patch fi cd $SRC_GGML if [ -f $SRC_GGML/whisper-src.patch ]; then # replace PR numbers # # Subject: some text (#1234) # Subject: some text (whisper/1234) cat whisper-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (whisper\/\2)/' > whisper-src.patch.tmp mv whisper-src.patch.tmp whisper-src.patch cat whisper-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (whisper\/\2)/' > whisper-src.patch.tmp mv whisper-src.patch.tmp whisper-src.patch # replace filenames: # # ggml/CMakelists.txt -> CMakeLists.txt # ggml/src/CMakelists.txt -> src/CMakeLists.txt # ggml/cmake/FindSIMD.cmake -> cmake/FindSIMD.cmake # # ggml/src/ggml* -> src/ggml* # # ggml/include/ggml*.h -> include/ggml*.h # ggml/include/gguf*.h -> include/gguf*.h # # examples/common.h -> examples/common.h # examples/common.cpp -> examples/common.cpp # examples/common-ggml.h -> examples/common-ggml.h # examples/common-ggml.cpp -> examples/common-ggml.cpp # # LICENSE -> LICENSE # scripts/gen-authors.sh -> scripts/gen-authors.sh cat whisper-src.patch | sed -E \ -e 's/\/ggml\/CMakeLists\.txt/\/CMakeLists.txt/g' \ -e 's/\/ggml\/src\/CMakeLists\.txt/\/src\/CMakeLists.txt/g' \ -e 's/\/ggml\/cmake\/FindSIMD\.cmake/\/cmake\/FindSIMD.cmake/g' \ -e 's/\/ggml\/src\/ggml(.*)/\/src\/ggml\1/g' \ -e 's/\/ggml\/include\/ggml(.*)\.h/\/include\/ggml\1.h/g' \ -e 's/\/ggml\/include\/gguf(.*)\.h/\/include\/gguf\1.h/g' \ -e 's/\/examples\/common\.h/\/examples\/common.h/g' \ -e 's/\/examples\/common\.cpp/\/examples\/common.cpp/g' \ -e 's/\/examples\/common-ggml\.h/\/examples\/common-ggml.h/g' \ -e 's/\/examples\/common-ggml\.cpp/\/examples\/common-ggml.cpp/g' \ -e 's/\/LICENSE/\/LICENSE/g' \ -e 's/\/scripts\/gen-authors\.sh/\/scripts\/gen-authors.sh/g' \ > whisper-src.patch.tmp mv whisper-src.patch.tmp whisper-src.patch git am whisper-src.patch rm -v $SRC_GGML/whisper-src.patch fi # update last commit cd $SRC_WHISPER git log -1 --format=%H > $SRC_GGML/scripts/sync-whisper.last echo "Done" exit 0 ggml-org-ggml-7ec8045/scripts/sync-whisper.last000066400000000000000000000000511506673203700215260ustar00rootroot00000000000000e4bf87b0e9c394bfaaabd64ae57b1e72e7c3490c ggml-org-ggml-7ec8045/scripts/sync-whisper.sh000077500000000000000000000011771506673203700212120ustar00rootroot00000000000000#!/bin/bash cp -rpv ../whisper.cpp/ggml/CMakeLists.txt CMakeLists.txt cp -rpv ../whisper.cpp/ggml/src/CMakeLists.txt src/CMakeLists.txt cp -rpv ../whisper.cpp/ggml/cmake/FindSIMD.cmake cmake/FindSIMD.cmake cp -rpv ../whisper.cpp/ggml/src/ggml* src/ cp -rpv ../whisper.cpp/ggml/include/ggml*.h include/ cp -rpv ../whisper.cpp/ggml/include/gguf*.h include/ cp -rpv ../whisper.cpp/examples/common-ggml.h examples/common-ggml.h cp -rpv ../whisper.cpp/examples/common-ggml.cpp examples/common-ggml.cpp cp -rpv ../whisper.cpp/LICENSE ./LICENSE cp -rpv ../whisper.cpp/scripts/gen-authors.sh ./scripts/gen-authors.sh ggml-org-ggml-7ec8045/src/000077500000000000000000000000001506673203700153125ustar00rootroot00000000000000ggml-org-ggml-7ec8045/src/CMakeLists.txt000066400000000000000000000362711506673203700200630ustar00rootroot00000000000000include(CheckCXXCompilerFlag) include("../cmake/common.cmake") add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) # enable libstdc++ assertions for debug builds if (CMAKE_SYSTEM_NAME MATCHES "Linux") add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) endif() if (NOT MSVC) if (GGML_SANITIZE_THREAD) add_compile_options(-fsanitize=thread) link_libraries (-fsanitize=thread) endif() if (GGML_SANITIZE_ADDRESS) add_compile_options(-fsanitize=address -fno-omit-frame-pointer) link_libraries (-fsanitize=address) endif() if (GGML_SANITIZE_UNDEFINED) add_compile_options(-fsanitize=undefined) link_libraries (-fsanitize=undefined) endif() endif() if (GGML_FATAL_WARNINGS) if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") list(APPEND C_FLAGS -Werror) list(APPEND CXX_FLAGS -Werror) elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") add_compile_options(/WX) endif() endif() if (GGML_ALL_WARNINGS) if (NOT MSVC) list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration) list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) list(APPEND C_FLAGS ${WARNING_FLAGS}) list(APPEND CXX_FLAGS ${WARNING_FLAGS}) ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") else() # todo : msvc set(C_FLAGS "") set(CXX_FLAGS "") endif() endif() if (GGML_LTO) include(CheckIPOSupported) check_ipo_supported(RESULT result OUTPUT output) if (result) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() message(WARNING "IPO is not supported: ${output}") endif() endif() if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER) find_program(GGML_CCACHE_FOUND ccache) find_program(GGML_SCCACHE_FOUND sccache) if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND) if(GGML_CCACHE_FOUND) set(GGML_CCACHE_VARIANT ccache) else() set(GGML_CCACHE_VARIANT sccache) endif() # TODO: should not be set globally if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl") else () set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}") endif () set(ENV{CCACHE_SLOPPINESS} time_macros) message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") else() message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF") endif () endif() # this version of Apple ld64 is buggy execute_process( COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v ERROR_VARIABLE output OUTPUT_QUIET ) if (output MATCHES "dyld-1015\.7") add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) endif() # architecture specific # TODO: probably these flags need to be tweaked on some architectures # feel free to update the Makefile for your architecture and send a pull request or issue message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") if (MSVC) string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") else () set(CMAKE_GENERATOR_PLATFORM_LWR "") endif () ggml_get_system_arch() message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}") if (NOT MSVC) if (GGML_STATIC) if (UNIX AND NOT APPLE) set(CMAKE_FIND_LIBRARY_SUFFIXES ".a;.so") endif() add_link_options(-static) if (MINGW) add_link_options(-static-libgcc -static-libstdc++) endif() endif() if (GGML_GPROF) add_compile_options(-pg) endif() endif() if (MINGW) add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER}) endif() # # POSIX conformance # # clock_gettime came in POSIX.1b (1993) # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional # posix_memalign came in POSIX.1-2001 / SUSv3 # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) # Somehow in OpenBSD whenever POSIX conformance is specified # some string functions rely on locale_t availability, # which was introduced in POSIX.1-2008, forcing us to go higher if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") add_compile_definitions(_XOPEN_SOURCE=700) else() add_compile_definitions(_XOPEN_SOURCE=600) endif() # Data types, macros and functions related to controlling CPU affinity and # some memory allocation are available on Linux through GNU extensions in libc if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android") add_compile_definitions(_GNU_SOURCE) endif() # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, # and on macOS its availability depends on enabling Darwin extensions # similarly on DragonFly, enabling BSD extensions is necessary if ( CMAKE_SYSTEM_NAME MATCHES "Darwin" OR CMAKE_SYSTEM_NAME MATCHES "iOS" OR CMAKE_SYSTEM_NAME MATCHES "tvOS" OR CMAKE_SYSTEM_NAME MATCHES "DragonFly" ) add_compile_definitions(_DARWIN_C_SOURCE) endif() # alloca is a non-standard interface that is not visible on BSDs when # POSIX conformance is specified, but not all of them provide a clean way # to enable it in such cases if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_compile_definitions(__BSD_VISIBLE) endif() if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") add_compile_definitions(_NETBSD_SOURCE) endif() if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") add_compile_definitions(_BSD_SOURCE) endif() if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() # ggml if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS") endif() add_library(ggml-base ../include/ggml.h ../include/ggml-alloc.h ../include/ggml-backend.h ../include/ggml-cpp.h ../include/ggml-opt.h ../include/gguf.h ggml.c ggml.cpp ggml-alloc.c ggml-backend.cpp ggml-opt.cpp ggml-threading.cpp ggml-threading.h ggml-quants.c ggml-quants.h gguf.cpp) target_include_directories(ggml-base PRIVATE .) if (GGML_BACKEND_DL) target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL) endif() add_library(ggml ggml-backend-reg.cpp) add_library(ggml::ggml ALIAS ggml) if (GGML_BACKEND_DIR) if (NOT GGML_BACKEND_DL) message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL") endif() target_compile_definitions(ggml PUBLIC GGML_BACKEND_DIR="${GGML_BACKEND_DIR}") endif() target_link_libraries(ggml PUBLIC ggml-base) if (CMAKE_SYSTEM_NAME MATCHES "Linux") target_link_libraries(ggml PRIVATE dl) endif() function(ggml_add_backend_library backend) if (GGML_BACKEND_DL) add_library(${backend} MODULE ${ARGN}) # write the shared library to the output directory set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL) add_dependencies(ggml ${backend}) if (GGML_BACKEND_DIR) install(TARGETS ${backend} LIBRARY DESTINATION ${GGML_BACKEND_DIR}) else() install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() else() add_library(${backend} ${ARGN}) target_link_libraries(ggml PUBLIC ${backend}) install(TARGETS ${backend} LIBRARY) endif() target_link_libraries(${backend} PRIVATE ggml-base) target_include_directories(${backend} PRIVATE ..) if (${BUILD_SHARED_LIBS}) target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD) target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED) endif() if(NOT GGML_AVAILABLE_BACKENDS) set(GGML_AVAILABLE_BACKENDS "${backend}" CACHE INTERNAL "List of backends for cmake package") else() list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend) if(has_backend EQUAL -1) set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}" CACHE INTERNAL "List of backends for cmake package") endif() endif() endfunction() function(ggml_add_backend backend) string(TOUPPER "GGML_${backend}" backend_id) if (${backend_id}) string(TOLOWER "ggml-${backend}" backend_target) add_subdirectory(${backend_target}) message(STATUS "Including ${backend} backend") if (NOT GGML_BACKEND_DL) string(TOUPPER "GGML_USE_${backend}" backend_use) target_compile_definitions(ggml PUBLIC ${backend_use}) endif() endif() endfunction() function(ggml_add_cpu_backend_variant tag_name) set(GGML_CPU_TAG_NAME ${tag_name}) # other: OPENMP LLAMAFILE CPU_HBM if (GGML_SYSTEM_ARCH STREQUAL "x86") foreach (feat NATIVE SSE42 AVX AVX2 BMI2 AVX_VNNI FMA F16C AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8 AMX_BF16) set(GGML_${feat} OFF) endforeach() foreach (feat ${ARGN}) set(GGML_${feat} ON) endforeach() elseif (GGML_SYSTEM_ARCH STREQUAL "ARM") foreach (feat ${ARGN}) set(GGML_INTERNAL_${feat} ON) endforeach() elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") foreach (feat ${ARGN}) set(GGML_INTERNAL_${feat} ON) endforeach() endif() ggml_add_cpu_backend_variant_impl(${tag_name}) endfunction() ggml_add_backend(CPU) if (GGML_CPU_ALL_VARIANTS) if (NOT GGML_BACKEND_DL) message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") elseif (GGML_CPU_ARM_ARCH) message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS") endif() if (GGML_SYSTEM_ARCH STREQUAL "x86") ggml_add_cpu_backend_variant(x64) ggml_add_cpu_backend_variant(sse42 SSE42) ggml_add_cpu_backend_variant(sandybridge SSE42 AVX) ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA) ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512) ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI) if (NOT MSVC) # MSVC doesn't support AMX ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) endif() elseif(GGML_SYSTEM_ARCH STREQUAL "ARM") if (CMAKE_SYSTEM_NAME MATCHES "Linux") # Many of these features are optional so we build versions with popular # combinations and name the backends based on the version they were # first released with ggml_add_cpu_backend_variant(armv8.0_1) ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD) ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE) ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8) ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME) ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME) elseif (CMAKE_SYSTEM_NAME MATCHES "Android") # Android-specific backends with SoC-compatible feature sets ggml_add_cpu_backend_variant(android_armv8.0_1) ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD) ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8) elseif (APPLE) ggml_add_cpu_backend_variant(apple_m1 DOTPROD) ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8) ggml_add_cpu_backend_variant(apple_m4 DOTPROD MATMUL_INT8 NOSVE SME) else() message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}") endif() elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") if (CMAKE_SYSTEM_NAME MATCHES "Linux") ggml_add_cpu_backend_variant(power0) ggml_add_cpu_backend_variant(power7_1 POWER7) ggml_add_cpu_backend_variant(power7_2 POWER7 VSX) ggml_add_cpu_backend_variant(power8_1 POWER8) ggml_add_cpu_backend_variant(power8_2 POWER8 VSX) ggml_add_cpu_backend_variant(power9 POWER9 VSX) ggml_add_cpu_backend_variant(power10 POWER10 VSX) ggml_add_cpu_backend_variant(power11 POWER11 VSX) else() message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") endif() else() message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") endif() elseif (GGML_CPU) ggml_add_cpu_backend_variant_impl("") endif() ggml_add_backend(BLAS) ggml_add_backend(CANN) ggml_add_backend(CUDA) ggml_add_backend(HIP) ggml_add_backend(METAL) ggml_add_backend(MUSA) ggml_add_backend(RPC) ggml_add_backend(SYCL) ggml_add_backend(Vulkan) ggml_add_backend(WebGPU) ggml_add_backend(zDNN) ggml_add_backend(OpenCL) foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump endforeach() target_link_libraries(ggml-base PRIVATE Threads::Threads) find_library(MATH_LIBRARY m) if (MATH_LIBRARY) if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT}) target_link_libraries(ggml-base PRIVATE m) endif() endif() if (CMAKE_SYSTEM_NAME MATCHES "Android") target_link_libraries(ggml-base PRIVATE dl) endif() if(CMAKE_SYSTEM_NAME MATCHES "visionOS") target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE) endif() if (BUILD_SHARED_LIBS) foreach (target ggml-base ggml) set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${target} PRIVATE GGML_BUILD) target_compile_definitions(${target} PUBLIC GGML_SHARED) endforeach() endif() ggml-org-ggml-7ec8045/src/ggml-alloc.c000066400000000000000000001246101506673203700175000ustar00rootroot00000000000000#include "ggml-alloc.h" #include "ggml-backend-impl.h" #include "ggml.h" #include "ggml-impl.h" #include #include #include #include #include #include #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MAX_FREE_BLOCKS 256 //#define GGML_ALLOCATOR_DEBUG //#define AT_PRINTF(...) GGML_LOG_DEBUG(__VA_ARGS__) #define AT_PRINTF(...) static bool ggml_is_view(const struct ggml_tensor * t) { return t->view_src != NULL; } // ops that return true for this function must not use restrict pointers for their backend implementations bool ggml_op_can_inplace(enum ggml_op op) { switch (op) { case GGML_OP_SCALE: case GGML_OP_DIAG_MASK_ZERO: case GGML_OP_DIAG_MASK_INF: case GGML_OP_ADD: case GGML_OP_ADD_ID: case GGML_OP_ADD1: case GGML_OP_SUB: case GGML_OP_MUL: case GGML_OP_DIV: case GGML_OP_SQR: case GGML_OP_SQRT: case GGML_OP_LOG: case GGML_OP_UNARY: case GGML_OP_ROPE: case GGML_OP_ROPE_BACK: case GGML_OP_SILU_BACK: case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM_BACK: case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX_BACK: return true; default: return false; } } static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) { assert(alignment && !(alignment & (alignment - 1))); // power of 2 size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment; return offset + align; } // tallocr struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) { void * base = ggml_backend_buffer_get_base(buffer); size_t align = ggml_backend_buffer_get_alignment(buffer); assert(align && !(align & (align - 1))); // power of 2 struct ggml_tallocr talloc = (struct ggml_tallocr) { /*.buffer = */ buffer, /*.base = */ base, /*.alignment = */ align, /*.offset = */ aligned_offset(base, 0, align), }; return talloc; } enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) { size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); size = GGML_PAD(size, talloc->alignment); if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) { GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n", __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset); GGML_ABORT("not enough space in the buffer"); } void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset; talloc->offset += size; assert(((uintptr_t)addr % talloc->alignment) == 0); return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr); } // dynamic tensor allocator #define GGML_VBUFFER_MAX_CHUNKS 16 // relative memory address within an allocation that can be split into multiple buffers (chunks) struct buffer_address { int chunk; // index of a backend buffer size_t offset; // local memory offset within the buffer }; static const struct buffer_address GGML_BUFFER_ADDRESS_INVALID = { -1, SIZE_MAX }; static bool ggml_buffer_address_less(struct buffer_address a, struct buffer_address b) { return a.chunk != b.chunk ? a.chunk < b.chunk : a.offset < b.offset; } struct free_block { size_t offset; size_t size; }; struct tallocr_chunk { struct free_block free_blocks[MAX_FREE_BLOCKS]; int n_free_blocks; size_t max_size; }; struct ggml_dyn_tallocr { size_t alignment; size_t max_chunk_size; struct tallocr_chunk * chunks[GGML_VBUFFER_MAX_CHUNKS]; int n_chunks; #ifdef GGML_ALLOCATOR_DEBUG struct { const struct ggml_tensor * tensor; struct buffer_address addr; } allocated_tensors[1024]; #endif }; static void ggml_dyn_tallocr_insert_block(struct tallocr_chunk * chunk, size_t offset, size_t size) { GGML_ASSERT(chunk->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks"); // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) int insert_pos = 0; while (insert_pos < chunk->n_free_blocks && chunk->free_blocks[insert_pos].offset < offset) { insert_pos++; } // shift all blocks from insert_pos onward to make room for the new block for (int i = chunk->n_free_blocks; i > insert_pos; i--) { chunk->free_blocks[i] = chunk->free_blocks[i-1]; } // insert the new block chunk->free_blocks[insert_pos].offset = offset; chunk->free_blocks[insert_pos].size = size; chunk->n_free_blocks++; } static void ggml_dyn_tallocr_remove_block(struct tallocr_chunk * chunk, int idx) { // shift all elements after idx by 1 to the left, overwriting the element at idx for (int i = idx; i < chunk->n_free_blocks; i++) { chunk->free_blocks[i] = chunk->free_blocks[i+1]; } chunk->n_free_blocks--; } static int ggml_dyn_tallocr_new_chunk(struct ggml_dyn_tallocr * alloc, size_t min_size) { if (alloc->n_chunks >= GGML_VBUFFER_MAX_CHUNKS) { return -1; } struct tallocr_chunk * chunk = calloc(1, sizeof(struct tallocr_chunk)); chunk->n_free_blocks = 1; chunk->free_blocks[0].offset = 0; // available space in a chunk is limited to max_chunk_size, but can be higher if: // 1. a single tensor exceeds the maximum, and cannot fit any other way // 2. we are running out of chunks // backends will either manage to allocate the larger size, or report an error. chunk->free_blocks[0].size = MAX(min_size, alloc->max_chunk_size); if (alloc->n_chunks == GGML_VBUFFER_MAX_CHUNKS - 1) { chunk->free_blocks[0].size = SIZE_MAX/2; } alloc->chunks[alloc->n_chunks] = chunk; alloc->n_chunks++; return alloc->n_chunks - 1; } #ifdef GGML_ALLOCATOR_DEBUG static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, const struct ggml_tensor * tensor) { for (int i = 0; i < 1024; i++) { if (alloc->allocated_tensors[i].tensor == NULL) { alloc->allocated_tensors[i].tensor = tensor; alloc->allocated_tensors[i].addr = addr; return; } } GGML_ABORT("out of allocated_tensors"); } static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, const struct ggml_tensor * tensor) { for (int i = 0; i < 1024; i++) { if (alloc->allocated_tensors[i].addr.chunk == addr.chunk && alloc->allocated_tensors[i].addr.offset == addr.offset) { alloc->allocated_tensors[i].tensor = NULL; return; } } GGML_ABORT("tried to free tensor %s not found\n", tensor->name); } #endif static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t size, const struct ggml_tensor * tensor) { size = aligned_offset(NULL, size, alloc->alignment); AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size); int best_fit_chunk = -1; int best_fit_block = -1; size_t max_avail = 0; // find the best fitting free block besides the last block, within any chunk for (int c = 0; c < alloc->n_chunks; ++c) { struct tallocr_chunk * chunk = alloc->chunks[c]; size_t best_fit_size = SIZE_MAX; for (int i = 0; i < chunk->n_free_blocks - 1; i++) { struct free_block * block = &chunk->free_blocks[i]; max_avail = MAX(max_avail, block->size); if (block->size >= size && block->size <= best_fit_size) { best_fit_chunk = c; best_fit_block = i; best_fit_size = block->size; } } } if (best_fit_block == -1) { // no suitable block found, try the last block (this will grow a chunks size) for (int c = 0; c < alloc->n_chunks; ++c) { struct tallocr_chunk * chunk = alloc->chunks[c]; if (chunk->n_free_blocks > 0) { struct free_block * block = &chunk->free_blocks[chunk->n_free_blocks - 1]; max_avail = MAX(max_avail, block->size); if (block->size >= size) { best_fit_chunk = c; best_fit_block = chunk->n_free_blocks - 1; break; } } } } if (best_fit_block == -1) { // none of the existing chunks have enough space left best_fit_chunk = ggml_dyn_tallocr_new_chunk(alloc, size); best_fit_block = 0; } if (best_fit_chunk == -1) { // since the last chunk always has virtually endless memory, this should never happen GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n", __func__, size, max_avail); GGML_ABORT("graph allocation: failed to reserve memory"); } struct tallocr_chunk * chunk = alloc->chunks[best_fit_chunk]; struct free_block * block = &chunk->free_blocks[best_fit_block]; struct buffer_address addr = {.chunk = best_fit_chunk, .offset = block->offset }; block->offset += size; block->size -= size; if (block->size == 0) { // remove block if empty ggml_dyn_tallocr_remove_block(chunk, best_fit_block); } AT_PRINTF("block %d, offset %zu, chunk %d\n", best_fit_block, addr.offset, addr.chunk); #ifdef GGML_ALLOCATOR_DEBUG add_allocated_tensor(alloc, addr, tensor); size_t cur_max = addr.offset + size; if (cur_max > alloc->max_size[addr.chunk]) { // sort allocated_tensors by chunk/offset for (int i = 0; i < 1024; i++) { for (int j = i + 1; j < 1024; j++) { if (ggml_buffer_address_less(alloc->allocated_tensors[j].addr, alloc->allocated_tensors[i].addr)) { const struct ggml_tensor * tmp_tensor = alloc->allocated_tensors[i].tensor; struct buffer_address tmp_addr = alloc->allocated_tensors[i].addr; alloc->allocated_tensors[i].tensor = alloc->allocated_tensors[j].tensor; alloc->allocated_tensors[i].addr = alloc->allocated_tensors[j].addr; alloc->allocated_tensors[j].tensor = tmp_tensor; alloc->allocated_tensors[j].addr = tmp_addr; } } } GGML_LOG_DEBUG("max_size[%d] = %.2f MB: tensors: ", addr.chunk, cur_max / 1024.0 / 1024.0); for (int i = 0; i < 1024; i++) { if (alloc->allocated_tensors[i].tensor) { GGML_LOG_DEBUG("%s [%d: %zx-%zx] (%.2f MB) ", alloc->allocated_tensors[i].tensor->name, alloc->allocated_tensors[i].addr.chunk, alloc->allocated_tensors[i].addr.offset, alloc->allocated_tensors[i].addr.offset + ggml_nbytes(alloc->allocated_tensors[i].tensor), ggml_nbytes(alloc->allocated_tensors[i].tensor) / 1024.0 / 1024.0); } } GGML_LOG_DEBUG("\n"); } #endif chunk->max_size = MAX(chunk->max_size, addr.offset + size); return addr; GGML_UNUSED(tensor); } // this is a very naive implementation, but for our case the number of free blocks should be very small static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct ggml_tensor * tensor) { size = aligned_offset(NULL, size, alloc->alignment); AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks); #ifdef GGML_ALLOCATOR_DEBUG remove_allocated_tensor(alloc, addr, tensor); #endif struct tallocr_chunk * chunk = alloc->chunks[addr.chunk]; // see if we can merge with an existing block for (int i = 0; i < chunk->n_free_blocks; i++) { struct free_block * block = &chunk->free_blocks[i]; // check if ptr is at the end of the block if (block->offset + block->size == addr.offset) { block->size += size; // check if we can merge with the next block if (i < chunk->n_free_blocks - 1) { struct free_block * next = &chunk->free_blocks[i+1]; if (block->offset + block->size == next->offset) { block->size += next->size; ggml_dyn_tallocr_remove_block(chunk, i+1); } } return; } // check if ptr is at the beginning of the block if (addr.offset + size == block->offset) { block->offset = addr.offset; block->size += size; // check if we can merge with the previous block if (i > 0) { struct free_block * prev = &chunk->free_blocks[i-1]; if (prev->offset + prev->size == block->offset) { prev->size += block->size; ggml_dyn_tallocr_remove_block(chunk, i); } } return; } } // otherwise, add a new block ggml_dyn_tallocr_insert_block(chunk, addr.offset, size); GGML_UNUSED(tensor); } static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) { for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS; i++) { free(alloc->chunks[i]); alloc->chunks[i] = NULL; } alloc->n_chunks = 0; #ifdef GGML_ALLOCATOR_DEBUG for (int i = 0; i < 1024; i++) { alloc->allocated_tensors[i].tensor = NULL; } #endif } static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t max_buffer_size) { struct ggml_dyn_tallocr * alloc = (struct ggml_dyn_tallocr *)malloc(sizeof(struct ggml_dyn_tallocr)); *alloc = (struct ggml_dyn_tallocr) { /*.alignment = */ alignment, /*.max_chunk_size = */ MIN(max_buffer_size, SIZE_MAX/2), // clamp to avoid overflows /*.chunks = */ {NULL}, /*.n_chunks = */ 0, #ifdef GGML_ALLOCATOR_DEBUG /*.allocated_tensors = */ {{0}}, #endif }; ggml_dyn_tallocr_reset(alloc); return alloc; } static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) { for (int i = 0; i < alloc->n_chunks; ++i) { free(alloc->chunks[i]); } free(alloc); } static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) { size_t max_size = 0; for (int i = 0; i < alloc->n_chunks; i++) { max_size += alloc->chunks[i]->max_size; } return max_size; } // virtual buffer with contiguous memory range, split into multiple backend buffers (chunks) struct vbuffer { ggml_backend_buffer_t chunks[GGML_VBUFFER_MAX_CHUNKS]; }; static void ggml_vbuffer_free(struct vbuffer * buf) { if (buf == NULL) { return; } for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS; ++i) { ggml_backend_buffer_free(buf->chunks[i]); } free(buf); } static int ggml_vbuffer_n_chunks(struct vbuffer * buf) { int n = 0; while (n < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[n]) n++; return n; } static size_t ggml_vbuffer_size(struct vbuffer * buf) { size_t size = 0; for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[i]; ++i) { size += ggml_backend_buffer_get_size(buf->chunks[i]); } return size; } static struct vbuffer * ggml_vbuffer_alloc(ggml_backend_buffer_type_t buft, const struct ggml_dyn_tallocr * talloc, enum ggml_backend_buffer_usage usage) { struct vbuffer * buf = (struct vbuffer *)calloc(1, sizeof(struct vbuffer)); if (buf == NULL) { return NULL; } for (int n = 0; n < talloc->n_chunks; n++) { size_t chunk_size = talloc->chunks[n]->max_size; buf->chunks[n] = ggml_backend_buft_alloc_buffer(buft, chunk_size); if (buf->chunks[n] == NULL) { ggml_vbuffer_free(buf); return NULL; } ggml_backend_buffer_set_usage(buf->chunks[n], usage); } return buf; } static void ggml_vbuffer_tensor_alloc(struct vbuffer * buf, struct ggml_tensor * tensor, struct buffer_address buf_addr) { void * base = ggml_backend_buffer_get_base(buf->chunks[buf_addr.chunk]); void * addr = (char *)base + buf_addr.offset; ggml_backend_tensor_alloc(buf->chunks[buf_addr.chunk], tensor, addr); } static void ggml_vbuffer_reset(struct vbuffer * buf) { for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[i]; ++i) { ggml_backend_buffer_reset(buf->chunks[i]); } } ///////////////////////////////////// // graph allocator struct hash_node { int n_children; int n_views; int buffer_id; struct buffer_address addr; bool allocated; }; struct tensor_alloc { int buffer_id; struct buffer_address addr; size_t size_max; // 0 = pre-allocated, unused, or view }; struct leaf_alloc { struct tensor_alloc leaf; }; struct node_alloc { struct tensor_alloc dst; struct tensor_alloc src[GGML_MAX_SRC]; }; struct ggml_gallocr { ggml_backend_buffer_type_t * bufts; // [n_buffers] struct vbuffer ** buffers; // [n_buffers] struct ggml_dyn_tallocr ** buf_tallocs; // [n_buffers] int n_buffers; struct ggml_hash_set hash_set; struct hash_node * hash_values; // [hash_set.size] struct node_alloc * node_allocs; // [n_nodes] int n_nodes; struct leaf_alloc * leaf_allocs; // [n_leafs] int n_leafs; }; ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) { ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr)); GGML_ASSERT(galloc != NULL); galloc->bufts = calloc(n_bufs, sizeof(ggml_backend_buffer_type_t)); GGML_ASSERT(galloc->bufts != NULL); galloc->buffers = calloc(n_bufs, sizeof(struct vbuffer *)); GGML_ASSERT(galloc->buffers != NULL); galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *)); GGML_ASSERT(galloc->buf_tallocs != NULL); for (int i = 0; i < n_bufs; i++) { galloc->bufts[i] = bufts[i]; galloc->buffers[i] = NULL; // check if the same buffer type is used multiple times and reuse the same allocator for (int j = 0; j < i; j++) { if (bufts[i] == bufts[j]) { galloc->buf_tallocs[i] = galloc->buf_tallocs[j]; break; } } if (galloc->buf_tallocs[i] == NULL) { size_t alignment = ggml_backend_buft_get_alignment(bufts[i]); size_t max_size = ggml_backend_buft_get_max_size(bufts[i]); galloc->buf_tallocs[i] = ggml_dyn_tallocr_new(alignment, max_size); } } galloc->n_buffers = n_bufs; return galloc; } ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft) { return ggml_gallocr_new_n(&buft, 1); } void ggml_gallocr_free(ggml_gallocr_t galloc) { if (galloc == NULL) { return; } for (int i = 0; i < galloc->n_buffers; i++) { if (galloc->buffers != NULL) { // skip if already freed bool freed = false; for (int j = 0; j < i; j++) { if (galloc->buffers[j] == galloc->buffers[i]) { freed = true; break; } } if (!freed) { ggml_vbuffer_free(galloc->buffers[i]); } } if (galloc->buf_tallocs != NULL) { // skip if already freed bool freed = false; for (int j = 0; j < i; j++) { if (galloc->buf_tallocs[j] == galloc->buf_tallocs[i]) { freed = true; break; } } if (!freed) { ggml_dyn_tallocr_free(galloc->buf_tallocs[i]); } } } ggml_hash_set_free(&galloc->hash_set); free(galloc->hash_values); free(galloc->bufts); free(galloc->buffers); free(galloc->buf_tallocs); free(galloc->node_allocs); free(galloc->leaf_allocs); free(galloc); } typedef struct ggml_gallocr * ggml_gallocr_t; static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) { size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t); return &galloc->hash_values[i]; } static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) { return ggml_gallocr_hash_get(galloc, t)->allocated; } static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor * t) { return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated; } static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) { GGML_ASSERT(buffer_id >= 0); struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); if (!ggml_gallocr_is_allocated(galloc, node) && !ggml_is_view(node)) { hn->allocated = true; assert(hn->addr.offset == 0); // try to reuse a parent's buffer (inplace) if (ggml_op_can_inplace(node->op)) { for (int i = 0; i < GGML_MAX_SRC; i++) { struct ggml_tensor * parent = node->src[i]; if (parent == NULL) { continue; } // if the node's data is external, then we cannot re-use it if (!ggml_gallocr_is_own(galloc, parent)) { AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data); continue; } // outputs cannot be reused if (parent->flags & GGML_TENSOR_FLAG_OUTPUT || (parent->view_src != NULL && parent->view_src->flags & GGML_TENSOR_FLAG_OUTPUT)) { AT_PRINTF("not reusing parent %s for %s as it is an output\n", parent->name, node->name); continue; } if (!ggml_are_same_layout(node, parent)) { AT_PRINTF("not reusing parent %s for %s as layouts are different\n", parent->name, node->name); continue; } struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent); if (p_hn->n_children == 1 && p_hn->n_views == 0) { if (ggml_is_view(parent)) { struct ggml_tensor * view_src = parent->view_src; struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src); if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name); assert(view_src_hn->addr.chunk == p_hn->addr.chunk && view_src_hn->addr.offset == p_hn->addr.offset); hn->buffer_id = p_hn->buffer_id; hn->addr = p_hn->addr; p_hn->allocated = false; // avoid freeing the parent view_src_hn->allocated = false; return; } } else { AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name); hn->buffer_id = p_hn->buffer_id; hn->addr = p_hn->addr; p_hn->allocated = false; // avoid freeing the parent return; } } } } // allocate tensor from the buffer struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id]; ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id]; size_t size = ggml_backend_buft_get_alloc_size(buft, node); hn->buffer_id = buffer_id; hn->addr = ggml_dyn_tallocr_alloc(alloc, size, node); } } static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * node) { // graph outputs are never freed if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { AT_PRINTF("not freeing output %s\n", node->name); return; } struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); int buffer_id = hn->buffer_id; struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id]; ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id]; size_t size = ggml_backend_buft_get_alloc_size(buft, node); ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node); hn->allocated = false; } static int get_node_buffer_id(const int * node_buffer_ids, int i) { return node_buffer_ids ? node_buffer_ids[i] : 0; } static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { // clear hash tables ggml_hash_set_reset(&galloc->hash_set); memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); // allocate leafs // these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i)); } // count number of children and views // allocate other graph inputs and leafs first to avoid overwriting them for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; // TODO: better way to add external dependencies // GGML_OP_NONE does not appear normally in the graph nodes, but is used by ggml-backend to add dependencies to // control when some tensors are allocated and freed. in this case, the dependencies are in `src`, but the node // itself is never used and should not be considered a dependency if (ggml_is_view(node) && node->op != GGML_OP_NONE) { struct ggml_tensor * view_src = node->view_src; ggml_gallocr_hash_get(galloc, view_src)->n_views += 1; } if (node->flags & GGML_TENSOR_FLAG_INPUT) { ggml_gallocr_allocate_node(galloc, graph->nodes[i], get_node_buffer_id(node_buffer_ids, i)); } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } ggml_gallocr_hash_get(galloc, src)->n_children += 1; // allocate explicit inputs if (src->flags & GGML_TENSOR_FLAG_INPUT) { ggml_gallocr_allocate_node(galloc, src, get_node_buffer_id(node_buffer_ids, i)); } } } // allocate tensors for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; int buffer_id = get_node_buffer_id(node_buffer_ids, i); // allocate parents (only leafs need to be allocated at this point) for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * parent = node->src[j]; if (parent == NULL) { continue; } ggml_gallocr_allocate_node(galloc, parent, buffer_id); } // allocate node ggml_gallocr_allocate_node(galloc, node, buffer_id); AT_PRINTF("exec: %s (%s) <= ", ggml_op_desc(node), node->name); for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * parent = node->src[j]; if (parent == NULL) { continue; } AT_PRINTF("%s", parent->name); if (j < GGML_MAX_SRC - 1 && node->src[j + 1] != NULL) { AT_PRINTF(", "); } } AT_PRINTF("\n"); // update parents for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * parent = node->src[j]; if (parent == NULL) { continue; } struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent); p_hn->n_children -= 1; AT_PRINTF("parent %s: %d children, %d views, allocated: %d\n", parent->name, p_hn->n_children, p_hn->n_views, p_hn->allocated); if (p_hn->n_children == 0 && p_hn->n_views == 0) { if (ggml_is_view(parent)) { struct ggml_tensor * view_src = parent->view_src; struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src); view_src_hn->n_views -= 1; AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views); if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src_hn->allocated) { ggml_gallocr_free_node(galloc, view_src); } } else if (p_hn->allocated) { ggml_gallocr_free_node(galloc, parent); } } AT_PRINTF("\n"); } } } bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { size_t min_hash_size = graph->n_nodes + graph->n_leafs; // add 25% margin to avoid hash collisions min_hash_size += min_hash_size / 4; // initialize hash table if (galloc->hash_set.size < min_hash_size) { ggml_hash_set_free(&galloc->hash_set); galloc->hash_set = ggml_hash_set_new(min_hash_size); GGML_ASSERT(galloc->hash_set.keys != NULL); free(galloc->hash_values); galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size); GGML_ASSERT(galloc->hash_values != NULL); } // reset allocators for (int i = 0; i < galloc->n_buffers; i++) { ggml_dyn_tallocr_reset(galloc->buf_tallocs[i]); } // allocate in hash table ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids); // set the node_allocs from the hash table if (galloc->n_nodes < graph->n_nodes) { free(galloc->node_allocs); galloc->node_allocs = calloc(graph->n_nodes, sizeof(struct node_alloc)); GGML_ASSERT(galloc->node_allocs != NULL); } galloc->n_nodes = graph->n_nodes; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; struct node_alloc * node_alloc = &galloc->node_allocs[i]; if (node->view_src || node->data) { node_alloc->dst.buffer_id = -1; node_alloc->dst.addr = GGML_BUFFER_ADDRESS_INVALID; node_alloc->dst.size_max = 0; } else { struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); node_alloc->dst.buffer_id = hn->buffer_id; node_alloc->dst.addr = hn->addr; node_alloc->dst.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node); } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (!src || src->view_src || src->data) { node_alloc->src[j].buffer_id = -1; node_alloc->src[j].addr = GGML_BUFFER_ADDRESS_INVALID; node_alloc->src[j].size_max = 0; } else { struct hash_node * hn = ggml_gallocr_hash_get(galloc, src); node_alloc->src[j].buffer_id = hn->buffer_id; node_alloc->src[j].addr = hn->addr; node_alloc->src[j].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], src); } } } if (galloc->n_leafs < graph->n_leafs) { free(galloc->leaf_allocs); galloc->leaf_allocs = calloc(graph->n_leafs, sizeof(galloc->leaf_allocs[0])); GGML_ASSERT(galloc->leaf_allocs != NULL); } galloc->n_leafs = graph->n_leafs; for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf); if (leaf->view_src || leaf->data) { galloc->leaf_allocs[i].leaf.buffer_id = -1; galloc->leaf_allocs[i].leaf.addr = GGML_BUFFER_ADDRESS_INVALID; galloc->leaf_allocs[i].leaf.size_max = 0; } else { galloc->leaf_allocs[i].leaf.buffer_id = hn->buffer_id; galloc->leaf_allocs[i].leaf.addr = hn->addr; galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf); } } // reallocate buffers if needed for (int i = 0; i < galloc->n_buffers; i++) { // if the buffer type is used multiple times, we reuse the same buffer for (int j = 0; j < i; j++) { if (galloc->buf_tallocs[j] == galloc->buf_tallocs[i]) { galloc->buffers[i] = galloc->buffers[j]; break; } } size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0; size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]); // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views if (new_size > cur_size || galloc->buffers[i] == NULL) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); #endif ggml_vbuffer_free(galloc->buffers[i]); galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE); if (galloc->buffers[i] == NULL) { GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size); return false; } } } return true; } bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL); } static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, struct tensor_alloc * tensor_alloc) { int buffer_id = tensor_alloc->buffer_id; assert(tensor->data || tensor->view_src || ggml_backend_buft_get_alloc_size(galloc->bufts[buffer_id], tensor) <= tensor_alloc->size_max); if (tensor->view_src != NULL) { if (tensor->buffer == NULL) { assert(tensor_alloc->addr.offset == SIZE_MAX); if (tensor->view_src->buffer == NULL) { // this tensor was allocated without ggml-backend return; } ggml_backend_view_init(tensor); } } else { if (tensor->data == NULL) { assert(tensor_alloc->addr.offset != SIZE_MAX); assert(ggml_backend_buft_get_alloc_size(galloc->bufts[buffer_id], tensor) <= tensor_alloc->size_max); ggml_vbuffer_tensor_alloc(galloc->buffers[buffer_id], tensor, tensor_alloc->addr); } else { if (tensor->buffer == NULL) { // this tensor was allocated without ggml-backend return; } } } } static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { size_t node_size = 0; if (!node->data && !node->view_src) { // If we previously had data but don't now then reallocate if (talloc->buffer_id < 0) { return false; } node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node); } return talloc->size_max >= node_size; } static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { if (galloc->n_nodes != graph->n_nodes) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: graph has different number of nodes\n", __func__); #endif return true; } if (galloc->n_leafs != graph->n_leafs) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: graph has different number of leafs\n", __func__); #endif return true; } for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; struct node_alloc * node_alloc = &galloc->node_allocs[i]; if (!ggml_gallocr_node_needs_realloc(galloc, node, &node_alloc->dst)) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: node %s is not valid\n", __func__, node->name); #endif return true; } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } if (!ggml_gallocr_node_needs_realloc(galloc, src, &node_alloc->src[j])) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: src %d (%s) of node %s is not valid\n", __func__, j, src->name, node->name); #endif return true; } } } return false; } bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph) { if (ggml_gallocr_needs_realloc(galloc, graph)) { if (galloc->n_buffers == 1) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: reallocating buffers automatically\n", __func__); #endif if (!ggml_gallocr_reserve(galloc, graph)) { return false; } } else { #ifndef NDEBUG GGML_LOG_DEBUG("%s: cannot reallocate multi buffer graph automatically, call reserve\n", __func__); #endif return false; } } // reset buffers for (int i = 0; i < galloc->n_buffers; i++) { if (galloc->buffers[i] != NULL) { ggml_vbuffer_reset(galloc->buffers[i]); } } // allocate the graph tensors from the previous assignments // leafs for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i]; ggml_gallocr_init_tensor(galloc, leaf, &leaf_alloc->leaf); } // nodes for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; struct node_alloc * node_alloc = &galloc->node_allocs[i]; for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } ggml_gallocr_init_tensor(galloc, src, &node_alloc->src[j]); } ggml_gallocr_init_tensor(galloc, node, &node_alloc->dst); } return true; } size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) { GGML_ASSERT(buffer_id >= 0 && buffer_id < galloc->n_buffers); if (galloc->buffers[buffer_id] == NULL) { return 0; } for (int i = 0; i < buffer_id; i++) { if (galloc->buffers[i] == galloc->buffers[buffer_id]) { // this buffer is the same as a previous one due to the same buffer type being used multiple times // only return the buffer size the first time it appears to avoid double counting return 0; } } return ggml_vbuffer_size(galloc->buffers[buffer_id]); } // utils static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) { for (size_t i = 0; i < *n_buffers; i++) { ggml_backend_buffer_free((*buffers)[i]); } free(*buffers); } static bool alloc_tensor_range(struct ggml_context * ctx, struct ggml_tensor * first, struct ggml_tensor * last, ggml_backend_buffer_type_t buft, size_t size, ggml_backend_buffer_t ** buffers, size_t * n_buffers) { ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); if (buffer == NULL) { GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size); free_buffers(buffers, n_buffers); return false; } *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1)); (*buffers)[(*n_buffers)++] = buffer; struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { enum ggml_status status = GGML_STATUS_SUCCESS; if (t->data == NULL) { if (t->view_src == NULL) { status = ggml_tallocr_alloc(&tallocr, t); } else if (t->buffer == NULL) { status = ggml_backend_view_init(t); } } else { if (t->view_src != NULL && t->buffer == NULL) { // view of a pre-allocated tensor status = ggml_backend_view_init(t); } } if (status != GGML_STATUS_SUCCESS) { GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name); free_buffers(buffers, n_buffers); return false; } } return true; } ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { GGML_ASSERT(ggml_get_no_alloc(ctx) == true); size_t alignment = ggml_backend_buft_get_alignment(buft); size_t max_size = ggml_backend_buft_get_max_size(buft); ggml_backend_buffer_t * buffers = NULL; size_t n_buffers = 0; size_t cur_buf_size = 0; struct ggml_tensor * first = ggml_get_first_tensor(ctx); for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) { size_t this_size = 0; if (t->data == NULL && t->view_src == NULL) { this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment); } if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) { // allocate tensors in the current buffer if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) { return NULL; } first = t; cur_buf_size = this_size; } else { cur_buf_size += this_size; } } // allocate remaining tensors if (cur_buf_size > 0) { if (!alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) { return NULL; } } if (n_buffers == 0) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__); #endif return NULL; } ggml_backend_buffer_t buffer; if (n_buffers == 1) { buffer = buffers[0]; } else { buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers); } free(buffers); return buffer; } ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) { return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend)); } ggml-org-ggml-7ec8045/src/ggml-backend-impl.h000066400000000000000000000275421506673203700207470ustar00rootroot00000000000000#pragma once // ggml-backend internal header #include "ggml-backend.h" #ifdef __cplusplus extern "C" { #endif #define GGML_BACKEND_API_VERSION 2 // // Backend buffer type // struct ggml_backend_buffer_type_i { const char * (*get_name) (ggml_backend_buffer_type_t buft); // allocate a buffer of this type ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); // tensor alignment size_t (*get_alignment) (ggml_backend_buffer_type_t buft); // (optional) max buffer size that can be allocated (defaults to SIZE_MAX) size_t (*get_max_size) (ggml_backend_buffer_type_t buft); // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes) size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false) bool (*is_host) (ggml_backend_buffer_type_t buft); }; struct ggml_backend_buffer_type { struct ggml_backend_buffer_type_i iface; ggml_backend_dev_t device; void * context; }; // // Backend buffer // struct ggml_backend_buffer_i { // (optional) free the buffer void (*free_buffer) (ggml_backend_buffer_t buffer); // base address of the buffer void * (*get_base) (ggml_backend_buffer_t buffer); // (optional) initialize a tensor in the buffer (eg. add tensor extras) enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // tensor data access void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported) bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // clear the entire buffer void (*clear) (ggml_backend_buffer_t buffer, uint8_t value); // (optional) reset any internal state due to tensor initialization, such as tensor extras void (*reset) (ggml_backend_buffer_t buffer); }; struct ggml_backend_buffer { struct ggml_backend_buffer_i iface; ggml_backend_buffer_type_t buft; void * context; size_t size; enum ggml_backend_buffer_usage usage; }; GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( ggml_backend_buffer_type_t buft, struct ggml_backend_buffer_i iface, void * context, size_t size); // do not use directly, use ggml_backend_tensor_copy instead GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); // multi-buffer // buffer that contains a collection of buffers GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); // // Backend (stream) // struct ggml_backend_i { const char * (*get_name)(ggml_backend_t backend); void (*free)(ggml_backend_t backend); // (optional) asynchronous tensor data access void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); // (optional) complete all pending operations (required if the backend supports async operations) void (*synchronize)(ggml_backend_t backend); // (optional) graph plans (not used currently) // compute graph with a plan ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph); // compute the graph with the plan enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); // compute graph (always async if supported by the backend) enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph); // (optional) event synchronization // record an event on this stream void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event); // wait for an event on on a different stream void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event); // (optional) sort/optimize the nodes in the graph void (*graph_optimize) (ggml_backend_t backend, struct ggml_cgraph * cgraph); }; struct ggml_backend { ggml_guid_t guid; struct ggml_backend_i iface; ggml_backend_dev_t device; void * context; }; struct ggml_backend_event { struct ggml_backend_device * device; void * context; }; // // Backend device // // Note: if additional properties are needed, we should add a struct with all of them // the current functions to obtain the properties can remain, since they are more convenient for often used properties struct ggml_backend_device_i { // device name: short identifier for this device, such as "CPU" or "CUDA0" const char * (*get_name)(ggml_backend_dev_t dev); // device description: short informative description of the device, could be the model name const char * (*get_description)(ggml_backend_dev_t dev); // device memory in bytes void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total); // device type enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev); // device properties void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props); // backend (stream) initialization ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params); // preferred buffer type ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev); // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device) ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev); // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries) ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size); // check if the backend can compute an operation bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); // check if the backend can use tensors allocated in a buffer type bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft); // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer // these should be expensive operations that may benefit from running on this backend instead of the CPU backend bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); // (optional) event synchronization ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev); void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event); void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event); }; struct ggml_backend_device { struct ggml_backend_device_i iface; ggml_backend_reg_t reg; void * context; }; // // Backend (reg) // struct ggml_backend_reg_i { const char * (*get_name)(ggml_backend_reg_t reg); // enumerate available devices size_t (*get_device_count)(ggml_backend_reg_t reg); ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index); // (optional) get a pointer to a function in the backend // backends can add custom functions that are not part of the standard ggml-backend interface void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name); }; struct ggml_backend_reg { int api_version; // initialize to GGML_BACKEND_API_VERSION struct ggml_backend_reg_i iface; void * context; }; // Internal backend registry API GGML_API void ggml_backend_register(ggml_backend_reg_t reg); // Add backend dynamic loading support to the backend // Initialize the backend typedef ggml_backend_reg_t (*ggml_backend_init_t)(void); // Optional: obtain a score for the backend based on the system configuration // Higher scores are preferred, 0 means the backend is not supported in the current system typedef int (*ggml_backend_score_t)(void); #ifdef GGML_BACKEND_DL # ifdef __cplusplus # define GGML_BACKEND_DL_IMPL(reg_fn) \ extern "C" { \ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ } \ ggml_backend_reg_t ggml_backend_init(void) { \ return reg_fn(); \ } # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ extern "C" { \ GGML_BACKEND_API int ggml_backend_score(void); \ } \ int ggml_backend_score(void) { \ return score_fn(); \ } # else # define GGML_BACKEND_DL_IMPL(reg_fn) \ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ ggml_backend_reg_t ggml_backend_init(void) { \ return reg_fn(); \ } # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ GGML_BACKEND_API int ggml_backend_score(void); \ int ggml_backend_score(void) { \ return score_fn(); \ } # endif #else # define GGML_BACKEND_DL_IMPL(reg_fn) # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) #endif #ifdef __cplusplus } #endif ggml-org-ggml-7ec8045/src/ggml-backend-reg.cpp000066400000000000000000000430361506673203700211120ustar00rootroot00000000000000#include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" #include #include #include #include #include #include #include #include #ifdef _WIN32 # define WIN32_LEAN_AND_MEAN # ifndef NOMINMAX # define NOMINMAX # endif # include #elif defined(__APPLE__) # include # include #else # include # include #endif // Backend registry #ifdef GGML_USE_CPU #include "ggml-cpu.h" #endif #ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif #ifdef GGML_USE_SYCL #include "ggml-sycl.h" #endif #ifdef GGML_USE_VULKAN #include "ggml-vulkan.h" #endif #ifdef GGML_USE_WEBGPU #include "ggml-webgpu.h" #endif #ifdef GGML_USE_ZDNN #include "ggml-zdnn.h" #endif #ifdef GGML_USE_OPENCL #include "ggml-opencl.h" #endif #ifdef GGML_USE_BLAS #include "ggml-blas.h" #endif #ifdef GGML_USE_RPC #include "ggml-rpc.h" #endif #ifdef GGML_USE_CANN #include "ggml-cann.h" #endif // disable C++17 deprecation warning for std::codecvt_utf8 #if defined(__clang__) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wdeprecated-declarations" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif namespace fs = std::filesystem; static std::string path_str(const fs::path & path) { std::string u8path; try { #if defined(__cpp_lib_char8_t) // C++20 and later: u8string() returns std::u8string std::u8string u8str = path.u8string(); u8path = std::string(reinterpret_cast(u8str.c_str())); #else // C++17: u8string() returns std::string u8path = path.u8string(); #endif } catch (...) { } return u8path; } #if defined(__clang__) # pragma clang diagnostic pop #elif defined(__GNUC__) # pragma GCC diagnostic pop #endif #ifdef _WIN32 using dl_handle = std::remove_pointer_t; struct dl_handle_deleter { void operator()(HMODULE handle) { FreeLibrary(handle); } }; static dl_handle * dl_load_library(const fs::path & path) { // suppress error dialogs for missing DLLs DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); HMODULE handle = LoadLibraryW(path.wstring().c_str()); SetErrorMode(old_mode); return handle; } static void * dl_get_sym(dl_handle * handle, const char * name) { DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); void * p = (void *) GetProcAddress(handle, name); SetErrorMode(old_mode); return p; } static const char * dl_error() { return ""; } #else using dl_handle = void; struct dl_handle_deleter { void operator()(void * handle) { dlclose(handle); } }; static void * dl_load_library(const fs::path & path) { dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL); return handle; } static void * dl_get_sym(dl_handle * handle, const char * name) { return dlsym(handle, name); } static const char * dl_error() { const char *rslt = dlerror(); return rslt != nullptr ? rslt : ""; } #endif using dl_handle_ptr = std::unique_ptr; struct ggml_backend_reg_entry { ggml_backend_reg_t reg; dl_handle_ptr handle; }; struct ggml_backend_registry { std::vector backends; std::vector devices; ggml_backend_registry() { #ifdef GGML_USE_CUDA register_backend(ggml_backend_cuda_reg()); #endif #ifdef GGML_USE_METAL register_backend(ggml_backend_metal_reg()); #endif #ifdef GGML_USE_SYCL register_backend(ggml_backend_sycl_reg()); #endif #ifdef GGML_USE_VULKAN register_backend(ggml_backend_vk_reg()); #endif #ifdef GGML_USE_WEBGPU register_backend(ggml_backend_webgpu_reg()); #endif #ifdef GGML_USE_ZDNN register_backend(ggml_backend_zdnn_reg()); #endif #ifdef GGML_USE_OPENCL register_backend(ggml_backend_opencl_reg()); #endif #ifdef GGML_USE_CANN register_backend(ggml_backend_cann_reg()); #endif #ifdef GGML_USE_BLAS register_backend(ggml_backend_blas_reg()); #endif #ifdef GGML_USE_RPC register_backend(ggml_backend_rpc_reg()); #endif #ifdef GGML_USE_CPU register_backend(ggml_backend_cpu_reg()); #endif } ~ggml_backend_registry() { // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources, // since backend threads may still be running and accessing resources from the dynamic library for (auto & entry : backends) { if (entry.handle) { entry.handle.release(); // NOLINT } } } void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) { if (!reg) { return; } #ifndef NDEBUG GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n", __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg)); #endif backends.push_back({ reg, std::move(handle) }); for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { register_device(ggml_backend_reg_dev_get(reg, i)); } } void register_device(ggml_backend_dev_t device) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); #endif devices.push_back(device); } ggml_backend_reg_t load_backend(const fs::path & path, bool silent) { dl_handle_ptr handle { dl_load_library(path) }; if (!handle) { if (!silent) { GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error()); } return nullptr; } auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); if (score_fn && score_fn() == 0) { if (!silent) { GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str()); } return nullptr; } auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init"); if (!backend_init_fn) { if (!silent) { GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str()); } return nullptr; } ggml_backend_reg_t reg = backend_init_fn(); if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) { if (!silent) { if (!reg) { GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path_str(path).c_str()); } else { GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n", __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION); } } return nullptr; } GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str()); register_backend(reg, std::move(handle)); return reg; } void unload_backend(ggml_backend_reg_t reg, bool silent) { auto it = std::find_if(backends.begin(), backends.end(), [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; }); if (it == backends.end()) { if (!silent) { GGML_LOG_ERROR("%s: backend not found\n", __func__); } return; } if (!silent) { GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg)); } // remove devices devices.erase( std::remove_if(devices.begin(), devices.end(), [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }), devices.end()); // remove backend backends.erase(it); } }; static ggml_backend_registry & get_reg() { static ggml_backend_registry reg; return reg; } // Internal API void ggml_backend_register(ggml_backend_reg_t reg) { get_reg().register_backend(reg); } void ggml_backend_device_register(ggml_backend_dev_t device) { get_reg().register_device(device); } // Backend (reg) enumeration static bool striequals(const char * a, const char * b) { for (; *a && *b; a++, b++) { if (std::tolower(*a) != std::tolower(*b)) { return false; } } return *a == *b; } size_t ggml_backend_reg_count() { return get_reg().backends.size(); } ggml_backend_reg_t ggml_backend_reg_get(size_t index) { GGML_ASSERT(index < ggml_backend_reg_count()); return get_reg().backends[index].reg; } ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) { for (size_t i = 0; i < ggml_backend_reg_count(); i++) { ggml_backend_reg_t reg = ggml_backend_reg_get(i); if (striequals(ggml_backend_reg_name(reg), name)) { return reg; } } return nullptr; } // Device enumeration size_t ggml_backend_dev_count() { return get_reg().devices.size(); } ggml_backend_dev_t ggml_backend_dev_get(size_t index) { GGML_ASSERT(index < ggml_backend_dev_count()); return get_reg().devices[index]; } ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) { for (size_t i = 0; i < ggml_backend_dev_count(); i++) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); if (striequals(ggml_backend_dev_name(dev), name)) { return dev; } } return nullptr; } ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) { for (size_t i = 0; i < ggml_backend_dev_count(); i++) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); if (ggml_backend_dev_type(dev) == type) { return dev; } } return nullptr; } // Convenience functions ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) { ggml_backend_dev_t dev = ggml_backend_dev_by_name(name); if (!dev) { return nullptr; } return ggml_backend_dev_init(dev, params); } ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) { ggml_backend_dev_t dev = ggml_backend_dev_by_type(type); if (!dev) { return nullptr; } return ggml_backend_dev_init(dev, params); } ggml_backend_t ggml_backend_init_best(void) { ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU); dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU); dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); if (!dev) { return nullptr; } return ggml_backend_dev_init(dev, nullptr); } // Dynamic loading ggml_backend_reg_t ggml_backend_load(const char * path) { return get_reg().load_backend(path, false); } void ggml_backend_unload(ggml_backend_reg_t reg) { get_reg().unload_backend(reg, true); } static fs::path get_executable_path() { #if defined(__APPLE__) // get executable path std::vector path; uint32_t size; while (true) { size = path.size(); if (_NSGetExecutablePath(path.data(), &size) == 0) { break; } path.resize(size); } std::string base_path(path.data(), size); // remove executable name auto last_slash = base_path.find_last_of('/'); if (last_slash != std::string::npos) { base_path = base_path.substr(0, last_slash); } return base_path + "/"; #elif defined(__linux__) || defined(__FreeBSD__) std::string base_path = "."; std::vector path(1024); while (true) { // get executable path # if defined(__linux__) ssize_t len = readlink("/proc/self/exe", path.data(), path.size()); # elif defined(__FreeBSD__) ssize_t len = readlink("/proc/curproc/file", path.data(), path.size()); # endif if (len == -1) { break; } if (len < (ssize_t) path.size()) { base_path = std::string(path.data(), len); // remove executable name auto last_slash = base_path.find_last_of('/'); if (last_slash != std::string::npos) { base_path = base_path.substr(0, last_slash); } break; } path.resize(path.size() * 2); } return base_path + "/"; #elif defined(_WIN32) std::vector path(MAX_PATH); DWORD len = GetModuleFileNameW(NULL, path.data(), path.size()); if (len == 0) { return {}; } std::wstring base_path(path.data(), len); // remove executable name auto last_slash = base_path.find_last_of('\\'); if (last_slash != std::string::npos) { base_path = base_path.substr(0, last_slash); } return base_path + L"\\"; #else return {}; #endif } static fs::path backend_filename_prefix() { #ifdef _WIN32 return fs::u8path("ggml-"); #else return fs::u8path("libggml-"); #endif } static fs::path backend_filename_extension() { #ifdef _WIN32 return fs::u8path(".dll"); #else return fs::u8path(".so"); #endif } static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths const fs::path name_path = fs::u8path(name); const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); const fs::path file_extension = backend_filename_extension(); std::vector search_paths; if (user_search_path == nullptr) { #ifdef GGML_BACKEND_DIR search_paths.push_back(fs::u8path(GGML_BACKEND_DIR)); #endif // default search paths: executable directory, current directory search_paths.push_back(get_executable_path()); search_paths.push_back(fs::current_path()); } else { search_paths.push_back(fs::u8path(user_search_path)); } int best_score = 0; fs::path best_path; for (const auto & search_path : search_paths) { if (!fs::exists(search_path)) { GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); continue; } fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied); for (const auto & entry : dir_it) { if (entry.is_regular_file()) { auto filename = entry.path().filename(); auto ext = entry.path().extension(); if (filename.native().find(file_prefix) == 0 && ext == file_extension) { dl_handle_ptr handle { dl_load_library(entry) }; if (!handle && !silent) { GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error()); } if (handle) { auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); if (score_fn) { int s = score_fn(); #ifndef NDEBUG GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s); #endif if (s > best_score) { best_score = s; best_path = entry.path(); } } else { if (!silent) { GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str()); } } } } } } } if (best_score == 0) { // try to load the base backend for (const auto & search_path : search_paths) { fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native(); fs::path path = search_path / filename; if (fs::exists(path)) { return get_reg().load_backend(path, silent); } } return nullptr; } return get_reg().load_backend(best_path, silent); } void ggml_backend_load_all() { ggml_backend_load_all_from_path(nullptr); } void ggml_backend_load_all_from_path(const char * dir_path) { #ifdef NDEBUG bool silent = true; #else bool silent = false; #endif ggml_backend_load_best("blas", silent, dir_path); ggml_backend_load_best("cann", silent, dir_path); ggml_backend_load_best("cuda", silent, dir_path); ggml_backend_load_best("hip", silent, dir_path); ggml_backend_load_best("metal", silent, dir_path); ggml_backend_load_best("rpc", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path); ggml_backend_load_best("cpu", silent, dir_path); // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend const char * backend_path = std::getenv("GGML_BACKEND_PATH"); if (backend_path) { ggml_backend_load(backend_path); } } ggml-org-ggml-7ec8045/src/ggml-backend.cpp000066400000000000000000002512301506673203700203340ustar00rootroot00000000000000// Note: porting this file to C++ is a work in progress #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX # define NOMINMAX #endif #include #endif #include "ggml-backend.h" #include "ggml-backend-impl.h" #include "ggml-alloc.h" #include "ggml-impl.h" #include #include #include #include #include #include #include #include #ifdef __APPLE__ #include #include #endif // backend buffer type const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); return buft->iface.get_name(buft); } ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { if (size == 0) { // return a dummy buffer for zero-sized allocations return ggml_backend_buffer_init(buft, {}, NULL, 0); } GGML_ASSERT(buft); return buft->iface.alloc_buffer(buft, size); } size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); return buft->iface.get_alignment(buft); } size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); // get_max_size is optional, defaults to SIZE_MAX if (buft->iface.get_max_size) { return buft->iface.get_max_size(buft); } return SIZE_MAX; } size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) { GGML_ASSERT(buft); // get_alloc_size is optional, defaults to ggml_nbytes if (buft->iface.get_alloc_size) { size_t size = buft->iface.get_alloc_size(buft, tensor); assert(size >= ggml_nbytes(tensor)); return size; } return ggml_nbytes(tensor); } bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); if (buft->iface.is_host) { return buft->iface.is_host(buft); } return false; } ggml_backend_dev_t ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); return buft->device; } // backend buffer ggml_backend_buffer_t ggml_backend_buffer_init( ggml_backend_buffer_type_t buft, struct ggml_backend_buffer_i iface, void * context, size_t size) { ggml_backend_buffer_t buffer = new ggml_backend_buffer { /* .interface = */ iface, /* .buft = */ buft, /* .context = */ context, /* .size = */ size, /* .usage = */ GGML_BACKEND_BUFFER_USAGE_ANY }; return buffer; } const char * ggml_backend_buffer_name(ggml_backend_buffer_t buffer) { return ggml_backend_buft_name(ggml_backend_buffer_get_type(buffer)); } void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) { if (buffer == NULL) { return; } if (buffer->iface.free_buffer != NULL) { buffer->iface.free_buffer(buffer); } delete buffer; } size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); return buffer->size; } void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); // get_base is optional if the buffer is zero-sized if (buffer->size == 0) { return NULL; } void * base = buffer->iface.get_base(buffer); GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL"); return base; } enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { GGML_ASSERT(buffer); // init_tensor is optional if (buffer->iface.init_tensor) { return buffer->iface.init_tensor(buffer, tensor); } return GGML_STATUS_SUCCESS; } void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { GGML_ASSERT(buffer); // clear is optional if the buffer is zero-sized if (buffer->size == 0) { return; } buffer->iface.clear(buffer, value); } size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) { return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer)); } size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) { return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer)); } size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor) { return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor); } bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) { return ggml_backend_buft_is_host(ggml_backend_buffer_get_type(buffer)); } void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) { GGML_ASSERT(buffer); buffer->usage = usage; // FIXME: add a generic callback to the buffer interface if (ggml_backend_buffer_is_multi_buffer(buffer)) { ggml_backend_multi_buffer_set_usage(buffer, usage); } } enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); return buffer->usage; } ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); return buffer->buft; } void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); if (buffer->iface.reset) { buffer->iface.reset(buffer); } } bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) { ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer; if (dst_buf->iface.cpy_tensor) { return dst_buf->iface.cpy_tensor(dst_buf, src, dst); } return false; } // backend ggml_guid_t ggml_backend_guid(ggml_backend_t backend) { if (backend == NULL) { return NULL; } return backend->guid; } const char * ggml_backend_name(ggml_backend_t backend) { if (backend == NULL) { return "NULL"; } return backend->iface.get_name(backend); } void ggml_backend_free(ggml_backend_t backend) { if (backend == NULL) { return; } backend->iface.free(backend); } ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend) { GGML_ASSERT(backend); return ggml_backend_dev_buffer_type(backend->device); } ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) { return ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), size); } size_t ggml_backend_get_alignment(ggml_backend_t backend) { return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)); } size_t ggml_backend_get_max_size(ggml_backend_t backend) { return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend)); } void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(backend); GGML_ASSERT(tensor); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); if (backend->iface.set_tensor_async == NULL) { ggml_backend_tensor_set(tensor, data, offset, size); } else { backend->iface.set_tensor_async(backend, tensor, data, offset, size); } } void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_ASSERT(backend); GGML_ASSERT(tensor); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds"); if (backend->iface.get_tensor_async == NULL) { ggml_backend_tensor_get(tensor, data, offset, size); } else { backend->iface.get_tensor_async(backend, tensor, data, offset, size); } } void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor); ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; if (size == 0) { return; } GGML_ASSERT(buf != NULL && "tensor buffer not set"); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); buf->iface.set_tensor(buf, tensor, data, offset, size); } void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_ASSERT(tensor); ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; if (size == 0) { return; } GGML_ASSERT(buf != NULL && "tensor buffer not set"); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds"); buf->iface.get_tensor(buf, tensor, data, offset, size); } void ggml_backend_tensor_memset(struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { GGML_ASSERT(tensor); ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; if (size == 0) { return; } GGML_ASSERT(buf != NULL && "tensor buffer not set"); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); GGML_ASSERT(buf->iface.memset_tensor != NULL && "memset not implemented by backend buffer"); buf->iface.memset_tensor(buf, tensor, value, offset, size); } void ggml_backend_synchronize(ggml_backend_t backend) { GGML_ASSERT(backend); if (backend->iface.synchronize == NULL) { return; } backend->iface.synchronize(backend); } ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { GGML_ASSERT(backend); GGML_ASSERT(backend->iface.graph_plan_create != NULL); return backend->iface.graph_plan_create(backend, cgraph); } void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { GGML_ASSERT(backend); GGML_ASSERT(backend->iface.graph_plan_free != NULL); backend->iface.graph_plan_free(backend, plan); } enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { GGML_ASSERT(backend); GGML_ASSERT(backend->iface.graph_plan_compute != NULL); return backend->iface.graph_plan_compute(backend, plan); } enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph); ggml_backend_synchronize(backend); return err; } enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) { GGML_ASSERT(backend); return backend->iface.graph_compute(backend, cgraph); } bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { GGML_ASSERT(backend); return ggml_backend_dev_supports_op(backend->device, op); } bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { GGML_ASSERT(backend); return ggml_backend_dev_supports_buft(backend->device, buft); } bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) { GGML_ASSERT(backend); return ggml_backend_dev_offload_op(backend->device, op); } ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) { GGML_ASSERT(backend); return backend->device; } // backend copy void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts"); if (src == dst) { return; } if (ggml_backend_buffer_is_host(src->buffer)) { ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src)); } else if (ggml_backend_buffer_is_host(dst->buffer)) { ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src)); } else if (!ggml_backend_buffer_copy_tensor(src, dst)) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer)); #endif size_t nbytes = ggml_nbytes(src); void * data = malloc(nbytes); ggml_backend_tensor_get(src, data, 0, nbytes); ggml_backend_tensor_set(dst, data, 0, nbytes); free(data); } } void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts"); if (src == dst) { return; } GGML_ASSERT(backend_dst); if (backend_dst->iface.cpy_tensor_async != NULL) { if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) { return; } } // an async copy would normally happen after all the queued operations on both backends are completed // to simulate the same behavior, we need to synchronize both backends first, and do a blocking copy ggml_backend_synchronize(backend_src); ggml_backend_synchronize(backend_dst); ggml_backend_tensor_copy(src, dst); } // events ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device) { // null device is allowed for the transition period to the device interface if (device == NULL || device->iface.event_new == NULL) { return NULL; } return device->iface.event_new(device); } void ggml_backend_event_free(ggml_backend_event_t event) { if (event == NULL) { return; } event->device->iface.event_free(event->device, event); } void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend) { GGML_ASSERT(backend); GGML_ASSERT(backend->iface.event_record != NULL); backend->iface.event_record(backend, event); } void ggml_backend_event_synchronize(ggml_backend_event_t event) { GGML_ASSERT(event); GGML_ASSERT(event->device->iface.event_synchronize); event->device->iface.event_synchronize(event->device, event); } void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) { GGML_ASSERT(backend); GGML_ASSERT(backend->iface.event_wait != NULL); backend->iface.event_wait(backend, event); } static void ggml_backend_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) { GGML_ASSERT(backend); if (backend->iface.graph_optimize != NULL) { backend->iface.graph_optimize(backend, cgraph); } } // Backend device const char * ggml_backend_dev_name(ggml_backend_dev_t device) { GGML_ASSERT(device); return device->iface.get_name(device); } const char * ggml_backend_dev_description(ggml_backend_dev_t device) { GGML_ASSERT(device); return device->iface.get_description(device); } void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total) { GGML_ASSERT(device); device->iface.get_memory(device, free, total); } enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) { GGML_ASSERT(device); return device->iface.get_type(device); } void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) { memset(props, 0, sizeof(*props)); device->iface.get_props(device, props); } ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) { GGML_ASSERT(device); return device->reg; } ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params) { GGML_ASSERT(device); return device->iface.init_backend(device, params); } ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device) { GGML_ASSERT(device); return device->iface.get_buffer_type(device); } ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) { GGML_ASSERT(device); if (device->iface.get_host_buffer_type == NULL) { return NULL; } return device->iface.get_host_buffer_type(device); } ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size) { GGML_ASSERT(device); return device->iface.buffer_from_host_ptr(device, ptr, size, max_tensor_size); } bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op) { GGML_ASSERT(device); return device->iface.supports_op(device, op); } bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) { GGML_ASSERT(device); return device->iface.supports_buft(device, buft); } bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) { GGML_ASSERT(device); if (device->iface.offload_op != NULL) { return device->iface.offload_op(device, op); } return false; } // Backend (reg) const char * ggml_backend_reg_name(ggml_backend_reg_t reg) { GGML_ASSERT(reg); return reg->iface.get_name(reg); } size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg) { GGML_ASSERT(reg); return reg->iface.get_device_count(reg); } ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index) { GGML_ASSERT(reg); return reg->iface.get_device(reg, index); } void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) { GGML_ASSERT(reg); if (!reg->iface.get_proc_address) { return NULL; } return reg->iface.get_proc_address(reg, name); } // multi-buffer buffer struct ggml_backend_multi_buffer_context { ggml_backend_buffer_t * buffers; size_t n_buffers; }; static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context; for (size_t i = 0; i < ctx->n_buffers; i++) { ggml_backend_buffer_free(ctx->buffers[i]); } free(ctx->buffers); free(ctx); } static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { GGML_ASSERT(buffer); ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context; for (size_t i = 0; i < ctx->n_buffers; i++) { ggml_backend_buffer_clear(ctx->buffers[i], value); } } static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = { /* .free_buffer = */ ggml_backend_multi_buffer_free_buffer, /* .get_base = */ NULL, /* .init_tensor = */ NULL, /* .memset_tensor = */ NULL, /* .set_tensor = */ NULL, /* .get_tensor = */ NULL, /* .cpy_tensor = */ NULL, /* .clear = */ ggml_backend_multi_buffer_clear, /* .reset = */ NULL, }; ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) { ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) malloc(sizeof(struct ggml_backend_multi_buffer_context)); ctx->n_buffers = n_buffers; ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t)); GGML_ASSERT(ctx->buffers != NULL); size_t total_size = 0; for (size_t i = 0; i < n_buffers; i++) { ctx->buffers[i] = buffers[i]; total_size += ggml_backend_buffer_get_size(buffers[i]); } return ggml_backend_buffer_init(buffers[0]->buft, ggml_backend_multi_buffer_i, ctx, total_size); } bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); return buffer->iface.free_buffer == ggml_backend_multi_buffer_free_buffer; } void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) { GGML_ASSERT(buffer); GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer)); ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context; for (size_t i = 0; i < ctx->n_buffers; i++) { ggml_backend_buffer_set_usage(ctx->buffers[i], usage); } } // creates a copy of the tensor with the same memory layout static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) { struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor); for (int i = 0; i < GGML_MAX_DIMS; i++) { dup->nb[i] = tensor->nb[i]; } return dup; } static bool ggml_is_view_op(enum ggml_op op) { return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; } // scheduler #ifndef GGML_SCHED_MAX_BACKENDS #define GGML_SCHED_MAX_BACKENDS 16 #endif #ifndef GGML_SCHED_MAX_SPLIT_INPUTS #define GGML_SCHED_MAX_SPLIT_INPUTS 30 #endif #ifndef GGML_SCHED_MAX_COPIES #define GGML_SCHED_MAX_COPIES 4 #endif struct ggml_backend_sched_split { int backend_id; int i_start; int i_end; struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS]; int n_inputs; // graph view of this split struct ggml_cgraph graph; }; struct ggml_backend_sched { bool is_reset; // true if the scheduler has been reset since the last graph split bool is_alloc; int n_backends; ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS]; ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS]; ggml_gallocr_t galloc; // hash map of the nodes in the graph struct ggml_hash_set hash_set; int * hv_tensor_backend_ids; // [hash_set.size] struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies] int * node_backend_ids; // [graph_size] int * leaf_backend_ids; // [graph_size] int * prev_node_backend_ids; // [graph_size] int * prev_leaf_backend_ids; // [graph_size] // copy of the graph with modified inputs struct ggml_cgraph graph; // graph splits struct ggml_backend_sched_split * splits; int n_splits; int splits_capacity; // pipeline parallelism support int n_copies; int cur_copy; int next_copy; ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES]; struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS]; int n_graph_inputs; struct ggml_context * ctx; ggml_backend_sched_eval_callback callback_eval; void * callback_eval_user_data; char * context_buffer; size_t context_buffer_size; bool op_offload; int debug; }; #define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor) #define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)] #define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)] #define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id) // returns the priority of the backend, lower id is higher priority static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { for (int i = 0; i < sched->n_backends; i++) { if (sched->backends[i] == backend) { return i; } } return -1; } static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) { ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; if (buffer == NULL) { return -1; } // find highest prio backend that supports the buffer type and the op for (int i = 0; i < sched->n_backends; i++) { if (ggml_backend_supports_buft(sched->backends[i], buffer->buft) && ggml_backend_supports_op(sched->backends[i], op)) { return i; } } #ifndef NDEBUG GGML_LOG_DEBUG("%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n", __func__, ggml_op_desc(tensor), ggml_backend_buffer_name(buffer), tensor->name); #endif return -1; } #if 0 #define GGML_SCHED_MAX_SPLITS_DEBUG 4096 static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__) #define GET_CAUSE(node) causes[hash_id(node)] #else #define SET_CAUSE(node, ...) #define GET_CAUSE(node) "" #endif // returns the backend that should be used for the node based on the current locations static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) { // assign pre-allocated nodes to their backend int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor); if (cur_backend_id != -1) { SET_CAUSE(tensor, "1.dst"); return cur_backend_id; } // view_src if (tensor->view_src != NULL) { cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src, tensor); if (cur_backend_id != -1) { SET_CAUSE(tensor, "1.vsrc"); return cur_backend_id; } } if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) { // since the tensor is pre-allocated, it cannot be moved to another backend ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; GGML_ABORT("pre-allocated tensor (%s) in a buffer (%s) that cannot run the operation (%s)", tensor->name, ggml_backend_buffer_name(buffer), ggml_op_name(tensor->op)); } // graph input if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { cur_backend_id = sched->n_backends - 1; // last backend (assumed CPU) SET_CAUSE(tensor, "1.inp"); return cur_backend_id; } // operations with weights are preferably run on the same backend as the weights for (int i = 0; i < GGML_MAX_SRC; i++) { const struct ggml_tensor * src = tensor->src[i]; if (src == NULL) { continue; } // skip ROPE since the rope freqs tensor is too small to choose a backend based on it // not an ideal solution if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor); // check if a backend with higher prio wants to offload the op if (sched->op_offload && src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) { for (int b = 0; b < src_backend_id; b++) { if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) { SET_CAUSE(tensor, "1.off"); return b; } } } SET_CAUSE(tensor, "1.wgt%d", i); return src_backend_id; } } return -1; } static char * fmt_size(size_t size) { static char buffer[128]; if (size >= 1024*1024) { snprintf(buffer, sizeof(buffer), "%zuM", size/1024/1024); } else { snprintf(buffer, sizeof(buffer), "%zuK", size/1024); } return buffer; } static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { int cur_split = 0; for (int i = 0; i < graph->n_nodes; i++) { if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) { ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id]; GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs", cur_split, ggml_backend_name(split_backend), sched->splits[cur_split].n_inputs); for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) { if (j == 0) { GGML_LOG_DEBUG(": "); } GGML_LOG_DEBUG("[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name, fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j]))); } GGML_LOG_DEBUG("\n"); cur_split++; } struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } if (sched->debug > 1) { ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node); GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name, fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node), graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]); for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src); GGML_LOG_DEBUG(" %20.20s (%5.5s) [%5.5s %8.8s]", src->name, fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src)); } GGML_LOG_DEBUG("\n"); } } } static bool ggml_backend_sched_buffer_supported(ggml_backend_sched_t sched, struct ggml_tensor * t, int backend_id) { ggml_backend_buffer_t buf = t->view_src ? t->view_src->buffer : t->buffer; ggml_backend_buffer_type_t buft = NULL; if (buf) { // the tensor is already allocated buft = buf->buft; } else { // see if the tensor already has a backend assigned, and use the buffer type of that backend int tensor_backend_id = tensor_backend_id(t); if (tensor_backend_id == -1 && t->view_src) { tensor_backend_id = tensor_backend_id(t->view_src); } if (tensor_backend_id != -1) { buft = sched->bufts[tensor_backend_id]; } } return buft != NULL && ggml_backend_supports_buft(sched->backends[backend_id], buft); } static void ggml_backend_sched_set_if_supported(ggml_backend_sched_t sched, struct ggml_tensor * node, int cur_backend_id, int * node_backend_id) { if (ggml_backend_supports_op(sched->backends[cur_backend_id], node)) { *node_backend_id = cur_backend_id; SET_CAUSE(node, "2.sup"); } } // assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { // reset splits sched->n_splits = 0; sched->n_graph_inputs = 0; sched->is_reset = false; struct ggml_init_params params = { /* .mem_size = */ sched->context_buffer_size, /* .mem_buffer = */ sched->context_buffer, /* .no_alloc = */ true }; ggml_free(sched->ctx); sched->ctx = ggml_init(params); if (sched->ctx == NULL) { GGML_ABORT("%s: failed to initialize context\n", __func__); } // pass 1: assign backends to ops with pre-allocated inputs for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; int * leaf_backend_id = &tensor_backend_id(leaf); // do not overwrite user assignments if (*leaf_backend_id == -1) { *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf); } } for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; int * node_backend_id = &tensor_backend_id(node); // do not overwrite user assignments if (*node_backend_id == -1) { *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node); #if 0 // src if (node->op == GGML_OP_NONE) { continue; } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } int * src_backend_id = &tensor_backend_id(src); if (*src_backend_id == -1) { *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src); } } #endif } } // pass 2: expand current backend assignments // assign the same backend to adjacent nodes // expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend) // thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops // ops unsupported by the backend being expanded will be left unassigned so that they can be assigned later when the locations of its inputs are known // expand gpu down { int cur_backend_id = -1; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id != -1) { if (*node_backend_id == sched->n_backends - 1) { // skip cpu (lowest prio backend) cur_backend_id = -1; } else { cur_backend_id = *node_backend_id; } } else if (cur_backend_id != -1) { ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); } } } // expand gpu up { int cur_backend_id = -1; for (int i = graph->n_nodes - 1; i >= 0; i--) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id != -1) { if (*node_backend_id == sched->n_backends - 1) { // skip cpu (lowest prio backend) cur_backend_id = -1; } else { cur_backend_id = *node_backend_id; } } else if (cur_backend_id != -1) { ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); } } } // expand rest down { int cur_backend_id = -1; for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id != -1) { cur_backend_id = *node_backend_id; } else if (cur_backend_id != -1) { ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); } } } // expand rest up { int cur_backend_id = -1; for (int i = graph->n_nodes - 1; i >= 0; i--) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id != -1) { cur_backend_id = *node_backend_id; } else if (cur_backend_id != -1) { ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id); } } } // pass 3: upgrade nodes to higher prio backends with compatible buffer types // if the tensor is already in the same buffer type (*) as another higher priority backend, we should move it there // however, we also need to verify that the sources are in compatible buffer types // (*) the actual requirement is more relaxed, the buffer type of the backend should be supported by all the users of this tensor further down the graph // however, this is slow to verify, so we have a more strict requirement that the buffer type is the same // this is not uncommon since multiple backends can use host memory, with the same buffer type (eg. BLAS and CPU) // additionally, set remaining unassigned nodes to the backend with the most supported inputs // only nodes that could not be assigned during expansion due to the backend not supporting the op should be unassigned at this point for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } int * node_backend_id = &tensor_backend_id(node); if (*node_backend_id == -1) { // unassigned node: find the backend with the most supported inputs int n_supported_best = -1; for (int b = 0; b < sched->n_backends; b++) { if (ggml_backend_supports_op(sched->backends[b], node)) { int n_supported = 0; for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } if ((tensor_backend_id(src) != -1 || tensor_backend_id(src->view_src) != -1) && ggml_backend_sched_buffer_supported(sched, src, b)) { n_supported++; } } if (n_supported > n_supported_best) { n_supported_best = n_supported; *node_backend_id = b; SET_CAUSE(node, "3.best"); } } } } else { // assigned node: upgrade to higher prio backend if possible for (int b = 0; b < *node_backend_id; b++) { if (sched->bufts[b] == sched->bufts[*node_backend_id] && ggml_backend_supports_op(sched->backends[b], node)) { bool supported = true; for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } if (!ggml_backend_sched_buffer_supported(sched, src, b)) { supported = false; break; } } if (supported) { *node_backend_id = b; SET_CAUSE(node, "3.upg"); break; } } } } } // pass 4: assign backends to remaining src from dst and view_src for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; int * cur_backend_id = &tensor_backend_id(node); if (node->view_src != NULL && *cur_backend_id == -1) { *cur_backend_id = tensor_backend_id(node->view_src); SET_CAUSE(node, "4.vsrc"); } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } int * src_backend_id = &tensor_backend_id(src); if (*src_backend_id == -1) { if (src->view_src != NULL) { // views are always on the same backend as the source *src_backend_id = tensor_backend_id(src->view_src); SET_CAUSE(src, "4.vsrc"); } else { *src_backend_id = *cur_backend_id; SET_CAUSE(src, "4.cur"); } } } // if the node is still unassigned, assign it to the first backend that supports it for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) { ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id); } GGML_ASSERT(*cur_backend_id != -1); } // pass 5: split graph, find tensors that need to be copied { int i_split = 0; struct ggml_backend_sched_split * split = &sched->splits[0]; // find the backend of the first split, skipping view ops int i = 0; for (; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (!ggml_is_view_op(node->op)) { split->backend_id = tensor_backend_id(node); break; } } split->i_start = 0; split->n_inputs = 0; int cur_backend_id = split->backend_id; for (; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { continue; } const int node_backend_id = tensor_backend_id(node); GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback // check if we should start a new split based on the sources of the current node bool need_new_split = false; if (node_backend_id == cur_backend_id && split->n_inputs > 0) { for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } // check if a weight is on a different and incompatible backend // by starting a new split, the memory of the previously offloaded weights can be reused if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { int src_backend_id = tensor_backend_id(src); if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) { need_new_split = true; break; } } // check if the split has too many inputs // FIXME: count the number of inputs instead of only checking when full if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) { const size_t id = hash_id(src); int src_backend_id = sched->hv_tensor_backend_ids[id]; bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id); if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) { need_new_split = true; break; } } } } if (node_backend_id != cur_backend_id || need_new_split) { split->i_end = i; i_split++; if (i_split >= sched->splits_capacity) { sched->splits_capacity *= 2; sched->splits = (ggml_backend_sched_split *) realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split)); GGML_ASSERT(sched->splits != NULL); } split = &sched->splits[i_split]; split->backend_id = node_backend_id; split->i_start = i; split->n_inputs = 0; cur_backend_id = node_backend_id; } // find inputs that are not on the same backend for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } size_t src_id = hash_id(src); const int src_backend_id = sched->hv_tensor_backend_ids[src_id]; GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) { if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) { ggml_backend_t backend = sched->backends[src_backend_id]; for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * tensor_copy; if (c == sched->cur_copy) { tensor_copy = src; // use the original tensor as the current copy } else { tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c); } if (sched->n_copies > 1) { ggml_set_input(tensor_copy); ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor } tensor_id_copy(src_id, src_backend_id, c) = tensor_copy; SET_CAUSE(tensor_copy, "4.cpy"); } int n_graph_inputs = sched->n_graph_inputs++; GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); sched->graph_inputs[n_graph_inputs] = src; } } if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) { // create a copy of the input in the split's backend if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) { ggml_backend_t backend = sched->backends[cur_backend_id]; for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c); if (sched->n_copies > 1) { ggml_set_input(tensor_copy); ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor } tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy; SET_CAUSE(tensor_copy, "4.cpy"); } int n_inputs = split->n_inputs++; GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); split->inputs[n_inputs] = src; } node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy); } } } split->i_end = graph->n_nodes; sched->n_splits = i_split + 1; } if (sched->debug) { ggml_backend_sched_print_assignments(sched, graph); } // swap node_backend_ids and leaf _backend_ids with prevs { int * tmp = sched->node_backend_ids; sched->node_backend_ids = sched->prev_node_backend_ids; sched->prev_node_backend_ids = tmp; tmp = sched->leaf_backend_ids; sched->leaf_backend_ids = sched->prev_leaf_backend_ids; sched->prev_leaf_backend_ids = tmp; } int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies; if (sched->graph.size < graph_size) { sched->graph.size = graph_size; sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *)); sched->graph.leafs = (ggml_tensor **) realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *)); GGML_ASSERT(sched->graph.nodes != NULL); GGML_ASSERT(sched->graph.leafs != NULL); } sched->graph.n_nodes = 0; sched->graph.n_leafs = 0; struct ggml_cgraph * graph_copy = &sched->graph; for (int i = 0; i < sched->n_splits; i++) { struct ggml_backend_sched_split * split = &sched->splits[i]; split->graph = ggml_graph_view(graph, split->i_start, split->i_end); // Optimize this split of the graph. This needs to happen before we make graph_copy, // so they are in sync. ggml_backend_graph_optimize(sched->backends[split->backend_id], &split->graph); // add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split for (int j = 0; j < split->n_inputs; j++) { assert(graph_copy->size > (graph_copy->n_nodes + 1)); struct ggml_tensor * input = split->inputs[j]; const size_t input_id = hash_id(input); struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy); // add a dependency to the input source so that it is not freed before the copy is done struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); input_dep->src[0] = input; sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id]; graph_copy->nodes[graph_copy->n_nodes++] = input_dep; // add a dependency to the input copy so that it is allocated at the start of the split sched->node_backend_ids[graph_copy->n_nodes] = split->backend_id; graph_copy->nodes[graph_copy->n_nodes++] = input_cpy; } for (int j = split->i_start; j < split->i_end; j++) { assert(graph_copy->size > graph_copy->n_nodes); sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(graph->nodes[j]); graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j]; } } if (sched->n_copies > 1) { // add input copies as leafs so that they are allocated first for (int i = 0; i < sched->n_graph_inputs; i++) { struct ggml_tensor * input = sched->graph_inputs[i]; size_t id = hash_id(input); int backend_id = tensor_backend_id(input); for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c); sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; assert(graph_copy->size > graph_copy->n_leafs); graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; } } for (int i = 0; i < sched->n_splits; i++) { struct ggml_backend_sched_split * split = &sched->splits[i]; int backend_id = split->backend_id; for (int j = 0; j < split->n_inputs; j++) { struct ggml_tensor * input = split->inputs[j]; size_t id = hash_id(input); for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c); sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; assert(graph_copy->size > graph_copy->n_leafs); graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; } } } } // add leafs from the original graph for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf); assert(graph_copy->size > graph_copy->n_leafs); graph_copy->leafs[graph_copy->n_leafs++] = leaf; } } static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { bool backend_ids_changed = false; for (int i = 0; i < sched->graph.n_nodes; i++) { if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] && sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) { backend_ids_changed = true; break; } } if (!backend_ids_changed) { for (int i = 0; i < sched->graph.n_leafs; i++) { if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] && sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) { backend_ids_changed = true; break; } } } // allocate graph if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) { // the re-allocation may cause the split inputs to be moved to a different address // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy for (int i = 0; i < sched->n_backends; i++) { ggml_backend_synchronize(sched->backends[i]); } #ifndef NDEBUG GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed); #endif ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids); if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) { GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__); return false; } } return true; } static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) { GGML_ASSERT(sched); struct ggml_backend_sched_split * splits = sched->splits; ggml_tensor * prev_ids_tensor = nullptr; std::vector ids; std::vector used_ids; for (int split_id = 0; split_id < sched->n_splits; split_id++) { struct ggml_backend_sched_split * split = &splits[split_id]; int split_backend_id = split->backend_id; ggml_backend_t split_backend = sched->backends[split_backend_id]; // copy the input tensors to the split backend for (int input_id = 0; input_id < split->n_inputs; input_id++) { ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[input_id]); struct ggml_tensor * input = split->inputs[input_id]; struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy); if (input->flags & GGML_TENSOR_FLAG_INPUT) { // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done if (sched->events[split_backend_id][sched->cur_copy] != NULL) { ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]); } else { ggml_backend_synchronize(split_backend); } ggml_backend_tensor_copy(input, input_cpy); } else { // wait for the split backend to finish using the input before overwriting it if (sched->events[split_backend_id][sched->cur_copy] != NULL) { ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]); } else { ggml_backend_synchronize(split_backend); } // when offloading MoE weights, we can reduce the amount of data copied by copying only the experts that are used ggml_tensor * node = split->graph.nodes[0]; if (split->graph.n_nodes > 0 && ggml_backend_buffer_get_usage(input->buffer) == GGML_BACKEND_BUFFER_USAGE_WEIGHTS && ggml_backend_buffer_is_host(input->buffer) && ( (node->src[0] == input_cpy && node->op == GGML_OP_MUL_MAT_ID) //|| (node->src[1] == input_cpy && node->op == GGML_OP_ADD_ID) /* GGML_OP_ADD_ID weights are small and not worth splitting */ )) { const int64_t n_expert = node->op == GGML_OP_MUL_MAT_ID ? input->ne[2] : input->ne[1]; const size_t expert_size = node->op == GGML_OP_MUL_MAT_ID ? input->nb[2] : input->nb[1]; ggml_backend_synchronize(input_backend); // get the ids ggml_tensor * ids_tensor = node->src[2]; ggml_backend_t ids_backend = split_backend; // if the ids tensor is also an input of the split, it may not have been copied yet to the split backend // in that case, we use the original ids tensor for (int i = input_id + 1; i < split->n_inputs; i++) { if (ids_tensor == tensor_copy(split->inputs[i], split_backend_id, sched->cur_copy)) { ids_tensor = split->inputs[i]; ids_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[i]); break; } } if (ids_tensor != prev_ids_tensor) { ids.resize(ggml_nbytes(ids_tensor) / sizeof(int32_t)); ggml_backend_tensor_get_async(ids_backend, ids_tensor, ids.data(), 0, ggml_nbytes(ids_tensor)); ggml_backend_synchronize(ids_backend); // find the used experts used_ids.clear(); used_ids.resize(ggml_bitset_size(n_expert)); for (int64_t i1 = 0; i1 < ids_tensor->ne[1]; i1++) { for (int64_t i0 = 0; i0 < ids_tensor->ne[0]; i0++) { int32_t id = ids[i1 * ids_tensor->nb[1]/sizeof(int32_t) + i0 * ids_tensor->nb[0]/sizeof(int32_t)]; GGML_ASSERT(id >= 0 && id < n_expert); ggml_bitset_set(used_ids.data(), id); } } prev_ids_tensor = ids_tensor; } // group consecutive experts and copy them together auto copy_experts = [&](int32_t first_id, int32_t last_id) { const size_t expert_offset = first_id * expert_size; const size_t expert_size_copy = (last_id - first_id + 1) * expert_size; const size_t padding = std::min(expert_size, 512); const size_t padding_end = last_id < n_expert - 1 ? padding : 0; ggml_backend_tensor_set_async(split_backend, input_cpy, (const uint8_t *)input->data + expert_offset, expert_offset, // copy a bit extra at the to ensure there are no NaNs in the padding of the last expert // this is necessary for MMQ in the CUDA backend expert_size_copy + padding_end); }; int id = 0; while (!ggml_bitset_get(used_ids.data(), id)) { id++; } int32_t first_id = id; int32_t last_id = first_id; for (++id; id < n_expert; ++id) { if (!ggml_bitset_get(used_ids.data(), id)) { continue; } if (id == last_id + 1) { last_id = id; continue; } copy_experts(first_id, last_id); first_id = id; last_id = id; } copy_experts(first_id, last_id); } else { // try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events // TODO: add public function to facilitate this, since applications do not have direct access to the backend interface if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) { ggml_backend_synchronize(input_backend); if (sched->events[split_backend_id][sched->cur_copy] != NULL) { ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]); } else { ggml_backend_synchronize(split_backend); } ggml_backend_tensor_copy(input, input_cpy); } } } } if (!sched->callback_eval) { enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph); if (ec != GGML_STATUS_SUCCESS) { return ec; } } else { // similar to ggml_backend_compare_graph_backend for (int j0 = 0; j0 < split->graph.n_nodes; j0++) { struct ggml_tensor * t = split->graph.nodes[j0]; // check if the user needs data from this node bool need = sched->callback_eval(t, true, sched->callback_eval_user_data); int j1 = j0; // determine the range [j0, j1] of nodes that can be computed together while (!need && j1 < split->graph.n_nodes - 1) { t = split->graph.nodes[++j1]; need = sched->callback_eval(t, true, sched->callback_eval_user_data); } struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv); if (ec != GGML_STATUS_SUCCESS) { return ec; } // TODO: pass backend to the callback, then the user can decide if they want to synchronize ggml_backend_synchronize(split_backend); if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) { break; } j0 = j1; } } // record the event of this copy if (split->n_inputs > 0) { if (sched->events[split_backend_id][sched->cur_copy] != NULL) { ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy], split_backend); } } } return GGML_STATUS_SUCCESS; } ggml_backend_sched_t ggml_backend_sched_new( ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool op_offload) { GGML_ASSERT(n_backends > 0); GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS); GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU); struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched)); const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG"); sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0; sched->n_backends = n_backends; sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1; // initialize hash table // FIXME: needs to be size*2 to account for leafs (do it in graph_split instead) sched->hash_set = ggml_hash_set_new(graph_size); sched->hv_tensor_backend_ids = (int *) malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0])); sched->hv_tensor_copies = (ggml_tensor **) malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2; sched->node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->node_backend_ids[0])); sched->leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->leaf_backend_ids[0])); sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0])); sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0])); sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false); sched->context_buffer = (char *) malloc(sched->context_buffer_size); const int initial_splits_capacity = 16; sched->splits = (ggml_backend_sched_split *) calloc(initial_splits_capacity, sizeof(sched->splits[0])); sched->splits_capacity = initial_splits_capacity; for (int b = 0; b < n_backends; b++) { sched->backends[b] = backends[b]; sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]); GGML_ASSERT(ggml_backend_supports_buft(backends[b], sched->bufts[b])); if (sched->n_copies > 1) { for (int c = 0; c < sched->n_copies; c++) { sched->events[b][c] = ggml_backend_event_new(backends[b]->device); } } } sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends); sched->op_offload = op_offload; ggml_backend_sched_reset(sched); return sched; } void ggml_backend_sched_free(ggml_backend_sched_t sched) { if (sched == NULL) { return; } for (int b = 0; b < sched->n_backends; b++) { for (int c = 0; c < sched->n_copies; c++) { ggml_backend_event_free(sched->events[b][c]); } } ggml_gallocr_free(sched->galloc); ggml_free(sched->ctx); ggml_hash_set_free(&sched->hash_set); free(sched->splits); free(sched->hv_tensor_backend_ids); free(sched->hv_tensor_copies); free(sched->node_backend_ids); free(sched->leaf_backend_ids); free(sched->prev_node_backend_ids); free(sched->prev_leaf_backend_ids); free(sched->context_buffer); free(sched->graph.nodes); free(sched->graph.leafs); free(sched); } void ggml_backend_sched_reset(ggml_backend_sched_t sched) { GGML_ASSERT(sched); // reset state for the next run if (!sched->is_reset) { ggml_hash_set_reset(&sched->hash_set); memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0])); memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); sched->is_reset = true; } sched->is_alloc = false; } bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { GGML_ASSERT(sched); GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs); ggml_backend_sched_reset(sched); ggml_backend_sched_synchronize(sched); ggml_backend_sched_split_graph(sched, measure_graph); if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) { return false; } ggml_backend_sched_reset(sched); return true; } bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { GGML_ASSERT(sched); GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs); GGML_ASSERT(!sched->is_alloc); sched->cur_copy = sched->next_copy; sched->next_copy = (sched->next_copy + 1) % sched->n_copies; ggml_backend_sched_split_graph(sched, graph); if (!ggml_backend_sched_alloc_splits(sched)) { return false; } sched->is_alloc = true; return true; } enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph); ggml_backend_sched_synchronize(sched); return err; } enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { GGML_ASSERT(sched); if (!sched->is_reset && !sched->is_alloc) { ggml_backend_sched_reset(sched); } if (!sched->is_alloc) { if (!ggml_backend_sched_alloc_graph(sched, graph)) { return GGML_STATUS_ALLOC_FAILED; } } return ggml_backend_sched_compute_splits(sched); } void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) { GGML_ASSERT(sched); for (int i = 0; i < sched->n_backends; i++) { ggml_backend_synchronize(sched->backends[i]); } if (!sched->is_alloc) { // if the graph is not already allocated, always use copy 0 after a synchronization // this ensures that during generation the same copy is used every time, // which avoids changes in the graph that could cause CUDA or other graphs to be disabled sched->next_copy = 0; } } void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) { GGML_ASSERT(sched); sched->callback_eval = callback; sched->callback_eval_user_data = user_data; } int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) { GGML_ASSERT(sched); return sched->n_splits; } int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) { GGML_ASSERT(sched); return sched->n_copies; } int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) { GGML_ASSERT(sched); return sched->n_backends; } ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) { GGML_ASSERT(sched); GGML_ASSERT(i >= 0 && i < sched->n_backends); return sched->backends[i]; } ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend) { GGML_ASSERT(sched); int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); return sched->bufts[backend_index]; } size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) { GGML_ASSERT(sched); int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); return ggml_gallocr_get_buffer_size(sched->galloc, backend_index); } void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { GGML_ASSERT(sched); int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); tensor_backend_id(node) = backend_index; SET_CAUSE(node, "usr"); sched->is_reset = false; } ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { GGML_ASSERT(sched); int backend_index = tensor_backend_id(node); if (backend_index == -1) { return NULL; } return sched->backends[backend_index]; } // utils enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) { GGML_ASSERT(tensor); GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->view_src != NULL); GGML_ASSERT(tensor->view_src->buffer != NULL); GGML_ASSERT(tensor->view_src->data != NULL); tensor->buffer = tensor->view_src->buffer; tensor->data = (char *)tensor->view_src->data + tensor->view_offs; return ggml_backend_buffer_init_tensor(tensor->buffer, tensor); } enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) { GGML_ASSERT(tensor); GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->data == NULL); GGML_ASSERT(tensor->view_src == NULL); GGML_ASSERT(addr >= ggml_backend_buffer_get_base(buffer)); GGML_ASSERT((char *)addr + ggml_backend_buffer_get_alloc_size(buffer, tensor) <= (char *)ggml_backend_buffer_get_base(buffer) + ggml_backend_buffer_get_size(buffer)); tensor->buffer = buffer; tensor->data = addr; return ggml_backend_buffer_init_tensor(buffer, tensor); } static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) { GGML_ASSERT(src != NULL); GGML_ASSERT(src->data && "graph must be allocated"); size_t id = ggml_hash_insert(&hash_set, src); if (id == GGML_HASHSET_ALREADY_EXISTS) { return node_copies[ggml_hash_find(&hash_set, src)]; } struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src); if (src->view_src != NULL) { dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src); dst->view_offs = src->view_offs; } dst->op = src->op; memcpy(dst->op_params, src->op_params, sizeof(dst->op_params)); ggml_set_name(dst, src->name); // copy src for (int i = 0; i < GGML_MAX_SRC; i++) { struct ggml_tensor * s = src->src[i]; if (s == NULL) { continue; } dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s); } node_copies[id] = dst; return dst; } static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { size_t id = ggml_hash_find(hash_set, src); if (node_init[id]) { return; } node_init[id] = true; struct ggml_tensor * dst = node_copies[id]; if (dst->view_src != NULL) { graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); enum ggml_status status = ggml_backend_view_init(dst); GGML_ASSERT(status == GGML_STATUS_SUCCESS); } else { ggml_backend_tensor_copy(src, dst); } // init src for (int i = 0; i < GGML_MAX_SRC; i++) { struct ggml_tensor * s = src->src[i]; if (s == NULL) { continue; } graph_copy_init_tensor(hash_set, node_copies, node_init, s); } } struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) { GGML_ASSERT(graph); struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size); struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0])); struct ggml_init_params params = { /* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false), /* .mem_buffer = */ NULL, /* .no_alloc = */ true }; struct ggml_context * ctx_allocated = ggml_init(params); struct ggml_context * ctx_unallocated = ggml_init(params); if (ctx_allocated == NULL || ctx_unallocated == NULL) { GGML_LOG_ERROR("%s: failed to allocate context for graph copy\n", __func__); ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); ggml_free(ctx_allocated); ggml_free(ctx_unallocated); return { /* .buffer = */ NULL, /* .ctx_allocated = */ NULL, /* .ctx_unallocated = */ NULL, /* .graph = */ NULL, }; } // dup nodes for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node); } // allocate nodes ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend); if (buffer == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__); ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); ggml_free(ctx_allocated); ggml_free(ctx_unallocated); return { /* .buffer = */ NULL, /* .ctx_allocated = */ NULL, /* .ctx_unallocated = */ NULL, /* .graph = */ NULL, }; } //printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024); // copy data and init views for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; graph_copy_init_tensor(&hash_set, node_copies, node_init, node); } // build graph copy struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false); for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)]; graph_copy->nodes[i] = node_copy; } graph_copy->n_nodes = graph->n_nodes; ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); return { /* .buffer = */ buffer, /* .ctx_allocated = */ ctx_allocated, /* .ctx_unallocated = */ ctx_unallocated, /* .graph = */ graph_copy, }; } void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) { ggml_backend_buffer_free(copy.buffer); ggml_free(copy.ctx_allocated); ggml_free(copy.ctx_unallocated); } bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) { struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph); if (copy.buffer == NULL) { return false; } struct ggml_cgraph * g1 = graph; struct ggml_cgraph * g2 = copy.graph; assert(g1->n_nodes == g2->n_nodes); if (test_node != nullptr) { // Compute the whole graph and only test the output for a specific tensor ggml_backend_graph_compute(backend1, g1); ggml_backend_graph_compute(backend2, g2); int test_node_idx = -1; for (int i = 0; i < g1->n_nodes; i++) { struct ggml_tensor * t1 = g1->nodes[i]; if (t1 == test_node) { test_node_idx = i; break; } } GGML_ASSERT(test_node_idx != -1); callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data); } else { for (int i = 0; i < g1->n_nodes; i++) { struct ggml_tensor * t1 = g1->nodes[i]; struct ggml_tensor * t2 = g2->nodes[i]; assert(t1->op == t2->op && ggml_are_same_layout(t1, t2)); struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1); struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1); ggml_backend_graph_compute(backend1, &g1v); ggml_backend_graph_compute(backend2, &g2v); if (ggml_is_view_op(t1->op)) { continue; } // compare results, calculate rms etc if (!callback(i, t1, t2, user_data)) { break; } } } ggml_backend_graph_copy_free(copy); return true; } // CPU backend - buffer static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); uintptr_t data = (uintptr_t)buffer->context; // align the buffer if (data % TENSOR_ALIGNMENT != 0) { data = GGML_PAD(data, TENSOR_ALIGNMENT); } return (void *)data; } static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { GGML_ASSERT(buffer); ggml_aligned_free(buffer->context, buffer->size); } static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { GGML_ASSERT(tensor); memset((char *)tensor->data + offset, value, size); GGML_UNUSED(buffer); } static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor); memcpy((char *)tensor->data + offset, data, size); GGML_UNUSED(buffer); } static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_ASSERT(tensor); memcpy(data, (const char *)tensor->data + offset, size); GGML_UNUSED(buffer); } static bool ggml_backend_cpu_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { GGML_ASSERT(src); if (ggml_backend_buffer_is_host(src->buffer)) { memcpy(dst->data, src->data, ggml_nbytes(src)); return true; } return false; GGML_UNUSED(buffer); } static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { GGML_ASSERT(buffer); memset(buffer->context, value, buffer->size); } static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = { /* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer, /* .get_base = */ ggml_backend_cpu_buffer_get_base, /* .init_tensor = */ NULL, // no initialization required /* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor, /* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor, /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor, /* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor, /* .clear = */ ggml_backend_cpu_buffer_clear, /* .reset = */ NULL, }; static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = { /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed /* .get_base = */ ggml_backend_cpu_buffer_get_base, /* .init_tensor = */ NULL, // no initialization required /* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor, /* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor, /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor, /* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor, /* .clear = */ ggml_backend_cpu_buffer_clear, /* .reset = */ NULL, }; // CPU backend buffer type // this buffer type is defined here to make it available to all backends static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_type_t buft) { return "CPU"; GGML_UNUSED(buft); } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { void * data = ggml_aligned_malloc(size); if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); return NULL; } return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size); } static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { return TENSOR_ALIGNMENT; GGML_UNUSED(buft); } static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft) { return true; GGML_UNUSED(buft); } ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = { /* .iface = */ { /* .get_name = */ ggml_backend_cpu_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, /* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, }, /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ NULL, }; return &ggml_backend_cpu_buffer_type; } static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) { return "CPU_Mapped"; GGML_UNUSED(buft); } static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = { /* .iface = */ { /* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, /* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, }, /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ NULL, }; return &ggml_backend_cpu_buffer_type; } ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) { GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned"); return ggml_backend_buffer_init(ggml_backend_cpu_buffer_from_ptr_type(), ggml_backend_cpu_buffer_from_ptr_i, ptr, size); } ggml-org-ggml-7ec8045/src/ggml-blas/000077500000000000000000000000001506673203700171575ustar00rootroot00000000000000ggml-org-ggml-7ec8045/src/ggml-blas/CMakeLists.txt000066400000000000000000000071571506673203700217310ustar00rootroot00000000000000if (GGML_STATIC) set(BLA_STATIC ON) endif() #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) # set(BLA_SIZEOF_INTEGER 8) #endif() set(BLA_VENDOR ${GGML_BLAS_VENDOR}) find_package(BLAS) if (BLAS_FOUND) message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") ggml_add_backend_library(ggml-blas ggml-blas.cpp ) if (${GGML_BLAS_VENDOR} MATCHES "Apple") add_compile_definitions(ACCELERATE_NEW_LAPACK) add_compile_definitions(ACCELERATE_LAPACK_ILP64) add_compile_definitions(GGML_BLAS_USE_ACCELERATE) elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "") # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 find_package(PkgConfig REQUIRED) if (${GGML_BLAS_VENDOR} MATCHES "Generic") pkg_check_modules(DepBLAS blas) elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS") # As of openblas v0.3.22, the 64-bit is named openblas64.pc pkg_check_modules(DepBLAS openblas64) if (NOT DepBLAS_FOUND) pkg_check_modules(DepBLAS openblas) endif() elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME") add_compile_definitions(GGML_BLAS_USE_BLIS) pkg_check_modules(DepBLAS blis) elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS") pkg_check_modules(DepBLAS blas-atlas) elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS") pkg_check_modules(DepBLAS flexiblas_api) elseif (${GGML_BLAS_VENDOR} MATCHES "Intel") add_compile_definitions(GGML_BLAS_USE_MKL) # all Intel* libraries share the same include path pkg_check_modules(DepBLAS mkl-sdl) elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC") # this doesn't provide pkg-config # suggest to assign BLAS_INCLUDE_DIRS on your own if ("${NVHPC_VERSION}" STREQUAL "") message(WARNING "Better to set NVHPC_VERSION") else() set(DepBLAS_FOUND ON) set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") endif() endif() if (DepBLAS_FOUND) set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" " detected by pkgconfig, trying to find cblas.h from possible paths...") find_path(BLAS_INCLUDE_DIRS NAMES cblas.h HINTS /usr/include /usr/local/include /usr/include/openblas /opt/homebrew/opt/openblas/include /usr/local/opt/openblas/include /usr/include/x86_64-linux-gnu/openblas/include ) endif() endif() message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS}) if ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) add_compile_definitions(GGML_BLAS_USE_MKL) endif() target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES}) target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS}) else() message(FATAL_ERROR "BLAS not found, please refer to " "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" " to set correct GGML_BLAS_VENDOR") endif() ggml-org-ggml-7ec8045/src/ggml-blas/ggml-blas.cpp000066400000000000000000000411341506673203700215330ustar00rootroot00000000000000#include "ggml-impl.h" #include "ggml-blas.h" #include "ggml-backend-impl.h" #include #include #include #if defined(GGML_BLAS_USE_ACCELERATE) # include #elif defined(GGML_BLAS_USE_MKL) # include #elif defined(GGML_BLAS_USE_BLIS) # include #elif defined(GGML_BLAS_USE_NVPL) # include #else # include #endif struct ggml_backend_blas_context { int n_threads = GGML_DEFAULT_N_THREADS; std::unique_ptr work_data; size_t work_size = 0; #ifndef GGML_USE_OPENMP std::vector> tasks; #endif }; static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; GGML_TENSOR_BINARY_OP_LOCALS const enum ggml_type type = src0->type; GGML_ASSERT(ne0 == ne01); GGML_ASSERT(ne1 == ne11); GGML_ASSERT(ne2 == ne12); GGML_ASSERT(ne3 == ne13); // we don't support permuted src0 or src1 GGML_ASSERT(nb00 == ggml_type_size(type)); GGML_ASSERT(nb10 == ggml_type_size(src1->type)); // dst cannot be transposed or permuted GGML_ASSERT(nb0 == sizeof(float)); GGML_ASSERT(nb0 <= nb1); GGML_ASSERT(nb1 <= nb2); GGML_ASSERT(nb2 <= nb3); // broadcast factors const int64_t r2 = ne12/ne02; const int64_t r3 = ne13/ne03; const int64_t ne_plane = ne01*ne00; const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float); if (ctx->work_size < desired_wsize) { ctx->work_data.reset(new char[desired_wsize]); ctx->work_size = desired_wsize; } void * wdata = ctx->work_data.get(); // convert src0 to float if (type != GGML_TYPE_F32) { const auto * type_traits = ggml_get_type_traits(type); ggml_to_float_t const to_float = type_traits->to_float; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { const void * x = (char *) src0->data + i02*nb02 + i03*nb03; float * const wplane = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane; const int min_cols_per_thread = 4096; const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1); const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1); #ifdef GGML_USE_OPENMP #pragma omp parallel for num_threads(n_threads) for (int64_t i01 = 0; i01 < ne01; i01++) { to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); } #else for (int i = 1; i < n_threads; i++) { const int64_t start = i*ne01/n_threads; const int64_t end = (i + 1)*ne01/n_threads; if (start < end) { ctx->tasks.push_back(std::async(std::launch::async, [=]() { for (int64_t i01 = start; i01 < end; i01++) { to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); } })); } } { // reuse the current thread for the first task const int64_t start = 0; const int64_t end = ne01/n_threads; for (int64_t i01 = start; i01 < end; i01++) { to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00); } } #endif } } #ifndef GGML_USE_OPENMP // wait for all tasks to finish for (auto & task : ctx->tasks) { task.get(); } ctx->tasks.clear(); #endif } #if defined(OPENBLAS_VERSION) openblas_set_num_threads(ctx->n_threads); #endif #if defined(GGML_BLAS_USE_BLIS) bli_thread_set_num_threads(ctx->n_threads); #endif #if defined(GGML_BLAS_USE_NVPL) nvpl_blas_set_num_threads(ctx->n_threads); #endif for (int64_t i13 = 0; i13 < ne13; i13++) { for (int64_t i12 = 0; i12 < ne12; i12++) { const int64_t i03 = i13/r3; const int64_t i02 = i12/r2; const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03); const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13); float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3); if (type != GGML_TYPE_F32) { x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane; } cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne1, ne01, ne10, 1.0f, y, ne10, x, ne00, 0.0f, d, ne01); } } } static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) { const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; GGML_TENSOR_BINARY_OP_LOCALS GGML_ASSERT(ne0 == ne00); GGML_ASSERT(ne1 == ne10); GGML_ASSERT(ne2 == ne02); GGML_ASSERT(ne02 == ne12); GGML_ASSERT(ne3 == ne13); GGML_ASSERT(ne03 == ne13); // we don't support permuted src0 or src1 GGML_ASSERT(nb00 == sizeof(float)); // dst cannot be transposed or permuted GGML_ASSERT(nb0 == sizeof(float)); // GGML_ASSERT(nb0 <= nb1); // GGML_ASSERT(nb1 <= nb2); // GGML_ASSERT(nb2 <= nb3); // Arguments to ggml_compute_forward_out_prod (expressed as major,minor) // src0: (k,n) // src1: (k,m) // dst: (m,n) // // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f) // Also expressed as (major,minor) // a: (m,k): so src1 transposed // b: (k,n): so src0 // c: (m,n) // // However, if ggml_is_transposed(src1) is true, then // src1->data already contains a transposed version, so sgemm mustn't // transpose it further. int n = src0->ne[0]; int k = src0->ne[1]; int m = src1->ne[0]; CBLAS_TRANSPOSE transposeA; int lda; if (!ggml_is_transposed(src1)) { transposeA = CblasTrans; lda = m; } else { transposeA = CblasNoTrans; lda = k; } float * a = (float *) ((char *) src1->data); float * b = (float *) ((char *) src0->data); float * c = (float *) ((char *) dst->data); cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n); GGML_UNUSED(ctx); } // backend interface static const char * ggml_backend_blas_get_name(ggml_backend_t backend) { return "BLAS"; GGML_UNUSED(backend); } static void ggml_backend_blas_free(ggml_backend_t backend) { ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; delete ctx; delete backend; } static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context; for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; switch (node->op) { case GGML_OP_MUL_MAT: ggml_backend_blas_mul_mat(ctx, node); break; case GGML_OP_OUT_PROD: ggml_backend_blas_out_prod(ctx, node); break; case GGML_OP_NONE: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: break; default: GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node)); } } return GGML_STATUS_SUCCESS; GGML_UNUSED(backend); } static struct ggml_backend_i blas_backend_i = { /* .get_name = */ ggml_backend_blas_get_name, /* .free = */ ggml_backend_blas_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ NULL, /* .graph_plan_create = */ NULL, /* .graph_plan_free = */ NULL, /* .graph_plan_update = */ NULL, /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_blas_graph_compute, /* .event_record = */ NULL, /* .event_wait = */ NULL, /* .graph_optimize = */ NULL, }; static ggml_guid_t ggml_backend_blas_guid(void) { static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d }; return &guid; } ggml_backend_t ggml_backend_blas_init(void) { ggml_backend_blas_context * ctx = new ggml_backend_blas_context; ggml_backend_t backend = new ggml_backend { /* .guid = */ ggml_backend_blas_guid(), /* .iface = */ blas_backend_i, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0), /* .context = */ ctx, }; #if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP) if (openblas_get_parallel() != OPENBLAS_OPENMP) { GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__); } #endif #if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP) GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__); #endif return backend; } bool ggml_backend_is_blas(ggml_backend_t backend) { return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid()); } void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) { GGML_ASSERT(ggml_backend_is_blas(backend_blas)); ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context; ctx->n_threads = n_threads; } // device interface static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) { return "BLAS"; GGML_UNUSED(dev); } static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) { #if defined(GGML_BLAS_USE_ACCELERATE) return "Accelerate"; #elif defined(GGML_BLAS_USE_MKL) return "MKL"; #elif defined(GGML_BLAS_USE_BLIS) return "BLIS"; #elif defined(GGML_BLAS_USE_NVPL) return "NVPL"; #elif defined(OPENBLAS_VERSION) return "OpenBLAS"; #else return "BLAS"; #endif GGML_UNUSED(dev); } static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { // TODO *free = 0; *total = 0; GGML_UNUSED(dev); } static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) { return GGML_BACKEND_DEVICE_TYPE_ACCEL; GGML_UNUSED(dev); } static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_blas_device_get_name(dev); props->description = ggml_backend_blas_device_get_description(dev); props->type = ggml_backend_blas_device_get_type(dev); ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = { /* .async = */ false, /* .host_buffer = */ false, /* .buffer_from_host_ptr = */ true, /* .events = */ false, }; } static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) { return ggml_backend_blas_init(); GGML_UNUSED(dev); GGML_UNUSED(params); } static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) { return ggml_backend_cpu_buffer_type(); GGML_UNUSED(dev); } static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { return ggml_backend_cpu_buffer_from_ptr(ptr, size); GGML_UNUSED(dev); GGML_UNUSED(max_tensor_size); } static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { const struct ggml_tensor * src0 = op->src[0]; const struct ggml_tensor * src1 = op->src[1]; switch (op->op) { case GGML_OP_NONE: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: return true; case GGML_OP_MUL_MAT: { // BLAS usually is only faster for large matrices const struct ggml_tensor * src0 = op->src[0]; const struct ggml_tensor * src1 = op->src[1]; const int64_t ne10 = src1->ne[0]; const int64_t ne0 = op->ne[0]; const int64_t ne1 = op->ne[1]; // TODO: find the optimal value const int64_t min_batch = 32; return ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && src1->type == GGML_TYPE_F32 && (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); } case GGML_OP_OUT_PROD: return op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32 && ggml_is_matrix(src0) && ggml_is_matrix(src1) && ggml_is_contiguous(src0) && (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) && (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); default: return false; } GGML_UNUSED(dev); } static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { return ggml_backend_buft_is_host(buft); GGML_UNUSED(dev); } static const struct ggml_backend_device_i ggml_backend_blas_device_i = { /* .get_name = */ ggml_backend_blas_device_get_name, /* .get_description = */ ggml_backend_blas_device_get_description, /* .get_memory = */ ggml_backend_blas_device_get_memory, /* .get_type = */ ggml_backend_blas_device_get_type, /* .get_props = */ ggml_backend_blas_device_get_props, /* .init_backend = */ ggml_backend_blas_device_init_backend, /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type, /* .get_host_buffer_type = */ NULL, /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr, /* .supports_op = */ ggml_backend_blas_device_supports_op, /* .supports_buft = */ ggml_backend_blas_device_supports_buft, /* .offload_op = */ NULL, /* .event_new = */ NULL, /* .event_free = */ NULL, /* .event_synchronize = */ NULL, }; // backend reg interface static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) { return "BLAS"; GGML_UNUSED(reg); } static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) { return 1; GGML_UNUSED(reg); } static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) { GGML_ASSERT(index == 0); static ggml_backend_device ggml_backend_blas_device = { /* .iface = */ ggml_backend_blas_device_i, /* .reg = */ reg, /* .context = */ nullptr, }; return &ggml_backend_blas_device; GGML_UNUSED(reg); GGML_UNUSED(index); } static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) { if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) { return (void *)ggml_backend_blas_set_n_threads; } return NULL; GGML_UNUSED(reg); GGML_UNUSED(name); } static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { /* .get_name = */ ggml_backend_blas_reg_get_name, /* .get_device_count = */ ggml_backend_blas_reg_get_device_count, /* .get_device = */ ggml_backend_blas_reg_get_device, /* .get_proc_address = */ ggml_backend_blas_get_proc_address, }; ggml_backend_reg_t ggml_backend_blas_reg(void) { static struct ggml_backend_reg ggml_backend_blas_reg = { /* .api_version = */ GGML_BACKEND_API_VERSION, /* .iface = */ ggml_backend_blas_reg_i, /* .context = */ NULL, }; return &ggml_backend_blas_reg; } GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) ggml-org-ggml-7ec8045/src/ggml-cann/000077500000000000000000000000001506673203700171555ustar00rootroot00000000000000ggml-org-ggml-7ec8045/src/ggml-cann/CMakeLists.txt000077500000000000000000000066241506673203700217300ustar00rootroot00000000000000if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME}) set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") endif() # Auto-detech Soc type and Soc version, if detect failed, will abort build set(SOC_VERSION "") function(detect_ascend_soc_type SOC_VERSION) execute_process( COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'" OUTPUT_VARIABLE npu_info RESULT_VARIABLE npu_result OUTPUT_STRIP_TRAILING_WHITESPACE ) if("${npu_info}" STREQUAL "" OR ${npu_result}) message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.") endif() set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE) endfunction() if(NOT SOC_TYPE) detect_ascend_soc_type(SOC_VERSION) set(SOC_TYPE "${SOC_VERSION}") message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}") endif() string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower # Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P. string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}") set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}") string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION) message(STATUS "CANN: SOC_VERSION = ${SOC_VERSION}") option(USE_ACL_GRAPH "Enable CANN graph execution (ACL graph mode)" OFF) if(USE_ACL_GRAPH AND (SOC_TYPE_MAJOR_SN STREQUAL "310P" OR SOC_TYPE_COMPILE_OPTION STREQUAL "ASCEND_310P")) message(FATAL_ERROR "CANN Graph (ACL graph mode) is not supported on 310P devices. " "Please build with -DUSE_ACL_GRAPH=OFF or use a supported SOC.") endif() if (CANN_INSTALL_DIR) # Only Support Linux. if (NOT UNIX) message(FATAL_ERROR "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}") endif() # Supported platforms: x86-64, arm64 if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") else() message(FATAL_ERROR "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}") endif() # Set header and libs set(CANN_INCLUDE_DIRS ${CANN_INSTALL_DIR}/include ${CANN_INSTALL_DIR}/include/aclnn ${CANN_INSTALL_DIR}/acllib/include ) list(APPEND CANN_LIBRARIES ascendcl nnopbase opapi acl_op_compiler ) file(GLOB GGML_SOURCES_CANN "*.cpp") ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN}) target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES}) target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS}) target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64) target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}") if (USE_ACL_GRAPH) target_compile_definitions(ggml-cann PRIVATE USE_ACL_GRAPH) message(STATUS "CANN: USE_ACL_GRAPH is enabled.") else() message(STATUS "CANN: USE_ACL_GRAPH is disabled.") endif() message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}") message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}") else() message(FATAL_ERROR "CANN: Can't find CANN_INSTALL_DIR, did you forget to source set_var.sh?") endif() ggml-org-ggml-7ec8045/src/ggml-cann/Doxyfile000077500000000000000000003337501506673203700207010ustar00rootroot00000000000000# Doxyfile 1.8.17 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "ggml" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "Tensor library for machine learning" # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = docs # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files), VHDL, tcl. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is # Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = YES # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = YES # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = YES # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = YES # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # (including Cygwin) ands Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: https://www.gnu.org/software/libiconv/) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), # *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen # C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.doc \ *.txt \ *.py \ *.pyw \ *.f90 \ *.f95 \ *.f03 \ *.f08 \ *.f \ *.for \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.ice # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the # path to the compilation database (see: # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files # were built. This is equivalent to specifying the "-p" option to a clang tool, # such as clang-check. These options will then be passed to the parser. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: https://developer.apple.com/xcode/), introduced with OSX # 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /