pax_global_header00006660000000000000000000000064151441111760014513gustar00rootroot0000000000000052 comment=bbe9720cd26b5e3c9784fcabd6cc4b7e5631cf3f python-pgzip-0.4.0/000077500000000000000000000000001514411117600141645ustar00rootroot00000000000000python-pgzip-0.4.0/.devcontainer/000077500000000000000000000000001514411117600167235ustar00rootroot00000000000000python-pgzip-0.4.0/.devcontainer/devcontainer.json000066400000000000000000000023401514411117600222760ustar00rootroot00000000000000// For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { "name": "Python 3", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "mcr.microsoft.com/devcontainers/python:1-3-bookworm", "features": {}, "customizations": { "vscode": { "extensions": [ "esbenp.prettier-vscode", "ms-python.flake8", "ms-python.isort", "ms-python.pylint", "ms-python.python", "ms-python.vscode-pylance", "tamasfe.even-better-toml" ] } }, // Features to add to the dev container. More info: https://containers.dev/features. // "features": {}, // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": "pip install hatch && hatch env create && pre-commit install" // Configure tool-specific properties. // "customizations": {}, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. // "remoteUser": "root" } python-pgzip-0.4.0/.github/000077500000000000000000000000001514411117600155245ustar00rootroot00000000000000python-pgzip-0.4.0/.github/workflows/000077500000000000000000000000001514411117600175615ustar00rootroot00000000000000python-pgzip-0.4.0/.github/workflows/codeql-analysis.yml000066400000000000000000000046741514411117600234070ustar00rootroot00000000000000# For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: push: branches: [master] pull_request: # The branches below must be a subset of the branches above branches: [master] schedule: - cron: "30 18 * * 3" jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: ["python"] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # Learn more: # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed steps: - name: Checkout repository uses: actions/checkout@v2 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v1 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v1 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines # and modify them (or add more) to build your code if your project # uses a compiled language #- run: | # make bootstrap # make release - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v1 python-pgzip-0.4.0/.github/workflows/python-publish.yml000066400000000000000000000016271514411117600232770ustar00rootroot00000000000000# This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package on: release: types: [published] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.13" - name: Install dependencies run: | python -m pip install --upgrade pip pip install hatch - name: Run tests run: hatch run all:test - name: Build package run: hatch build - name: Publish package uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} python-pgzip-0.4.0/.github/workflows/python-tests.yml000066400000000000000000000017251514411117600227720ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Run tests on: push: #branches: [ master ] pull_request: #branches: [ master ] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install hatch - name: Test with hatch run: | hatch run test - name: Lint with hatch if: matrix.python-version == '3.13' run: | hatch run lint:check python-pgzip-0.4.0/.gitignore000066400000000000000000000023761514411117600161640ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # gz test file *.gz zipTest* # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .hatch/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # Visual Studio Code .vscode/ python-pgzip-0.4.0/.pre-commit-config.yaml000066400000000000000000000010531514411117600204440ustar00rootroot00000000000000repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: - id: check-added-large-files - id: trailing-whitespace - id: end-of-file-fixer - id: check-toml - id: check-yaml - repo: local hooks: - id: hatch-lint name: hatch lint entry: hatch run lint:fix language: system types: [python] pass_filenames: false always_run: true - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.1.0 hooks: - id: prettier python-pgzip-0.4.0/CONTRIBUTING.md000066400000000000000000000034531514411117600164220ustar00rootroot00000000000000# Contributing to pgzip Thank you for your interest in contributing to pgzip! This guide will help you get started. ## Development Setup This project uses [Hatch](https://hatch.pypa.io/) for development environment management. ### Prerequisites - Python 3.10+ - [Hatch](https://hatch.pypa.io/) (`pip install hatch`) ### Getting Started ```bash # Clone the repository git clone https://github.com/pgzip/pgzip.git cd pgzip # Install pre-commit hooks pre-commit install # Run tests to verify setup hatch run test ``` ## Development Commands ### Testing ```bash # Run tests (current Python version) hatch run test # Run tests with coverage hatch run cov # Test all Python versions (3.10-3.14) hatch run all:test # Test specific Python version hatch run all.py3.10:test hatch run all.py3.11:test hatch run all.py3.12:test hatch run all.py3.13:test hatch run all.py3.14:test ``` ### Code Quality ```bash # Check code style hatch run lint:check # Fix code style hatch run lint:fix ``` ### Building ```bash # Build package hatch build ``` ### Release ```bash # Full release check (test all versions + lint + build) hatch run release ``` ## Making Changes 1. **Fork** the repository 2. **Create a branch** for your changes 3. **Make your changes** with tests 4. **Run the full test suite** (`hatch run release`) 5. **Submit a pull request** ## Code Style - Code is formatted with [Black](https://black.readthedocs.io/) - Imports are sorted with [isort](https://pycqa.github.io/isort/) - Security checks with [Bandit](https://bandit.readthedocs.io/) - Pre-commit hooks enforce these automatically ## Testing - All new features should include tests - Tests are written using [pytest](https://pytest.org/) - Aim for good test coverage of new code ## Questions? Feel free to open an issue for questions or discussion! python-pgzip-0.4.0/CompressionBenchmark.png000066400000000000000000001003571514411117600210140ustar00rootroot00000000000000PNG  IHDRZ.isRGBgAMA a pHYs&?IDATx^i,Zvn=܃zuvnajӡp s*/p: &˒6pg {?G5X[‡[6jՖ-XQodEDVUeUyOUE73#3{pXN?[4}1 ZyܜanK趻緓>];zd^ˆ?Ϲ9-1 ΢ݙ|8zia6ufm 2٧}QLoZ;mFҋR,f94cd{9̝C1 6ZbR:`އ]2g &o,vݞḘ̑ާ>aؔe;(Vt=8ߣ KcH:Xvyuf̆YԬO`>5/c+;{l͇v /7y[m7Zg?mx|Nb2GnxS׬۶t]nKi7&@8ew²05aF JjڱcvYd&nbzsy.znnѲr?mx(yNx Cz~J`۶t]Jٱ+luQڨ.[`[?'o\HZ}:#?hӋgvIIMX_sCr)q`yZᢆݶ.3Jg&=شރn֚;fe1;\.bgljbVEu%՛ֳ{MF[C}?<~zuY.{- m{l2\NJL^0ޡnou"3LIuca~bO똮3\y|ݺl/[y??^2o}ne֬c\~e%Fho5>n)iZ;/ 5m2u=^]wc+ɚ`PוgY.bV6M:\\6)1xuQm~L6O1d(eN鮺b=\ l>7pܽ|r+h{3p ǥz;IݘT6vv ,efƎcݷ{lݮXo?zݺ ivͶۨq]l-wY>nv1\NJL^+qo躷E{;>lyy#;r[{8mM-st/7Ȧ1m8rt}1[u.om˘>cKtKodkX#ڶ$4f>Xy#qCkM^=&님 K/ :4ZE3͝Ⱥ52;Hv(;XmXY>1@ÝvU{>=x]VŎukE:X׻rsos{̺*{nԌ+܆ˈkt-dŨڏ$S>^,otW_\Vt6qrRbRZרax}kNnlڬu:]c}ڬ[uu+$n}ú[eym:qn<] v<-ghG1y./.62ndau*im^ˮ:2q^^חי{,][oi;}4j%stl΂`/$)1Z/nm^LwFn 5Vv$xaM?W^/۱Q_1{)s RGwݫ;S'c.jJ;: wƺyYM;[]F3|#ƵYwP7~}כcb.nns};[tialz.dw_fh{?n3v.uNMf7.kK^׍~l|r?_އiy>]O^.6O7ͥest̋~1ݖmwC{[ iz?2{vbVTpYu֑hu2rYz{MonLW;mܭ`Ln9.':bz䮖$%\˒mdM+I_iyɿFOo l5z!ils1X}Lf^,Mc&wWzO611IcW8c֞.?E~eiPK2NZe[gzۥչtY~WQȧƆ^Z}s6FVvT'1vrB:`lcz|u@.ݙ`mvn̆vhvd';%eM6|nr[c$TJǥŬ+KM]tvܣz'q k4~y܌mX4/WvַmhWtIarU;=w{mdS~mh<ct~qMK1IOG&Qoz[Ǭ"+׷˺oy9}D{\`7#&Yy\dn+s?nc9crlc^y;57v{Ln] ]ǦmyC\b4fKxzci= k"&&_ ;7lx dgf:f^Tמ0mkj:>;KdNKMvglV}d8.].&߈tvV ;h|p[O폛mnrRbHj1h@f2L˒n}\d+j/w^ mu#&n׎/& . .;76Eu8.Ws~)t|:YKnzgrR-mAM:+E{Ĭ]>FmMv{m9>o3n f6k˿xXo4~2JEq0zm|MK˲zq4=ۿ,~ޛ.M#]3F5?g[a=<.JnmMיiݥ˻]n]`8~ܰi^]]}\S=شi_ضt]v.q3-b.3W:{-}o?^Ҍ(R;cf߻,wt[c$v?:untMsզy[v~7m{M=ngcRelolwO7%IY3ZFoHmCqKGtLobe=׭M{ouq>n`_y{tOcs61goEmm#\7y8r{urWosׇh˒]q׹Kqj=1&L/1{$0_| cjtPnYIdʋt & t+e]]5cs9rƷiD$rɝ麧1sd8`]_ӝZ T7Yٖ&PZvN{w1XF;z瘜Rk{ŬMɶbhuc0cyߗ\$W첬p\Z~L,gw]c{jHp^L'&=={unmv^mo86_{S wnc0gx.y}ϦzҲdeYk^Kqjt OvⅥ{kPzqJi|Gz,M_ڬ׿{\ڠqǮEu8.-?&/Ec.RGw8Y] vxtĬM\utmZiNL]ogw`;tZpzͭN}8/pGce9s{^/Wv0.sx1ߦ]Lic$KEcr46S]ۻϚ⺏'ƶ?WueE뒒᢮сB ӥ/ouq-nYvu<]}|2ΘbeK>/Xyκr{!S g[zՑYqڟ1}MŗJo+-]ibFb/YɛNj}\gsY't|;Vio[Nu/ӴrgC瓚kw"Wǥ䥕 8k,hl:srGͤ E&;)ٝL"K+]{Ĭ[}$q9i}elodv${qcx>S9tM:ގSfr `EI}`8vcguZy^uu۵d8.P{{t~uRC2]%Hv0]Etם=gt]fs&۶x~fw7>wz,wvQ;gi]ޣa]ɋp\ng.]v8]n'9{0/]>]:&[u;t a JqvZ_Luz7<Vnm;}IbM ]&˽n_S}']p9lzi]`@|\XCC{Кλ\+^wc98h-ܖN0t]Oo apXgpV<fip[3`Þ>]чN_Oυܩqx5p4Z-Ǚ;`*f{Et:bz_͸k/r\:1Mkao}̓~i{ٷ˴G1yaS'_=\vʤ9^v1]NZy}n[= $ލo_bR(ğEc`fFF͈ط,Yq%[2%&ҸVZ~ -螰˺/ϑ>w]O^4J^}FKz[:v:eFKMh G1t$sMOIﲗ}%gI)3hK,]2> pGE`F˶'.벗}Y6"o]/i~g &;͒ )i?ouyuҧ̣i[{R'槹mmkēX؇ ~pT_<@۝>{!TLrӼaہ:vq]=ܐ Ljzt_|@I~␦jW~a_g4~&Fp0ݍy+Οr~9;fSXw]2z~~Gw'wa_}|Z^n;qmFKZ6%/nw]Kj1 '3M?~ƌ/q]螠o!Jm͓~~X؇7eozob]WcM?/k^f3Rz8,'QJ{B?]~/ ihI]>Xk3w؋&躣9rѤ9]7T4-6_~ϴ6{oϑ>Q/b"_XwXM4Z .Lrdؙo~7_ziZnlac[ws x:iZzIK2 6%c6b%inyO,n-G/.}E d軤ӓt zٜi/NO|5lJxЃ-OM1r>_eC~io4O>=Q*~%Jدɧ5'Jد\cDkLԖ(`'W1Q[T5&jK _͓DkLԖ(`rW1Q[Tj|jS{T5&jK _DmRy)MR+ט-Q*~%Jد\cD=Q*~%Jد\cDkLԖ(`'Ԟ(`rW1Q[TjԞ(`rW1Q[T5&jK _͓Ԟ(`rW1Q[TjDkLԖ(`rWijO _DmR+ט-Q*~%JدɿԞ(`rW1Q[T5&jK _͓=Q*~%Jد\cD<6'Jد\cDkLԖ(`'DŋwbҊ.<9;ڎkR-^#%c`rp(Rki5LڟaEe@) kLԖ(CptVwIjd-qKj¼ҙ*gX DmRk[N_NM&iL97V廿 mhߛpRc%ŧ}Mt6Jk X DmRK-CFK9]l ^2nC1Q[T-C1im}mbx7W~Mi'W0q;fǴ! """""ǜ\c&ه 5A%J2[IӶ5O%c6%Jඥ&G:dk G)Vd,܆=Q*-rrx-~~H[/ϑ4.5O%c6%JXRϥF3QN /S2nC1Q[TIjkufw6ۃ3` o͓kS{TɦFKrrvh45QFYy,IXا\cDkLԖ(`'DkLԖ(`rW1Q[TjW1Q[T5&jK _DmRyo4'Jد\cD\绬k$i^L;id%Jد=Q*~%Jد\cDkLԖ(`DkLԖ(`rWԞ(`rW1Q[TjW1Q[T5&jK _DmRyW1Q[T5&jK \fr%J+ה1Qn=Q*p]Dr%J~Y3DrוkJԘ(XԞ(\SDrוkJԘ(X1Q[Turx_]y1FrMkϛ南E)'g,p~Y3Dr[wJjd*[sxtE0cg5&jK 0_I29B C۱/o%j:LʃX'ה1Qn\cD'Fɋ\)i[Ho'YP)QcbYS{TyfQgŜ.މIP)QcbDmR+Py-QΈ?\SDrוkJԘ(X_jjO PLwRey南btG5sH[,ט-Q*mOƤ&K| /|CDr%J(7Z^駝?MMB @+ה1Qn\cD$o!|{68%/qkJԘ(XԞ(oVbRzØ|+Q^Kyei]h3gm6tv.Kp=Dr%J 99;kH+omCïn3Z&H!x"oM5DL׽)͠\ȳfr%J7x70g>φ3zşEcDr5'MR蝤Z͖ٳYk~sjxğ#(g'5MƟ#}c%wXnfr%J`MlŃ6YYhIgd3Ijgٔf5&-kLԖ(`~.)O*f]5Jnjˤ&Pd,7#ה1Qn=Q*FK:c$>vg`޾4Y['m p3rM5&jK 0?,oV>1L2\ђit+k?lվKrڒ[Dd8Wc}sjTY.gœ/ IçyR2&<""""sK)Qcr\c&!͖Hj|y1V܏o 6zRcd[_2kJԘ(XԞ(HM\ڼu[5y J?W:~/ 5%jL[,ט-Q*뤷?]o咾1J65;4󇍖46]w9b:f5&-kLԖ(m,MbH62(iFoj?Ӝe-?sd,7#ה1Qn\cD;ś'gOF5錒 -qie?cRwE~|8X \{Y3DŚæD\Eɜ1R"{Lzҫb$}3QIX\O)QcbDmR$!ozDr%J<LtltFH6 z\SDŚbS{TZ~h줙̋y85&-kLԖ(`3X򍔔CiӘɇ^9*{Y3DrW49Iӟɲr2jggkShFgĤq'ה1Qn\cD}=s4ZbR?/5UbҒF 9Dr%J'nR)QcbDmRihIstV4ZNi-L5&-<SPF˕@)QcbDmRItCϚ9$-kLԖ(`9[j@Ǣ@e=Q*p]Dr%J+ה1Qn\cDu5&-kLԖ(`|F 7)ה1Qn=Q*gE3\b(lkJԘ(X1Q[TN_>9=MY.OS3&5eb(d5&-kLԖ(^Kaå͋ԈI  #Dr5MRIoB49yY3DrM6& :R)QcbDmR^,x'}aH |iz)-'dzOONOv"/Cn@)QcbDmRZ\6@&I)5q2Ѝڱe?aK;V?BIPSt tJ/14ZhwtI\5PVqOܚ4Mgr. =S3_~wl58e۱T4Ƕ_o{]oӧg,.^WRQM( 0Dr%J=X7y}#dؘ4Sv򌓘4nLό˿׎bZstVEb?㺺K靈WN,[fr%J=J1M4~xL?K-r1QLZZ6L0=CiNpgP(ה1Qn\cDG)55xELJ͍a#:ݱT;=5KM׀ϕceILZڥawdv8+\I)QcbvS{ԣ3zP ]D#fCi2&x82Fˋɠ$g(h"ט-QQ6%~#J%>faMː4ZskJԘ(XԞ(hG|~ug\(e[[.Ŷw886<;*qF Dr%J=xY'2]_[g$gl<ꗡ0c_Ϛ9$-kLԖ(M,/5+5<58[ЏksŤ-5%jL[,ט-QAK͋hBDY{y[Q;opFj%--KJ.bzf̠ 4&h;u`p\Nzi7bo%\SDŚ\S{ԃ5lr,,}#KFG[6Ci/l΢,bଘ˷ U?at~zzR78CrN-ƴIlݷ$f(ה1Qn\cD;7=q2ix* DYhI͒iuİΦä닋0} .=-3kJԘ(X1Q[ԃUhIlg=Z׸cuuEђY 7k{'!^/`]t6ˍ kJԘ(X1Q[T2g씓³`Wfr5MR襆K挕q0sY4X5%jL[,ט-Q*뜜ܿuk5%jL[,ט-Q*p]Dr5MR5%jL[,ט-Q*p]Dr%J+~Y3DrוkJԘ(XmjO \W)QcbDmRϚ9$-kLԖ(\SDŚLS{Ty:9[vl\9wbQѲE1Q[Tyh&Ϛ9$-kLԖ(`N_N͒L)kx3-[45'JwrvH~Kq̋4?&@'ה1Qn\cDR%%d靜ݝrM5&jK @̖͟ҏ;9;@e\cDGw~˛9$ <&eo/֐`^rM5&jKzrMw+o5PRּ-(3ZϚ9$-kLԖ((Ώ~Y3D]M(->e.1S)QcbDmRB)Qc\ஜ|6S$5]"5%jL[>Ԟ((|GC\.+MtvK \SDr\SDHg>(7^"ה1QnKon(ckJԘ(X1Q[ԣkJԘ(kY,/ahEJ75sH[,ט-QQ5%jL ܕGf-3Zi @5%jL[,ט-QQ~i3D]x0h2m´-rȗ6sH ܅H(+݇F|#у-[%J= Drl?IFKrx'&FDmRB)Qc\. -oǤleyF Dr_Ԟ((|GC\. Ty)iZђ1ѐY5sH[,ט-QQ5%jL ܕoS7gogihlkLԖ((5&JxDr%J= Dr5[-Eo-^)QcbDmRw|K9$AjcY|:Dr%J= /n(c?CbDmRB)Qc\u_> l }izDr%J= Dr4b,˯wNM4ou5&-OܽDG;>Z3D]&o,}rM{MRB)Qc\t r(MhN5[Nou֗~fr%J= ך9$BTIoIi}%>e2rM5&jKzrMwaT9=;&e-F S_Ϛ9$-kLԖ((|G~S3D]ȝkķuo/I/]|9sb֊ϴ|E^uIwrz0/iTyzוjC,ںY)QcbDmRB)Qc\/E{4Z_|3Z&ƃnhuMhƎSNjqkm2mt;hrM5&jKzrMw%5O/ٜ.@Ֆ͈J-.FIpzg2/ U?>SDr%J= Drӱ|$Pg F`_jἘ4zO;]s嬖+5ZE u+kJԘ(X1Q[ԣfr~;.sxv-uibr'HW?[Ĥ4Z4ZvO[,ט-QQQ3DDCgkYYЙ~\+ m8Ck9fKwIӻm W}>3.5sH[{MRB)Qc\\/x01ƤEza_^Oɇ%@ntJ*Gt*)RSct-E>KJL)mlH֣͖m뱋\SDr\SD]j_G_7I]OJ4DF?:d]֣gX4ZXL?>փ7Z}:Lg؜?gl[]5&-kLԖ((|ۇc3D]ےv"Wvֽ;0g'o/./MX-|PHi'C@7hdԄi/&h(X1Q[ԣkJԘ(+ {yafHz{ykMYhv0\ю_mS2ĶFK[R}&]ӺAiF˖uN.rM5&jKzrMw}|}`8G&4tcnଖu?HMwSs$vBO/ Pb[b[cdyW/omڷf:'iT,ה1Qn4sH ܅85)bRV;E&i#cCݴm͓X7W~G~gkFߧo_eſleZuh4ozw}ʼ4_Nt]?Er\c>5&W]&oplk@ 2kߚsUm=+gtש""""wm?MFj|{c9?Kˤqig/9?l ؘo=.Oק>͍4Z,}%W&ה1E2q=/>R{hdIujm7{ j!0˴Pt24~o.ix鲹qӬ{QN)QcbDmRB)Qc\͎Dv~zP7?&݈~s/]I?d,&kO.ߏT  :5X綬k$i[ M$MlFjb/kjOz~LJC\.t/95%"8t92Wphl=C&v46}PZQS2kJԘ(X1Q[ԣkJԘ( fmNrY^oCJvsLS2kJԘ(X1Q[ԣkJԘ( q2>,g ދ;ΜҮw:g) MCbDmR·~ 9$ sb^;t*%c\SDrosg߆IMtrvh`e5%jL[,ט-QQ5%jL px룿CbDr%J= DrO)QcbDr%J= C!Q.5%jL[,ה1Qn\cDG!ה1Q.~i3DrM5&jKzrMȗ6sH[ ȳfrW^S{ԣkJԘ(5&-CbDmRo5sH px#_!Qn\SD?s=QQ5%jL pxrM5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQ~^3D;>5%jL[,ט-QQ~n3D'ה1QnWY3Dr\SD'ה1Qn\SDr\SD;>Z3DrM5&jKzrM\SDrM5&jKzrMok5%jL[,ט-QQ~N3D?9$-kJԘ(X1Q[ԣMfrO)QcbDr?s=QQ5%jL pxrM;޿!Qn\cDG>o5sH[?[5&jKzrMo5sH[,ה1Qn5'J= DrO)QcbDr%J= DrϷ}76sH[,ה1Qn\cDG}frO)QcbDr%J= DrO)QcbDr%J= Dr{9$-kJԘ(X1Q[ԣkJԘ(5&-kJԘ(5'J= 탟!Q.frCbDmRB)Qc\ÓkJԘ(X)QcbDmRB)Qc\ÓkJԘ(5&jKz!Q.!Qn\SDrog6sH pxrM5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQ|F3D[>t!Qn\SDr\SD'ה1Qn\SD^S{ԣ frO)QcbDr%J= DrϷ|贙Cb_F3Dro9$89?כ9$-kLԖ(('\SDrM5&jKzrMog7sH[,ה1Qn\cDG}frO)Qcbfr%J= DrO)QcbDr%J= Dro5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQx~j3D}9$-kJԘ({MR4sH pxrM?i(X1Q[ԣkJԘ(|>CbDrԞ((5&8C\`NNO&ȶF]ä&]N^4Q CbDr%J= DrHtɻHZzI.ފIK]t<, \SDrM{?=QQfrHd77ZNo_ğ#}c5%jL[s_k(X1Q[ԣkJԘ(-ۋ^0`WOo(X)QcbDmRB)Qc\`5ZNΟnjJ{|z3DrM5&jKzrMsђmm~/ \SDrM5&jKzrMst̍__kDDD5&W."wI1Q[ruj?y.rhnFz6j%DվKrڒPkJԘ\"w86lm.<痌U)QcbDr%J= Dr9hI"囄%cvu!Qn\SDޣ{MRW'7sH ѶFKj_ğ#'g/˦5%jL[,ה1Qnϣ{MRB)Qc\`6Z^y<ɝ^i/?sd,rM5%jL[,ט-QQfr9x{^eN^ir9-.Ɯ}9QjLP) {I3D>5&jKzrMs)iL _:9;_yz9CbDr%J= DrO)QcbDr^S{ԣkJԘ(5&-5&jKz? \SDrM5&jKzrMOn(X)QcbDmRB)Qc\ÓkJԘ(X)QcbDmRB)Qc\?5%jL[,ט-QQ5%jL px?CbDr%J= ? \SD^!Qn\cDG!ה1Q.ꇟ!Qn\SDrfrO)QcbDr%J= DrO)Qcb=q3DkjOz9$8<}I5%jL[{MRB)Qc\ÓkJԘ(X)QcbDmRB)Qc\އ!Qn\SDr\SD?5%jL[,ט-QQ5%jL pxrM5%jL[,ט-QQx|3D'ה1Qnfr%J= Dr>CbDr%J= DrO)QcbDr%J= DrO)QcbDrڟԞ((?C\6南NN?]<hRNΟ~{^W lZ\d˵'?Cbfr%J= Dr};9;E&5\bh'7&]-OocG׭\SDrM5&jKzrMvtN49^/ҴtIqś-R.3&w(5&-kJԘ(X1Q[ԣkJԘ(طۅĤ~Vs,ikYRSk-5%jL[,ה1Qn\cDGfr6ThƶYiB8&? 5%jL[,ט-QQx?C\ PIg4?ƾ~xhO|B3DrM5&jKzrMw&3T66O\N$K8 Dr}K?q=QQ5%jL ܶt&˰ɒYY1ҏ,I,C8 ?5%jL[,ט-QQ5%jL ܦxPu[,`>hB)QcbDr^S{ԣkJԘ(-+͎tF+o^f,[8Dr}fr%J= 6sH ܆Qo:9;]iL2!Qn\SDrc9$-.l䝴 %uh9$-kJԘ(X1Q[ԣkJԘ(|qt?x[ 8};1yki{;MnIˋ{F鮷J>vؠIgqK]1-5%jL[,ה1Qn\cDG6sH eRc#5H>1{wٱ2լ|.hIד\"KnZu~4nS)QcbDr%J= Dr}xIh8_FZk̐8%]&Me-Wfr5&-kLԖ((5&2 ~7si5Kezߎ|mBoj_rH&ה1Qn_n(X1Q[ԣkJԘ(nFK<)il~^4Z*kJԘ(X)QcbDmRB)Qc\]-q7f<>Li?ݒn!Qn\SDr\SDpuۚ IwK49U-ӷ ?e$}I5&-ĽDGi(nF˲1zF˦yk-iZj63`8bF~c`Kvٞ6m9]6wTnOw˸M)QcbDr%J= DrM&em.κXֽ[v^[c%iǾHKO63pSszY|،]l3DrM>z=QQ?1p]Y+˃[iZ;}`AdH. eŊD3bkڴP=_6lFoQk<k˯*?Itf?5%jL[,ט-QQ5%jL utUYuΜN3ygL5%jL[,ה1Qn\cDG!ה1Q.ב8M/ t@벒deuFME7Tr֤Y=χ4v>h;oۤeIJ}\ƤuYSkL[,ה1Qn\cDG!ה1Q.W@_[l l2<#7i1@n߆)nuю|$逶oi˷bxc(tJMJݘ%tazȒ1Y;sqnq|8 5&-kJԘ(X1Q[ԣkJԘ(HѮm٥q[h]e{i~PZh^^o{>$ה1QnOCbї9$*tpnerM!WٞyMe[ei(X)QcbDmRB)Qc\J_^,4;hY=݁KWݞ@m?~E\6R4Xƺ5&-kJԘ(X1Q[ԣ?R3DJQ[L?ddt,7-c0혴 sF5%jL[,ט-QQ5%jL @!1ig]$LM)ଝցn |p=ܞ/ho4v|VF $Dr5&-kLԖ((5&T{ tqvɆLgIZrtP9 ͓DUj+oϩ9M?]<ߥ 7u2ӯ}^50&5&-kJԘ(X1Q[ԣkJԘ(Rkj݁YfgDdd?wyMR܆ mҊ3`J6E 5%jL[,ה1Qn\cDG+fr(< b%Iu9];ZhDrM5&jKzrM<ܪ\SDrM5&jKzrM<ܪԘ(X)QcbDmRB)Qc\}?Cb=Q*5%jL[~!Qn\cDG!ה1Q.l?Cb?Ԟ(VbjL[,ה1Qn#_>O CϚ9$-O~^S{T[kJԘ(9$-kLԖ((f_Ϛ9$-ܽD*ה1Qn\SDr\SDٗfr_Ԟ(VN˯1Qn\SDrCC\KY3D?{=Q*DrM5&jKzrMf_}Ϛ9$-_{MRnU DrM5&jKzr܌k'g-_]9b~j(?SܪALr5&-~^S{ԣ{ט(R%+..mNnH?cΏ|q3DNl({MRB_c\Z DrM5&jKzr\d?ougK1 }ˇO9$-7sH[DG!د1Q.T-wScbDr%J= ~r8409$-5sH[>Ԟ((5&ʅbjL[wfr5'J= _CN3DӿE(\tx'fr^S{ԣ{ט(;1Qn\SDrcDד,-EAz?Cb{B3DG5'J= ~rj5%jL[,ט-QQ=kL p=h_FDDDDDDD6'96-"""""""8tm Oom֡dI<AgLIMԌ?$}}slww$6Z^f.!_]EjFK\x+Q:enMJ=&lZ?]NO6nmPahZ6F,@tiX4/WoF ̋r]9#vU.P=prvtp;JihݎI+r?&w{tl7޳@e&&l{|K._4Zy42y_~ε/k9nfcڝ p޺7Y^MLz7n[1y)mϡec҈r؟l.{>;<}nǭz}X3}\]]?Dskw6l;c$mw66uumk1i{>!t}OnAi\n_[N^h8/ֽFm+&//7k9lˎ7P ,`'tp^zk{NoZy:VNn%%wp8\1[^{miZx9k9l붹6} L/Or89^(&/>iŴvSӼBFiiW&J69Y:4RziZ:ܹmB'ip..2ۖN;g]¸bὣv-d*մJ2m9Y:hIz-uc6kʶ'xN/Nܭ]im^O8n]mK&6-nz2s4v~Kmfڛ.uecH,s>7}u]W]ni˶@IJaj܇h پ0d?%y-ö}2iph]<9O{y-ö}h~2.Ms> uۤZviݓ?n/>0^0{msrzErrx-&wuoƤ:OuyK1`-knF16ֺ.]kcq-u[7z3yz4Zhُ6Zv|YP95Z+?`4_t԰F;7Z/J5Z+}bΏ~ik"+hik}KnȳfEV̬R45O>%1]Yb]"?ś1yFʨ{˦Î麺=kvM\dŲ61iv{9iz I`xԴ..?wY~o9Ѳ1o͓%1I#/?wY~rFKt울ȊжO;6DutP_f(Y~Х1i]>kvM\dŲ61itP9ݮ4D.msxh9n':s['zKbxV_.ve+=ueC2GtuhggۼWr/>t7s]Y1<v@580{14fnmJӖM?/MlCɨ 3>p卖r_NL8\OmvM\d@hզ$\uY bC%æUFK\f6m^fEV,knq1:@6r.`IܴuW{]3z3z_a˲?կh;bȌ䬰_*EM$C?~بKO?=&OnvM\d@hpܶE]e= 0|{PÖdxfr`y_Fk"+5IYq3;Kl܇UOHܮ~Ƥ_f퐳%m(},nIFK/Əv'?5q]-I}+gCa7(˕.yp 68+=5qڝ-oKˬ NhhLtef~ƤgfL͔t~[oolkVޖ/'&]Y1<t@5~t`]&>ߣ mthjK%WfEV,kn˸{FJKz;\{.Y:n{3t>{m.e+o+]~~.pm_/5&.bxhtyMZNw=|dy>xʸ+,?4vw>x\buuu4ng|/7&.bYs5n\nHimrBE#pӕ\eVӎMM$Nq+M;^KbxV_hiTgcR;qyx|ˊKhfЯovM\d tV:?0HӻFݓQ%.cH|sY`.*?울Ȋmƥ1Axw=hPڇˈɝmbfGU6y姟1/.#]ó5<Ghߗc}/ޚsq[ƷiW~91 nuirnz{ֿ%lep w5ǥ_\4G^f)uR.ou ˤ (C裏\Lh}5 bʭ(n6/\kxdy-W4<ZVt7յRolm1mhw߱4bIENDB`python-pgzip-0.4.0/DecompressionBenchmark.png000066400000000000000000000340031514411117600213170ustar00rootroot00000000000000PNG  IHDRdp=G@sRGBgAMA aePLTEHXhDRiDUiCTjCTjDUjETkDSjETkCTjDTjETkDTjDTjDTjDTjL\qUcx\k~a5a6a5b5b6b6c6c7c7d7d8d8e8e9e9f9f9f:g:g:g;h;h;hj>k>k>k?l?l?l@m@m@mAnAnAnBoBoBoCpCpDqEqFrFrGrHsHsIsJtJtKuuLuMvMvNvOwOwPwQxQxRyRySyTzTzUzV{V{W|X|Y}Y}Z}[~[~\~]]^_``až}^ZtRNS 8K[jx+ pHYs&?6 IDATx^݇\Wqٖm:!! N( @ߟ~$o9wY}kr,O; 4[7QiMz&\t/Oif՗|\;ls MfJ7tnt4e/pY['Ei3_ɬDV7}Jqζ Tdis 5w^vVMVch2[Ki2yq)ͧ ք7L(,!klg-Sop{7tɜ4/SLS,6^),].Еji;}곺l{52E=ǏR!;˶qhPlTEs mMplc>-ד0O?\6F 56eQ9nؖXgaf0-Jd='jǮ]܌نyes,H84}wIl|`]W FWli02k[٢pJYֶi9~;⪹VO_adkҪӤ5ҬlLT)O-~S4:1O0=d{I3X6S۹f쪍(֧mccdzg½=]u񕇭=9̀4;fINβc&sa5n@1+[4V5qڲ>/v}<$.4[bhC78?IiDGg+]_"6JdHh˥R 7q-c,mLkQݥq:-)4΂kiUYp0t擶4n7/bbgI}!&?Orf}?&d+7& Ẁ4ɶ˛%9y4nG ]6G/-82РfVWV*ؚ '+AhCʖcZ[@X:.ZYڊV:ٚݗwimE?A+k.a,ك5[RۣltqqK 5 \-VESqozj%Oq&΀lmDq i&irU e880&bEG&>dzzƬ5Ԍ? ZIlqXvFإVm!-KuAE}5 c+㌴g\ϵq\2hvl.%9y0.\>h2.9b\1'[4b+Y&,y[#\LxƓi}8l7ڝ\wdq|Wc oGg7Ovi7<(dc Vcl؞[2ҽkK8crlD㣳aH0{ܺǹ_fں٣ ܌˞?.x3.qϦ1]fKr`=]͇<2\\mJ(] c+YFK[iIOW.7^gZ_1hW6 1~霞`5Jx%\(H 7&[ 1mp ܯ+i.얱irm# al㣳ҳq٧mwLuqe/pejJ,ɉ=f=]_<2\\Me~󊘅- 9]N_=[1=@+/ߴc*t^Mkk^+g| I>]t wl_-u82\\;Z.q;Gǭ- y(l`|\Evn7M{jdOHn9/<>~\ >vy=._/ɉ=f=]G4400%Wǽ-|6\6Eqi7lV j?פTz7ݭ0Q6DYi򣇧Yt7oWQc^06سgE69Bdʎ*#T=v]u/l"~ϋG.j^wa2_>,qBDY8{Pz'Ғ>x1vޏ+{Y?΀t1#{Yht1A6v0o2.k2=~K#mzL>k|>k9n'LwvA ^d)Ev\¥YcH I.VwZciYUmռP[H^g?tGlg]7HA'جQ3Ǖ`Tip"(\G=eFv~Mf$ff|:CZ}@9㧝v!V։Bwo€e}p[t0R-_㑻qЦcǰYs|khӑt1P36ۅ ;1:Fy&ݎsz=2 >|04w?vO+oӨGg5>8kwk=|zBvXnqe/f&1wBo,{0p{6|pCzs 3>VZE7d_+!x5V"?Y;lm^d톶VRǢ5. )+D2Nbc­ZFq&N}ćYʥ?ws2izVQ&* iJٝ)=W '؍^PiۛGiKrh=]G]v)5YBY9zҶI7\09S eKR<nP*@]jWњKUGq tojfA_!gJ<&>>7i|Ar<溛L:QTna8=?6Z~zݸ ̀4΄Yv?L)GKԸQo1TL Iv/S=ti]kiR|H' lP|5Q#zF6m^ٻ۶dC3},d "{MOzcUO]XؐM=YDHx^ WI@8~rq9tNGMQ/5"1=n[Zx4qlð_̧ٹv$pK'묃û]SQeMfa/rH$6|Xd}2$<nio¦I>-~OBMMKH˖"=GiiM&p"Nq%NMO'~F =(M?a/ؤOzkp 6{]:KcfN$#/p%|XB7~/ 2fώ__@raɇd bM2|LaKFMf}Jlëx`x4]ɇaximNf o㎙9DBm|v-u_r-]vvj2O[^_ mO;"^d퇶 %vf86Mۇ |y-.?f۔Y.Ol۠imc'LX5ܞAm:priM;ː :}?dG*uv:]&`_;5qL,ba_Lً ƃcq2a>Aoy :|pAٜN{Rɉl>r,c8f[O @ET>(}P&QyL*?=Q&QyL*7(}P&QyL*ATD2DTD2eP"*o @ET>(}P&QyL*מ(}P&QyL*ATD2eP"*o @EeP"*o @ET>(}P&QyL*=Q&o(}P&QyL*ATD2eP"*o @EeP"*o @ET>('"*o @ET>(}P&sO @ET>(}P&QyL*ATD2eP?DTD2DTD2eP"*o  WZ qN[^G2oÕ)] pl6 ΟzLxw9ݰ׀WPyLxo^ݴ+}P&ɛLۧcxʄw ij2{5.Aδ!-eئL x ʄ6i`8fׇW}P&|Ux d=li22#K85 aюMFSyLxW(lR^C2]mƷDue3's ^M2]MSR@ve»w./,ٗ ֽ}wix=ʄwui2iھ}*o @ET>(}P&cO @ET>(}P&QyL*G(}P&QyL*(}P&QyL*ATD2eP"*o @EeP"*o @E~eP"*o @ET>('"*o @ET>(}P&QyL*ATD2eP2zL*ATD2eP"*o @ET>('"*o @ET>(}P&QyL*AT?{L*۞(}P&QyL*ATD2eP"*o @ET>('"*o @ET>('"*o @ET>(}P& O @ET>(}P&e{27a8moWQyLxGil'ncӴ}{=e;j϶:ݲVW먼}P&h=X fA~ٶJۧc(vX{:J{WRyLx?p:6ͮҍe|xeGPv{xaK5O AZ=>/SyLx?cҎWRyLx?>]&GJ1^I2,mgLp_/ɄAT>(! M- :2Ӧi {RܶQyLxO **o @ET>(}P&QyL*ATD2/{L*APpu(N2&PeBImtiMۅ}M (F2M*sJRyL(I/ %}P&I'ɭْAI*o @E~eBq}ؠC5Z(ʄNn`s;APf85t́OPeBiZ,ߊOPeBidiA ~eBiRb7'(M2}_-P/xL(nuk>PeBA3%`<ʄxeBA7IeBIv\T>(ZS9WAP6/;yP{L6g }P&"ysxׄT>(b; SyLϪK((QyÉϗPeBQ0t(F2e6ߗ#QeBI04;q̹PzL(c.{>Fq*o %_mt(I2$ fAPR;tmv!.h2(M2 M}P&DT>(JZ5W8N'"?2eBI]T>(Jb/fAPM3SyL()4^.4ʄk(fAPjm@Y?2nl`*o 3H3>SyL~zOPe<Z JSyLAZ~s婼}P&B6fAPr{>[<~eBI+Q2eBI1eBI|A3SyL(&}P&O\JSyL*APУ]wx18 '/A2m)vLh.pJ' plg^'ƫ}P& o{&Ǝ;|kd+}P&oFùm6wХɴ}(J[04yyeT>(Nɻ%(bVSyLúiv$ATG6dPeBq[#cDPڲd=a8ʄҶ1`<APZ;lR'QPeBiSdPeBiMfO<ʄ mO`*o m#lÆ SyL(o}f́/<"*o 3X6~eBq}#6SyL(~k?K(N20tۦن^wPeBipLǁs4ʄk?~eBiK[(M2e?]k(N2,^7M_(J~v)a JRyL(&}P&ִWzJRyL*A0%{|1 ʄ6'k.[5OQ{L(jχJ|u PeBIa8&smIAPv8{>~pAPR!I0-Ƞ8ʄR_9 $ʄ^V}:'މK(N20>[ZT>(J 0t6DPֶ.`<@2a&+~@3PyL*ATD2ȿDTD2ȿDTD2=mOpҗ 4 A}4d3po :*o g5ğ.h;ϛ7e먼}P&]/5!|+}P&dp.^I2ŦWGiO J*o lpэeWRyLxW0ߏl2_^ATf9h2xeGPk+}P&>^t|x%ʄ3vSh2180e٥n?rJ˫AuT>(faֽ}a)lx2-0ڴŲ^!3WkO @ET>(}P&QyL*ATD2eP"*o @ET>(}P&^O @ET>(}P&O @ET>(}P&QyL*ݞ(}P&QyL*ATD2eP"*o @ET>(}P&W(|'0 4$ܪth7 ]h}a٩}P&Gv[(/YYq$}d=HT>(/Y9 g %'8MM0 z }P&_&_7500 WC%@T>(/&syԥnrdv}P&_.M&496L̤K~8h2/ƉN$v5sSyL~Y5p-7K4EePDTD2eP"*o @ET>(}P&QyL*ATD2˱X4a,O2˯YxS$e$un7>ec5 բ9p~])HzeroNju=e2E'k2 e2Mf[0lv?fu=T>(j2˓^4k_|'4DXd֡DŽb'n3a[e:2exLx,k2a$n&sԧc*oiec&c#Yg*oi*o MM&s7M_L2=Q&<66#c:N0*e_yL^,]lؾ+tzi8OlҚ PyL3PyLNd& pl٨pGxCduρeD|fl2dq fqd[gA2/=Q&Pc6\:xTe_xLn՜"0ҹ/_ #_Af'(wM[RǡQh.R ʚU>( ercl2u~:N8\8j2R_Y|RyL3#OɍɬWS:z\.m>rd 1R_L=Q'C4a4޾ėgvZ/ydp7-[2de!y ;M_^۷ݎYH&}ԏ2ӀC*oi2W~;~+Tas'nKdAf{(@dQj22~tLhAf(voqdy8Ll2-=kO ȅb^>lc'ӄ7Tv GU>( T>(p%~|-4ijw&N` ]O*oi2=N{>SG4 IGS'Tv>( &Oi2=ʄBT>( ~ez*) }P'Tk0>( FOi2=ʄBT>( ~ez* }P'TC >( QyL3PyLOEPAPe/yLOE2oDf(S9}P&Af (S9}P&Af(S}P&Afs(S}P&Af(SљG}P&AfӞ(SљG}P&AfA<2DfS(SI}P&Af(SI}PT|>(Sy*oi*o褀>( t*><4D2@gASyL3qO|]>( 'Teez*:4%e*O2( T>(S}Pk>(Sy*oi?2=}7e2Af(SwC}P>(Sy*oi?2=}me2=Q|'T-iFTeez*F4}#e*O2{LOE2@_ASyL3w(S>( e)<4TT>( e)<42=}e2Af=Q}Pc<4Tt4DSyL3^O8Di:e*yL3O!i:e*O2=Q}( t`Teez*:ee*zL3.O!i:0e*O2;=QC|P'Teez*eA'( ez*eASyL3PyLOE}P3s<4oDe>3ASyL36O5i8e*O2[=Q}>( qTeez*2@gL婼}P|'TT>( m5<4oDvGL3Оf<4TNevL婼}P|'TNeޟL婼}P|'T2@GFL婼}P}PM( e*O2=Q2@|P'4D|[.u퓩}PT?Df(kiNW?6|PhWT?DfGA\;m#f=lO2@N>(Sy*oi*oɵ~/x4>( |PT>( t*>ɳfX/>&eTe>A<ۨ4դL3rL婼}PhA/4Dlm`Ҋ)~i۪'k8rWSdbyN0t*K_l2պ]lNLr&OdXaYo[viwhNĽ1i=m}NIVa-|eX&[iU,\b4 &ٺE`e288]j28O>gaUXK5o+kf sfu E.°Z"PiU, Xp4aA]ӄyHzb; &3֋exfR~aǛZYwuoZ.ﰿ픿V5&9tަ"MMTzdO5Lj.anG~ngZ/xSa;94uM cZB׻zOLEE C'r-Vkq Nm錩ɤ͸}>[73̀|,yiVΞ~%-ֽ<ֺ&yp;fCa* ӴOٺYKk>Դ*}iaUq>0"Z$.れyĆ ӴWd{M˺qjikljqm,[iڟeK&[ޙfF}2p}2M&N?<*LM}2\loKFoj갮I~dL,ݚLVaj2ӧKyYimH "Yiaǃ >?h7Oa6u'q2wv)YUHOɜtfR~aq*8+iaC75Iei/\XX;>6ʚ{Sš/xcyCɤ/u̢I@Ya MXo5vj֍ϜMq<5e?8.ԩ[co˫|ǜs&SMw{N7/\VºdTI;~>|њ_޽Gv~xM |]X8i2Vuo2_bԗЈK(IENDB`python-pgzip-0.4.0/LICENSE000066400000000000000000000020531514411117600151710ustar00rootroot00000000000000MIT License Copyright (c) 2019 Vincent Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. python-pgzip-0.4.0/README.md000066400000000000000000000075111514411117600154470ustar00rootroot00000000000000👷👷👷 Maintainers Wanted 👷👷👷 See https://github.com/pgzip/pgzip/issues/37 # pgzip [![Run tests](https://github.com/pgzip/pgzip/actions/workflows/python-tests.yml/badge.svg)](https://github.com/pgzip/pgzip/actions/workflows/python-tests.yml) [![CodeQL](https://github.com/pgzip/pgzip/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/pgzip/pgzip/actions/workflows/codeql-analysis.yml)

`pgzip` is a multi-threaded `gzip` implementation for `python` that increases the compression and decompression performance. Compression and decompression performance gains are made by parallelizing the usage of block indexing within a `gzip` file. Block indexing utilizes gzip's `FEXTRA` feature which records the index of compressed members. `FEXTRA` is defined in the official `gzip` specification starting at version 4.3. Because `FEXTRA` is part of the `gzip` specification, `pgzip` is compatible with regular `gzip` files. `pgzip` is **~25X** faster for compression and **~7X** faster for decompression when benchmarked on a 24 core machine. Performance is limited only by I/O and the `python` interpreter. Theoretically, the compression and decompression speed should be linear with the number of cores available. However, I/O and a language's general performance limits the compression and decompression speed in practice. ## Usage and Examples ### CLI ``` ❯ python -m pgzip -h usage: __main__.py [-h] [-o OUTPUT] [-f FILENAME] [-d] [-l {0-9}] [-t THREADS] input positional arguments: input Input file or '-' for stdin options: -h, --help show this help message and exit -o OUTPUT, --output OUTPUT Output file or '-' for stdout (Default: Input file with 'gz' extension or stdout) -f FILENAME, --filename FILENAME Name for the original file when compressing -d, --decompress Decompress instead of compress -l {0-9}, --compression-level {0-9} Compression level; 0 = no compression (Default: 9) -t THREADS, --threads THREADS Number of threads to use (Default: Determine automatically) ``` ### Programatically Using `pgzip` is the same as using the built-in `gzip` module. Compressing data and writing it to a file: ```python import pgzip s = "a big string..." # An explanation of parameters: # `thread=8` - Use 8 threads to compress. `None` or `0` uses all cores (default) # `blocksize=2*10**8` - Use a compression block size of 200MB with pgzip.open("test.txt.gz", "wt", thread=8, blocksize=2*10**8) as fw: fw.write(s) ``` Decompressing data from a file: ```python import pgzip s = "a big string..." with pgzip.open("test.txt.gz", "rt", thread=8) as fr: assert fr.read(len(s)) == s ``` ## Performance ### Compression Performance ![Compression Performance](CompressionBenchmark.png) ### Decompression Performance ![Decompression Performance](DecompressionBenchmark.png) Decompression was benchmarked using an 8.0GB `FASTQ` text file with 48 threads across 24 cores on a machine with Xeon(R) E5-2650 v4 @ 2.20GHz CPUs. The compressed file used in this benchmark was created with a blocksize of 200MB. ## Warning `pgzip` only replaces the following methods of `gzip`'s `GzipFile` class: - `open()` - `compress()` - `decompress()` Other class methods and functionality have not been well tested. Contributions or improvements is appreciated for methods such as: - `seek()` - `tell()` ## History Created initially by Vincent Li (@vinlyx), this project is a fork of [https://github.com/vinlyx/mgzip](https://github.com/vinlyx/mgzip). We had several bug fixes to implement, but we could not contact them. The `pgzip` team would like to thank Vincent Li (@vinlyx) for their hard work. We hope that they will contact us when they discover this project. python-pgzip-0.4.0/pgzip/000077500000000000000000000000001514411117600153155ustar00rootroot00000000000000python-pgzip-0.4.0/pgzip/__init__.py000066400000000000000000000005451514411117600174320ustar00rootroot00000000000000"""This module provide a simple replacement of Python internal gzip module to provide a multiprocessing solution for gzip compression/decompression. License: MIT LICENSE Copyright (c) 2019 Vincent Li """ from .pgzip import PgzipFile, __version__, compress, decompress, open __all__ = ["GzipFile", "compress", "decompress", "open"] GzipFile = PgzipFile python-pgzip-0.4.0/pgzip/__main__.py000066400000000000000000000103241514411117600174070ustar00rootroot00000000000000"""This module provide a simple replacement of Python internal gzip module to provide a multiprocessing solution for gzip compression/decompression. License: MIT LICENSE Copyright (c) 2019 Vincent Li """ import sys from argparse import ArgumentParser from contextlib import contextmanager from pathlib import Path from shutil import copyfileobj from traceback import format_exc from .pgzip import PgzipFile def main(): # Utility function to help open files with context manager # Return stdin/stdout if the filename is '-' @contextmanager def smart_open(file: str, mode: str, *args, **kwargs): if file == "-": if "w" in mode: yield sys.stdout.buffer else: yield sys.stdin.buffer return with open(file, mode, *args, **kwargs) as fh: yield fh parser = ArgumentParser() parser.add_argument("input", help="Input file or '-' for stdin") parser.add_argument( "-o", "--output", help="Output file or '-' for stdout (Default: Input file with 'gz' extension or stdout)", ) parser.add_argument( "-f", "--filename", default="", help="Name for the original file when compressing", ) parser.add_argument( "-d", "--decompress", action="store_true", help="Decompress instead of compress" ) parser.add_argument( "-l", "--compression-level", default=9, type=int, choices=range(10), # 0-9 help="Compression level; 0 = no compression (Default: 9)", metavar="{0-9}", ) parser.add_argument( "-t", "--threads", type=int, help="Number of threads to use (Default: Determine automatically)", ) parser.add_argument( "-b", "--blocksize", type=int, help="Block size to use (Default: Determine 100MB)", default=10**8, ) args = parser.parse_args() # Parse name from input file if output file is not specified if args.output is None: if args.input == "-": args.output = "-" elif args.decompress: input_path = Path(args.input) if input_path.suffix == ".gz": args.output = input_path.name[:-3] else: args.output = input_path.name else: args.output = f"{Path(args.input).name}.gz" if "-" not in (args.input, args.output): try: if Path(args.input).samefile(args.output): print( "Error: Input and output cannot be the same file", file=sys.stderr ) sys.exit(1) except OSError: pass if not args.filename: if args.input != "-": args.filename = Path(args.input).name elif args.output != "-": args.filename = Path(args.output).name try: with ( # pylint: disable=contextmanager-generator-missing-cleanup smart_open(args.input, "rb") as in_fh, smart_open(args.output, "wb") as out_fh, ): if args.decompress: with PgzipFile( mode="rb", compresslevel=args.compression_level, fileobj=in_fh, thread=args.threads, ) as pgzip_fh: copyfileobj(pgzip_fh, out_fh) out_fh.flush() else: with PgzipFile( filename=args.filename, mode="wb", compresslevel=args.compression_level, fileobj=out_fh, thread=args.threads, blocksize=args.blocksize, ) as pgzip_fh: copyfileobj(in_fh, pgzip_fh) pgzip_fh.flush() except Exception: exc_info = sys.exc_info() if exc_info[1]: print(f"{exc_info[0].__name__}: {exc_info[1]}", file=sys.stderr) else: print(format_exc(), file=sys.stderr) sys.exit(1) if __name__ == "__main__": try: main() except KeyboardInterrupt: print("KeyboardInterrupt", file=sys.stderr) sys.exit(1) python-pgzip-0.4.0/pgzip/pgzip.py000066400000000000000000000671151514411117600170320ustar00rootroot00000000000000"""This module provide a simple replacement of Python internal gzip module to provide a multiprocessing solution for gzip compression/decompression. License: MIT LICENSE Copyright (c) 2019 Vincent Li """ import builtins import io import os import struct import time import zlib from concurrent.futures import ThreadPoolExecutor from gzip import ( FCOMMENT, FEXTRA, FHCRC, FNAME, READ, WRITE, GzipFile, _GzipReader, _PaddedFile, write32u, ) __version__ = "0.4.0" SID = b"IG" # Subfield ID of indexed gzip file def open( filename, mode="rb", compresslevel=9, encoding=None, errors=None, newline=None, thread=None, blocksize=10**8, ): """Open a gzip-compressed file in binary or text mode. The filename argument can be an actual filename (a str or bytes object), or an existing file object to read from or write to. The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is "rb", and the default compresslevel is 9. For binary mode, this function is equivalent to the GzipFile constructor: GzipFile(filename, mode, compresslevel). In this case, the encoding, errors and newline arguments must not be provided. For text mode, a GzipFile object is created, and wrapped in an io.TextIOWrapper instance with the specified encoding, error handling behavior, and line ending(s). """ if "t" in mode: if "b" in mode: raise ValueError(f"Invalid mode: {mode!r}") else: if encoding is not None: raise ValueError("Argument 'encoding' not supported in binary mode") if errors is not None: raise ValueError("Argument 'errors' not supported in binary mode") if newline is not None: raise ValueError("Argument 'newline' not supported in binary mode") gz_mode = mode.replace("t", "") if isinstance(filename, (str, bytes, os.PathLike)): binary_file = PgzipFile( filename, gz_mode, compresslevel, thread=thread, blocksize=blocksize ) elif hasattr(filename, "read") or hasattr(filename, "write"): binary_file = PgzipFile( None, gz_mode, compresslevel, filename, thread=thread, blocksize=blocksize ) else: raise TypeError("filename must be a str or bytes object, or a file") if "t" in mode: return io.TextIOWrapper(binary_file, encoding, errors, newline) return binary_file def compress(data, compresslevel=9, thread=None, blocksize=10**8): """Compress data in one shot and return the compressed string. Optional argument is the compression level, in range of 0-9. """ buf = io.BytesIO() with PgzipFile( fileobj=buf, mode="wb", compresslevel=compresslevel, thread=thread, blocksize=blocksize, ) as f: f.write(data) return buf.getvalue() def decompress(data, thread=None, blocksize=10**8): """Decompress a gzip compressed string in one shot. Return the decompressed string. """ with PgzipFile(fileobj=io.BytesIO(data), thread=thread, blocksize=blocksize) as f: return f.read() def padded_file_seek(self, off, whence=0): """ Provide a whence of seek method in gzip to allow seek to the end of file. * FIXME: This method may have some problem is stream mode since it is unable to seek to the end of stream object. """ self._read = None self._buffer = None return self.file.seek(off, whence) _PaddedFile.seek = ( padded_file_seek # override the seek method to provide whence parameter ) class PgzipFile(GzipFile): """docstring of PgzipFile""" def __init__( self, filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None, thread=None, blocksize=10**8, ): """Constructor for the GzipFile class. At least one of fileobj and filename must be given a non-trivial value. The new class instance is based on fileobj, which can be a regular file, an io.BytesIO object, or any other object which simulates a file. It defaults to None, in which case filename is opened to provide a file object. When fileobj is not None, the filename argument is only used to be included in the gzip file header, which may include the original filename of the uncompressed file. It defaults to the filename of fileobj, if discernible; otherwise, it defaults to the empty string, and in this case the original filename is not included in the header. The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or 'xb' depending on whether the file will be read or written. The default is the mode of fileobj if discernible; otherwise, the default is 'rb'. A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'. The compresslevel argument is an integer from 0 to 9 controlling the level of compression; 1 is fastest and produces the least compression, and 9 is slowest and produces the most compression. 0 is no compression at all. The default is 9. The mtime argument is an optional numeric timestamp to be written to the last modification time field in the stream when compressing. If omitted or None, the current time is used. """ if thread and thread >= 1: self.thread = thread else: self.thread = os.cpu_count() or 1 self.read_blocks = None if mode and ("t" in mode or "U" in mode): raise ValueError(f"Invalid mode: {mode!r}") if mode and "b" not in mode: mode += "b" if fileobj is None: fileobj = self.myfileobj = builtins.open(filename, mode or "rb", blocksize) if filename is None: filename = getattr(fileobj, "name", "") if not isinstance(filename, (str, bytes)): filename = "" if mode is None: mode = getattr(fileobj, "mode", "rb") if mode.startswith("r"): self.mode = READ self.thread = self.thread // 2 or 1 self.raw = _MulitGzipReader( fileobj, thread=self.thread, max_block_size=blocksize ) self._buffer = io.BufferedReader(self.raw, blocksize) self.name = filename self.index = [] elif mode.startswith(("w", "a", "x")): self.mode = WRITE self._init_write(filename) self.compress = zlib.compressobj( compresslevel, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0 ) self._write_mtime = mtime self.compresslevel = compresslevel self.blocksize = blocksize # use 20M blocksize as default self.pool = ThreadPoolExecutor(max_workers=self.thread) self.pool_result = [] self.small_buf = io.BytesIO() # Add _buffer attribute for Python 3.12+ compatibility with io.TextIOWrapper self._buffer = self self._buffer_size = 32768 # Match standard gzip module else: raise ValueError(f"Invalid mode: {mode!r}") self.fileobj = fileobj def __repr__(self): s = repr(self.fileobj) return "" def _write_gzip_header(self): ## ignored to write original header pass def _compress_func(self, data, pdata=None): """ Compress data with zlib deflate algorithm. Input: data: btyes object of input data pdata: exists small buffer data Return: tuple of (Buffered compressed data, Major compressed data, Rest data after flush buffer, CRC32, Original size) """ cpr = zlib.compressobj( self.compresslevel, zlib.DEFLATED, -zlib.MAX_WBITS, 9, # use memory level 9 > zlib.DEF_MEM_LEVEL (8) for better performance 0, ) if pdata: prefix_bytes = cpr.compress(pdata) body_bytes = cpr.compress(data) rest_bytes = cpr.flush() if pdata: crc = zlib.crc32(data, zlib.crc32(pdata)) return ( prefix_bytes, body_bytes, rest_bytes, crc, pdata.nbytes + data.nbytes, ) crc = zlib.crc32(data) return (b"", body_bytes, rest_bytes, crc, data.nbytes) def write(self, data): self._check_not_closed() if self.mode != WRITE: import errno raise OSError(errno.EBADF, "write() on read-only GzipFile object") if self.fileobj is None: raise ValueError("write() on closed GzipFile object") data = memoryview(data) length = data.nbytes if length == 0: return length if length >= self.blocksize: if length < 2 * self.blocksize: # use sigle thread self._compress_block_async(data) else: for st in range(0, length, self.blocksize): self._compress_block_async(data[st : st + self.blocksize]) self._flush_pool() elif length < self.blocksize: self.small_buf.write(data) if self.small_buf.tell() >= self.blocksize: self._compress_async(self.small_buf.getbuffer()) self.small_buf = io.BytesIO() self._flush_pool() return length def _compress_async(self, data, pdata=None): return self.pool_result.append( self.pool.submit(self._compress_func, data, pdata) ) def _compress_block_async(self, data): if self.small_buf.tell() != 0: self._compress_async(data, self.small_buf.getbuffer()) self.small_buf = io.BytesIO() else: self._compress_async(data) def _flush_pool(self, force=False): if len(self.pool_result) <= self.thread and not force: return 0 length = 0 if force: flushSize = len(self.pool_result) else: flushSize = len(self.pool_result) - self.thread for i in range(flushSize): cdata = self.pool_result.pop(0).result() length += self._write_member(cdata) # (bodyBytes, resBytes, crc, oriSize) = rlt.get() # compressRlt = rlt.get() return length def _write_member(self, cdata): """ Write a compressed data as a complete gzip member Input: cdata: compressed data, a tuple of compressed result returned by _compress_func() Return: size of member """ size = self._write_member_header( len(cdata[0]) + len(cdata[1]) + len(cdata[2]), cdata[4] ) self.fileobj.write(cdata[0]) # buffer data self.fileobj.write(cdata[1]) # body data self.fileobj.write(cdata[2]) # rest data write32u(self.fileobj, cdata[3]) # CRC32 write32u(self.fileobj, cdata[4] & 0xFFFFFFFF) # raw data size in 32bits return size def _write_member_header(self, compressed_size, raw_size): self.fileobj.write(b"\037\213") # magic header, 2 bytes self.fileobj.write(b"\010") # compression method, 1 byte try: # RFC 1952 requires the FNAME field to be Latin-1. Do not # include filenames that cannot be represented that way. fname = os.path.basename(self.name) if not isinstance(fname, bytes): fname = fname.encode("latin-1") if fname.endswith(b".gz"): fname = fname[:-3] except UnicodeEncodeError: fname = b"" flags = FEXTRA if fname: flags |= FNAME self.fileobj.write(chr(flags).encode("latin-1")) # flags, 1 byte mtime = self._write_mtime if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) # modified time, 4 bytes self.fileobj.write(b"\002") # fixed flag (maximum compression), 1 byte self.fileobj.write(b"\377") # OS (unknown), 1 byte # write extra flag for indexing # XLEN, 8 bytes self.fileobj.write(b"\x08\x00") # extra flag len, 2 bytes # EXTRA FLAG FORMAT: # +---+---+---+---+---+---+---+---+ # |SI1|SI2| LEN | MEMBER SIZE | # +---+---+---+---+---+---+---+---+ # SI1, SI2: Subfield ID, 'IG' (Indexed Gzip file) # LEN: Length of subfield body, always 4 (bytes) # MEMBER SIZE: The size of current member self.fileobj.write(SID) # subfield ID (IG), 2 bytes # LEN: 4 bytes self.fileobj.write(b"\x04\x00") # subfield len (4), 2 bytes # compressed data size: 16 + 4 + len(fname) + 1 + data + 8 # header + member size + filename with zero end + data block + CRC32 and ISIZE member_size = 20 + len(fname) + 1 + compressed_size + 8 if not fname: member_size -= 1 self.fileobj.write(struct.pack(" 4 GB, rsize is just the mod of 4G ## not a good idea to read all of them in memory body_bytes = dpr.decompress(data, rsize) crc = zlib.crc32(body_bytes) if dpr.unconsumed_tail != b"": body_bytes += dpr.unconsumed_tail crc = zlib.crc32(dpr.unconsumed_tail, crc) return (body_bytes, rsize, crc, rcrc) def _decompress_async(self, data, rcrc, rsize): self._read_pool.append( self._pool.submit(self._decompress_func, data, rcrc, rsize) ) def _read_exact(self, n): """Read exactly *n* bytes from `fp` This method is required because fp may be unbuffered, i.e. return short reads. """ data = self._fp.read(n) while len(data) < n: b = self._fp.read(n - len(data)) if not b: raise EOFError( "Compressed file ended before the end-of-stream marker was reached" ) data += b return data def _read_gzip_header(self): magic = self._fp.read(2) if magic == b"": return False if magic != b"\037\213": raise OSError("Not a gzipped file (%r)" % magic) (method, flag, self._last_mtime) = struct.unpack(" len(self._block_buff) if self._block_buff_pos != self._block_buff_size: # still something in self._block_buff st_pos = self._block_buff_pos self._block_buff_pos = self._block_buff_size return self._block_buff[st_pos:] if self._is_eof: return b"" # Read a chunk of data from the file buf = self._fp.read(io.DEFAULT_BUFFER_SIZE) uncompress = self._decompressor.decompress(buf, size) # Handle Python 3.12+ compatibility where unconsumed_tail was removed if hasattr(self._decompressor, "unconsumed_tail"): if self._decompressor.unconsumed_tail != b"": self._fp.prepend(self._decompressor.unconsumed_tail) elif self._decompressor.unused_data != b"": # Prepend the already read bytes to the fileobj so they can # be seen by _read_eof() and _read_gzip_header() self._fp.prepend(self._decompressor.unused_data) # Python 3.12+ - unconsumed_tail was removed, only check unused_data elif self._decompressor.unused_data != b"": # Prepend the already read bytes to the fileobj so they can # be seen by _read_eof() and _read_gzip_header() self._fp.prepend(self._decompressor.unused_data) if uncompress != b"": break if buf == b"": raise EOFError( "Compressed file ended before the end-of-stream marker was reached" ) # Handle Python 3.12+ compatibility where _add_read_data was removed if hasattr(self, "_add_read_data"): self._add_read_data(uncompress) else: # Python 3.12+ - manually update CRC and stream size self._crc = zlib.crc32(uncompress, self._crc) self._stream_size = self._stream_size + len(uncompress) self._pos += len(uncompress) return uncompress def _read_eof_crc(self): """ Get crc32 and isize without checking """ crc32, isize = struct.unpack(">>PTh֬Y4jx p}ܿΆV\[Y+M)B| ]UFPP"##: ,, m۶EHHd+GΝ;@FF]K.ҥK˓F*l]Kn.E)|`,0퍾}b0`z@s+Hnn..\ӧOɓ8{,JJJ,Qt,7@ATz{;>|*++dOhl6 ` aksD2 ۛ._ăhǎ4qDrqq1g{Ep|3yfeZVSNT pr|  2;o*-f!++Zli^w SpV,JEt1:x T*Gµo>-$]y=zܯRn @p 2 :u*]v )$$`\???uP%''7.\HhȐ!T^^>3^r*YŸ%΂Cݿ_O &Nh03|XٙSdd$񒒒wr(F.,GvTpsےyaާ|jW_eϟOUUU!ׯS.]I'OK0Y]]MqqqԬY3S$%y;7""}]xrրw}W>>>sN/Z6:qP7Qjjlfgg /`bh-5z\l]pо}{:u,7]Ҽy  S8W)___JJJ̹s璛.5$gϞFSTLl!hڴiu3f"kc ث믲P CT `8/ի1fq V=g~Rh"ݻ'< RRRлwoqd+oĉ`f,0$>E… DDszj'ǩ)hh_A~'iCTWW3UVVfprՕ<~81!pOQQQTPP@D{~͍:ݻw+,,-ZP-hٲeTZZ*(4d]:tW2 Tqq1͙3('') +V@Q7o-T*kz*3goZï qrru77С7,RHH}wzÇԧO!Z$TzU111Lqu />k9}4nݚ޾}bcc͛F\z=1f_~:'A\\\믿np֯_o4dff UЧO\ZʹRTTD_1-ujMiLy^UUE˖-3F'OM߭[7|||hݛ'pE7/:t`kF9}t2Ga`ԦMe ZbΝu=z,3n>2F.YhT_iӦR#diBBw`{h4޽QQQ& SQQ͘1 Qyk4/řtׯ_'e7[nRPPTxDyT a:$$_n }FM0hB$$$Ppp05ѣG_n6?~7c=8]?IOOMy&Ȥ [pannniΜ9k駟ilw {#GW /ח^w֭gzꩧ-[f0Mݺuڵk'E1mM)B 5<iӧ󦏋kiӦk.]RV yt ))o.\ViҤI۞o.''")xl*ׯ_5&)U(___Znٲh7n =u~o111t ]W}(<<\W/|zz: Ftt4o|`Q|J}E^1ppg@3_+8p@5Ç)22`6m26ԣGׯ똹h4Gaݻ'8ԯ_?7ߔ:811Q˝GB/!.ɉ6n([cQ]]M;w=Ӽŋ?22M^^UUU|Œ_QQQ#Gx}nAtnp_ mY O8+3"/7KQ^^N}aRRRxgw|}}MZӨڵkpBj۶-z䐛 8ڌ׼ybr]xw('r1"=EEE ߙ>Rر#ogeR\\ 8P/_OOO?\w^ϫiԨQfFӗ[UUUѰa*EE;8_ |4z-ϟ?_0DW3FC{ &n٬;޽xZӕ+Wʽ}:OAA*F65_bRF4!8;?x`7|C7si> 9r$qӓmګW//ɓԦMڵٳgukBΝX qqtHwBθD@-[%ڻwIxxxPII \Rrh&k׮̓eυ!T@1 riԨQ&7Xl[\]\\mN:etkF-bVrjiĈb,|R (_U Z-[2?(00dA޽;ٳfΜii.L]\\HRQ^h…-ziԡC#VZ%KJs=kK`fxћw'Z-^3mٲٷ)deeы/[JE;v0{],!eDLg`kwrr[NxF9>}Q cd;J899Qrrmgrrr2tk׮… 4vXqEBBM8Q ZMO=rQc+:uJ#8 7n7'0xlJKKСCtR=z<ˋ'j۶-QϞ=i4n8]ܑ#G 7j+ny饗(E%9y7,;>C8}v=zS!((˻iΜ9a۷o1R@q?t[YB0`ٳg_RRB}LE4i$gp\~Io[1g1VJl^PմrJݤ޽{n—Zjec_rr2,{ ;;[y"S "\(qZ())1Ņ-[f}BZl/YYY,y1$yyyb{WMQ S7&n޼)+=|Pf8BR"gggsrrrXK6lEn޼)v/Kw)J!AHEEUUW_}U&8"WUOE;vL8sU\–-[n&r1S.7_`ͼCn#))IK___Yس%< #1c,OQ1k,U ,Peڴib3˹87&ً?( *Ҕn6#\=5z`\;v,X ˗/5?J͛낇.Zuf0j(>D6cqD.HIIAjjx{{cHIIAEE3 tZQee%-ZHAmZMmck@F~饗vӾ}{sN3ꫯLVvŴOJP(>eܻwׯ_I&صkޘƍزe n޼ F{'¨QtzgƱcǰ{n9rDhf ŴW# dٷoIO[lu<~x=&Fca)4Hk8K.JKKc6裏`|j b[o߾mS n& ? QQoYvý{$ussC׮]u7mڄJtׯޫW/uL"uu#FFL {?eeeL/SxIm oaub KJoooCLavAiipuu5sڪ*g"())1)ѫW/xy1R8޴1kq dZrꊧz5z?CRDh’rРA‘8dz'Jx\}/V(ݦ:`ޚ׳gO18p("ǿzJɒe˖&π8p`Il7R0׭[7 r@Y E8p’{wI8PH!h @ +K:H ұcG֨nZ cжm[QE)8 D>T`O8;;M6*B\]%fpp0$ẂpwwGPPktfhl^;"])9p`_??~L8JDȯ ̙:p`u)Z0k҄ɲ܁G8 3Ol-+%V;QnJ)2b{ ׎By;@ppUs&"yWdcODjݒ2P[Dj]xbY @rr2O.K~ [Fp={ʚ'?mTTTFը0vT*l߾]vի1eoo)TYb6pLeΜ9?EEE믿>T*'%o{ѣGQU Yb޿_r׿V+K'N'|73'L!??5)(`Od…_t߇qɖ"B~ TGO!Lxx8&L~AlڴI23gs{{{#<<75+"6C 鐂I] H֯_?ӧOH/m|O Lm׮sRҥKe# O$''n\><~zMl'''DDD0ko\|5-GS*9l^z0@`Nׯ_;W^a_ ?v =)vh"2| . ??_Ԗ]>} ٷo_J?^wVTnݺ᫯}:t(>cw"իī{<왬,1klzJJ/^Y-ju]'.]dxXYf!%%oFaÆaذa2e lقSon;$$͛77X^K?IrqqkgggRպ8k֬1vqqq-..&___yFEEcr iZ]Ŵc1bsA]sj*5$CCJ15O>DU\jѢ4'&肫+nݚ5k&)+tu(--%ggg@={*%*裏ĴZEyB ,@l޼͛7GuOQYYLKJ_w\鉈x{{c֭%U1 ZL|pIOzSÓO>W^zmfɦ:IR!hc}v[qZiSs777ӧhb֭bPojZx>=JQ KNY {1[0'!!eeem3_̱VqփHl hy x޽@1;j57ojmвeKdgg+V6mڰ>V׭)J0i4lٲ%]sYErrrY]6m$^SR\ȖPթ> "lذALrnL)`_LOOlj'TPz]KcW=zTa02dL)|Ϛ{}\g*]Bn1u㊠J?CELEq" IDATBdd͡b:@cϷGVŗ_~\vѺ6]b/nGXlܸQz&Y^vMjX<~R耹9kk)9X|<")E׬YӨ˭ncn_I&²]0ZJJJ|rvK.5R`xȳ(E[Ѹ췶W'zN음 3{;0 X|n˞ X`c5 kGNNհs>v%89`vrrǏ+=umvڴiڄtӘ'O2{N ߰ Y*o795Zo; OY#Q6Fx=ք@[kCDbine5k֌ݻtOky_nݺ)|(&rTY[wڥFV4zÃ:wLcƌ:x$;;vg`f]Fz3gرn߾+jܹfS>nL1RE8A`PD;~ڴiHLLd:_ǔ)SХQ eI 2D|||3bO3PZ((`T1?JFn3\R,L:5i҄/^^JO1vX""RմdjҤx>9,_\l;,UM}l #8qz)U7ءCCNğ={6/:tZnn. m۶1b.^ZZ&LX\1gY-gΜژ}`$+mڴ<>1tEDD]MD#n"*4Dе~qqArssubr\J34fх?ܽ{'OV%TRRRVR]]]ߣYfubim@D0P ΄u5m۶mkp+Wd,۲TUU_Dffdrv 21 :_-OǏ_u=!)㱍fϞ=M/֭[X2|=zTllrܡ/ƺud*rܸqCIn)PWr/rj_|!69pJa2r)Exj-sde(,,R9{c<.Eh~̚5KlGdy"yM$BLLM}?wP*H_ BpG.\ĉQUŴu.30{AS!r% ^d ,ЫWQ9UVIy ̛7\\\D ? ѣi9HOO ѳtL+yq[?S{#ݻ'2Nra4(zÃJJJqm `!|}Kv3Ç=4Đ!CT 23{YNjqROC6_75!CΝ;bx&NZBj׮ݺuK/3g4X@#FÔ)foɉRRRpM C/ZVD]tpp0]rE{f\ 4ZpZj#E){9Zz5oh_B-!k׮IYy,Z!)((RSSwFILL$777kpvvÇҥK RBh~ѣG ϟ) A*  дiSL jJҥ =ԣGjӦ Rhh(iӆP`` (:ul|||* Hhggg4ʕ+W[nRolaܸqFxrqqz?ȑ] 'Hϧ@ee%]Zliqe[*TVVpo%n8*Iyy9m޼~i:uիWYiذa\!r 5 B%5RHH;wN{-Ƚ{hƍ4}tYŅG|={VcRgjyixW)wwwZrkF&#F… oR]]Mg^p pInaÆQff2 w?)]mIPTT=Pp k gpӥfu^V2sQ3zr&MP.ŋ^G9رc|MSq| -Ȼe&y\#H~3ܹRyXYYYmjYMqa(U6)Io>t qqq6jXv-:vݻwU9a%Jam  Ӟ8ԫW/:qPAl8vށdʌ$^߻>ϟO?I&I1IvCFF&Nŋf~[չ֦ ܤ%۷oGDDfΜ,yjHǻヒN:a׮]rd gXݍb1TxUk4F7|[7;#DVVqJB˖- QT^?>|r[NY|<8$ 4uT u7nМ9s$9`$$W</.|::29T*zȑ#f_jta?~ܶ[fĦwwoߞ/_n6cCkWZlEDDڮ522ʀ{T@FǏjlBgJCںu+7\]]͡ e ;6F@ƱFIѴm6Ih-JO[n &Yڭ&A@,N6:Olpvv}ҢEԩS:V FCIIIpBӧx;cn UQ6 /pߩlV<==ѧO 4 @Ϟ=N%K Vz-~wnp}8Ѱegg… 8uN/AYZIENDB`python-pgzip-0.4.0/pgzip_logo.svg000066400000000000000000000121311514411117600170540ustar00rootroot00000000000000 python-pgzip-0.4.0/pyproject.toml000066400000000000000000000042271514411117600171050ustar00rootroot00000000000000[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] license = { text = "MIT License" } name = 'pgzip' requires-python = ">=3.10" description = 'A multi-threading implementation of Python gzip module' readme = "README.md" authors = [ { name = 'Vincent Li', email = 'vincentliyx@gmail.com' }, { name = 'Tim Hughes', email = 'thughes@thegoldfish.org' }, ] maintainers = [{ name = 'Tim Hughes', email = 'thughes@thegoldfish.org' }] classifiers = [ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: Implementation :: CPython', 'Topic :: Software Development :: Libraries :: Python Modules', 'Intended Audience :: Developers', ] dynamic = ["version"] [tool.hatch.version] path = "pgzip/pgzip.py" [tool.hatch.build.targets.wheel] packages = ["pgzip"] [project.urls] Homepage = 'https://github.com/pgzip/pgzip' [tool.hatch.envs.default] installer = "uv" dependencies = ["pytest", "pytest-cov", "pytest-xdist"] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=pgzip --cov=tests {args:tests}" release = [ "hatch run all:test", "hatch run lint:check", "hatch build", ] [tool.hatch.envs.lint] installer = "uv" detached = true dependencies = ["black", "isort", "bandit", "pre-commit", "pre-commit-hooks"] [tool.hatch.envs.lint.scripts] check = ["black --check --diff {args:.}", "isort --check-only --diff {args:.}"] fix = ["black {args:.}", "isort {args:.}"] [[tool.hatch.envs.all.matrix]] python = ["3.10", "3.11", "3.12", "3.13", "3.14"] [tool.hatch.envs.all] installer = "uv" dependencies = ["pytest", "pytest-cov", "pytest-xdist"] [tool.hatch.envs.all.scripts] test = "pytest {args:tests}" [tool.pylint.main] ignore-paths = ["tests"] [tool.isort] profile = "black" [tool.black] target-version = ['py313'] [tool.flake8] max-line-length = 88 extend-ignore = ["E203", "W503"] [tool.bandit] targets = ["pgzip"] skips = ["B101"] [tool.pytest.ini_options] pythonpath = ["."] testpaths = ["tests"] python-pgzip-0.4.0/rfc1952.txt000066400000000000000000000607001514411117600160230ustar00rootroot00000000000000 Network Working Group P. Deutsch Request for Comments: 1952 Aladdin Enterprises Category: Informational May 1996 GZIP file format specification version 4.3 Status of This Memo This memo provides information for the Internet community. This memo does not specify an Internet standard of any kind. Distribution of this memo is unlimited. IESG Note: The IESG takes no position on the validity of any Intellectual Property Rights statements contained in this document. Notices Copyright (c) 1996 L. Peter Deutsch Permission is granted to copy and distribute this document for any purpose and without charge, including translations into other languages and incorporation into compilations, provided that the copyright notice and this notice are preserved, and that any substantive changes or deletions from the original are clearly marked. A pointer to the latest version of this and related documentation in HTML format can be found at the URL . Abstract This specification defines a lossless compressed data format that is compatible with the widely used GZIP utility. The format includes a cyclic redundancy check value for detecting data corruption. The format presently uses the DEFLATE method of compression but can be easily extended to use other compression methods. The format can be implemented readily in a manner not covered by patents. Deutsch Informational [Page 1] RFC 1952 GZIP File Format Specification May 1996 Table of Contents 1. Introduction ................................................... 2 1.1. Purpose ................................................... 2 1.2. Intended audience ......................................... 3 1.3. Scope ..................................................... 3 1.4. Compliance ................................................ 3 1.5. Definitions of terms and conventions used ................. 3 1.6. Changes from previous versions ............................ 3 2. Detailed specification ......................................... 4 2.1. Overall conventions ....................................... 4 2.2. File format ............................................... 5 2.3. Member format ............................................. 5 2.3.1. Member header and trailer ........................... 6 2.3.1.1. Extra field ................................... 8 2.3.1.2. Compliance .................................... 9 3. References .................................................. 9 4. Security Considerations .................................... 10 5. Acknowledgements ........................................... 10 6. Author's Address ........................................... 10 7. Appendix: Jean-Loup Gailly's gzip utility .................. 11 8. Appendix: Sample CRC Code .................................. 11 1. Introduction 1.1. Purpose The purpose of this specification is to define a lossless compressed data format that: * Is independent of CPU type, operating system, file system, and character set, and hence can be used for interchange; * Can compress or decompress a data stream (as opposed to a randomly accessible file) to produce another data stream, using only an a priori bounded amount of intermediate storage, and hence can be used in data communications or similar structures such as Unix filters; * Compresses data with efficiency comparable to the best currently available general-purpose compression methods, and in particular considerably better than the "compress" program; * Can be implemented readily in a manner not covered by patents, and hence can be practiced freely; * Is compatible with the file format produced by the current widely used gzip utility, in that conforming decompressors will be able to read data produced by the existing gzip compressor. Deutsch Informational [Page 2] RFC 1952 GZIP File Format Specification May 1996 The data format defined by this specification does not attempt to: * Provide random access to compressed data; * Compress specialized data (e.g., raster graphics) as well as the best currently available specialized algorithms. 1.2. Intended audience This specification is intended for use by implementors of software to compress data into gzip format and/or decompress data from gzip format. The text of the specification assumes a basic background in programming at the level of bits and other primitive data representations. 1.3. Scope The specification specifies a compression method and a file format (the latter assuming only that a file can store a sequence of arbitrary bytes). It does not specify any particular interface to a file system or anything about character sets or encodings (except for file names and comments, which are optional). 1.4. Compliance Unless otherwise indicated below, a compliant decompressor must be able to accept and decompress any file that conforms to all the specifications presented here; a compliant compressor must produce files that conform to all the specifications presented here. The material in the appendices is not part of the specification per se and is not relevant to compliance. 1.5. Definitions of terms and conventions used byte: 8 bits stored or transmitted as a unit (same as an octet). (For this specification, a byte is exactly 8 bits, even on machines which store a character on a number of bits different from 8.) See below for the numbering of bits within a byte. 1.6. Changes from previous versions There have been no technical changes to the gzip format since version 4.1 of this specification. In version 4.2, some terminology was changed, and the sample CRC code was rewritten for clarity and to eliminate the requirement for the caller to do pre- and post-conditioning. Version 4.3 is a conversion of the specification to RFC style. Deutsch Informational [Page 3] RFC 1952 GZIP File Format Specification May 1996 2. Detailed specification 2.1. Overall conventions In the diagrams below, a box like this: +---+ | | <-- the vertical bars might be missing +---+ represents one byte; a box like this: +==============+ | | +==============+ represents a variable number of bytes. Bytes stored within a computer do not have a "bit order", since they are always treated as a unit. However, a byte considered as an integer between 0 and 255 does have a most- and least- significant bit, and since we write numbers with the most- significant digit on the left, we also write bytes with the most- significant bit on the left. In the diagrams below, we number the bits of a byte so that bit 0 is the least-significant bit, i.e., the bits are numbered: +--------+ |76543210| +--------+ This document does not address the issue of the order in which bits of a byte are transmitted on a bit-sequential medium, since the data format described here is byte- rather than bit-oriented. Within a computer, a number may occupy multiple bytes. All multi-byte numbers in the format described here are stored with the least-significant byte first (at the lower memory address). For example, the decimal number 520 is stored as: 0 1 +--------+--------+ |00001000|00000010| +--------+--------+ ^ ^ | | | + more significant byte = 2 x 256 + less significant byte = 8 Deutsch Informational [Page 4] RFC 1952 GZIP File Format Specification May 1996 2.2. File format A gzip file consists of a series of "members" (compressed data sets). The format of each member is specified in the following section. The members simply appear one after another in the file, with no additional information before, between, or after them. 2.3. Member format Each member has the following structure: +---+---+---+---+---+---+---+---+---+---+ |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) +---+---+---+---+---+---+---+---+---+---+ (if FLG.FEXTRA set) +---+---+=================================+ | XLEN |...XLEN bytes of "extra field"...| (more-->) +---+---+=================================+ (if FLG.FNAME set) +=========================================+ |...original file name, zero-terminated...| (more-->) +=========================================+ (if FLG.FCOMMENT set) +===================================+ |...file comment, zero-terminated...| (more-->) +===================================+ (if FLG.FHCRC set) +---+---+ | CRC16 | +---+---+ +=======================+ |...compressed blocks...| (more-->) +=======================+ 0 1 2 3 4 5 6 7 +---+---+---+---+---+---+---+---+ | CRC32 | ISIZE | +---+---+---+---+---+---+---+---+ Deutsch Informational [Page 5] RFC 1952 GZIP File Format Specification May 1996 2.3.1. Member header and trailer ID1 (IDentification 1) ID2 (IDentification 2) These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139 (0x8b, \213), to identify the file as being in gzip format. CM (Compression Method) This identifies the compression method used in the file. CM = 0-7 are reserved. CM = 8 denotes the "deflate" compression method, which is the one customarily used by gzip and which is documented elsewhere. FLG (FLaGs) This flag byte is divided into individual bits as follows: bit 0 FTEXT bit 1 FHCRC bit 2 FEXTRA bit 3 FNAME bit 4 FCOMMENT bit 5 reserved bit 6 reserved bit 7 reserved If FTEXT is set, the file is probably ASCII text. This is an optional indication, which the compressor may set by checking a small amount of the input data to see whether any non-ASCII characters are present. In case of doubt, FTEXT is cleared, indicating binary data. For systems which have different file formats for ascii text and binary data, the decompressor can use FTEXT to choose the appropriate format. We deliberately do not specify the algorithm used to set this bit, since a compressor always has the option of leaving it cleared and a decompressor always has the option of ignoring it and letting some other program handle issues of data conversion. If FHCRC is set, a CRC16 for the gzip header is present, immediately before the compressed data. The CRC16 consists of the two least significant bytes of the CRC32 for all bytes of the gzip header up to and not including the CRC16. [The FHCRC bit was never set by versions of gzip up to 1.2.4, even though it was documented with a different meaning in gzip 1.2.4.] If FEXTRA is set, optional extra fields are present, as described in a following section. Deutsch Informational [Page 6] RFC 1952 GZIP File Format Specification May 1996 If FNAME is set, an original file name is present, terminated by a zero byte. The name must consist of ISO 8859-1 (LATIN-1) characters; on operating systems using EBCDIC or any other character set for file names, the name must be translated to the ISO LATIN-1 character set. This is the original name of the file being compressed, with any directory components removed, and, if the file being compressed is on a file system with case insensitive names, forced to lower case. There is no original file name if the data was compressed from a source other than a named file; for example, if the source was stdin on a Unix system, there is no file name. If FCOMMENT is set, a zero-terminated file comment is present. This comment is not interpreted; it is only intended for human consumption. The comment must consist of ISO 8859-1 (LATIN-1) characters. Line breaks should be denoted by a single line feed character (10 decimal). Reserved FLG bits must be zero. MTIME (Modification TIME) This gives the most recent modification time of the original file being compressed. The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this may cause problems for MS-DOS and other systems that use local rather than Universal time.) If the compressed data did not come from a file, MTIME is set to the time at which compression started. MTIME = 0 means no time stamp is available. XFL (eXtra FLags) These flags are available for use by specific compression methods. The "deflate" method (CM = 8) sets these flags as follows: XFL = 2 - compressor used maximum compression, slowest algorithm XFL = 4 - compressor used fastest algorithm OS (Operating System) This identifies the type of file system on which compression took place. This may be useful in determining end-of-line convention for text files. The currently defined values are as follows: Deutsch Informational [Page 7] RFC 1952 GZIP File Format Specification May 1996 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) 1 - Amiga 2 - VMS (or OpenVMS) 3 - Unix 4 - VM/CMS 5 - Atari TOS 6 - HPFS filesystem (OS/2, NT) 7 - Macintosh 8 - Z-System 9 - CP/M 10 - TOPS-20 11 - NTFS filesystem (NT) 12 - QDOS 13 - Acorn RISCOS 255 - unknown XLEN (eXtra LENgth) If FLG.FEXTRA is set, this gives the length of the optional extra field. See below for details. CRC32 (CRC-32) This contains a Cyclic Redundancy Check value of the uncompressed data computed according to CRC-32 algorithm used in the ISO 3309 standard and in section 8.1.1.6.2 of ITU-T recommendation V.42. (See http://www.iso.ch for ordering ISO documents. See gopher://info.itu.ch for an online version of ITU-T V.42.) ISIZE (Input SIZE) This contains the size of the original (uncompressed) input data modulo 2^32. 2.3.1.1. Extra field If the FLG.FEXTRA bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each of the form: +---+---+---+---+==================================+ |SI1|SI2| LEN |... LEN bytes of subfield data ...| +---+---+---+---+==================================+ SI1 and SI2 provide a subfield ID, typically two ASCII letters with some mnemonic value. Jean-Loup Gailly is maintaining a registry of subfield IDs; please send him any subfield ID you wish to use. Subfield IDs with SI2 = 0 are reserved for future use. The following IDs are currently defined: Deutsch Informational [Page 8] RFC 1952 GZIP File Format Specification May 1996 SI1 SI2 Data ---------- ---------- ---- 0x41 ('A') 0x70 ('P') Apollo file type information LEN gives the length of the subfield data, excluding the 4 initial bytes. 2.3.1.2. Compliance A compliant compressor must produce files with correct ID1, ID2, CM, CRC32, and ISIZE, but may set all the other fields in the fixed-length part of the header to default values (255 for OS, 0 for all others). The compressor must set all reserved bits to zero. A compliant decompressor must check ID1, ID2, and CM, and provide an error indication if any of these have incorrect values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC at least so it can skip over the optional fields if they are present. It need not examine any other part of the header or trailer; in particular, a decompressor may ignore FTEXT and OS and always produce binary output, and still be compliant. A compliant decompressor must give an error indication if any reserved bit is non-zero, since such a bit could indicate the presence of a new field that would cause subsequent data to be interpreted incorrectly. 3. References [1] "Information Processing - 8-bit single-byte coded graphic character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987). The ISO 8859-1 (Latin-1) character set is a superset of 7-bit ASCII. Files defining this character set are available as iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/ [2] ISO 3309 [3] ITU-T recommendation V.42 [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification", available in ftp://ftp.uu.net/pub/archiving/zip/doc/ [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/ [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table Look-Up", Communications of the ACM, 31(8), pp.1008-1013. Deutsch Informational [Page 9] RFC 1952 GZIP File Format Specification May 1996 [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal, pp.118-133. [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt, describing the CRC concept. 4. Security Considerations Any data compression method involves the reduction of redundancy in the data. Consequently, any corruption of the data is likely to have severe effects and be difficult to correct. Uncompressed text, on the other hand, will probably still be readable despite the presence of some corrupted bytes. It is recommended that systems using this data format provide some means of validating the integrity of the compressed data, such as by setting and checking the CRC-32 check value. 5. Acknowledgements Trademarks cited in this document are the property of their respective owners. Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler, the related software described in this specification. Glenn Randers-Pehrson converted this document to RFC and HTML format. 6. Author's Address L. Peter Deutsch Aladdin Enterprises 203 Santa Margarita Ave. Menlo Park, CA 94025 Phone: (415) 322-0103 (AM only) FAX: (415) 322-1734 EMail: Questions about the technical content of this specification can be sent by email to: Jean-Loup Gailly and Mark Adler Editorial comments on this specification can be sent by email to: L. Peter Deutsch and Glenn Randers-Pehrson Deutsch Informational [Page 10] RFC 1952 GZIP File Format Specification May 1996 7. Appendix: Jean-Loup Gailly's gzip utility The most widely used implementation of gzip compression, and the original documentation on which this specification is based, were created by Jean-Loup Gailly . Since this implementation is a de facto standard, we mention some more of its features here. Again, the material in this section is not part of the specification per se, and implementations need not follow it to be compliant. When compressing or decompressing a file, gzip preserves the protection, ownership, and modification time attributes on the local file system, since there is no provision for representing protection attributes in the gzip file format itself. Since the file format includes a modification time, the gzip decompressor provides a command line switch that assigns the modification time from the file, rather than the local modification time of the compressed input, to the decompressed output. 8. Appendix: Sample CRC Code The following sample code represents a practical implementation of the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42 for a formal specification.) The sample code is in the ANSI C programming language. Non C users may find it easier to read with these hints: & Bitwise AND operator. ^ Bitwise exclusive-OR operator. >> Bitwise right shift operator. When applied to an unsigned quantity, as here, right shift inserts zero bit(s) at the left. ! Logical NOT operator. ++ "n++" increments the variable n. 0xNNN 0x introduces a hexadecimal (base 16) constant. Suffix L indicates a long value (at least 32 bits). /* Table of CRCs of all 8-bit messages. */ unsigned long crc_table[256]; /* Flag: has the table been computed? Initially false. */ int crc_table_computed = 0; /* Make the table for a fast CRC. */ void make_crc_table(void) { unsigned long c; Deutsch Informational [Page 11] RFC 1952 GZIP File Format Specification May 1996 int n, k; for (n = 0; n < 256; n++) { c = (unsigned long) n; for (k = 0; k < 8; k++) { if (c & 1) { c = 0xedb88320L ^ (c >> 1); } else { c = c >> 1; } } crc_table[n] = c; } crc_table_computed = 1; } /* Update a running crc with the bytes buf[0..len-1] and return the updated crc. The crc should be initialized to zero. Pre- and post-conditioning (one's complement) is performed within this function so it shouldn't be done by the caller. Usage example: unsigned long crc = 0L; while (read_buffer(buffer, length) != EOF) { crc = update_crc(crc, buffer, length); } if (crc != original_crc) error(); */ unsigned long update_crc(unsigned long crc, unsigned char *buf, int len) { unsigned long c = crc ^ 0xffffffffL; int n; if (!crc_table_computed) make_crc_table(); for (n = 0; n < len; n++) { c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); } return c ^ 0xffffffffL; } /* Return the CRC of the bytes buf[0..len-1]. */ unsigned long crc(unsigned char *buf, int len) { return update_crc(0L, buf, len); } Deutsch Informational [Page 12] python-pgzip-0.4.0/test.py000066400000000000000000000036501514411117600155210ustar00rootroot00000000000000# import gzip as pgzip import time import pgzip def _test(): import os import sys # Act like gzip; with -d, act like gunzip. # The input file is not deleted, however, nor are any other gzip # options or features supported. args = sys.argv[1:] decompress = args and args[0] == "-d" if decompress: arg = args[1] else: arg = args[0] # if not args: # args = ["-"] if decompress: tsize = 0 if arg != "-": # outf = arg + ".dcp" outf = "/dev/null" fh = open(outf, "wb") gh = pgzip.open(arg, "rb") t0 = time.time() # gh.show_index() # data = b"AAA" chunk_size = 10**7 while True: data = gh.read(chunk_size) # data = gh.readline() if not data: break fh.write(data) tsize += len(data) # data = gh.readline() t1 = time.time() fh.close() gh.close() size = tsize / (1024**2) seconds = t1 - t0 speed = size / seconds nsize = os.stat(arg).st_size print( f"Decompressed {size:.2f} MB data in {seconds:.2f} S, Speed: {speed:.2f} MB/s, Rate: {nsize / tsize * 100:.2f} %" ) elif arg != "-": outf = arg + ".gz" fh = open(arg, "rb") gh = pgzip.open(outf, "wb", compresslevel=6) data = fh.read() t0 = time.time() gh.write(data) gh.close() t1 = time.time() size = len(data) / (1024**2) seconds = t1 - t0 speed = size / seconds nsize = os.stat(outf).st_size print( f"Compressed {size:.2f} MB data in {seconds:.2f} S, Speed: {speed:.2f} MB/s, Rate: {nsize / len(data) * 100:.2f} %" ) if __name__ == "__main__": _test() python-pgzip-0.4.0/tests/000077500000000000000000000000001514411117600153265ustar00rootroot00000000000000python-pgzip-0.4.0/tests/conftest.py000066400000000000000000000006161514411117600175300ustar00rootroot00000000000000"""Pytest configuration for pgzip tests.""" import os import shutil import tempfile import pytest @pytest.fixture def temp_dir(): """Create a temporary directory for test files.""" temp_dir = tempfile.mkdtemp() yield temp_dir shutil.rmtree(temp_dir) @pytest.fixture def temp_file(temp_dir): """Create a temporary file path.""" return os.path.join(temp_dir, "test.gz") python-pgzip-0.4.0/tests/test_cli.py000066400000000000000000000203101514411117600175020ustar00rootroot00000000000000"""Tests for the pgzip CLI module.""" import gzip import subprocess import sys import tempfile from pathlib import Path import pytest class TestCLI: """Test the pgzip command-line interface.""" def test_help(self): """Test --help flag.""" result = subprocess.run( [sys.executable, "-m", "pgzip", "--help"], capture_output=True, text=True, ) assert result.returncode == 0 assert "usage:" in result.stdout assert "Input file or '-' for stdin" in result.stdout def test_compress_file(self, tmp_path): """Test compressing a file.""" # Create test input file input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) output_file = tmp_path / "test.txt.gz" # Compress using CLI result = subprocess.run( [sys.executable, "-m", "pgzip", str(input_file), "-o", str(output_file)], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() # Verify compressed data can be decompressed with gzip.open(output_file, "rb") as f: decompressed = f.read() assert decompressed == test_data def test_compress_auto_output(self, tmp_path): """Test compressing with automatic output filename.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) # Compress using CLI (auto output name) result = subprocess.run( [sys.executable, "-m", "pgzip", str(input_file)], capture_output=True, cwd=tmp_path, ) assert result.returncode == 0 output_file = tmp_path / "test.txt.gz" assert output_file.exists() def test_decompress_file(self, tmp_path): """Test decompressing a file.""" # Create compressed test file test_data = b"Hello, World! " * 1000 input_file = tmp_path / "test.txt.gz" with gzip.open(input_file, "wb") as f: f.write(test_data) output_file = tmp_path / "test.txt" # Decompress using CLI result = subprocess.run( [ sys.executable, "-m", "pgzip", "-d", str(input_file), "-o", str(output_file), ], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() assert output_file.read_bytes() == test_data def test_decompress_auto_output(self, tmp_path): """Test decompressing with automatic output filename.""" test_data = b"Hello, World! " * 1000 input_file = tmp_path / "test.txt.gz" with gzip.open(input_file, "wb") as f: f.write(test_data) # Decompress using CLI (auto output name) result = subprocess.run( [sys.executable, "-m", "pgzip", "-d", str(input_file)], capture_output=True, cwd=tmp_path, ) assert result.returncode == 0 output_file = tmp_path / "test.txt" assert output_file.exists() assert output_file.read_bytes() == test_data def test_stdin_stdout(self): """Test reading from stdin and writing to stdout.""" test_data = b"Hello, World! " * 100 # Compress via stdin/stdout result = subprocess.run( [sys.executable, "-m", "pgzip", "-", "-o", "-"], input=test_data, capture_output=True, ) assert result.returncode == 0 # Verify compressed data by writing to temp file and reading back import tempfile with tempfile.NamedTemporaryFile() as tmp: tmp.write(result.stdout) tmp.flush() tmp.seek(0) with gzip.open(tmp.name, "rb") as f: decompressed = f.read() assert decompressed == test_data def test_compression_levels(self, tmp_path): """Test different compression levels.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) for level in [0, 1, 6, 9]: output_file = tmp_path / f"test_level_{level}.txt.gz" result = subprocess.run( [ sys.executable, "-m", "pgzip", str(input_file), "-o", str(output_file), "-l", str(level), ], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() def test_threads_option(self, tmp_path): """Test threads option.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) output_file = tmp_path / "test.txt.gz" result = subprocess.run( [ sys.executable, "-m", "pgzip", str(input_file), "-o", str(output_file), "-t", "2", ], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() def test_filename_option(self, tmp_path): """Test custom filename option.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) output_file = tmp_path / "test.txt.gz" result = subprocess.run( [ sys.executable, "-m", "pgzip", str(input_file), "-o", str(output_file), "-f", "custom.txt", ], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() def test_same_input_output_error(self, tmp_path): """Test error when input and output are the same file.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World!" input_file.write_bytes(test_data) result = subprocess.run( [sys.executable, "-m", "pgzip", str(input_file), "-o", str(input_file)], capture_output=True, text=True, ) assert result.returncode == 1 assert "Input and output cannot be the same file" in result.stderr def test_invalid_compression_level(self): """Test invalid compression level.""" result = subprocess.run( [sys.executable, "-m", "pgzip", "-", "-l", "10"], capture_output=True, text=True, ) assert result.returncode != 0 assert "invalid choice" in result.stderr def test_nonexistent_input_file(self): """Test error with nonexistent input file.""" result = subprocess.run( [sys.executable, "-m", "pgzip", "nonexistent.txt"], capture_output=True, text=True, ) assert result.returncode == 1 assert "FileNotFoundError" in result.stderr def test_keyboard_interrupt(self, tmp_path): """Test KeyboardInterrupt handling.""" # This is harder to test directly, but we can at least verify # the main function exists and can be imported from pgzip.__main__ import main assert callable(main) def test_blocksize_option(self, tmp_path): """Test blocksize option.""" input_file = tmp_path / "test.txt" test_data = b"Hello, World! " * 1000 input_file.write_bytes(test_data) output_file = tmp_path / "test.txt.gz" result = subprocess.run( [ sys.executable, "-m", "pgzip", str(input_file), "-o", str(output_file), "-b", "50000", ], capture_output=True, ) assert result.returncode == 0 assert output_file.exists() python-pgzip-0.4.0/tests/test_interop.py000066400000000000000000000140221514411117600204160ustar00rootroot00000000000000"""Pytest-based stdlib compatibility tests for pgzip.""" import array import gzip import pytest import pgzip # Test data DATA1 = b""" int length=DEFAULTALLOC, err = Z_OK; PyObject *RetVal; int flushmode = Z_FINISH; unsigned long start_total_out; """ DATA2 = b"""/* zlibmodule.c -- gzip-compatible data compression */ /* See http://www.gzip.org/zlib/ /* See http://www.winimage.com/zLibDll for Windows */ """ class TestPgzipGzipCompatibility: """Test pgzip compatibility with stdlib gzip.""" def test_write_read_cycle(self, temp_file): """Test that pgzip files can be read by gzip and vice versa.""" test_data = DATA1 * 50 # Write with pgzip, read with gzip with pgzip.open(temp_file, "wb") as f: f.write(test_data) with gzip.open(temp_file, "rb") as f: assert f.read() == test_data # Note: Reading gzip files with pgzip has compatibility issues # This is a known limitation that needs to be addressed def test_text_mode(self, temp_file): """Test text mode compatibility.""" text_data = "Hello, 世界!\nMultiple lines\nWith unicode" # Write with pgzip text mode with pgzip.open(temp_file, "wt", encoding="utf-8") as f: f.write(text_data) # Read with gzip text mode with gzip.open(temp_file, "rt", encoding="utf-8") as f: assert f.read() == text_data def test_append_mode(self, temp_file): """Test append mode compatibility.""" # Initial write with pgzip with pgzip.open(temp_file, "wb") as f: f.write(DATA1) # Append with pgzip with pgzip.open(temp_file, "ab") as f: f.write(DATA2) # Verify with gzip with gzip.open(temp_file, "rb") as f: assert f.read() == DATA1 + DATA2 def test_multiple_appends(self, temp_file): """Test multiple append operations.""" expected_data = b"" # Multiple appends with pgzip for i in range(5): with pgzip.open(temp_file, "ab") as f: f.write(DATA1) expected_data += DATA1 # Verify with gzip with gzip.open(temp_file, "rb") as f: assert f.read() == expected_data def test_different_data_types(self, temp_file): """Test writing different data types.""" test_cases = [ DATA1, memoryview(DATA1), bytearray(DATA1), array.array("B", DATA1), ] for i, data in enumerate(test_cases): test_file = f"{temp_file}.{i}" # Write with pgzip with pgzip.open(test_file, "wb") as f: f.write(data) # Read with gzip with gzip.open(test_file, "rb") as f: assert f.read() == bytes(DATA1) def test_readline_compatibility(self, temp_file): """Test readline behavior matches gzip.""" lines = [b"line1\n", b"line2\n", b"line3"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(temp_file, "wb") as f: f.write(test_data) # Test readline with both implementations pgzip_lines = [] with pgzip.open(temp_file, "rb") as f: while True: line = f.readline() if not line: break pgzip_lines.append(line) gzip_lines = [] with gzip.open(temp_file, "rb") as f: while True: line = f.readline() if not line: break gzip_lines.append(line) assert pgzip_lines == gzip_lines == lines def test_readlines_compatibility(self, temp_file): """Test readlines behavior matches gzip.""" lines = [b"line1\n", b"line2\n", b"line3\n"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(temp_file, "wb") as f: f.write(test_data) # Compare readlines output with pgzip.open(temp_file, "rb") as f: pgzip_lines = f.readlines() with gzip.open(temp_file, "rb") as f: gzip_lines = f.readlines() assert pgzip_lines == gzip_lines == lines def test_iteration_compatibility(self, temp_file): """Test file iteration matches gzip.""" lines = [b"line1\n", b"line2\n", b"line3\n"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(temp_file, "wb") as f: f.write(test_data) # Compare iteration with pgzip.open(temp_file, "rb") as f: pgzip_lines = list(f) with gzip.open(temp_file, "rb") as f: gzip_lines = list(f) assert pgzip_lines == gzip_lines == lines def test_file_operations(self, temp_file): """Test basic file operations match gzip.""" test_data = DATA1 * 50 # Write with pgzip with pgzip.open(temp_file, "wb") as f: f.write(test_data) f.flush() fileno = f.fileno() assert isinstance(fileno, int) # Test operations work the same way with pgzip.open(temp_file, "rb") as pf, gzip.open(temp_file, "rb") as gf: # Both should read the same data assert pf.read(100) == gf.read(100) def test_closed_file_operations(self, temp_file): """Test operations on closed files raise same errors.""" # Test with pgzip f = pgzip.open(temp_file, "wb") f.close() with pytest.raises(ValueError): f.write(b"data") with pytest.raises(ValueError): f.flush() @pytest.mark.parametrize("mode", ["wb", "ab"]) def test_mode_property(self, temp_file, mode): """Test mode property matches gzip behavior.""" # Create file first for append mode with pgzip.open(temp_file, "wb") as f: f.write(DATA1) with pgzip.open(temp_file, mode) as pf, gzip.open(temp_file, mode) as gf: # Both should have same mode values assert pf.mode == gf.mode python-pgzip-0.4.0/tests/test_monkeypatch.py000066400000000000000000000123511514411117600212630ustar00rootroot00000000000000"""Run stdlib gzip tests against pgzip by monkey-patching.""" import gzip import os import tempfile import pytest import pgzip class StdlibTestRunner: """Run stdlib gzip tests against pgzip implementation.""" def __init__(self): self.original_gzip = None def patch_gzip_module(self): """Replace gzip module functions with pgzip equivalents.""" self.original_gzip = { "open": gzip.open, "GzipFile": gzip.GzipFile, "compress": gzip.compress, "decompress": gzip.decompress, } # Monkey patch gzip module gzip.open = pgzip.open gzip.GzipFile = pgzip.PgzipFile gzip.compress = pgzip.compress gzip.decompress = pgzip.decompress def restore_gzip_module(self): """Restore original gzip module.""" if self.original_gzip: gzip.open = self.original_gzip["open"] gzip.GzipFile = self.original_gzip["GzipFile"] gzip.compress = self.original_gzip["compress"] gzip.decompress = self.original_gzip["decompress"] # Test data from stdlib data1 = b""" int length=DEFAULTALLOC, err = Z_OK; PyObject *RetVal; int flushmode = Z_FINISH; unsigned long start_total_out; """ @pytest.fixture def monkey_patched_gzip(): """Fixture to monkey patch gzip module for tests.""" runner = StdlibTestRunner() runner.patch_gzip_module() yield runner.restore_gzip_module() @pytest.fixture def temp_file(): """Create a temporary file for testing.""" temp_dir = tempfile.mkdtemp() filename = os.path.join(temp_dir, "test.gz") yield filename if os.path.exists(filename): os.unlink(filename) os.rmdir(temp_dir) class TestStdlibMonkeyPatched: """Test stdlib gzip functionality with pgzip monkey-patched in.""" def test_write(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_write""" with gzip.GzipFile(temp_file, "wb") as f: f.write(data1 * 50) f.flush() f.fileno() if hasattr(os, "fsync"): os.fsync(f.fileno()) f.close() # Test multiple close() calls f.close() def test_read(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_read""" # Write first with gzip.GzipFile(temp_file, "wb") as f: f.write(data1 * 50) # Then read with gzip.GzipFile(temp_file, "rb") as f: d = f.read() assert d == data1 * 50 def test_append(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_append""" # Write initial data with gzip.GzipFile(temp_file, "wb") as f: f.write(data1 * 50) # Append more data with gzip.GzipFile(temp_file, "ab") as f: f.write(data1) # Read and verify with gzip.GzipFile(temp_file, "rb") as f: d = f.read() assert d == (data1 * 50) + data1 def test_many_append(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_many_append""" for i in range(10): with gzip.GzipFile(temp_file, "ab") as f: f.write(data1) with gzip.GzipFile(temp_file, "rb") as f: d = f.read() assert d == data1 * 10 def test_buffered_reader(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_buffered_reader""" # Write test data with gzip.GzipFile(temp_file, "wb") as f: f.write(data1 * 50) # Read in chunks with gzip.GzipFile(temp_file, "rb") as f: bufsize = 8192 d1 = f.read(bufsize) d2 = f.read(bufsize) d3 = f.read(bufsize) assert d1 + d2 + d3 == data1 * 50 def test_readline(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_readline""" with gzip.GzipFile(temp_file, "wb") as f: f.write(data1) with gzip.GzipFile(temp_file, "rb") as f: line_length = 0 while True: L = f.readline() if not L: break line_length += len(L) assert line_length == len(data1) def test_readlines(self, monkey_patched_gzip, temp_file): """Adapted from stdlib test_gzip.py TestGzip.test_readlines""" with gzip.GzipFile(temp_file, "wb") as f: f.write(data1) with gzip.GzipFile(temp_file, "rb") as f: L = f.readlines() assert b"".join(L) == data1 def test_seek_read(self, monkey_patched_gzip, temp_file): """Test seek and read operations""" with gzip.GzipFile(temp_file, "wb") as f: f.write(data1 * 50) with gzip.GzipFile(temp_file, "rb") as f: f.seek(10) d = f.read(10) assert len(d) == 10 def test_mode(self, monkey_patched_gzip, temp_file): """Test file mode property""" with gzip.GzipFile(temp_file, "wb") as f: assert f.mode == gzip.WRITE with gzip.GzipFile(temp_file, "rb") as f: assert f.mode == gzip.READ python-pgzip-0.4.0/tests/test_pgzip.py000066400000000000000000000125111514411117600200700ustar00rootroot00000000000000import gzip import os import pytest import pgzip # The Zen of Python as test data DATA1 = b"""The Zen of Python, by Tim Peters Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex. Complex is better than complicated. Flat is better than nested. Sparse is better than dense. Readability counts. Special cases aren't special enough to break the rules. Although practicality beats purity. Errors should never pass silently. Unless explicitly silenced. In the face of ambiguity, refuse the temptation to guess. There should be one-- and preferably only one --obvious way to do it. Although that way may not be obvious at first unless you're Dutch. Now is better than never. Although never is often better than *right* now. If the implementation is hard to explain, it's a bad idea. If the implementation is easy to explain, it may be a good idea. Namespaces are one honking great idea -- let's do more of those! """ def test_write_wb(tmpdir): filename = os.path.join(tmpdir, "test.gz") with pgzip.open(filename, "wb", compresslevel=6) as f1: f1.write(DATA1 * 50) # Try flush and fileno. f1.flush() f1.fileno() if hasattr(os, "fsync"): os.fsync(f1.fileno()) f1.close() f1.close() assert os.path.exists(filename) with gzip.open(filename, "rb") as f2: file_content = f2.read() assert file_content == DATA1 * 50 def test_read_rb(tmpdir): filename = os.path.join(tmpdir, "test.gz") with gzip.open(filename, "wb") as f1: f1.write(DATA1 * 500) with pgzip.open(filename, "rb") as f2: file_content = f2.read() assert file_content == DATA1 * 500 def test_pool_close(tmpdir): filename = os.path.join(tmpdir, "test.gz") fh = pgzip.open(filename, "wb", compresslevel=6, thread=4, blocksize=128) fh.write(DATA1 * 500) assert not fh.pool._shutdown fh.close() assert fh.fileobj is None assert fh.myfileobj is None assert fh.pool_result == [] assert fh.pool._shutdown with pytest.raises(RuntimeError) as excinfo: fh.pool.submit(print, ("x",)) assert str(excinfo.value) == "cannot schedule new futures after shutdown" def test_compress_function(): """Test pgzip.compress() function.""" data = DATA1 * 100 # Test basic compression compressed = pgzip.compress(data) assert isinstance(compressed, bytes) assert len(compressed) < len(data) # Should be smaller # Verify it's valid gzip decompressed = gzip.decompress(compressed) assert decompressed == data # Test with different compression levels for level in [0, 1, 6, 9]: compressed = pgzip.compress(data, compresslevel=level) decompressed = gzip.decompress(compressed) assert decompressed == data # Test with threading parameters compressed = pgzip.compress(data, thread=2, blocksize=1024) decompressed = gzip.decompress(compressed) assert decompressed == data def test_decompress_function(): """Test pgzip.decompress() function.""" data = DATA1 * 100 # Create compressed data with stdlib gzip compressed = gzip.compress(data) # Test basic decompression decompressed = pgzip.decompress(compressed) assert decompressed == data # Test with threading parameters decompressed = pgzip.decompress(compressed, thread=2, blocksize=1024) assert decompressed == data def test_thread_parameter_values(tmpdir): """Test different thread parameter values.""" filename = os.path.join(tmpdir, "test.gz") data = DATA1 * 100 # Test various thread values for threads in [None, 0, 1, 2, 4]: with pgzip.open(filename, "wb", thread=threads) as f: f.write(data) # Verify file is readable with gzip.open(filename, "rb") as f: assert f.read() == data def test_blocksize_parameter_values(tmpdir): """Test different blocksize parameter values.""" filename = os.path.join(tmpdir, "test.gz") data = DATA1 * 200 # Test various block sizes for blocksize in [1024, 8192, 65536, 1024 * 1024]: with pgzip.open(filename, "wb", blocksize=blocksize) as f: f.write(data) # Verify file is readable with gzip.open(filename, "rb") as f: assert f.read() == data def test_large_data_performance(tmpdir): """Test with larger data to verify threading benefit.""" filename = os.path.join(tmpdir, "test.gz") # Use larger data to see threading effects large_data = DATA1 * 10000 # Test with multiple threads with pgzip.open(filename, "wb", thread=4, blocksize=64 * 1024) as f: f.write(large_data) # Verify correctness with gzip.open(filename, "rb") as f: assert f.read() == large_data def test_empty_file(tmpdir): """Test handling of empty files.""" filename = os.path.join(tmpdir, "empty.gz") # Write empty file with pgzip.open(filename, "wb") as f: pass # Read empty file with pgzip.open(filename, "rb") as f: assert f.read() == b"" def test_single_byte_operations(tmpdir): """Test single byte read/write operations.""" filename = os.path.join(tmpdir, "single.gz") # Write single byte with pgzip.open(filename, "wb") as f: f.write(b"x") # Read single byte with pgzip.open(filename, "rb") as f: assert f.read() == b"x" python-pgzip-0.4.0/tests/test_stdlib_compatibility.py000066400000000000000000000202651514411117600231560ustar00rootroot00000000000000"""Test pgzip compatibility with stdlib gzip using adapted stdlib tests.""" import array import gzip import io import os import tempfile import unittest import pgzip # Test data from stdlib data1 = b""" int length=DEFAULTALLOC, err = Z_OK; PyObject *RetVal; int flushmode = Z_FINISH; unsigned long start_total_out; """ data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */ /* See http://www.gzip.org/zlib/ /* See http://www.winimage.com/zLibDll for Windows */ """ class UnseekableIO(io.BytesIO): def seekable(self): return False def tell(self): raise io.UnsupportedOperation def seek(self, *args): raise io.UnsupportedOperation class BaseTest(unittest.TestCase): def setUp(self): self.temp_dir = tempfile.mkdtemp() self.filename = os.path.join(self.temp_dir, "test.gz") def tearDown(self): if os.path.exists(self.filename): os.unlink(self.filename) os.rmdir(self.temp_dir) class TestPgzipStdlibCompatibility(BaseTest): """Test that pgzip behaves identically to stdlib gzip.""" def write_and_read_back(self, data, mode="b"): """Test write/read cycle with both gzip and pgzip.""" b_data = bytes(data) # Test with pgzip with pgzip.open(self.filename, "w" + mode) as f: l = f.write(data) self.assertEqual(l, len(b_data)) # Verify pgzip file can be read by stdlib gzip with gzip.open(self.filename, "r" + mode) as f: self.assertEqual(f.read(), b_data) # Verify pgzip can read its own files with pgzip.open(self.filename, "r" + mode) as f: self.assertEqual(f.read(), b_data) def test_write(self): """Test basic write functionality.""" with pgzip.open(self.filename, "wb") as f: f.write(data1 * 50) f.flush() f.fileno() if hasattr(os, "fsync"): os.fsync(f.fileno()) f.close() # Test multiple close() calls f.close() # Verify with stdlib gzip with gzip.open(self.filename, "rb") as f: self.assertEqual(f.read(), data1 * 50) def test_write_memoryview(self): """Test write with memoryview input.""" self.write_and_read_back(memoryview(data1 * 50)) def test_write_bytearray(self): """Test write with bytearray input.""" self.write_and_read_back(bytearray(data1 * 50)) def test_write_array(self): """Test write with array input.""" self.write_and_read_back(array.array("B", data1 * 50)) def test_read(self): """Test basic read functionality.""" # Create file with stdlib gzip with gzip.open(self.filename, "wb") as f: f.write(data1) # Read with pgzip with pgzip.open(self.filename, "rb") as f: self.assertEqual(f.read(), data1) def test_read1(self): """Test read1 method.""" # Create file with stdlib gzip with gzip.open(self.filename, "wb") as f: f.write(data1 * 50) # Test read1 with pgzip with pgzip.open(self.filename, "rb") as f: d = f.read1() self.assertTrue(len(d) > 0) self.assertTrue(len(d) <= len(data1 * 50)) def test_io_on_closed_object(self): """Test operations on closed file objects.""" f = pgzip.open(self.filename, "wb") f.close() with self.assertRaises(ValueError): f.write(b"data") with self.assertRaises(ValueError): f.flush() def test_append(self): """Test append mode.""" # Write initial data with pgzip with pgzip.open(self.filename, "wb") as f: f.write(data1) # Append more data with pgzip with pgzip.open(self.filename, "ab") as f: f.write(data2) # Verify with stdlib gzip with gzip.open(self.filename, "rb") as f: self.assertEqual(f.read(), data1 + data2) def test_many_append(self): """Test multiple append operations.""" # Multiple appends for i in range(10): with pgzip.open(self.filename, "ab") as f: f.write(data1) # Verify with stdlib gzip with gzip.open(self.filename, "rb") as f: self.assertEqual(f.read(), data1 * 10) def test_buffered_reader(self): """Test buffered reading.""" # Create file with stdlib gzip with gzip.open(self.filename, "wb") as f: f.write(data1 * 50) # Test buffered reading with pgzip with pgzip.open(self.filename, "rb") as f: # Read in chunks chunks = [] while True: chunk = f.read(100) if not chunk: break chunks.append(chunk) self.assertEqual(b"".join(chunks), data1 * 50) def test_readline(self): """Test readline functionality.""" lines = [b"line1\n", b"line2\n", b"line3"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(self.filename, "wb") as f: f.write(test_data) # Read lines with pgzip with pgzip.open(self.filename, "rb") as f: self.assertEqual(f.readline(), lines[0]) self.assertEqual(f.readline(), lines[1]) self.assertEqual(f.readline(), lines[2]) self.assertEqual(f.readline(), b"") def test_readlines(self): """Test readlines functionality.""" lines = [b"line1\n", b"line2\n", b"line3"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(self.filename, "wb") as f: f.write(test_data) # Read all lines with pgzip with pgzip.open(self.filename, "rb") as f: self.assertEqual(f.readlines(), lines) def test_iteration(self): """Test file iteration.""" lines = [b"line1\n", b"line2\n", b"line3\n"] test_data = b"".join(lines) # Write with pgzip with pgzip.open(self.filename, "wb") as f: f.write(test_data) # Iterate with pgzip with pgzip.open(self.filename, "rb") as f: result_lines = list(f) self.assertEqual(result_lines, lines) def test_text_mode(self): """Test text mode operations.""" text_data = "Hello, 世界!\nLine 2\nLine 3" # Write in text mode with pgzip with pgzip.open(self.filename, "wt", encoding="utf-8") as f: f.write(text_data) # Read in text mode with pgzip with pgzip.open(self.filename, "rt", encoding="utf-8") as f: self.assertEqual(f.read(), text_data) # Verify with stdlib gzip with gzip.open(self.filename, "rt", encoding="utf-8") as f: self.assertEqual(f.read(), text_data) def test_compression_levels(self): """Test different compression levels.""" for level in range(10): filename = f"{self.filename}.{level}" # Write with pgzip at different compression levels with pgzip.open(filename, "wb", compresslevel=level) as f: f.write(data1 * 100) # Verify with stdlib gzip with gzip.open(filename, "rb") as f: self.assertEqual(f.read(), data1 * 100) os.unlink(filename) def test_cross_compatibility(self): """Test that files created by gzip can be read by pgzip and vice versa.""" # File created by stdlib gzip gzip_file = self.filename + ".gzip" with gzip.open(gzip_file, "wb") as f: f.write(data1 * 100) # Read with pgzip with pgzip.open(gzip_file, "rb") as f: gzip_data = f.read() # File created by pgzip pgzip_file = self.filename + ".pgzip" with pgzip.open(pgzip_file, "wb") as f: f.write(data1 * 100) # Read with stdlib gzip with gzip.open(pgzip_file, "rb") as f: pgzip_data = f.read() # Both should be identical self.assertEqual(gzip_data, pgzip_data) self.assertEqual(gzip_data, data1 * 100) os.unlink(gzip_file) os.unlink(pgzip_file) if __name__ == "__main__": unittest.main()