pax_global_header00006660000000000000000000000064151274477340014530gustar00rootroot0000000000000052 comment=78d453f7151eac3e5028d5a556857ee6d6464861 nijel-utidylib-dbcc161/000077500000000000000000000000001512744773400151415ustar00rootroot00000000000000nijel-utidylib-dbcc161/.github/000077500000000000000000000000001512744773400165015ustar00rootroot00000000000000nijel-utidylib-dbcc161/.github/FUNDING.yml000066400000000000000000000000561512744773400203170ustar00rootroot00000000000000github: [nijel] liberapay: nijel polar: nijel nijel-utidylib-dbcc161/.github/dependabot.yml000066400000000000000000000005451512744773400213350ustar00rootroot00000000000000# This file is generated in https://github.com/WeblateOrg/meta/ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: daily labels: - dependencies - automerge - package-ecosystem: pip directory: / schedule: interval: daily labels: - dependencies - automerge nijel-utidylib-dbcc161/.github/release.yml000066400000000000000000000007321512744773400206460ustar00rootroot00000000000000# Copyright © Michal Čihař # # SPDX-License-Identifier: CC0-1.0 # This file is maintained in https://github.com/WeblateOrg/meta/ changelog: categories: - title: Changes and improvements labels: - '*' exclude: labels: - dependencies authors: - weblate - title: Dependency updates labels: - dependencies - title: Translation updates authors: - weblate nijel-utidylib-dbcc161/.github/renovate.json000066400000000000000000000001771512744773400212240ustar00rootroot00000000000000{ "$schema": "https://docs.renovatebot.com/renovate-schema.json", "extends": [ "github>WeblateOrg/meta:renovate" ] } nijel-utidylib-dbcc161/.github/workflows/000077500000000000000000000000001512744773400205365ustar00rootroot00000000000000nijel-utidylib-dbcc161/.github/workflows/pre-commit.yml000066400000000000000000000036131512744773400233400ustar00rootroot00000000000000name: Pre-commit check permissions: contents: read on: push: branches-ignore: - renovate/** - weblate pull_request: jobs: pre-commit: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - name: Get cache tag id: get-date run: | echo "cache_tag=$(/bin/date --utc '+%Y%m')" >> "$GITHUB_OUTPUT" echo "previous_cache_tag=$(/bin/date --date='1 month ago' --utc '+%Y%m')" >> "$GITHUB_OUTPUT" shell: bash - uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1 id: pre-commit-cache with: path: ~/.cache/pre-commit key: ${{ runner.os }}-pre-commit-${{ steps.get-date.outputs.cache_tag }}-${{ hashFiles('.pre-commit-config.yaml') }} restore-keys: | ${{ runner.os }}-pre-commit-${{ steps.get-date.outputs.cache_tag }} ${{ runner.os }}-pre-commit-${{ steps.get-date.outputs.previous_cache_tag }} ${{ runner.os }}-pre-commit- - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.14' - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 with: version: 0.9.22 enable-cache: false - name: pre-commit run: | uvx pre-commit run --all --show-diff-on-failure uvx pre-commit gc env: RUFF_OUTPUT_FORMAT: github REUSE_OUTPUT_FORMAT: github GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: diff run: git diff if: always() - uses: pre-commit-ci/lite-action@5d6cc0eb514c891a40562a58a8e71576c5c7fb43 # v1.1.0 if: always() with: msg: 'chore(pre-commit): apply code formatting' nijel-utidylib-dbcc161/.github/workflows/setup.yml000066400000000000000000000044401512744773400224230ustar00rootroot00000000000000name: Distribution on: [push, pull_request] jobs: setup: name: Build packages runs-on: ubuntu-24.04 steps: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.14' - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 with: enable-cache: true cache-dependency-glob: '' - name: Install dependencies run: uv pip install --system -e .[dev] - name: build sdist run: uv build - name: twine run: uvx twine check dist/* - name: pydistcheck run: uvx pydistcheck --inspect dist/* - name: pyroma run: uvx pyroma dist/*.tar.gz - name: check-wheel-contents run: uvx check-wheel-contents dist/*.whl - name: check-manifest run: uvx check-manifest -v - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: path: dist/* name: dist release_pypi: if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') runs-on: ubuntu-24.04 name: Publish release to PyPI permissions: # this permission is mandatory for trusted publishing id-token: write needs: - setup steps: - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: dist path: dist - name: Publish package run: uv publish --trusted-publishing always release_github: runs-on: ubuntu-24.04 name: Create release on GitHub permissions: contents: write needs: - setup if: startsWith(github.ref, 'refs/tags/') steps: - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: dist path: dist - uses: ncipollo/release-action@b7eabc95ff50cbeeedec83973935c8f306dfcd0b # v1.20.0 with: generateReleaseNotes: true artifacts: dist/* permissions: contents: read nijel-utidylib-dbcc161/.github/workflows/test.yml000066400000000000000000000040111512744773400222340ustar00rootroot00000000000000name: Test on: [push, pull_request] permissions: contents: read jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - ubuntu-latest - macos-latest - windows-latest python-version: - '3.10' - '3.11' - '3.12' - '3.13' - '3.14' name: ${{ matrix.os }}, Python ${{ matrix.python-version }} steps: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 id: setup_python with: python-version: ${{ matrix.python-version }} - name: Install tidy if: matrix.os == 'ubuntu-latest' run: sudo apt-get install -y libtidy5deb1 - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 with: save-cache: ${{ github.ref == 'refs/heads/main' }} cache-suffix: ${{ steps.setup_python.outputs.python-version }} version: 0.9.22 - name: Install tidy if: matrix.os == 'macos-latest' run: brew install tidy-html5 - name: Install tidy if: matrix.os == 'windows-latest' run: choco install html-tidy -y - name: Install pip dependencies run: uv sync --all-extras - name: Check used library run: uv run python -c 'import tidy.lib; print(tidy.lib._tidy.lib._name)' - name: Check used library version run: uv run python -c 'import tidy.lib; print(tidy.lib.getTidyVersion())' - name: Test run: uv run pytest --cov=tidy tidy - name: Coverage run: uv run coverage xml - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: token: ${{secrets.CODECOV_TOKEN}} flags: unittests name: Python ${{ matrix.python-version }}, ${{ matrix.os }}, tidy ${{ matrix.tidy-version }} nijel-utidylib-dbcc161/.gitignore000066400000000000000000000001531512744773400171300ustar00rootroot00000000000000*.swp *.pyc /uv.lock /build/ /dist/ /.venv* /uTidylib.egg-info/ .coverage /.cache/ /.pytest_cache/ /.idea/ nijel-utidylib-dbcc161/.pre-commit-config.yaml000066400000000000000000000022661512744773400214300ustar00rootroot00000000000000# See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-toml - id: check-merge-conflict - id: debug-statements - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/adrienverge/yamllint rev: v1.37.1 hooks: - id: yamllint - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.10 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/asottile/blacken-docs rev: 1.20.0 hooks: - id: blacken-docs - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks rev: v2.15.0 hooks: - id: pretty-format-yaml args: [--autofix, --indent, '2', --offset, '2'] - repo: https://github.com/pappasam/toml-sort rev: v0.24.3 hooks: - id: toml-sort-fix - repo: meta hooks: - id: check-hooks-apply - id: check-useless-excludes exclude: ^docs/make.bat$ nijel-utidylib-dbcc161/.readthedocs.yml000066400000000000000000000010501512744773400202230ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # Optionally build your docs in additional formats such as PDF and ePub formats: all build: os: ubuntu-24.04 tools: python: '3.14' apt_packages: - libtidy5deb1 # Optionally set the version of Python and requirements required to build your docs python: install: - requirements: docs/requirements.txt nijel-utidylib-dbcc161/.yamllint.yml000066400000000000000000000001401512744773400175660ustar00rootroot00000000000000extends: default rules: line-length: max: 500 level: error document-start: disable nijel-utidylib-dbcc161/CHANGES.rst000066400000000000000000000021221512744773400167400ustar00rootroot00000000000000Changes ======= 1.0.0 ----- * Modernized packaging. * Updated supported Python versions. * Homebrew compatiblity. 0.10 ---- * Dropped support for Python 3.7. * Added support for Python 3.12. * Added type hints. * Improved documentation. * Always call CleanAndRepair after parsing. * Fixed handling char_encoding argument. 0.9 --- * Dropped support for Python 3.6. * Added support for Python 3.10 and 3.11. * Compatibility with html-tidy 5.8.0. * Added support for specifying library full path using TIDY_LIBRARY_FULL_PATH. * Added getTidyVersion to get libtidy version. 0.8 --- * Code cleanups. * Fixed typo in 0.7 release notes. 0.7 --- * Dropped support for Python 2. 0.6 --- * First official release PyPI. 0.5 --- * Fixed compatibility with Debian patched libtidy5deb1. 0.4 --- * Compatibility with html-tidy 5.6.0. * Added support for Python 3. 0.3 --- * Initial release under new maintainer. * Incorporated Debian patches. * Various compatiblity fixes (eg. with 64-bit machines). * Various code cleanups. * New test suite. * New documentation. * Support for new HTML 5 tidy library. nijel-utidylib-dbcc161/LICENSE000066400000000000000000000022061512744773400161460ustar00rootroot00000000000000The MIT License Copyright (c) 2003 Cory Dodt Copyright (c) 2014-2018 Michal Čihař Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. nijel-utidylib-dbcc161/MANIFEST.in000066400000000000000000000003261512744773400167000ustar00rootroot00000000000000include LICENSE include README.* include CHANGES.* include MANIFEST.in include docs/conf.py include docs/Makefile include docs/make.bat include pytest.ini include docs/requirements.txt recursive-include docs *.rst nijel-utidylib-dbcc161/README.rst000066400000000000000000000034611512744773400166340ustar00rootroot00000000000000uTidylib ======== .. image:: https://github.com/nijel/utidylib/actions/workflows/test.yml/badge.svg :target: https://github.com/nijel/utidylib/actions/workflows/test.yml :alt: Build Status .. image:: https://codecov.io/gh/nijel/utidylib/branch/master/graph/badge.svg :target: https://codecov.io/gh/nijel/utidylib :alt: Coverage Status .. image:: https://readthedocs.org/projects/utidylib/badge/?version=latest :target: http://utidylib.readthedocs.org/en/latest/ :alt: Documentation .. image:: https://img.shields.io/pypi/v/uTidylib :target: https://pypi.org/project/uTidylib/ :alt: PyPI - Version This is uTidylib, the Python wrapper for the HTML cleaning library named TidyLib. It supports both original Tidy and new HTML5 enabled Tidy . The package is available on PyPI . Once installed, there are two ways to get help. The simplest is: .. code-block:: sh $ python >>> import tidy >>> help(tidy) . . . Then, of course, there's the API documentation, which is available at . 10 Second Tutorial ------------------ .. code-block:: pycon >>> import tidy >>> print( ... tidy.parseString( ... "Hello Tidy!", ... output_xhtml=1, ... add_xml_decl=1, ... indent=1, ... tidy_mark=0, ... doctype="transitional", ... ) ... ) Hello Tidy! Good luck! nijel-utidylib-dbcc161/docs/000077500000000000000000000000001512744773400160715ustar00rootroot00000000000000nijel-utidylib-dbcc161/docs/.gitignore000066400000000000000000000000071512744773400200560ustar00rootroot00000000000000_build nijel-utidylib-dbcc161/docs/Makefile000066400000000000000000000151621512744773400175360ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/uTidylib.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/uTidylib.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/uTidylib" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/uTidylib" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." nijel-utidylib-dbcc161/docs/conf.py000066400000000000000000000057651512744773400174050ustar00rootroot00000000000000# # uTidylib documentation build configuration file, created by # sphinx-quickstart on Wed Aug 13 11:25:46 2014. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) # -- General configuration ------------------------------------------------ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The master toctree document. master_doc = "index" # General information about the project. project = "uTidylib" copyright = "uTidylib contributors" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = "1.0" # The full version, including alpha/beta/rc tags. release = version # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "furo" # Output file base name for HTML help builder. htmlhelp_basename = "uTidylibdoc" # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( "index", "uTidylib.tex", "uTidylib Documentation", "uTidylib contributors", "manual", ), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ("index", "utidylib", "uTidylib Documentation", ["uTidylib contributors"], 1), ] # -- Options for Epub output ---------------------------------------------- # Bibliographic Dublin Core info. epub_title = "uTidylib" epub_author = "uTidylib contributors" epub_publisher = "uTidylib contributors" epub_copyright = copyright # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] nijel-utidylib-dbcc161/docs/index.rst000066400000000000000000000031671512744773400177410ustar00rootroot00000000000000Welcome to uTidylib's documentation! ==================================== .. automodule:: tidy .. autofunction:: parse .. autofunction:: parseString .. autoclass:: Document :members: .. autoclass:: ReportItem :members: .. autoexception:: TidyLibError .. autoexception:: InvalidOptionError .. autoexception:: OptionArgError Installing ========== To use uTidylib, you need to have HTML tidy library installed. Check for instructions how to obtain it. Once you have installed the library, install uTidylib: .. code-block:: sh pip install uTidylib Contributing ============ You are welcome to contribute on GitHub, we use it for source code management, issue tracking and patches submission, see . Running testsuite ================= The testsuite can be exececuted using pytest: .. code-block:: sh pytest tidy Building documentation ====================== To build the doc, just run: .. code-block:: sh make -C docs html This requires that you have Sphinx installed. The API documentation will be built in the :file:`docs/_build/html/` directory. License ======= .. include:: ../LICENSE .. include:: ../CHANGES.rst History ======= This is fork of the original uTidylib with permission with original author. Originally it incorporated patches from Debian and other distributions, now it also brings compatibility with recent html-tidy versions and works with Python 3. The original source code is still available at https://github.com/xdissent/utidylib/. Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` nijel-utidylib-dbcc161/docs/make.bat000066400000000000000000000150611512744773400175010ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\uTidylib.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\uTidylib.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end nijel-utidylib-dbcc161/docs/requirements.txt000066400000000000000000000000371512744773400213550ustar00rootroot00000000000000furo==2025.12.19 Sphinx==9.1.0 nijel-utidylib-dbcc161/pyproject.toml000066400000000000000000000053561512744773400200660ustar00rootroot00000000000000[build-system] build-backend = "setuptools.build_meta" requires = [ "setuptools>=78.0.2" ] [dependency-groups] dev = [ "pytest-cov==7.0.0", "pytest-github-actions-annotate-failures==0.3.0", "pytest-profiling==1.8.1", "pytest-xdist==3.8.0", "pytest==9.0.2" ] [project] authors = [ {email = "michal@cihar.com", name = "Michal Čihař"} ] classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Programming Language :: Python", "Topic :: Internet" ] description = "Wrapper for HTML Tidy" dynamic = [ "version" ] license = "MIT" license-files = ["LICENSE"] name = "uTidylib" requires-python = ">=3.10" [project.readme] content-type = "text/x-rst" file = "README.rst" [project.urls] Documentation = "https://utidylib.readthedocs.io/" Download = "https://github.com/nijel/utidylib" Funding = "https://liberapay.com/nijel" Homepage = "https://github.com/nijel/utidylib" "Issue Tracker" = "https://github.com/nijel/utidylib/issues" "Source Code" = "https://github.com/nijel/utidylib" [tool.check-manifest] ignore = [ "*.yaml", "*.yml" ] [tool.isort] force_grid_wrap = 0 include_trailing_comma = true known_first_party = [ "tidy" ] line_length = 88 multi_line_output = 3 use_parentheses = true [tool.pycodestyle] exclude = ".git,.venv*,build" max-line-length = "88" select = "E,W1,W2,W3,W504,W505,W6" [tool.ruff.lint] extend-safe-fixes = [ "ANN", "D", "FLY", "RUF005", "SIM", "TCH", "UP" ] ignore = [ "COM812", # CONFIG: incompatible with formatter "D100", "D101", "D102", "D103", "D105", "D107", "D203", # CONFIG: incompatible with D211 "D212", # CONFIG: incompatible with D213 "E501", # CONFIG: formatter "EM101", "ISC001", # CONFIG: incompatible with formatter "PT", # CONFIG: Not using pytest "PTH100", # TODO: Use pathlib "PTH118", # TODO: Use pathlib "PTH120", # TODO: Use pathlib "S101", # TODO: Use of `assert` detected "TRY003" ] select = ["ALL"] [tool.ruff.lint.per-file-ignores] "docs/conf.py" = ["A001", "INP001"] "tidy/lib.py" = ["N802", "N816"] [tool.setuptools] include-package-data = true packages = [ "tidy", "tidy.test_data" ] [tool.setuptools.dynamic.version] attr = "tidy.__version__" [tool.setuptools.package-data] tidy = [ "test_data/*.html" ] [tool.tomlsort] ignore_case = true sort_inline_arrays = true sort_inline_tables = true sort_table_keys = true spaces_before_inline_comment = 2 nijel-utidylib-dbcc161/pytest.ini000066400000000000000000000000451512744773400171710ustar00rootroot00000000000000[pytest] addopts = --doctest-modules nijel-utidylib-dbcc161/tidy/000077500000000000000000000000001512744773400161125ustar00rootroot00000000000000nijel-utidylib-dbcc161/tidy/__init__.py000066400000000000000000000033001512744773400202170ustar00rootroot00000000000000""" The Tidy wrapper. I am the main interface to TidyLib. This package supports processing HTML with Tidy, with all the options that the tidy command line supports. For more information on the tidy options, see the reference. These options can be given as keyword arguments to parse and parseString, by changing dashes (-) to underscores(_). For example: >>> import tidy >>> from __future__ import print_function >>> print(tidy.parseString( ... 'Hello Tidy!', ... output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, ... doctype='transitional' ... )) # doctest: +NORMALIZE_WHITESPACE Hello Tidy! For options like newline and output_encoding, which must be set to one of a fixed number of choices, you can provide either the numeric or string version of the choice; so both tidy.parseString('foo', newline=2) and tidy.parseString('foo', newline='CR') do the same thing. There are no plans to support other features of TidyLib, such as document-tree traversal, since Python has several quality DOM implementations. (The author uses Twisted's implementation, twisted.web.microdom). """ from tidy.error import InvalidOptionError, OptionArgError, TidyLibError from tidy.lib import Document, ReportItem, parse, parseString __all__ = [ "Document", "InvalidOptionError", "OptionArgError", "ReportItem", "TidyLibError", "error", "lib", "parse", "parseString", ] __version__ = "1.0.0" nijel-utidylib-dbcc161/tidy/error.py000066400000000000000000000007121512744773400176150ustar00rootroot00000000000000"""Exceptions for uTidylib.""" from __future__ import annotations __all__ = ("InvalidOptionError", "OptionArgError", "TidyLibError") class TidyLibError(Exception): """Generic Tidy exception.""" class InvalidOptionError(TidyLibError): """Exception for invalid option.""" def __str__(self) -> str: return f"{self.args[0]} was not a valid Tidy option." class OptionArgError(TidyLibError): """Exception for invalid parameter.""" nijel-utidylib-dbcc161/tidy/lib.py000066400000000000000000000270001512744773400172310ustar00rootroot00000000000000from __future__ import annotations import ctypes import io import os import os.path import weakref from abc import ABC, abstractmethod from collections.abc import Callable, Mapping from errno import ENOMEM from typing import ( TYPE_CHECKING, Any, BinaryIO, ClassVar, TypeVar, ) from tidy.error import InvalidOptionError, OptionArgError if TYPE_CHECKING: OPTION_TYPE = str | int | bool | None OPTION_DICT_TYPE = dict[str, OPTION_TYPE] LIBNAMES = ( # MacOS Homebrew (ARM) - try first for CI compatibility "/opt/homebrew/lib/libtidy.dylib", "/opt/homebrew/lib/libtidy.5.dylib", "/opt/homebrew/lib/libtidy.58.dylib", # MacOS Homebrew (Intel) "/usr/local/lib/libtidy.dylib", "/usr/local/lib/libtidy.5.dylib", "/usr/local/lib/libtidy.58.dylib", # Windows - Chocolatey installation paths "C:/ProgramData/chocolatey/lib/html-tidy/tools/tidy-5.9.14-win64/bin/tidy.dll", "C:/ProgramData/chocolatey/lib/html-tidy/tools/bin/tidy.dll", "C:/Program Files/tidy-html5/bin/tidy.dll", "C:/tools/tidy-html5/bin/tidy.dll", # Linux "libtidy.so", # MacOS (generic, after Homebrew paths) "libtidy.dylib", # Windows (generic, after specific paths) "tidy.dll", "tidy", # Cygwin "cygtidy-0-99-0", # Linux, full soname "libtidy-0.99.so.0", # Linux, full soname "libtidy-0.99.so.0.0.0", # HTML tidy "libtidy.so.5", # Linux, HTML tidy v5.8 "libtidy.so.58", # Debian changed soname "libtidy.so.5deb1", # Windows? "libtidy.dll", "libtidy", # Windows? "tidylib.dll", "tidylib", ) class Loader: """ ctypes.CDLL wrapper. I am a trivial wrapper that eliminates the need for tidy.tidyFoo, so you can just access tidy.Foo. """ def __init__(self, libnames: tuple[str, ...] | None = None) -> None: self.lib: ctypes.CDLL self.libnames: tuple[str, ...] = libnames or LIBNAMES # Add package directory to search path os.environ["PATH"] = "".join( (os.path.dirname(__file__), os.pathsep, os.environ["PATH"]), ) # Add full path to a library lib_path = os.environ.get("TIDY_LIBRARY_FULL_PATH") if lib_path: self.libnames = (lib_path, *self.libnames) # Try loading library for libname in self.libnames: try: self.lib = ctypes.CDLL(libname) break except OSError: continue else: # Fail in case we could not load it raise OSError("Couldn't find libtidy, please make sure it is installed.") # Adjust some types self.Create.restype = ctypes.POINTER(ctypes.c_void_p) self.LibraryVersion.restype = ctypes.c_char_p def __getattr__(self, name: str) -> Any: # noqa: ANN401 return getattr(self.lib, f"tidy{name}") _tidy = Loader() _putByteFunction = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char) # define a callback to pass to Tidylib @_putByteFunction def putByte(handle: int, char: int) -> int: """Lookup sink by handle and call its putByte method.""" sinkfactory[handle].putByte(char) return 0 class _OutputSink(ctypes.Structure): _fields_ = (("sinkData", ctypes.c_int), ("putByte", _putByteFunction)) class _Sink: def __init__(self, handle: int) -> None: self._data = io.BytesIO() self.struct = _OutputSink() self.struct.putByte = putByte self.handle = handle def putByte(self, byte: bytes) -> None: self._data.write(byte) def getvalue(self) -> bytes: return self._data.getvalue() class ReportItem: """Error report item as returned by tidy.""" severities: ClassVar[dict[str, str]] = { "W": "Warning", "E": "Error", "C": "Config", "D": "Document", } def __init__(self, err: str) -> None: self.err: str = err #: Whole error message as returned by tidy self.full_severity: str #: Full severity string self.severity: str #: D, W, E or C indicating severity self.message: str #: Error message itself self.line: int | None #: Line where error was fired (can be None) self.col: int | None #: Column where error was fired (can be None) # Parses: # line column - (Error|Warning): # It might be also useful to gnu-emacs reporting mode if err.startswith("line"): tokens = err.split(" ", 6) self.full_severity = tokens[5] self.severity = tokens[5][0] # W, E or C self.line = int(tokens[1]) self.col = int(tokens[3]) self.message = tokens[6] else: tokens = err.split(" ", 1) self.full_severity = tokens[0] self.severity = tokens[0][0] self.message = tokens[1] self.line = None self.col = None def get_severity(self) -> str: try: return self.severities[self.severity] except KeyError: return self.full_severity.strip().rstrip(":") def __str__(self) -> str: if self.line: return f"line {self.line} col {self.col} - {self.get_severity()}: {self.message}" return f"{self.get_severity()}: {self.message}" def __repr__(self) -> str: return "{}('{}')".format(self.__class__.__name__, str(self).replace("'", "\\'")) K = TypeVar("K") V = TypeVar("V") class FactoryDict(ABC, dict, Mapping[K, V]): """ Custom dict wrapper. I am a dict with a create method and no __setitem__. This allows me to control my own keys. """ @abstractmethod def create(self) -> V: """Generate a new item.""" raise NotImplementedError def _setitem(self, name: K, value: V) -> None: dict.__setitem__(self, name, value) def __setitem__(self, _: K, __: V) -> None: raise TypeError("Use create() to get a new object") class SinkFactory(FactoryDict[int, _Sink]): """Mapping for lookup of sinks by handle.""" def __init__(self) -> None: super().__init__() self.lastsink: int = 0 def create(self) -> _Sink: sink = _Sink(self.lastsink) sink.struct.sinkData = self.lastsink FactoryDict._setitem(self, self.lastsink, sink) # noqa: SLF001 self.lastsink = self.lastsink + 1 return sink sinkfactory = SinkFactory() class Document: """Document object as returned by :func:`parseString` or :func:`parse`.""" def __init__(self, options: OPTION_DICT_TYPE) -> None: self.cdoc = _tidy.Create() self.options = options self.errsink = sinkfactory.create() _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct)) self._set_options() def _set_options(self) -> None: for key, value in self.options.items(): # this will flush out most argument type errors... if value is None: value = "" # noqa: PLW2901 if isinstance(value, bool): value = int(value) # noqa: PLW2901 _tidy.OptParseValue( self.cdoc, key.replace("_", "-").encode("utf-8"), str(value).encode("utf-8"), ) if self.errors: for error_prefix, error_exception in ERROR_MAP.items(): if self.errors[-1].message.startswith(error_prefix): raise error_exception(self.errors[-1].message) def __del__(self) -> None: del sinkfactory[self.errsink.handle] def write(self, stream: BinaryIO) -> None: """ :param stream: Writable file like object. Writes document to the stream. """ stream.write(self.getvalue()) def get_errors(self) -> list[ReportItem]: """Return list of errors as a list of :class:`ReportItem`.""" ret = [] for line in self.errsink.getvalue().decode("utf-8").splitlines(): line = line.strip() # noqa: PLW2901 if line: ret.append(ReportItem(line)) return ret @property def errors(self) -> list[ReportItem]: return self.get_errors() def getvalue(self) -> bytes: """Raw string as returned by tidy.""" stlen = ctypes.c_int(8192) string_buffer = ctypes.create_string_buffer(stlen.value) result = _tidy.SaveString(self.cdoc, string_buffer, ctypes.byref(stlen)) if result == -ENOMEM: # buffer too small string_buffer = ctypes.create_string_buffer(stlen.value) _tidy.SaveString(self.cdoc, string_buffer, ctypes.byref(stlen)) return string_buffer.value def gettext(self) -> str: """Unicode text for output returned by tidy.""" output_encoding = self.options["output_encoding"] assert isinstance(output_encoding, str) return self.getvalue().decode(output_encoding) def __str__(self) -> str: return self.gettext() ERROR_MAP = { "missing or malformed argument for option: ": OptionArgError, "unknown option: ": InvalidOptionError, } class DocumentFactory(FactoryDict[weakref.ReferenceType, Document]): @staticmethod def load( doc: Document, arg: bytes, loader: Callable[[Document, bytes], int], ) -> None: status = loader(doc.cdoc, arg) if status >= 0: _tidy.CleanAndRepair(doc.cdoc) def loadFile(self, doc: Document, filename: str) -> None: self.load(doc, filename.encode("utf-8"), _tidy.ParseFile) def loadString(self, doc: Document, text: bytes) -> None: self.load(doc, text, _tidy.ParseString) def create(self, **kwargs: OPTION_TYPE) -> Document: enc = kwargs.get("char_encoding", "utf8") if "output_encoding" not in kwargs: kwargs["output_encoding"] = enc if "input_encoding" not in kwargs: kwargs["input_encoding"] = enc doc = Document(kwargs) ref = weakref.ref(doc, self.releaseDoc) FactoryDict._setitem(self, ref, doc.cdoc) # noqa: SLF001 return doc def parse(self, filename: str, **kwargs: OPTION_TYPE) -> Document: """ Open and process filename as an HTML file. Returning a processed document object. :param kwargs: named options to pass to TidyLib for processing the input file. :param filename: the name of a file to process :return: a :class:`Document` object """ doc = self.create(**kwargs) self.loadFile(doc, filename) return doc def parseString(self, text: bytes | str, **kwargs: OPTION_TYPE) -> Document: """ Use text as an HTML file. Returning a processed document object. :param kwargs: named options to pass to TidyLib for processing the input file. :param text: the string to parse :return: a :class:`Document` object """ doc = self.create(**kwargs) if isinstance(text, str): input_encoding = doc.options["input_encoding"] assert isinstance(input_encoding, str) text = text.encode(input_encoding) self.loadString(doc, text) return doc def releaseDoc(self, ref: weakref.ReferenceType) -> None: _tidy.Release(self[ref]) docfactory = DocumentFactory() parse = docfactory.parse parseString = docfactory.parseString def getTidyVersion() -> str: version = _tidy.lib.tidyLibraryVersion() assert isinstance(version, bytes) return version.decode() nijel-utidylib-dbcc161/tidy/py.typed000066400000000000000000000000001512744773400175770ustar00rootroot00000000000000nijel-utidylib-dbcc161/tidy/test_data/000077500000000000000000000000001512744773400200625ustar00rootroot00000000000000nijel-utidylib-dbcc161/tidy/test_data/test.html000066400000000000000000000001521512744773400217250ustar00rootroot00000000000000

woot


é nijel-utidylib-dbcc161/tidy/test_tidy.py000066400000000000000000000136221512744773400205000ustar00rootroot00000000000000from __future__ import annotations import io import os import pathlib import unittest import tidy import tidy.lib DATA_STORAGE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_data") class TidyTestCase(unittest.TestCase): input1 = "" input2 = "\n" + "

asdkfjhasldkfjhsldjas\n" * 100 test_file = os.path.join(DATA_STORAGE, "test.html") def default_docs(self) -> tuple[tidy.Document, tidy.Document, tidy.Document]: doc1 = tidy.parseString(self.input1) doc2 = tidy.parseString(self.input2) doc3 = tidy.parse(self.test_file, char_encoding="ascii") return (doc1, doc2, doc3) def test_bad_options(self) -> None: badopts = [{"foo": 1}] for opts in badopts: with self.assertRaisesRegex( tidy.InvalidOptionError, "not a valid Tidy option", ): tidy.parseString(self.input2, **opts) def test_bad_option_values(self) -> None: badopts: list[tidy.lib.OPTION_DICT_TYPE] = [ {"indent": "---"}, {"indent_spaces": None}, ] for opts in badopts: with self.assertRaisesRegex( tidy.OptionArgError, "missing or malformed argument", ): tidy.parseString(self.input2, **opts) def test_encodings(self) -> None: text = ( pathlib.Path(self.test_file) .read_bytes() .decode("utf8") .encode("ascii", "xmlcharrefreplace") ) doc1u = tidy.parseString(text, input_encoding="ascii", output_encoding="latin1") self.assertTrue(doc1u.getvalue().find(b"\xe9") >= 0) doc2u = tidy.parseString(text, input_encoding="ascii", output_encoding="utf8") self.assertTrue(doc2u.getvalue().find(b"\xc3\xa9") >= 0) def test_error_lines(self) -> None: for doc in self.default_docs(): self.assertEqual(doc.errors[0].line, 1) def test_nonexisting(self) -> None: os.environ.pop("IGNORE_MISSING_TIDY", None) doc = tidy.parse(os.path.join(DATA_STORAGE, "missing.html")) self.assertEqual(str(doc).strip(), "") self.assertIn("missing.html", doc.errors[0].message) if doc.errors[0].severity == "E": self.assertEqual(doc.errors[0].severity, "E") self.assertTrue(str(doc.errors[0]).startswith("Error")) else: # Tidy 5.5.19 and newer self.assertEqual(doc.errors[0].severity, "D") self.assertTrue(str(doc.errors[0]).startswith("Document")) def test_options(self) -> None: doc1 = tidy.parseString( self.input1, add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=True, ) self.assertIn("CDATA", str(doc1)) doc2 = tidy.parseString( "", add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=True, ) self.assertTrue(str(doc2).startswith(" None: doc1, doc2, doc3 = self.default_docs() self.assertIn("", str(doc1)) self.assertIn("", str(doc2)) self.assertIn("", doc3.gettext()) def test_big(self) -> None: text = "x" * 16384 doc = tidy.parseString(f"{text}") self.assertIn(text, str(doc)) def test_unicode(self) -> None: doc = tidy.parseString("zkouška") self.assertIn("zkouška", doc.gettext()) def test_write(self) -> None: doc = tidy.parseString(self.input1) handle = io.BytesIO() doc.write(handle) self.assertEqual(doc.getvalue(), handle.getvalue()) def test_errors(self) -> None: doc = tidy.parseString(self.input1) for error in doc.errors: self.assertTrue(str(error).startswith("line")) self.assertTrue(repr(error).startswith("ReportItem")) def test_report_item(self) -> None: item = tidy.ReportItem("Invalid: error") self.assertEqual(item.get_severity(), "Invalid") def test_missing_load(self) -> None: with self.assertRaises(OSError): tidy.lib.Loader(libnames=("not-existing-library",)) def test_lib_from_environ(self) -> None: os.environ["TIDY_LIBRARY_FULL_PATH"] = "/foo/bar/tidy" loader = tidy.lib.Loader() expected_libnames = ( "/foo/bar/tidy", "/opt/homebrew/lib/libtidy.dylib", "/opt/homebrew/lib/libtidy.5.dylib", "/opt/homebrew/lib/libtidy.58.dylib", "/usr/local/lib/libtidy.dylib", "/usr/local/lib/libtidy.5.dylib", "/usr/local/lib/libtidy.58.dylib", "C:/ProgramData/chocolatey/lib/html-tidy/tools/tidy-5.9.14-win64/bin/tidy.dll", "C:/ProgramData/chocolatey/lib/html-tidy/tools/bin/tidy.dll", "C:/Program Files/tidy-html5/bin/tidy.dll", "C:/tools/tidy-html5/bin/tidy.dll", "libtidy.so", "libtidy.dylib", "tidy.dll", "tidy", "cygtidy-0-99-0", "libtidy-0.99.so.0", "libtidy-0.99.so.0.0.0", "libtidy.so.5", "libtidy.so.58", "libtidy.so.5deb1", "libtidy.dll", "libtidy", "tidylib.dll", "tidylib", ) self.assertEqual(loader.libnames, expected_libnames) def test_lib_version(self) -> None: self.assertEqual(len(tidy.lib.getTidyVersion().split(".")), 3) if __name__ == "__main__": unittest.main()