pax_global_header00006660000000000000000000000064147535707350014532gustar00rootroot0000000000000052 comment=50213b1db08dbd2ad90ba36d5f7549507390e33a url-matcher-0.6.0/000077500000000000000000000000001475357073500137605ustar00rootroot00000000000000url-matcher-0.6.0/.codecov.yml000066400000000000000000000001201475357073500161740ustar00rootroot00000000000000comment: layout: "header, diff, tree" coverage: status: project: false url-matcher-0.6.0/.git-blame-ignore-revs000066400000000000000000000002131475357073500200540ustar00rootroot00000000000000# Contains commits to be ignored due to linting # https://github.com/zytedata/url-matcher/pull/5 47197448e5ac7e4d9dc9f764231a6915e7fb2295 url-matcher-0.6.0/.github/000077500000000000000000000000001475357073500153205ustar00rootroot00000000000000url-matcher-0.6.0/.github/workflows/000077500000000000000000000000001475357073500173555ustar00rootroot00000000000000url-matcher-0.6.0/.github/workflows/publish.yml000066400000000000000000000015741475357073500215550ustar00rootroot00000000000000# This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: publish on: push: tags: - "[0-9]+.[0-9]+.[0-9]+" jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.13' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish if: startsWith(github.ref, 'refs/tags') env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} run: | python setup.py sdist bdist_wheel twine upload dist/* url-matcher-0.6.0/.github/workflows/test.yml000066400000000000000000000033431475357073500210620ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: tox on: push: branches: [ main ] pull_request: branches: [ main ] jobs: test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: '3.9' toxenv: min - python-version: '3.9' - python-version: '3.10' - python-version: '3.11' - python-version: '3.12' - python-version: '3.13' steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install tox - name: tox run: | tox -e ${{ matrix.toxenv || 'py' }} - name: coverage uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} check: runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ['3.12'] # Keep in sync with .readthedocs.yml tox-job: ["mypy", "docs", "pre-commit", "twinecheck"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install tox - name: tox run: | tox -e ${{ matrix.tox-job }} url-matcher-0.6.0/.gitignore000066400000000000000000000003471475357073500157540ustar00rootroot00000000000000# Python *.pyc *.pyo /build/ /dist/ *.egg-info # Mac OS *.DS_Store # IDE /.idea/ .mypy_cache/ .cache/ .tox/ .pytest_cache/ .coverage /coverage.xml htmlcov/ .ipynb_checkpoints docs/_build/ docs/_autosummary/ __pycache__/ _build/ url-matcher-0.6.0/.pre-commit-config.yaml000066400000000000000000000002161475357073500202400ustar00rootroot00000000000000repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.9.6 hooks: - id: ruff args: [ --fix ] - id: ruff-format url-matcher-0.6.0/.readthedocs.yml000066400000000000000000000003621475357073500170470ustar00rootroot00000000000000version: 2 formats: all sphinx: configuration: docs/conf.py build: os: ubuntu-22.04 tools: python: "3.12" # Keep in sync with .github/workflows/tests.yml python: install: - requirements: docs/requirements.txt - path: . url-matcher-0.6.0/CHANGELOG.rst000066400000000000000000000025071475357073500160050ustar00rootroot00000000000000========= Changelog ========= 0.6.0 (2025-02-14) ------------------ * Dropped Python 3.8 support. * Added Python 3.13 support. * Improved type hints and added ``py.typed``. * CI improvements. 0.5.0 (2024-04-15) ------------------ * Added the ``include_universal`` argument to :meth:`.URLMatcher.match` and :meth:`.URLMatcher.match_all`. It can be set to ``False`` to skip universal matchers. * Added the :meth:`.URLMatcher.match_universal` method that returns only identifiers of universal matchers. * Added ``.readthedocs.yml``. 0.4.0 (2024-04-03) ------------------ * Added official support for Python 3.12. * Added the :meth:`.URLMatcher.match_all` method that returns all matching identifiers. * Adding a :class:`~.Patterns` instance with several patterns for the same domain to a :class:`~.URLMatcher` no longer creates multiple identical :class:`~.matcher.PatternsMatcher` instances. * CI improvements. 0.3.0 (2023-09-21) ------------------ * Drop Python 3.7 support, make Python 3.11 support official. * Support tldextract >= 3.6, make the requirement of tldextract >= 1.2 explicit. 0.2.0 (2022-02-01) ------------------ * Update :class:`~.Patterns` to be **frozen** so instances can easily be deduped based on its hash uniqueness. * Remove Python 3.6 support 0.1.0 (2021-11-19) ------------------ * Initial release url-matcher-0.6.0/CONTRIBUTING.rst000066400000000000000000000015311475357073500164210ustar00rootroot00000000000000============ Contributing ============ ``url-matcher`` is an open-source project. Your contribution is very welcome! Issue Tracker ============= If you have a bug report, a new feature proposal or simply would like to make a question, please check our issue tracker on Github: https://github.com/zytedata/url-matcher/issues Source code =========== Our source code is hosted on Github: https://github.com/zytedata/url-matcher Before opening a pull request, it might be worth checking current and previous issues. Some code changes might also require some discussion before being accepted so it might be worth opening a new issue before implementing huge or breaking changes. Testing ======= We use tox_ to run tests with different Python versions:: tox The command above also runs type checks; we use mypy. .. _tox: https://tox.readthedocs.io url-matcher-0.6.0/LICENSE000066400000000000000000000027451475357073500147750ustar00rootroot00000000000000Copyright (c) Zyte Group Ltd All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Zyte nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. url-matcher-0.6.0/MAINTAINING.rst000066400000000000000000000023751475357073500162570ustar00rootroot00000000000000Maintaining =========== How to release a new version ---------------------------- Make sure to install bump2version_, a maintained fork of bumpversion_: .. code-block:: shell pip install --upgrade bump2version Then follow this checklist: * Update changelog in a separate commit * Execute the ``bumpversion`` command * Push latest changes including tags The changelog is kept under the ``CHANGES.rst`` file. It should be updated in a separate commit to master. After changelog is merged to master, you can check which changes are needed to update the version executing: .. code-block:: shell bumpversion minor --dry-run --verbose When you're ready, you can remove the flags and execute: .. code-block:: shell bumpversion minor In this example, we're bumping our minor version, but you may use any of the following: * patch (for bug fixes) * minor (new features that keep compatibility) * major (introduces breaking changes) Finally, you can push the changes to the remote repository. Make sure to include git tags. .. code-block:: shell git push origin `git describe --tags` The PyPI release will be handled by Github actions. .. _bump2version: https://github.com/c4urself/bump2version .. _bumpversion: https://github.com/peritus/bumpversion url-matcher-0.6.0/README.rst000066400000000000000000000056561475357073500154630ustar00rootroot00000000000000=========== url-matcher =========== .. image:: https://img.shields.io/pypi/v/url-matcher.svg :target: https://pypi.python.org/pypi/url-matcher :alt: PyPI Version .. image:: https://img.shields.io/pypi/pyversions/url-matcher.svg :target: https://pypi.python.org/pypi/url-matcher :alt: Supported Python Versions .. image:: https://github.com/zytedata/url-matcher/workflows/tox/badge.svg :target: https://github.com/zytedata/url-matcher/actions :alt: Build Status .. image:: https://codecov.io/github/zytedata/url-matcher/coverage.svg?branch=main :target: https://codecov.io/gh/zytedata/url-matcher :alt: Coverage report URL matching library that relates URLs with resources. Rules are defined using simple pattern definitions. It is simpler and faster than using regular expressions if the rules involves many domains. To illustrate it with an example, imagine that you have several proxy servers and you want to route requests to the right one. You could define the following rules: * ``site1.com`` →︎ ``us_proxy`` * ``site2.com/uk`` →︎ ``uk_proxy`` * ``site2.com/ie`` →︎ ``ie_proxy`` All URLs from ``site1.com`` should use the US proxy. The situation for ``site2.com`` URLs are different: if the path starts with ``/uk``, then use the UK proxy, otherwise use the IE proxy. This library allows to create a matcher that can be used to match URLs with the right proxy using these rules. Have a look to https://github.com/zytedata/url-matcher/blob/main/url_matcher/example.py for an example of usage. The following files are useful to understand the pattern, the set of patterns and how they behave: * https://github.com/zytedata/url-matcher/blob/main/tests/fixtures/single_patterns.json * https://github.com/zytedata/url-matcher/blob/main/tests/fixtures/patterns.json The full documentation can be found at https://url-matcher.readthedocs.io/ License is BSD 3-clause. * Documentation: https://url-matcher.readthedocs.io/ * Source code: https://github.com/zytedata/url-matcher * Issue tracker: https://github.com/zytedata/url-matcher/issues Developing ********** Setup your local Python environment via: 1. ``pip install -r requirements-dev.txt`` 2. ``pre-commit install`` Now everytime you perform a ``git commit``, these tools will run against the staged files: * ``black`` * ``isort`` * ``flake8`` * ``mypy`` You can also directly invoke ``pre-commit run --all-files`` to run them without performing a commit. Using sphinx-autobuild ~~~~~~~~~~~~~~~~~~~~~~ When working on documentation, it is convenient to use sphinx-autobuild. First, run ``pip install -r docs/requirements.txt sphinx-autobuild``. Then run :: sphinx-autobuild docs docs/_build/html and then open http://127.0.0.1:8000/ in a browser, to see the current version of docs. A process would be running in a background, watching for docs changes; when docs are changed, a build is started, and the web page is refreshed automatically when the build is finished. url-matcher-0.6.0/docs/000077500000000000000000000000001475357073500147105ustar00rootroot00000000000000url-matcher-0.6.0/docs/Makefile000066400000000000000000000011041475357073500163440ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)url-matcher-0.6.0/docs/_templates/000077500000000000000000000000001475357073500170455ustar00rootroot00000000000000url-matcher-0.6.0/docs/_templates/custom-class-template.rst000066400000000000000000000013071475357073500240260ustar00rootroot00000000000000.. Template based in the original one, with some changes proposed on https://stackoverflow.com/a/62613202/3887420 {{ fullname | escape | underline}} .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} :members: :show-inheritance: {% block methods %} .. automethod:: __init__ {% if methods %} .. rubric:: {{ _('Methods') }} .. autosummary:: {% for item in methods %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} {% block attributes %} {% if attributes %} .. rubric:: {{ _('Attributes') }} .. autosummary:: {% for item in attributes %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} url-matcher-0.6.0/docs/_templates/custom-module-template.rst000066400000000000000000000024761475357073500242160ustar00rootroot00000000000000.. Template based in the original one, with some changes proposed on https://stackoverflow.com/a/62613202/3887420 {{ fullname | escape | underline}} .. automodule:: {{ fullname }} {% block attributes %} {% if attributes %} .. rubric:: {{ _('Module Attributes') }} .. autosummary:: :toctree: {% for item in attributes %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block functions %} {% if functions %} .. rubric:: {{ _('Functions') }} .. autosummary:: :toctree: {% for item in functions %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block classes %} {% if classes %} .. rubric:: {{ _('Classes') }} .. autosummary:: :toctree: :template: custom-class-template.rst {% for item in classes %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block exceptions %} {% if exceptions %} .. rubric:: {{ _('Exceptions') }} .. autosummary:: :toctree: {% for item in exceptions %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block modules %} {% if modules %} .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: {% for item in modules %} {{ item }} {%- endfor %} {% endif %} {% endblock %} url-matcher-0.6.0/docs/api_reference.rst000066400000000000000000000003211475357073500202250ustar00rootroot00000000000000============= API Reference ============= .. Based on ideas found on https://stackoverflow.com/a/62613202/3887420 Module ``url_matcher`` ====================== .. automodule:: url_matcher :members: url-matcher-0.6.0/docs/changelog.rst000066400000000000000000000000701475357073500173660ustar00rootroot00000000000000.. include:: ../CHANGELOG.rst .. toctree:: :hidden: url-matcher-0.6.0/docs/conf.py000066400000000000000000000134451475357073500162160ustar00rootroot00000000000000# Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from url_matcher import __version__ # -- Project information ----------------------------------------------------- project = "url-matcher" copyright = "2021, Zyte" author = "Zyte" # The short X.Y version version = "" # The full version, including alpha/beta/rc tags release = __version__.__version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.ifconfig", "sphinx.ext.viewcode", "sphinx.ext.githubpages", "sphinx.ext.autosummary", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = {".rst": "restructuredtext"} # The master toctree document. master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "url-matcher-doc" # -- Options for LaTeX output ------------------------------------------------ latex_elements: dict = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( master_doc, "url-matcher.tex", "url-matcher Documentation", "Zyte", "manual", ), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "url-matcher", "url-matcher Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "url-matcher", "url-matcher Documentation", author, "url-matcher", "URL matching rules library to connect URLs with resources", "Miscellaneous", ), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] # -- Extension configuration ------------------------------------------------- # -- Options for intersphinx extension --------------------------------------- intersphinx_mapping = { "python": ( "https://docs.python.org/3", None, ) } autodoc_default_options = { "special-members": "__init__,__call__", "undoc-members": True, } add_module_names = False autosummary_generate = True url-matcher-0.6.0/docs/contributing.rst000066400000000000000000000000721475357073500201500ustar00rootroot00000000000000.. include:: ../CONTRIBUTING.rst .. toctree:: :hidden:url-matcher-0.6.0/docs/index.rst000066400000000000000000000007751475357073500165620ustar00rootroot00000000000000============================== url-matcher documentation ============================== URL matching library that relates URLs with resources. Rules are defined using simple pattern definitions. It is simpler and faster than using regular expressions if the rules involves many domains. :ref:`license` is BSD 3-clause. .. toctree:: :caption: Getting started :maxdepth: 1 intro.rst .. toctree:: :caption: Documentation :maxdepth: 1 api_reference contributing changelog license url-matcher-0.6.0/docs/intro.rst000066400000000000000000000134251475357073500166020ustar00rootroot00000000000000.. _`intro`: ============ Introduction ============ Let's start with an example. Imagine that you have several proxy servers and you want to route requests to the right one. You could define the following rules: * ``site1.com`` →︎ ``us_proxy`` * ``site2.com/uk`` →︎ ``uk_proxy`` * ``site2.com/ie`` →︎ ``ie_proxy`` All URLs from ``site1.com`` should use the US proxy. The situation for ``site2.com`` URLs are different: if the path starts with ``/uk``, then the UK proxy should be used whereas if the path starts with ``/ie`` then the IE proxy should be used instead. This library allows to create a matcher that can be used to match URLs with the right proxy using these rules. Let see how the library can handle this situation: .. code-block:: python from url_matcher import URLMatcher, Patterns matcher = URLMatcher() matcher.add_or_update("us_proxy", Patterns(["site1.com"])) matcher.add_or_update("uk_proxy", Patterns(["site2.com/uk"])) matcher.add_or_update("ie_proxy", Patterns(["site2.com/ie"])) proxy = matcher.match("http://site1.com/articles/article1") # proxy is "us_proxy" here proxy = matcher.match("http://site2.com/uk/a_page") # proxy is "uk_proxy" here proxy = matcher.match("https://www.site2.com/ie/a_page") # proxy is "ie_proxy" here proxy = matcher.match("http://example.com/a_differnt_page") # proxy is None here As can be seen the the class :class:`url_matcher.URLMatcher` is handy to handle this use case. .. note:: Relative URLs are not supported in the ``match`` method. Patterns, include and exclude ============================= A pattern is a URL that describes a set of URLs. For example, the pattern ``example.com`` describes any URL whose domain is ``example.com`` or any of its subdomains. A single pattern is sometimes not enough to describe which URLs to match. This is why we can define instead a set of patterns that are matched against. There is then a list of positive patterns (``include``) and a list of negative ones (``exclude``). **A URL is a match** if it matches **at least one** of the patterns in ``include`` and **none** of the patterns in ``exclude``. This is an example of a rule using such a set of patterns: .. code-block:: python patterns = Patterns(include=["example.com", "example.org"], exclude=["*.jpg|", "*.jpeg|"]) matcher.add_or_update("proxy_1", patterns)) Patterns ======== A pattern is a URL that describes a set of URLs. It itself is just a URL. The following diagram summarizes its different parts and what do they mean. .. image:: patterns/patterns-cheatsheet.png :alt: Patterns Cheatsheet .. note:: Matching is always **case-insensitive**. The best way to understand how the patterns work is to look at some examples: Basic patterns -------------- .. csv-table:: :file: patterns/basic_patterns.csv :widths: 30, 70 :header-rows: 1 Domain patterns --------------- .. csv-table:: :file: patterns/domain_patterns.csv :widths: 30, 70 :header-rows: 1 .. note:: Rules above only differ by the ``/`` character and this is enough to change the matching behaviour. The general rule is that the pattern matches the domain or any of the subdomains only if the pattern does not contain a path, a query or a fragment. Otherwise, only URLs with the exact same domain after removing ``www.`` will match the pattern. Path patterns ------------- A URL matches if the pattern path is a prefix of it. Besides, the following modifier characters can be used: * The ``*`` character matches any number of characters. * Use the ``|`` character at the end of the pattern path if a exact path matching is required. .. csv-table:: :file: patterns/path_patterns.csv :widths: 30, 70 :header-rows: 1 Query patterns -------------- It serves to match URLs that have some specific parameters in the URL. The order of parameters in the query string is irrelevant. The wildcard char ``*`` can be used for values. If a parameter is repeated in the pattern it will match if any of the values provided is matched .. csv-table:: :file: patterns/query_patterns.csv :widths: 30, 70 :header-rows: 1 Fragment patterns ----------------- It works exactly like the path. Rules conflict resolution ========================= Sometimes several rules can match the same URL. We have then a conflict. By default the library will prioritize the most specific rule. For example, if a URL is matching both a rule with a pattern ``example.com`` and another with the pattern ``example.com/articles`` then the later one will be final match because it is more specific. Alternatively, it is possible to control manually the order of rules by using the ``priority`` parameter of the :class:`url_matcher.Patterns`. In case of conflict, the rule with the highest priority will be chosen. The full criteria applied to resolve a conflict between rules are: 1. universality (rules with non universal include patterns are prioritized over rules with universal ones) 2. priority (the highest wins) 3. specificity (the most specific include patterns for the concerning domain wins) 4. the rule id (the rule with the highest id wins) Efficiency ========== Internally, the library clusters the rules by the top level domain of their include patterns. This is done to speed up the matching because it reduces the space of possible rules that can match a URL. The drawback is that the rules with ``include`` patterns that do not belong to any top level domain are not supported. In fact, an error is raised. An exception were done for the universal matching pattern. It is the only cross-top-level-domain ``include`` pattern that is allowed. The rationale is that is can be convenient to define defaults (e.g. to define the default proxy to use if no other rule matches). url-matcher-0.6.0/docs/license.rst000066400000000000000000000001011475357073500170540ustar00rootroot00000000000000.. _`license`: ======= License ======= .. include:: ../LICENSE url-matcher-0.6.0/docs/patterns/000077500000000000000000000000001475357073500165505ustar00rootroot00000000000000url-matcher-0.6.0/docs/patterns/basic_patterns.csv000066400000000000000000000012401475357073500222630ustar00rootroot00000000000000Pattern, Behaviour The empty string, Universal pattern. Match any URL ``example.com``, " Match any URL whose domain is ``example.com`` or any of its subdomains. | **Match:** * ``http://example.com/anything?id=24`` * ``https://www.example.com/page#with_fragment`` **Don't match:** * ``http://myexample.com`` " ``example.com/articles/``, " Match any URL whose domain is ``example.com`` or ``www.example.com`` and path starts by ``/articles/``. | **Match:** * ``http://www.example.com/articles/article1`` * ``https://example.com/articles/another_article?id=23`` **Don't match:** * ``http://example.com/articles`` * ``http://shop.example.com/articles/article1`` "url-matcher-0.6.0/docs/patterns/domain_patterns.csv000066400000000000000000000011241475357073500224520ustar00rootroot00000000000000Pattern, Behaviour ``shop.example.com``, " Match any URL whose domain is ``shop.example.com`` or any of its subdomains. | **Match:** * ``https://shop.example.com/foo?id=34#fragment`` * ``http://uk.shop.example.com/foo?id=34`` **Don't match:** * ``http://myshop.example.com`` " ``shop.example.com/``, " Match any URL whose domain is ``shop.example.com`` or ``www.shop.example.com``. | **Match:** * ``https://shop.example.com/foo?id=34#fragment`` * ``http://www.shop.example.com/foo?id=34`` **Don't match:** * ``http://myshop.example.com`` * ``http://uk.shop.example.com/foo?id=34`` "url-matcher-0.6.0/docs/patterns/path_patterns.csv000066400000000000000000000016221475357073500221420ustar00rootroot00000000000000Pattern, Behaviour ``/articles/``, " Match any URL whose path starts by ``/articles/``. | **Match:** * ``http://example.com/articles/an_article?id=23#main`` * ``https://foo.com/articles/`` **Don't match:** * ``https://foo.com/articles`` " ``example.com/index.html|``, " Match any URL whose domain is ``example.com`` or ``www.example.com`` and path is exactly ``/index.html`` | **Match:** * ``http://example.com/index.html?id=24`` * ``https://www.example.com/index.html#main`` **Don't match:** * ``http://shop.example.com/index.html`` * ``http://shop.example.com/index.html_2`` " ``/images/*.jpg|``, " Match any URL whose path starts by ``/images/`` and whose path ends by ``.jpg`` | **Match:** * ``http://example.com/images/foo.jpg`` * ``https://example.org/images/other/subpath/FOO.JPG?id=23`` **Don't match:** * ``http://example.com/images/foo.jpeg`` * ``http://example.com/images/foo.jpg_2`` "url-matcher-0.6.0/docs/patterns/patterns-cheatsheet.png000066400000000000000000002016061475357073500232360ustar00rootroot00000000000000PNG  IHDR,8WsRGB _tEXtmxfile%3Cmxfile%20host%3D%22app.diagrams.net%22%20modified%3D%222021-11-22T11%3A09%3A14.653Z%22%20agent%3D%225.0%20(Macintosh%3B%20Intel%20Mac%20OS%20X%2010_15_7)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F96.0.4664.45%20Safari%2F537.36%22%20etag%3D%22G5crxP-wRdDt0zO1ULS2%22%20version%3D%2215.8.3%22%20type%3D%22google%22%3E%3Cdiagram%20id%3D%2221cbwoHLaFD1zpfFUvyJ%22%20name%3D%22Page-1%22%3E7Vxtc5s4EP41nnuZCYN498e8NO3Nzc30JtPe3UcFZFtXQFTItXO%2F%2FiSQACGc4LQ4iW1%2FSGAFC%2BzzrLRaVszc62z7nsJi9QdJUDpz7GQ7c29mjuMBwP8KwUMtcEBYC5YUJ7XIbgV3%2BD9UC4GSrnGCSimrRYyQlOFCF8Ykz1HMNBmklGz0wxYkTTRBAZfIENzFMDWlf%2BGErWpp5Nut%2FAPCy5W6MrBlSwbVwVJQrmBCNh2R%2B27mXlNCWL2Vba9RKmyn2%2BV2R2tzYxTlbMwJoVvED5urT5%2FKKPj3N%2FLh%2Fe%2F3ny%2Bklm8wXcsHljfLHpQF%2BH0XYjNe0%2FThisL4C%2BLXu9qsMEN3BYxF24Zjz2UrlqV8D%2FBNStZ5gpJmj0GGSc53L%2BY2FyxIzq5JSmh1DTeoflxeMkq%2BoKEWeaOIMrTdaQHQ2JXzEZEMMfrAD1EnBKEV%2BvVZko4gtBvRpsVXYbbqQOtFljwQSk4tmyu0Zucb0vJ7oOAYKKwY4wa9FGqdW7SFWZEiKyYZ3ysEsZxbRvgfikqypgICLqYw4yfXShx3QeEyE4bp48ntx3SsdKPnJEcCIZymPRFM8VJAGHO1iMuvBBqY%2B8qlbMhwkojLDJKjZYQigPR2AdlOQvwI2BWaCvPANgAPfN9E3LUnwts9Ja9zAtvqOZ0zt8b5HHDBRBB4pwRBMworAIA7zvyBN5H5%2FVMyvx%2F4lh32XCCwvLE%2BEFrAmQiH4JRwCCKOg6fj4PGQwBuFw9yz%2FKkCgNCA4S5eoQy%2B%2FsEbgIkHb8%2BM2eZzc%2FweAGyy4Tsy0LohGcT5GS3HM8b6lwZrboD1sQqgTx0qdw5eF1Bqxt9B6s81kopPGirf4%2BNWMG9%2Frww4M4lw%2B2Ymn1NjFwRmzPHSgLkjsj5LbrFi5%2FPLbBu8V4fb%2B9rFc%2FRxgk8SbdttfyAwbAT8ASOFtjWfB%2B0vmork5qA%2Fc4JUkLksYK4ZL%2Fi6Fkm9q7im1CVvpMv7n4EtFMwcfgN2b%2FsXsSP4ZgsqXixghtOH%2BkyuDmZF1ei6nvAdlH5DgvlGi66krAguVABQbHtt9b2KxpzQDKZ680YaWLR7IqEpG1PEuO9d8AeOcb4cPF84%2BIX01cvqlAXTGzF3xFwqtzs3VjUyCvNywVUq5dz31QEbQhP92t3TE1wWKZRGw3mKO2cuUgJZV6NCiG8txf%2BCogUWujLI4pXQL7HlXKnhrY%2Fb1Z89MRn6of3NIy69O4p2Ne9qkvAd75oPJMCccCJvcsyx%2FuxNR%2BNNKE94G1lUxBWxth2vIK2tP%2BxVT0B%2FhvElYFSwiKcXGDYdVntg1ZssFgPnhtcdsGsNutZ9OXB2%2FzfEm9qU%2BX1ZPIb4cY2lMpD13F5AamYXm%2FT7YcbaEdF%2BBbCai6n33MILEkx5nF9nbkuyFvIuGAWimN%2BlmI%2Fd3MP4yz0nxMdWdlUQnLPqgfyrmX%2FDJVyHuNh187p%2Bn%2FmYmgNWfjAFxBJDMzoCA1MPfyrEgJmzOnePR9M9bnCaxJCKEOkcFr1B%2FL4nLPr1HBSdLmu0oAimKdmg5OhiIy0WcmzLeyoWcsMDxkLuiMqbqTOfINJfvKg0r5aLsQ5ZjqSqCzpWQVsYC69q82E%2FPJEvg7jXksZ%2FhC%2BPVJYN1DFFA8H9dKn8oUTaqQf3O0Brip9GRfeTldy4I0rPpu6D%2FFB%2Fh%2BhHpk1UN3WoGlizEzqHP0cT%2FnDNSEx1EoLK%2FCc5sohu5K3FP96o%2BEdL9ngHDHCcoQDn7EVH4kXn1MHbxu%2BcOjiz5jmssZtsATcBVd17eaSDZ7NScGByBVSBz2FG0xHLFE5udrUDxL1enbjeZPMIdwRmU0%2Bvhor%2BBkLDyLe4ddrfQKA4WeLAMZcclDBD9aVU3cZp5nz8x3spM10XChwPid1QLeKpd0s7UJPuZ1bcHjTn441I06E8uRQL1luvSWC5apZQDa6t2sfUTy2tQomxEv7J%2Fq5jz6HiXCWjKIUMf9PVDxlZXuGjIFnH5YLe4jmVrlIq6kXQ8qwWKlNRpK8E9nqBA4N0iZihp4K8eervYMGISowzC3axoF9oA%2Fr1F2NZ4PRKz%2BfOYVkwIv%2F7cixAW8z%2BFtosz4vk%2Fj98%2F8K2bNeVgputvF6189DZ6QwVN%2FYepOJPLz9ksHvUe0nyeaC3rM0HzyOf5%2BiznCg4LPlGRMevgXxB5HTId9rc89Wct%2BGe%2Fzzu%2BaGuKDpwxzdisflr4F7kuOeOr5nK%2Bo7GGcf2LTWC7cu%2FIHAtR3%2Fx2P0OwoFoCMxXJIlcvm3DauUCW6F%2Buc5ms7EEVut7dazeTvg5dINLZDD66CbNT60A73VWwBv6wAIIDlklAcyQqwc5ZmUXXrGDxfqVnDTJEJEN%2F1ovSJYnTfiJpTeNeRi9AshPO3X7BGCOPpUK%2FKFU%2B9Bq23CyvK1nJiSr4jPx8b%2FLk%2FcwtweYmUz2o6GaQRBMB9jQKoV%2BOJemuCh3mbODHiyL2jEWeCsM%2FMyAbhjjLgsm%2F%2BBJZHpSYOISTASKPybReHqgAFGJPBEw4jOMzUc86yC1%2FRKq%2B%2B5%2F%3C%2Fdiagram%3E%3C%2Fmxfile%3E+ IDATx^ Egvpa!(D9Co_/xخ$FAs/$ջ! _D叢' a#$@vw7V 3;٧>?oQ`ҢPJ@ (%PJ@ 4LA+PJ@ (%PJ@ (%HJ@ (%PJ@ (%@@v jJ@ (%PJ@ (%T`=PJ@ (%PJ T`7VPJ@ (%PJ@J@ (%PJ@ (%@@v jJ@ (%PJ@ (%T`=PJ@ (%PJ T`7VPJ@ (%PJ@J@ (%PJ@ (%@@v jJ@ (%PJ@ (%T`=PJ@ (%PJ T`7VPJ@ (%PJ@J@ (%PJ@ (%@@v jJ@ (%PJ@ (%T`=h޼y3gnee޼yDTZfMI)%PJ@ (%T`Op$SƘWrGW\Y]]]/# ¯) ÷1yaǗV;gٲe728~w=1J@ (%PJ@ L*'~!oҍdBD7Zk81!ZkgӾ"(IUYߌzcPJ@ (%:T`OX\tww绻a.qƘqAwwwZv>W^:%PJ@ (%Z  lc!3yD!WlGD?~>o%n}, 73Gh"*ѯDkٝ0 eˈEDQD sRi]?%ܹsn_"2O i"bxr{{*y`q7: ùA|}"/"+\{kqn$?8o%CrS>bŊcڈwqn-c@DDtDK,ٹX,bW if>Z承J@ (%PJ@ 4 fPl 0 e3!xJH;+kN [Dޟ$UQ]-"%.^xRt\OD]- ?6ODwc.c9D;00B} "2p󧷷?Awo? ֙8C0Yfm m"r;3^8y"Z쎱e!K[[۫چ5D[\[kOZk0 3/s+"0 kˎ?g̘ ~c[\?k-g+%PJ@ (%0T`h5MӵӦMuh{ 3/:;;_vQ*"M􈞞[PGE섟a=sw9;îD^kLijT*]]]i7q|END?n/iz$Y8k4Mן|볈<6ZG-|3(ۈZk0 fOљZ0BL~U`O=PJ@ (%P"Y$G_OSvElj{Dߝ4M_`'>!L?)JAf..]:T*DD^HD"r6 S9˖-qØ^k#u_;ks3?Zks}}}/'QEDZC`gyZkδ;"Z6>f8}}}hfkQQ03"]=S (%PJ@ ?"MVV N? ü<&gAcXD~$ɫk5Vo%I2B`Z} ̼ U~{>g|CkmO? g(w]1Dc6 JT`O=PJ@ (%P"Y$G_OSvE'ȅ"%fN*%\hVo΍4}qOOO4KD 4M4MNW<̗f̘AaMX<( ƒ lg^}l_!"( 8~[l=dK+]E#R`XB\ܵ 1[emg*%PJ@ (%0T`O'Zuy(zMeEQ%"18NGf>GDVf}1<|`(D.B3d4 f~3({>PJ@ (%h&ͤY0 D* ؕ-MK6^BDc[DNDHT>- czLDp*0>0&a6 | Qǿ.wq) k]wFej<(Lh?f,!JDt/rAD>*"WA<88x lN 58<ڄPJ@ (%/*ǁE l&#u.o4M1Df"p… ;<Ed/f@](q҈8/2@"t 73\(~ax!34o˖-ߏtbWYkڪ\ՙ6`\xkh}1~%ϻgc ^+6 lgfvi f!wŮ;F,rQvq %PJ@ (%Ɓ q|f4yhm$0EqqZ bpofiT歷znF͟?|>?w` Q׭[wۚ5kp1ϟb\0;::~.^x'|r;8{r͚5% яnvZPJ@ (%PC@8p7t9je">WM1d"SH\ Ddo*H㕀PJ@ (%@*1x*ͩ1&Ӟ}.L|j;WmW (%PJ@ (n_3V -\ǒ%Khf~K%";*[xдkJ@ (%PJ`T`ZOQdZPJ@ (%PJ`"꛷{@{PJ@ (%h 6\vZPJ@ (%PJ`P=ƃ!]TI5ƭkJ@ (%PJ@ (%0^T`1鮮#49 ݑcZPJ@ (%PJ`cֵz%PJ@ (%/*ǘ 1+%PJ@ (%Z 1(.Ǹ)^ (%PJ@ (%& M+%PJ@ (%C@3z%J@ (%PJ@ (%0T`O |mZ (%PJ@ (%*+QJ@ (%PJ@ ( ${ a]DDK1lJVJ@ (%PJ@ ( &{  |i/( æj%PJ@ (%`*p4ժPJ@ (%P-F@1f5Z6U+%PJ@ (%@ P=EŨ>㏎a3ZPJ@ (%PJ@ -0%PJ@ (%T`O1+PJ@ (%PJ@ ( APJ@ (%P ?zJ@ (%PDG95@V)%ƃ 1~Dt},Iƨ-Z| orq?&"@nץPJ1:h@njUhn%KlqF^jտ'+%Phu.k[zz.c1mRi9slNZ;캵>%0T`nl} .\miŊ1$;s!ffS@<('"o֖XcR"*3cH7/ Xk܍^P,o[zƑzmU(~hz#{< kywk-Lŵ@} 10MdE%0Y ;j~+&hgS#maYf>5} ~R}coml {;̼}&j'T3N`ϛ7}͚5Aص@vww绻#9GUJ&5+Yxb 6JD|_V8g޼y3gn5)ϛ7mܹV4m[Pٜ?:wV1j< k]vݜUZȜs'/]k6{DcV/w= n1= "Zwf^=\c B#h3TDd"ZADFf)j֬Yˉ"ND$ l" ~3,3mBD.[n B3ƒ>x3ѿEf^l}DŽa;4=;Z;zG]wjEѩ"YsE"z%ƘWrGW\ aKocw5[:"VLfM{6!.$D Dbxb("9f- .%;"|":ʛ9?Ɉ>;$I2Q_E1"fr?3#Y3BDXPxD!";"3BDXkREQn "zw]o%=&O{cu< "QG#/Zkloٲe37nx3 fMA](4ƀfD Z9.IcEQt|[DNc"cEd$I}Z;30@afFy@Ḋedׯf͚ଆ&1N"AGa":}Hd81ow/qqsg;b>7NtNOOZc >,omO'IrS?_2;%'I>T_B&[=&QacL=旹\~ec"3\h}p^`Ȣ$IVU;pS3i 4ل$2-"ÉuDt|{aF\ bq79q36 QqS||߫|~fKD  (Wk-l``:$esD۬n{{3>⹄0}3=bƵ}Ek`oV |<$8 w #Še/#׺L4 Ov|TT` 4F̼|r^"cZPz5>>"$I5`m XEVDBa4ymZ Dg}uן3+q|1;Xߴዙ7rW\a>d=^h^綾^V:QVIa=ǔ3`V2;NMQaɬM㡇M.My6^ 8>#p 3w-]bſ(:SD% '΃ !0|/q 31?찣cU5W"cgA?CcOaƌbSf 3xisġUBA\x===ps/X>mڴ/_dD9ށXk!{~zd6[irqU=ynK>y1<"ovXP u(AS+οJ7AV0ڗc`."IXD?򦮕}Θ~CD33v7)ٙ~7: ùA|&w3E4ME4a{b3 CaMD szMD4|>?ǝ)3‰;"ڇp;g'k^dboر{ }❬4̈epwѷ|k^`Aߥ4 b"$Iʈ:vQEa)'" 9U^]j3Sp+(OB`3s ?1f9If'}u c vJ ;2=9ŋ_\,dfu]]eòn}Z(c0ag^;^2&Q} .JbuOA,g}*P:i0^K`ax&#X㣇]Zk/_9=IcM~ilvmmmky ~VsljSJ`TJX/ޫT*a ի֯_?ǹ< &CApqTqu_Yp-Kc Ag[[CbvwE @DAc7f>+l֬Y _0U3_i ]GpMQDޝ\s6 j`#hrk ?q*LM6t 3.AXNDs^Ye<P_ުi*qey^"35;}UʫLe0-ḭ{V&gZk=s .6UӟV$|1]a##8:;G;H6ޗvaCa"Z(c0a@IS>B.#`ϳgLƋ_qggPG{V }a J.lE U;IxVFJ`TF2"@q;ֈP{PG`bܑ$ \e*uA`Š@JL[+~յ{/ᶒND?n/rʦi%I띇;]>.0vՏD!g<D'v.S𗄯y-ߌ'B&k0질Lk-C >1w|SM?J>E Xq W*J;>uó!4LEZ@ĴmFT91\AD=_'"˓$3^..ʑIXHBl߇{\SHx?w;|A [{{{jpVC2;*oɖT`DQt`bir ْr4[D0P⡆ax+^vE Dd3'qcϟ}{{;˓jyl5Zlٴ~D\q`__&7[k\2;BM Q*!#ob׵\OtH'k/0#e=83y_fqݝ{9DGIN IDAT(f&pL:~O]T\ʗ;].r]}v;i.b4»(|~[gbwn'Ӕgor &0%GlXђ%KE,}w_byk[[\rɺzvE]"2=KI? pvwȼk 1%bG!S9oi3#ͦ1Qa%3}{X6*[0S=ȍ1?FZ 6lU9c ;E䓦M-av]>̚5 %q|t`.'bE[:JQB_Óz14MjrVb -ЋSO`TZۥ 71/yB11V`g0aVCbwB`s0ǎ9̷Q \倎p|e2>ء'#T6Κf._1q.vq1coCAMy6}O6 Dʆ1vwla3̌x<0ZY` X)m %$0wTe~8O!i01N`÷^;}V0yusHmA\fHvGPxĉpX=K`;?hp. /Jve.ǬPΚoæk0`|n»F{My6ͮWi@;e ;l4o }vb QJ%M˱̛&й8`>?.w]0@_ ͠ORK 7'q.fbb/q|CWYk۠\|L|Ń33rDl`O׊ED=o86,>jpl2s͂m-ح6"h^Hv1j.Scװl Q(qUYpᶹ\y5tQVVEѾ.PI_XۧUzPh;*c"@@3tǎ@$I!y01tA"ODI;$I ik֟5c^{Lc!+19@$v#O؞۔gwѹ P(\zP$Ϧ̿Ip>i6l}͚5:yLTzb$Θ_sN˺`=TA?33f[|9EM+~Qn``N|23|Pjń;::y`=lqhP=inxS?[4)8%&@SQQıde]䡁=iMm)C0!M`?8l2(LXV<Qڿ K޵뮻~;O؉{!)s>epp1B]6lהN@H@?8*'%<2n#gLqޭj&{[GƘ].0q#\V7ѱkV-O;U].=wiэ7f]șg?eM 83ё+c\}]@)L6fjl EѻEb5}s CKhʳْWRJ`P=ijJuT?Sjb#ДwTF`~ >MLzk]{ݺuyVwAF|>?.;[o ~hjxY3|ɵ;{I>؈~X6D)fC=ГT`7p9OjrS:@NkU@SQYmE--Bdj˕]NHD[khj76'ДgS*%!zUu[O9UFZS LNMyGnN{K7<K{֔gP;@P! ZfZ_Jr*^|)t{{{3Z@ɇe걋<|43%MSDx;*[zlj甀hy*5>%0Rl@ n2RcGEdC$\TN?SiZ#7fA@ͩ1zJ nᙲzp%0);jR vr gs ^h5*[mD? HhejѾm.Mp===eLgs}8%094zRH|KҶsݝN`twwZ;8}~x޼yms-u,[lfTJGnذt?c3.͛#i6l\K.d] hvZ@+0BD}D6k*[yoJ`jP=5ǽկ&ƘDUf>('1D\kݗ!̌1o%눨Z/\p kooӊ+9eܝ$ɫGKDwBJ#z+̼^dKEG(/"M5kw$]U zY˭lgaZPP(i0[j;kDdgkM#䊆cfx?5Ƙя$kEMΈۂfGaax2v 1!" S#nRMAH,ѥ'T`7֡@3 n"(.˪Zj:X7o^5k`-tN`WF`9Xf [FsNJ b= lٲi˗/ﯧXu_wƘ!7Xkf fszaFsxKFszB 賹^LT`7q4v`6hѢA|;kY?O)|h9"[D񬉸1;t| Yw1!mj":O(s̉ xv'nܸJDh:Ykob<s3oc} ;yQ?9ShCDw鬉xWWNi^NDGd`Oq]z<<+EAݍţ "ڗ18o? 73Gh܃D+Z$pLq1|o\f=`؎ZxŒcq֞9pw ܂ ^U(~xJ\OX൅BYq2Ҩ^6`ys[8AD/$I:KYlX`L5 V(~Vy^ Ewa3c\ g8jL LPQqžQcqT (}6PJ` n;crjbU  1u~i x]hcgc OaƌbӪU #aq/?71ۉ)I| fC,c.' !^S`iwOOv^S9|ڴi-_kĈyI@ܒm};Y]8YYpDb7$ oȇ$ݬsHv03ߌ0 oefyM5ƜND8L9ϊF}6M׫Z Rcy3B&JfWij:&(:_D> VJffvZ3 A1^K/^bqOfyp7vn}]v(1ƠAP.`\__ˉ"r3w/@ NᘍF`GQ:U_gTPT9#kh.] Cxi__ӼK1˽dʕ:(0 W`"ɇ93=;"kI*^tRb *Dt!h> c 4;~Q>aŊԀ/k|̧am_>?B_a38I'u@8ס;D$I`TYlnI (QP=Jpzژhz?$IDQt5L+qc> l^jѮ1}^oJK쬨0l&1pIE0Ob5Dj1fF:94f{!c6DQ=,DڶrTwN5E "rYmT1@|>OXwYHW)M1yZgy\E'ȅ"?gdc;biz$ b3dɒDR`J{{{a*H+_˂ ֆ]d}G{^f\dy." @o`\v*P#qoj;j㕀0 ?opQ>z(%0T`OhtAKT63&KX4_'"˓$ArYpaG>G`Y__c̯urE"=lrC l B95Q(qvͺᘍV`kzUDB^6gɩuN5-*?&ӝajoX<Wb[1řޕFQu ̻E>8YVhe?Ui~w0wC |>쮴1f1>N^z)mVؕҊ63#wSUA|oaď%!n$ IA{ae30DhѢYO>俇9n un/Yd7r7V;o֬A Nmoh>֒6l(]~>V-3s Th>3*GC9~.- hhhѢr{ETڧ6(`21ci"߬{^xF`?'03ӹڳQ7[H~^DKh+^i"^S`#pF#] 9cA~Lw9[DΘ1>T9 L0UE>/qG9o"Z{Wbv)J; (6%{+m>ÙG!(˦(abwOz g1,n uFVAC﨑5ѣ 6%"/rLy{5ȗ\E}6m۱"?\0lTTHIŪ+R#Dax!3^*\ڬ7<9Δvs}b#:mH$Ig ٳau_pOuC7MǓ>88xի7f8d~:(Ic x_},`?." {h/k"tgZRC`u2f&_7o1O^X/ "rw$F_ďIKbh ?!fu#"@ݍ`pZ`jluT`Si^9שO5Sq U; 9T.b;ʗD7rJ+b ׇF^.xs˗ӏzh˩ZO=pwF>&W$I SM>Ƙ6f.BgoN;To9i nյ{T9wy\AX,400p7E T 7p}ݺu4 "5۪6[/ mmmbA73O l4ZusZf$]DdfG zJ fk$\儷Xk>ׁDÝ_Żۿ./p~Cz]>_jjLTt?$aW3${vmʳi6ZdqV8vbY;ӫ=.`ѝ|9&n/¢\缫Us əÊ )-aA94fFcEd$IqzX?Y!qLˋ.E%6" yϥфBJDc >%d< IDATp@ْ Vu1nZW ' '1rgwAߙ֍ "33"ť`%QnUb]0NYk/B5>xg`MnҖ1<޺V̼5f|a\u>?~CpWlZ)>*P|d"r 3/nd>4w ђ8\oRS| 2@v%FG Û梯RSSQ:Yė'b?j\jֳ-"Bʗ"6EW h``kp"k"ӻ|/} 28b2&@ SOk֬YſbӀQ|/:2MSXb}i $ӚW%cL9kDATrӝc͢"M9===ke,p]W23bz ^[(~\ߕ n׿ҕ"t3+!^U`5/' 2.pװ$NIt[W>WoX(gΈ"tD/aW!ލw99?B\^q988xe{{;0fٰp>o_d !N6W?ꈭHZ.N#,ٵT* 9U d57wG_gsu&6pcMD~̈3]D^$M|D"B$Iw\ &p>v2hRS| 2@v%FG-<JMMyGct N'{sVggyG"vSM (?+)6c>4Oo%A0|TznoonQzf6v#$/!"A4M$Ip`Fb7nrö_9JQ/2Yܮz"C / C S#KE"&2g܂c(z k]xV[0 Cf>l"{ 1óRzV> h* ϊ<3f n◻ѱ󧷷})\ǽE^S[!!^2{1^r%Q܋wYGʖYT`'mm^M@ۘDbkdžҔwNmOID-| Nڬ)mEl.^xVTzwc b lDpc 3ĉ;.`)0./*3 8۸qa|ݝ E(fnbYd:8a*Mmr\P$IyG̳85ٴQID`{ZǟsҥKgJ%( SDY/Iِ*ԗ>¸OF0T0 Wb JҞ|`ְJy3+"r*Zg}P6MӭZ뮻bGG4/vWׇ{nze4{g{Xkf1|V OxKzzlE\.\DTHۉ8>o&"_oȦ$mTw5*F#| DZڔP[r9qbKj;J'ۥx.c̖K+O'QwrkSM'y0uBvu^8883~C lA:t\tr4MJ5#̼ԛ;lW q Y-}Fl3ZO٠ K,ٹX,"5|g})9}܌&gr\U`wbW$k-7c Dzkm1&bEm|'~y343bڃ],mv+6nܸ!O}{!n; X,+.l^x x}RkoÎu=s32>X@`l2\.'-A|ipppK/Ѻ^4M:Hv@""qP@?-?} u@Pc@i(ė#$~MyJf.qTJӞ0 sz]6|iwo&C RTryL0c QO5@P*v,'S rݱêhѢ~%(*fN86AZGۋ|&<;!2ߘ)#+ro1DtEVxcuɘ,S$b&f߈vankInz#zxD| 3A׺K95vv,]>;vl,8~5k`ajc]B m? tttׇ`n7gd0^iA*1Ea̖¯lVkT^X>^P-Hi($~M'yOBPm+ICDÝD? !Z4쟜0F;c;/J=hWDAϺ0)>*DF&L1ö_9ngD@ 9Gn"l/th &DV"BZ'DM6Zע(Z4/ fq""#eRi3_ bqUVheR;Rcxryw,zaG)."r)f>D}G4 3wս F5f^X* `ZiᘻT>iyߟSX"MW DweE[:sh'[k/b i(ub8{ӍNHc oOE-'"_#4y JCIFڎ3gV'wǪƘn;IN4M`g':Idďפzlw3r(w18`i}j&cL9.`紵\n¡\O۱C C2u|?BF9>蒵b\lYCE۱;( 08F~\lԁ |"z3Y3rX|ZǵnnCQ@+HI=kXiLJX,ކ7 ۚ^U@TZ|(]/oc^'mYGfq֨O':/Sq366lr9i1}_/_~*H[υ)+Yڕv_nm>]sH UQjڹ?BwevSZ:c d{33LmÝMUT* xwy!*V*+JO͘1C}ʜ#iShk35}2;|ooV .6ODm#}4݋s;hSOYkPVAȺ' 2A 캰5tP5/"dz4 58 Nv~U7h\t18 b+D$N˳ZlT%&M-=AaMheJUJH<#XiZ0{@=ֹ->Ccqu\ףJ_umcz58#x} 'EJ`(IF@ P݂]luAᵒ;WXO?68D9t>I?/tC}"BtPDA.4󇑌 qB^8φ1,ApxnߋH$ɧ br!ߙN߇{EȷE4fFVYYoREK|Gծg(n8fJDh-GF_gEG /A&aG~s0Zym{%IrpD?:;;@DH`%A4MJà RD>EMDx=3zu8.q? mA`c I| ˬ'vvv :&s```Oo5{mp=_DIrJSe:0qv>SFKW I2PS2yu࣋IK\֯_?)|J#"3U@ `ND/FO8z"~D1+8)i=z矷bŊnrs6r䯉9BpyD-3:s|L`31c,2xQhyghl8n9]`o[NIK0FHwQ!N^/"r"jiڂ 84MӣhuE?H"#>@Gٿ; ÷!/Dx詾[D~`>%qAU0f!Ě`(EIW܂ꂸG ,`f~7iz̉1ǂ _ L;nk J`rp߆;WVJ`*P=F5K%us f6 3yR%c;{~G0"h>B-q75DQçM]'as#^,3||OQ|zv9c |o; !yG YʁW|PK־ڭV7/}Ic9>rSrvJD[ܔ0K쯗k\ǰܜZ눢uH\JX`߻}}}؝;?+w_c _.>]8ƙ֖St ⥈EDYkO2u| `q _rfC lc za]67@jXf槝ެЇ|c Lw\~60Va=ܜ~v{_RM-\9]ymCoH!_뭵0T&⡵Ǎ_9=I =7ƤCׯ&RiͰF D*GNOT##R햦霞J֢hi*́]?@D%sɁ 'dwE7I)+c q|1~ʧ?Ϟ="rp2vE#^'"K>#o%3^`)"0?,AjqXAiIǦQ7'w! R|>ܡXkZpaG>p;؛ Of֬YWyv 6f&"(IU٦ł;صF`3p۬r9ANkwّDu[w(/MOSJ@ (%Ƒ ac&j?= |qvwEO5c";D>KDc; |ZJ<DTDFpr~^`GQpD61^gv?OD`"5vֶ{Tg]6._pToֶ%\^";M{܀bV^|fM076vpG5k*Al4>!_֨PJ@ 4 :Oչ`kS VD.Yf~ 'C{^]JD ?AocN6mڊȹ 1LBv} 4MmwV/(ߞ݄;, \ry+LJ'|O՛`D@`4LW&ATx(* "~x˰`;3]7O{{cٷlO_<_[^f>j#c̰ޛ ,qMY5Ƭ$SkQ a &MZP8k(\Sm 6?KDr ]vj`:e?23.36RKl^fڵ(3c&wi/4hg}}* jaL_EA@!-NHTz$H$i96t+oA䒙߁U6D"Wy̼QiMkpJ夔K_)"s\rb ׊ҏ"i6BB}/)J*Jjxxau_̰yC/&!*yZkHk!89#i? /Zxm5<*n5τzHqd]kliw}݇  HE6<J&Rk!X;ff8+v@#>cL>r.qP8quѧsc6y͚5e#`/{oѴQ2o\Due%  0=hNvpZ1鑑c q'Rl 4֦MsOg!va'/Z.H ֻm۶oYz5lv& Hooލv) ðֱǹ!"1µ1f pi5!Y24f^G|5xg2׮]auS>001#N2ӖQ?|AxΠݳ>{ ƘwA {Kiٲe;g2(̷iLK,- ;q6&Z©Q=QwA_ f(8N tBg.i$rxlyR 1ՒS3Na=! )}Yb۩fbA$ jy۰g'AP\i٣N P\a-~a $I?EAu{q&YBB:-י@篯%m);r40 lc6yPǿf5CUl>'@ۃ=jnC.xw1f~?ȘA`! 9RdV5q1Qٴ^J3̿}ƣ99 R"I-vV)Dz솹j׋`\4c 1dFlF5-{ԌFD:'tDEcS @$I x`0E;=x=m;h$ qxA IDAT` ßVZؾ߰aJA`# )@k U4iV^ DE@)1NYGM`CezÆ ۓ3y޹ƘE,͌:a4"iNx➤|R^LW'IA@!MNNy POf5@$ \Mx)nRD~"ڟ#DT)]fv1$|1 +J9Dt̗cPOE|pp0e˖hDD Ws֭[ou?l"ڕd/*>ED/fOc>HDG?w]Hqg/r~;3_e }KDE"ll\.i1t"zj}cGm[73G֬YsDi6"z׍Ƙ/Amyw1;f~% 왻HA@h'B@sɒ%iֺL%^3LuaKRܶ=Xb 5É67j RE>CW"Zj~^; D=":fGR? 3uHGDk%( fƼ=nZzX⩥R E3cDt.9%>Y06<;o5ۅ DN ](I鴫Vl^,2pA@hk&e' M ^zjW3}߿!"A`ODcWc~d iT* oP^`j쁁0G_̧HEqgV26 X<1rz0  妁0| އZ*1_(W;>uY;Ο?_x8Γ<.%pYq3JkyyƘOaPy1@Zv>ژDdPJ%HC!}SC@a--G-INA@ॸ-{_ Ds!T?}`?Ekxw1 Ƙ_B*s9Eį<HWiSJwZO`31VoD?Dă-[MDc^ 7hmI0'89t]k6@k/L51L(Jr 0dQ|g^*3DDPUCZD8/OB2J+1AWRS1ҋ*  !ڲG)` Z Zs,[k[> I6$I"Iklԗ8buCDt͓!###qgbJ D":Xkwu!5đnHKDZkǸz#}%p"^ޕ+W.,P?%VG$~kMoy7a=8+ L#B4b`c^5ɏd[W)+J;~4ۮ6cqi*ڜl`ƙLfN=/L1m٣"jv%ǐKZ #8b]ڋP`G `]NsY=dv_v-|D,xJ=2 Dc"yLn+ .;۪W%؞[q\rbxm~;o< * {zzV=]v^<}##_jIAnSc &ϩ"M>m9 h-_dkֵ4Tf)7y`dZv-8Q2dTW&@[H;l=__̯ t#L&"Wkh}~HQ)OD3}\9{3dd1uA@V\SXپ_ky_7Ƽ(vL{2!؈$&nbxa DCtS='mv4*Z'm'LA@v3USKk.bc`Fؘ-״5ʗ,6{mc^u45uۮH=@hJ)+ݐbcLDOd}׬Yx# uhu gΪEZ#===\`fzb6!:B6R<=[n"+1q,3="zoJ)Zxg}ؼy%p9Y<ewK;1א<l{$#wm*A@enb”R#|$:7Jwi2]WHb^8N9M jJ"000 }ZO.]I\ |a!ǝI˶"\/Di1CVZ;::pVCs ̼c {|PL[wZ̢~8ίh_`ѱCrܧmx腺*£C(9"79|>҄Ble2EJ)x6q{ Wbϩ`6](ٚd[ޣ뺇XכZ0?bðo޼y]z5b67L֯o8Ν;Ѱ`,C$-Z3Boc[hx%'#nzSM)܏jˡ,Y[6z`2nݺ7hq˸PŜx;z3JA@.D@$'V0,.!!KZ7$@/!?3mEUƘ/c?j~8 Fqkm@b<3gA08,dH^N'ݪ>rzg u(4"*Ǟ6ŋg,X nff\*̟}u,E_` B GD#00ƜWR *7c~oƘApMr̬#M{UomE N}ҽ˿s@h=6gt8 @#y$A4stf|l߾ 6Hj?-^]})ALWR}}}A=:KB?7X,nݺIgAf7I:`jywB|o; 夔r \[ ϶CCC< qZ{{{w&8* CxhDK|:'ص. Yo51 v=3_sx+`JN# K VJ!.4~5l*)'ceD֢X';r%W7H<1,.J Cj_N1GF(zHodәh P=g_c`ug?l@?n: j $ @=%xI4s5$+6)y׭ٜ8M:Ŋja<$cVGq)f{Իk-w"[nJyu]:f~N 7|]u R vPyL|>R 1 F(Zud vX6|=.dL,95m's}.yhHdA`N!`Xn1(S !B ՐB'x\]d(vJ Ud lFJ)=#i5?x7`Yk1"ذê+ ûcW" ޼yޱ?Wl޼x'qnǍ1fQ(I73F5|X,g ߃cUEŰmhTa BԻ1kib#FD QeFJ®܋S8=i|!*zm=Kaj=I%vzKҗ*l c3of1P!a.: Dv8(ZzBlND/Z_g56l~6?Ŧ99s] m|gTm?VXRtV|#"vuh\զ,&`0MD ðS:ބI@l_"zHlc=A`ÚLjLQ^xf0GyjNJQ0Xp]ԉd>qll(Bh q_i뫗o``غuM8RY)o9Yd]xlӲevd2 Ssw< 06"U,FCh"W)uPrdÛF[qVS˗?gG"4u,QP}Yg8 6lc\<Z>{Ri;$Ƙч5ވ`3l4hhmT%BvÊ,BI(.E# 1FCD@)5}WMV!A 7fJk6W/u 6B*j`/]I\꼗a#˴t%&}_>.ƥ+W.,_joQ2N1 F;:/*lFm#uBg{=g;/;UF%R h?#:$g $gqYڤY5E/_8/rƘyP+ˡfÛ6"ب%|1toh-R ǖOCG؈0eh\ׯ(XDj,^^*d2kv#/Q6_l` yQ  UDG T@L`МXVkMu/'pqkywMO#HRk \K/![%j4KD;D=1C}}}۲eKqJz 4Y@CuzFmG^v+Xh!6-KKA@C2fC}߇s'I@3L`L[0y&9pb{.n6i]߿8 Hlr1f% /ZW󼯃1D~Eމ266l6Y.֭[d1/pvm&"v`{iچtIdmfgZ`ObF\-j+$Z鎬-K@F! F׋԰IƉ4El`?-V%BQkԟRlgU߯oJ%5<ۓv7As ̼H98 IDATa?qAZlG ~udh\F" 6!L1E]_-m p38ILPk=8bs2kXj؅L& UvyGJ_~3a,^J7n,MuVXT*Vs1}jŹ#<888NCdPJ!ciSR  `uJ]EK{ 1㛈A!)'ucV^xcР8//瞐cVV6&lan 8+bա3w|jIWjٲe;;slOO_֬Ys%Kl KWKr)`5߹N#iOt[R @&3w:]!-B9ŦSioͮu&Icʖ$lwr46 ,%TJBD?'s@\"J,f'ᐺ;R"ZHՂ mF E lM!*0QJj!&:L`C%iF5:IVAX`CzN@S ҾqF8P%!m#qd淕vl@S">Z/^ @u!3?Np]Jf>Vk}@NFM잱LVJGDpbjDt7-Z_ xRW3??[)"~lf^a(AD[BWG<{1,`Ḋ8Ka0/D"z? V7"3φb5|X,g1 =c̙QDut}wvfs(l-WJVc^yG258LJ'h 0ZM)F;]*׌.8&gzּ3 CCC5gYy>f}>y-[֗fo$XQ;1߄'O1{|.͛e3aKDc9l\DDok.c{^5˗d0[A{PJ߈BBtHed `UwZ$IS3;6-{g2@m>c la.z@)"Qkg!י=?Y5\ Y eށ0C@Dۿ l& 쑑Z"Uj$c%}+.^8`DD?3gѡ XWjp] ̼ y]l\K֗U[BZ,p"dx?":>HDX~;ADiyca+NDĄED6= $cg?#"p]`K#k~^l _c~̯81A?J|Cl $7\/@ .  f #iV`:ODDT*0<I`8΋y&8rP? 3nhx mn0pnpHɦ|iRPJccc 6l?*S#1SdX |6ͤi!קC1t8QUR tO^;>I@,<&~VR/ҭkCc7 ]ջ8\JC$"8[^I[c0^N$؞-7 !  xxmBeŽO`۾9lRB^bϺu(fѷ_W8\)y"'9s.pyjl7WE.? ƍ|I vD$- ßg2#1-j8R5`N%k/nғE`ppg˖-ƘwA[~I @!E 0=gvnZ%RPA$xՆ+%k3~ :o0]/"_SQ]Zhdk zH{˶ӞiExr, g UzkZk+FۖClgؕOz{{u#~Ea7C^8~J)4*mx/~B)C诽GXZႡw(M+}n\&,2z `mcqhJ(dW+Z*`(!x0SDgҷJ5IS@M8Ι|+Sش4%uPJ?IZkD֐$TE jYA`=ePKC@`6߿8G0s8{ƀzP޸ Įָ=v}1Oc;5^';ǖJ|ֺ R{":`֭7EҕRAjvW`V*B 588B:.!O.{!&3+W.,J6}2L6yDŽaTfR,oʣ.a~2[0l$ey)3 @ 03PJ}#LD*B4IgH5V@@"`LA@j"О|74&A@!03h PZg%Kk  懭'e !uҘ~Y V鱒 0u(#[1okYZA@h*R~:--n``0 ņ iKR -"a044tKIqA@)D@)3Z%Sج4%:𹮻>66vƖ֙A`Zp]#"z_ iT!  " i|3 !3i6/ mEuݍ|V, @Fͩf[1K 0/'A@ygcZRňyXdn===';s[>gW'D\}A&lmS*A@1ک  eF-0J% @# ]?2@A`N# {-]&vߑFZ_9(  E@L7d@7# ڰSwc_b`7Hr#rOOz6n86=A@EL&6.`&'ax8_L͆vRo'H80cGA`n `C-/J unZF)@w Cf8gJf BkLUt03<=f͌NCG!'"b&A&JA"wիW?&P  Ŀ+x`'K'ˢA@}(|"Z}^z,t'bѝQa!S% 0e(GD 'MYҐ EuUl6h͚5 \@5`˺ͳ'}A@$eqhhHh:fjfvr!s2!3qVOi--ROA@A%\׽Z 3!3tb[  t>8趱xA@Be  L b1%0K# 0FiA@A@A@A@v̥DA@A@A`=Kӂ   ݃K   4" {A@A@A@!؋/Y`~l6;f͚v45?b1֭[j5|d+Z'zֺJ}R= :{ƍSֲdɒzzz@B?̴[歝s$uUG`ʕ cٱT*zQK Ygy5k<a=/ _D5LWZ˶mێpDUCCCiCh!JDu׌IVJ]W^X4yZ郔moD[åZ6_l.cagA0+㹺;-.7j.PL[e9GRGތ$6|1fo IJ<"*js [leޱǵ;W9>̋}f'qv!5${``0 _jϛ Xoq}|fI<;s㟵ָibz&Rf":~ɏ|fPJ]CD^BkÙ4{T' v%IgüurRwmڼ*1FA@o00ǿvj82Nb+SmUJr?WRvuoOkݗ BW+[]~ϳ|Hk*h~H膏l󣟙%LDJ[viNjOS>[Szk#`"5k;6KDk0 ۯz?}.EaƘ XP)s":%n"1]ADBN.uNJ)5o})jzʚ[)"nbO 3eQ@7|dmZ={1f#?}3qM-Ig˼ĵ}<381uDI)u6}1NQݎozoR7D CCC+@K8ߵٕnZ4= v !؝BSEá^SkcŊ lILă/WA7uNlٲ-rfwNCo{>d|=FO 5CCCo{ Swo}}^[160w_qޗLџf}/fi'cLU J;7^M}>>FGG?HBj "ԙ.>f?o+;sN>>!}{ Zk\,r:sq>'`<߶mNɐ<%/ل]82o۶ZajۙkmEƘ襬~TJ!v8G&#\,Mq8ESJV|`c^HDFWO7agRJpH-Z+W.,QxTk[4y!# T1*N*)Jh_,<00prƞ?^*<<XW\3!m=gJ 㞄REҏ<Vq^VKeּ߽R [$&"3/X's4SmoIrDC~.4e5o$eƘ[b ػq9-} B\mc^\2[17oIvB|HzU0 +h^Ҏp]4\̼OpKr[|IDx"3?';'v=\bo.Xhѫ6&J)8Cr2,=~B{ֹK^/R 23̩Dؾ}z^(R]vҠV7o + C|dq3RΜǭzGLs/='^We?u.h*զmܙ<\5 pa-})`O$ٍƘ/13nqS^ܴ!GVAJKRڶ^n/bZ\ IH$`j=BnDͫҳJ=:&JLlhicS ϕ5IHp(}?rFےMZ2<i~ɂ@3".%Hf%ٸ!7T im$7h67^Gb kJjQ!bz"|\nK/;MsWRk۳졑V@P'AB.c̻ EFj[⠆&'zߔR HC(ޣ{Ptg;>q)3;EqHF"ݎ]svۋ#HoZDOjsD1H^zc>? LvVrHP0E]֦Fð"aiT {m{__ΑY'殓]Q|*N v͉]'8V;zΣQI)}ߋUw .⬶frgfΧi;34ґ{sOHdsA\ l6;X,B.q5F7U ܂bu3dضmHV{3qJyƘW@/N1 $6nNU*u̳lٲl69^JMr*!IIד TeڞH+nAMJ 17ok31THlfT1f 4 CjYZaC0|33ǝ\vB9"8\hmwqW=66(ֈ`+a50 }.;9lfL]k]yk߮뮄lwsttj֡QQØ"2֖*.ZH-:TLa*Q!W@u7jVԳ =τE\̲x+}rj*=VqQjlaf8*J/1YK\cV< "JGx/ }G&zxw?=}? 2n(&-iƘf8cnυ}3Ƙ/.f٧T# Uk}߇ -oI5cxfP}b\FiIJA̍1orW%Kc X+dd&g`^ҮHY`߁c=S; >sѢE#i>[1Q.hc0'-*k,5):Z13"(D ۵uO6lkXSiDRåsIݎ=ͮvX|X!O e;.s $|ubx?MmRTyqf'\XhJ#Qt"aGCmP([f/o;p枕 {\"+ el6f_E󇆆%^6]hpd2L+iZq[~e&'U8I@L&ڵk#^nr`-HK, l`918 nVwLp5Ff~Ml![< ƘFϙG#+񀙿(pWAoll)I}$HɄG$R#N9s?Q`|U6^lkO r?|z! {M>>0۶mm2v.p4|GbT:|xx8T_xժU]jAݷoy{ /lŏ@5m9?iz9 Q;cl+vpb VbzڱUģxvRm^"l:bmD `S(JWy/ 2ָ$-V ,m* USw]M\$ FVve6=+C]R7Y*&EI,R=&)--اZ*I\l6([ 6^Z$>v%횋$eZk1!ϛ'/jhU .FfhGRQބΉ%UMʝIجg[Su]]DN3a.DvU.ZF#]v#eA1`bO !۸ z<;(122yc#qױL;"Ik]jy/i\6 t8ًҷp9n?O^7"ϊw_`ÁQTR]uNZ\r|:X3gUvǾqmFsYHcj)(JU$0gRԩ{?25.-̫"E< oqRjܥf֒zvI=lN#qsКZDbp#߲s`GSg##ͫľ}s,l)fwi(Wɛ[I;Pq.5:ڏHk[5ul®B⪵3~CI:hd<,{G>wpnEyeMZUiH!Vu1Q_EA$n+Y))n%cov#Ȇz$뭳8X+c*B὎8N*5.Yj'jEjUv=2[9(q :Tg=o4Nj;הn5pJGFFzkkbVp5ux__߮B%&%F$|qx|[yߔR0EeY._"Z", _`דW[% }}}&:#4ֿHۣ9'vŋ6n܈֢PXdN~kI1C0zp,vb:E;%['(ua9Pzy[%mَ9>"UM}x'ߑu=@"aY:.V!!ƥd\+T8K}z[תJC?qMMk!E,F@Y4<&>ǽX[+gLfQYJH $Qb9c^!u簘8ux:;6M\O|ƞa |1qQHF["~oQk i2W*uUc-T{Su3ڸa.5pKRp:Iq׫(%/jqw /UC)j{vl uxX\>&|=*Aky¨@5F{K5&|-]I\+yi4oId[vm"j[юslg)}#1Z!\׍Cͽ,U.+҄NZފhi/i\۳<;Qzs`Zc{ogoA1VLlzsJ 70{f$<#؝־& Cd~Es+,΍{)J{#$;Ѯ{9 ?*TN^O`ڷ-& \:E9 d= vG`O5/tF S |Fv["!]qY⅌{#]Mfj5⸎K(q} dUu9@`((QE#a뾞5nc4Ё}= . jh4;QCĠ3zͫof߻߹ifq3D?M??`^μA.Z`Ҋ`d=6 nH8:! ˂dZv1=Ԣ"gczpv\D|75_vjdlf(3ZlD"@[`,o^uiwJ ?Cv7֛c࿟7)s>Y_|cLSe vFݞo+|} ‡mNɝ쬿4Ll=c-ZmY4[9htv`wc/JgKv3D4%ح\ g& ״J֌`tmgg"6%œsT|Z{V:I囁,ݚGN3`PҡB Z.s2tJ۾"L~̵4Y?gΑk^'ӡ!EUh`5Hfidjc7}Sye(ɘv<6%+G7 (Rtvüri<W7Ԃu}H-p<˝}׿mlר 9kA4Kv`]Bs(a<7Vf\Oy G|i>Vdڊ`&-̬׎: ]o|qYk1;)Qkw%N:!8 ؾ7q\wig=q h5>?e1ZqכAF:"BuAXPdSbm@ZB\>f{moL0J\~(f4`cqr2+kZ6hp;gZrg& I6YA>cδ =#.sKMrO/3{e3LZ5wf5F*̏`F4%XGf.(Hfms̻k癬YZ ~ddidM-d9Qc"~*"?)2}`7Lb`AZ!9!ib#]*IW߾ kjj*j%V|Ns%50I9DM`g]h=? X`7˥]{5c.Wp_4s+]g"qUi63?/g;rk`rt?P(!"R^]966vM~7،ePvlbb A:@R>Mn9ko"> Y"WΛ`5A%}o'Yb?޹s.#2P;ֳ(d}ffflŒ]g 251P1JG4cN@$ݐIޏ/9R=[Rleq)ME.E`<*y@5±>- sfCoA t2~(@;ϔJ0\mi[ 0'; ω^b m"(}EytM>rD"j-xo◓zq+{cTӉTMrP(7I'%IRO,FBo `A<%q995j FY[݊`sZhwl:#v b7鎌 7L*Q({E-MڵkV(yL˫h1s.`/h-뮻+WS!dVA>~g+@8"zJ^L|T*};o+Ero@y\pz.vsvFψȅqω_=DT[2 % vBG{e֦yͷ h'CN7 vdnnւ9J%&PR"VOEϘ|QrJk1vӃĉA\Yz<ALF'cuUV`}E[,ܼy3L攬_OƔv 翊 $tϾLD(@N*@(lNqEᶮɘ"<ދEd}Z_LoھZ6,Z[ ]I[?4S倫p v rsoʆ))hY}S-ڵkU(-MKÞ?J!AG(va?0ekIdCa\D>潿X,b1@{9pm]9v[[ݡ72ȍI5 Ln95ksZ>q˖-Ok:g{(/ W "}Xnѓ\_uB52QTuBIWD̘kiK\@BM_iͧA0-t_~dkBѕSf~4W6z֍ 5֏& L~ 牷A4e;'7|"Exsfffb\t`7B(>6Wb˕k o!̌<ƻe&˅ޭyd-g|fZ^tnwY&s,\{nteKIF?FJ!.e&s2c 3}lpTaO\w WY{.t>/JD}mJe3疥yR4&C5KRFȦ݈`jzxmHgoJ3-Rv!Ic< RHQ? 2snμR4?Jr(̝y#4'Bә.s6Hd!-b|>"{ov &Zh0kx;u,MY[ɚDM;3vhf֨vjB^[][0|ʁϾs|3n%Nj[-l c/\z28./GTL8q{Y}an%ݒE#3A:%]]-i%[dM-re,r%@[>pjjvה~'.?VL= w8);c \9p۬ls\-~J<ݛ|u/  wSM ~{UDX+1S]pp{43f=Lۙc۬ ߝyjVeIo}z`v4~ (GJ&8Nbs3߿%"A'|צN= dL X,[;7o|7_%"5M+;fffoX!oΜ/4,g4hõb͛7* w;Zka̟;ܤaPlU?(L`$ q]|R-CH0m4N sƆamt(MEn5^qdT*HݐEſDŽ~-A<{jj*?]"vQ6c0 }&BɸJ fu"jf&j{o'34__kK;kk-{5,v:tu<^)Qo~܅6H"b~z&''O䋞npaj,]Z!5O)"o^jզ۷#UmIeǥiVus-,'_HD^~oyԷ7svضm.c o}7a")D(y5{m9ڍߕJ˘Ui] 8X4ٵX. *\D֓V-dMd*Z{,v䶦x$u&X=ANnͧa?<||d#IgLOO/?w=G;ҹ֧0@@_j;&۬[1P%۝oz`wcZu԰Z-"c{vC?r>P(+"q:qf9 =~LD𛆿O$qOy_hC糳#h;!"X_$Il^ifa&`ڋ6zqϔ0#:~37>7^8_:Syu FP81IApg$?M< $⿙IW;ܤ,>kÎ;^ |yqvcPJ;&P֨'vVaÆ'Im*"2󵣣76t|[fӉ"}ZAp]^Jllo?P(<1I'A0ApΝ;GvpWO'23\xmy-t-,~Ŵ8\"[FGG?13O jy}ƥ*"cfVVTV+{ժUw(_(E[H"+!fBp<< @GH1\DjgR@]};8DdGsEjC\e )f9;;{kn/u{1ynu;%˯'v#)G@/C"( 8Onk-|sKXJ^wEzPCCpj%},5M vjeDPLE`drWyA.(6}}e! qJX8=j*<0d[\p~6rQ+\OU:+%=n=VQӔ`XVX&~LT恀F6H ~CDqT}KX'e0 ky(zmȤ6Rҗqʲy8>_0v(^Ra[k7v2G`/_kˇobbf~kzzAG;*wx}X'Α OlNO_[nQj6ׂ\/mmSe|0BylKvSpuW*.5g]H_w c RX޵jk-RhQ>DBI:O4}!ڤD o. O Ky}}'Mwgk]S?ݶ(nxna]PB@H[0CF{YWl&^fR_R>==]e~(n=}5=<*W0B@P[;;(Rkc>:(a]:G,+`/+qic%},e4%}(4`ÍΑᖿ~8P=r^%ؽxPPc>gGQt߹0 EExs(>DCu> M"rnۖ{`C)tkpkf`E띘xvADHdrzz֣,&''M듓$9uzzk:G䴍@gcƘhO\MqjR*ng Q]3|HiA ׉Dq?w"gU}&󧦦~Be&''OIj"\.g>m`6mڴҺF`Æ cry"KCkkΑ~_?֭;*I7Z"v=|\@N"("0O"0Xdҝ0my]Rr֭Ң돩T*P= ї\rO#؎do6 AZ"Sr|x3Rtfӈ"Bг2(Y5k( 3D?1W$(i#AFEA0$I0ߟLD hjjpZzA###afۘ?GZ+@\sj: !#3{?SGq#0+Ѣ׸(_DyӂKAlG511qZP8[D&w33GKsrϷn obbBp(a+I?2^"s5%D"+~غʹOc<"…2|3f_f$I 0փ_q|B>i%>p.,H(U! š%IFD ewSBt^[o a&}k>;s/'Apb$'a"= D{|~?OfGѐqw}NkӋwFFFV# Ao3c^q`LLLO0y|Znaa曓$[ko\(|k׮=P(`=>g1ގx/SC>L[;hbb {:T2uK5 #=!"b=\+:i1f$IN`'ӂ 8) s&\ |\$l211qb%" h_P^f[VwivZ4 X M'hVz?s~cK BQ6DgjPb拢(F%c)yY '"X/" *gZE8#bf4r."8T`2?Z̷Z)Jg7Xϙz~r$c ˔tb.VmۆK-"&sFys5\FD/H0L("YDp D~sSb!G>{]T`of>TDpwvf"]EW3̢Gb nK@;2 stK'TKi_خvguVa}"|":.fkI\."s}|m"0NuuOƘ́/ 4p Es\@7ayVȧ+E =_ND/%cڷJ:w(lKLXM̅jwyZ] .&~Au(" ["cDQ":Z{"|^?( pޏ:7u,w֭[wnD "!5]MگRc!٩SSSsZ2E7GLV $I \vVe#=m#`5 Liun='^3vӃDlaJ{Tnݺj{@XvLNN>?IC6xX~J~J\=V?'#~ڦ"qছn*6*wZ۰"04Ka&O3V^Qw剀1D؜>f}I?nÆ cvEKME`133~8(@srʟ7'@{xv"{k"gg^:B O 7\>}$[y/=&qAED7 /䒙FK{xFfq"W-f[RMA755E 0Cҽ333Mh0K(bQqJߌ&c̵D14%:DڌA5k<6"rHYW6~=99yl$7ݩ7;\ED+kR^]"?ELLL<=1kDdNuckJ%C|hFݥ؝XgT*ED""\~WD>։? DS:rD"GQu\"0wy Jr@>S*dfZTvqlcWGQZ|2qg&ޚ;"`uY~"ZJ ZjssŊؿbӡؐu]mɎDQR ""u o"No /ܙK8ؘ$ ;W\s E! |Ratt̟Wu;KE1_\\T=@"KO1gBXaMDr!=PlXc  ###_ۼyDv`.23w=`{}>֎wa."_ƿQ:{ ΟfT(*qg('ȷ8~D>OD@D;$^g}BG͐|vrbKƘ\%Ӊt"˥W2/~gJ3:QKD!Se_1$bn -4ةWZnQjgEDp;A˨%InwщC'qID0KkCܒ\fc9; WIDO+MRZeO w93]y8Z?ș2CZo{8rƍ i99kekM2n5 oic0q @ s,Gsbc~255uB1)gNkg kȈ'8>+S0Ez 6 "CDKݨGw HO%v3'O)i`|gܸ9٨Ƙp#:edބ` J8P@xN3S\0kEٓ$IrMX(CZ¯Z>P(*`c-DYX4*־cRk?o\9032ɼ>"(F gJ134O{FJa}.pAc x|2Z v|k$*f~ޮ] SO(o%؋3`cUk":3kH۷oG}K[T#G )$0|3߳s[`>X,޷y_Tu>sO=v3ߝ^$ɷlr[T&3?U5ݛ8 `[Z R!f 8/va*,5+6%"ZS! #'I76;] /f#Fz~sJ eQ\~m+=::z]A%3՜#!3CC[k_7_Ǔ4־ݎ>D vfQ#mA.lD<8'De5>ZnuRtDĻW2W=kM 6X i0 EQ`S@.y_GnJ4CnC _Q h6 6IA,`$E@D>E?~ igfAtaf=>yRE\5 A້ .k-'/4 rnc@i|rժU߾};0*^6BL_3Z ,C옙9:=~|i GD/@-"/tC\@\x 9Bi{p,H׊>#"̌ԾT=wX1WEdsw804K3- ughS"fBZb 燂`CrnGbyEO5aDDBÁ(Ruن Ɯvz1yQ>666 Vv-V̙#wKׅv;G9rS˄1g(yxsܘn _֘s@7,Xcccb\&1HSi{P'#P*pOSO7(p92Lȹ!5לf h>ўY? pa h༅=T*DLtg<Q2Me\ŜMˍIrzzqN"HQZDexr{+Nmb0U.sEƑ;㬍pwm.袍#G:Nqĸ<WXqWDsHOUUe˖{0 %"xc z yX]v jÆaBIZsƍ;vIoh tj+V|/M/1w֯_WRr|ݶmېګ2`vvv/~}~`l߾IM~$R/A}_R쉵 p1|k/@V,ǦtI""v'IBpXZe?2Z333C,3?f׮]?ܶmHp^|#߷qm0 /"׋Ȧ8a]E#_uUe7vAX-( e֥+V`ox NkA1A1Ax+Vl/aS("(" qn߾SD$I֢t/KpQ2uk퓺=P)"("c̫s([0բtZ*ӹ Iq#!A@ Z("(0!b@aup'y\S,?ԼR"{)o+"("(" JFE@PE@PE@PD@ RVE@PE@PE@`(#"("("(KD_("("("00(QjGE@PE@PE@P%K~[PE@PE@PE``P=0Ԏ("("(",%J}"("("( {`DQE@PE@PE@XJ`/%mE@PE@PE@PA@ R;("("("(^Jۊ"("("(vDPE@PE@PE`)3&IENDB`url-matcher-0.6.0/docs/patterns/query_patterns.csv000066400000000000000000000017201475357073500223520ustar00rootroot00000000000000Pattern, Behaviour ``/product|?id=34``," Match any URL whose path is ``/product`` and contains the query parameter ``id`` with the value 34 | **Match:** * ``http://example.com/product?cat=shoes&id=34`` **Don't match:** * ``http://example.com/product?id=12`` * ``http://example.com/product/other?id=34`` " ``/product|?id=*``," Match any URL whose path is ``/product`` and contains the query parameter ``id`` with any value | **Match:** * ``http://example.com/product?cat=shoes&id=34`` * ``https://example.com/product?id=12&cat=clothes`` * ``https://example.com/product?id=`` **Don't match:** * ``http://example.com/product?cat=shoes`` * ``http://example.com/product?cat=shoes&ids=34`` " ``?cat=shoes&cat=pants``,"Match any URL containing the query parameters ``cat`` with the values ``shoes`` or ``pants`` | **Match:** * ``http://example.com/product?cat=shoes&id=34`` * ``http://example.org/p?cat=pants`` **Don't match:** * ``http://example.org/p?cat=pant`` "url-matcher-0.6.0/docs/requirements.txt000066400000000000000000000000461475357073500201740ustar00rootroot00000000000000Sphinx >= 3.1 sphinx-rtd-theme >= 0.4 url-matcher-0.6.0/pyproject.toml000066400000000000000000000054101475357073500166740ustar00rootroot00000000000000[tool.bumpversion] current_version = "0.6.0" commit = true tag = true tag_name = "{new_version}" [[tool.bumpversion.files]] filename = "setup.py" [[tool.bumpversion.files]] filename = "url_matcher/__version__.py" [[tool.mypy.overrides]] module = "tests.*" allow_untyped_defs = true check_untyped_defs = true [tool.ruff] line-length = 120 [tool.ruff.lint] extend-select = [ # flake8-bugbear "B", # flake8-comprehensions "C4", # pydocstyle "D", # flake8-future-annotations "FA", # flynt "FLY", # refurb "FURB", # isort "I", # flake8-implicit-str-concat "ISC", # flake8-logging "LOG", # Perflint "PERF", # pygrep-hooks "PGH", # flake8-pie "PIE", # pylint "PL", # flake8-pytest-style "PT", # flake8-use-pathlib "PTH", # flake8-pyi "PYI", # flake8-quotes "Q", # flake8-return "RET", # flake8-raise "RSE", # Ruff-specific rules "RUF", # flake8-bandit "S", # flake8-simplify "SIM", # flake8-slots "SLOT", # flake8-debugger "T10", # flake8-type-checking "TC", # pyupgrade "UP", # pycodestyle warnings "W", # flake8-2020 "YTT", ] ignore = [ # Missing docstring in public module "D100", # Missing docstring in public class "D101", # Missing docstring in public method "D102", # Missing docstring in public function "D103", # Missing docstring in public package "D104", # Missing docstring in magic method "D105", # Missing docstring in __init__ "D107", # One-line docstring should fit on one line with quotes "D200", # No blank lines allowed after function docstring "D202", # 1 blank line required between summary line and description "D205", # Multi-line docstring closing quotes should be on a separate line "D209", # First line should end with a period "D400", # First line should be in imperative mood; try rephrasing "D401", # First line should not be the function's "signature" "D402", # Too many return statements "PLR0911", # Too many branches "PLR0912", # Too many arguments in function definition "PLR0913", # Too many statements "PLR0915", # Magic value used in comparison "PLR2004", # String contains ambiguous {}. "RUF001", # Docstring contains ambiguous {}. "RUF002", # Comment contains ambiguous {}. "RUF003", # Mutable class attributes should be annotated with `typing.ClassVar` "RUF012", # Use of `assert` detected "S101", ] [tool.ruff.lint.per-file-ignores] "url_matcher/__init__.py" = ["F401", "F403"] "url_matcher/example.py" = ["PT", "S"] [tool.ruff.lint.pydocstyle] convention = "pep257" url-matcher-0.6.0/requirements-dev.txt000066400000000000000000000000221475357073500200120ustar00rootroot00000000000000pre-commit pytest url-matcher-0.6.0/setup.py000066400000000000000000000023271475357073500154760ustar00rootroot00000000000000from pathlib import Path from setuptools import find_packages, setup setup( name="url-matcher", version="0.6.0", description="URL matching rules library to connect URLs with resources", long_description=Path("README.rst").read_text(encoding="utf-8"), long_description_content_type="text/x-rst", author="Zyte Group Ltd", author_email="info@zyte.com", url="https://github.com/zytedata/url-matcher", packages=find_packages( exclude=[ "tests", ] ), package_data={ "url_matcher": ["py.typed"], }, include_package_data=True, python_requires=">=3.9", install_requires=[ "tldextract>=1.2", ], classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ], ) url-matcher-0.6.0/tests/000077500000000000000000000000001475357073500151225ustar00rootroot00000000000000url-matcher-0.6.0/tests/__init__.py000066400000000000000000000000001475357073500172210ustar00rootroot00000000000000url-matcher-0.6.0/tests/fixtures/000077500000000000000000000000001475357073500167735ustar00rootroot00000000000000url-matcher-0.6.0/tests/fixtures/patterns.json000066400000000000000000000036761475357073500215420ustar00rootroot00000000000000[ { "description": "Universal matcher by using an empty include pattern", "patterns": { "include": [], "exclude": ["/index.html|"] }, "match": [ "https://books.toscrape.com/catalogue/join_902/index.html", "https://books.toscrape.com/catalogue/arena_587/index.html" ], "no_match": [ "https://books.toscrape.com/index.html" ] }, { "description": "Match products in books.toscrape.com", "patterns": { "include": [ "books.toscrape.com/catalogue/" ], "exclude": [ "/catalogue/category/" ] }, "match": [ "https://books.toscrape.com/catalogue/join_902/index.html", "https://books.toscrape.com/catalogue/arena_587/index.html" ], "no_match": [ "https://books.toscrape.com/index.html", "https://books.toscrape.com/catalogue/category/books/mystery_3/index.html" ] }, { "description": "Match product lists in books.toscrape.com", "patterns": { "include": [ "books.toscrape.com/catalogue/category/", "books.toscrape.com/|", "books.toscrape.com/index.html|" ] }, "match": [ "https://books.toscrape.com/catalogue/category/books/mystery_3/index.html", "https://books.toscrape.com/index.html", "https://books.toscrape.com/" ], "no_match": [ "https://books.toscrape.com/catalogue/join_902/index.html" ] }, { "description": "All Goodreads individual photo pages but excluding those from users", "patterns": { "include": [ "goodreads.com/photo/?photo=*" ], "exclude": [ "/photo/user/" ] }, "match": [ "https://www.goodreads.com/photo/author/2900145.Tracy_Ames?page=1&photo=113390", "https://www.goodreads.com/photo/group/30626-goodreads-italia?photo=925633" ], "no_match": [ "https://www.goodreads.com/photo/user/44137919-mikee-readwithmikee?photo=1762284" ] } ]url-matcher-0.6.0/tests/fixtures/patterns_corner_cases.json000066400000000000000000000027401475357073500242570ustar00rootroot00000000000000[ { "description": "Universal matcher by using an empty include pattern", "patterns": { "include": [ "" ], "exclude": [] }, "match": [ "https://books.toscrape.com/catalogue/join_902/index.html", "http://whatever:443/some?more=things#everything", "https://example.com/index.html" ], "no_match": [ ] }, { "description": "Universal matcher by using an empty include pattern and some excludes", "patterns": { "include": [ "" ], "exclude": [ "/index.html|" ] }, "match": [ "https://books.toscrape.com/catalogue/join_902/index.html", "http://whatever:443/some?more=things#everything" ], "no_match": [ "https://example.com/index.html" ] }, { "description": "Universal matcher with everything excluded", "patterns": { "include": [ "" ], "exclude": [ "" ] }, "match": [ ], "no_match": [ "https://example.com/index.html", "https://books.toscrape.com/catalogue/join_902/index.html", "http://whatever:443/some?more=things#everything" ] }, { "description": "Using IPs", "patterns": { "include": [ "127.0.0.1:80/ipv4", "[::1]:80/ipv6" ] }, "match": [ "http://127.0.0.1/ipv4/something_else", "http://[::1]/ipv6/something_else" ], "no_match": [ "https://example.com/ipv4/something_else" ] } ]url-matcher-0.6.0/tests/fixtures/rules.json000066400000000000000000000061711475357073500210250ustar00rootroot00000000000000[ { "description": "Priority is respected", "rules": [ [ "1", { "include": ["example.com/path"], "exclude": [], "priority": 500 }], [ "2", { "include": ["example.com"], "exclude": [], "priority": 600 }], [ "2", { "include": ["example.com"], "exclude": [], "priority": 550 }], [ "3", { "include": ["example.com"], "exclude": [], "priority": 400 }] ], "cases": [ [ "http://example.com/path", "2" ] ] }, { "description": "Use the most specific includes to resolve conflicts", "rules": [ ["1", { "include": ["example.com", "blog.example.org/path?id=12#fragment"] }], ["2", { "include": ["blog.example.com", "blog.example.org/path?id=12"] }], ["3", { "include": ["blog.example.com/path", "blog.example.org/path"] }], ["4", { "include": ["blog.example.com/path?query=23", "blog.example.org"] }], ["5", { "include": ["blog.example.com/path?query=23#fragment", "example.org"] }] ], "cases": [ ["http://example.com", "1"], ["http://example.com/other", "1"], ["http://blog.example.com", "2"], ["http://blog.example.com/other", "2"], ["http://blog.example.com/path/without_query", "3"], ["http://blog.example.com/path/with_query?query=23", "4"], ["http://blog.example.com/path/with_query?query=23#fragment", "5"], ["http://blog.example.org/path/subpath?id=12#fragment_subsegment", "1"], ["http://blog.example.org/path?id=12", "2"], ["http://blog.example.org/path", "3"], ["http://blog.example.org", "4"], ["http://example.org", "5"], ["http://myexample.org", null] ] }, { "description": "Id is respected on conflicts", "rules": [ ["3", { "include": ["example.com"], "exclude": ["/something"] }], ["1", { "include": ["example.com"] }], ["2", { "include": ["example.com"] }] ], "cases": [ ["http://example.com", "3"], ["http://example.org", null] ] }, { "description": "Id is respected on conflicts", "rules": [ ["3", { "include": ["example.com"], "exclude": ["/something"] }], ["1", { "include": ["example.com"] }], ["2", { "include": ["example.com"] }] ], "cases": [ ["http://example.com", "3"], ["http://example.org", null] ] }, { "description": "Domain rules have always priority over universal ones", "rules": [ ["1", { "include": ["example.com"], "priority": 500 }], ["2", { "include": ["example.com"], "priority": 500 }], ["3", { "include": [], "priority": 600 }], ["4", { "include": [""], "priority": 600 }] ], "cases": [ ["http://example.com", "2"], ["http://other", "4"] ] } ]url-matcher-0.6.0/tests/fixtures/single_patterns.json000066400000000000000000000140161475357073500230710ustar00rootroot00000000000000[ { "description": "Universal matching", "pattern": "", "match": [ "http://www.example.com/", "https://EXAMPLE.COM/product1", "http://products.example.com/path?id=23¶m=#hash", "http://otherexample.com/" ], "no_match": [] }, { "description": "Strict domain", "pattern": "example.com/", "match": [ "http://www.example.com/", "https://EXAMPLE.COM/product1" ], "no_match": [ "http://products.example.com/", "http://otherexample.com/" ] }, { "description": "Domain and subdomains", "pattern": "example.com", "match": [ "http://www.example.com/", "http://product.example.COM/product1", "https://example.com/product2", "https://example.com/product?id=2344#main_section" ], "no_match": [ "http://otherexample.com/" ] }, { "description": "Strict domain + root path", "pattern": "example.com/|", "match": [ "http://www.example.com/", "https://example.com/", "https://example.com/?id=23" ], "no_match": [ "http://example.com/product", "http://otherexample.com/", "https://uk.example.com/", "http://shop/example.com/" ] }, { "description": "Strict domain + strict path", "pattern": "example.com/index.htm|", "match": [ "http://www.example.com/index.htm", "https://example.com/index.htm?color=blue" ], "no_match": [ "http://example.com/index.html", "http://shop.example.com/index.htm" ] }, { "description": "Strict schema", "pattern": "http://example.com", "match": [ "http://www.example.com/", "http://www.example.com/product1", "HTTP://example.com/product1", "http://example.com/product2", "http://shop.example.com/product2" ], "no_match": [ "https://www.example.com/" ] }, { "description": "Path subsection 1", "pattern": "/product/", "match": [ "http://www.example.com/product/pen", "http://other-example.com/PRODUCT/", "https://example.com/product/rubber?size=xl" ], "no_match": [ "https://www.example.com/product", "http://www.example.com/shop/product/pen" ] }, { "description": "Path subsection 2", "pattern": "/product/*", "match": [ "http://www.example.com/product/pen", "http://other-example.com/PRODUCT/", "https://example.com/product/rubber?size=xl" ], "no_match": [ "https://www.example.com/product" ] }, { "description": "Path subsection 3", "pattern": "/product/*|", "match": [ "http://www.example.com/product/pen", "http://other-example.com/PRODUCT/", "https://example.com/product/rubber?size=xl" ], "no_match": [ "https://www.example.com/product" ] }, { "description": "match extension", "pattern": "/*.jpg|", "match": [ "http://www.example.com/my/image.jpg", "http://anything.com/my/image.JPG", "https://example.com/other-image.jpg#big" ], "no_match": [ "http://www.example.com/my/image.jpeg", "http://www.example.com/my/image/jpg_2" ] }, { "description": "Wildcards in the path", "pattern": "example.com/category/*/product/", "match": [ "http://example.com/category/bread/product/loaf", "http://www.example.com/category/bread/product/loaf", "https://example.com/category/bread/product/loaf?id=23#main" ], "no_match": [ "http://example.com/product/loaf", "http://shop.example.com/product/loaf", "http://fr.example.com/category/bread/product/loaf" ] }, { "description": "Must have query param", "pattern": "example.com/product?id=*", "match": [ "http://www.example.com/product?location=US&id=23", "http://www.example.com/PRODUCT?ID=23#main_section" ], "no_match": [ "http://uk.example.com/product?location=US&id=23", "http://www.example.com/product?location=US" ] }, { "description": "Wildcard in param value", "pattern": "example.com?product_id=*&category=shoes", "match": [ "http://www.example.com/product?category=shoes&location=US&product_id=23", "http://example.com/product?category=SHOES&product_id=23#main_section" ], "no_match": [ "http://www.example.com/product?product_id=23&category=clothes", "http://www.example.com/product?category=shoes", "http://shop.example.com/product?category=SHOES&product_id=23#main_section" ] }, { "description": "Multiple possible values for a param", "pattern": "example.com?param=1¶m=2", "match": [ "http://www.example.com?param=1&otherparam=3", "http://www.EXAMPLE.com?PARAM=2" ], "no_match": [ "http://example.com?param=3" ] }, { "description": "Wildcard in fragment", "pattern": "example.com#product_section_*", "match": [ "http://www.example.com/product_1?location=US#product_section_main", "http://example.com#product_section_main" ], "no_match": [ "http://www.example.com/product_1?location=US#product_description", "http://www.example.com/product_1?location=US", "http://uk.example.com#product_section_main" ] }, { "description": "Mix path wildcard with param string", "pattern": "shop.example.com/*/product/?category=shoes", "match": [ "https://shop.example.com/US/product/nike_air?sid=423&category=shoes" ], "no_match": [ "https://example.com/US/product/nike_air?sid=423&category=shoes", "https://shop.example.com/US/product/loaf?sid=424&category=bread", "https://shop.example.com/US/product/nike_air?sid=423", "https://shop.example.com/product/nike_air?sid=423&category=shoes", "https://my.shop.example.com/US/product/nike_air?sid=423&category=shoes" ] }, { "description": "Empty schema, but double slash", "pattern": "//example.com/product", "match": [ "http://another.page//example.com/product" ], "no_match": [ "https://example.com/product" ] } ]url-matcher-0.6.0/tests/fixtures/single_patterns_corner_cases.json000066400000000000000000000130011475357073500256100ustar00rootroot00000000000000[ { "description": "Double slash at the start", "pattern": "//strange_path/subpath", "match": [ "http://example.com//strange_path/subpath/something_else" ], "no_match": [ "http://strange_path/subpath" ] }, { "description": "Wildcard not allowed in query params", "pattern": "example.com?id_gen_*&category=shoes", "match": [ "http://example.com/prod?id_gen_=&category=shoes" ], "no_match": [ "http://www.example.com/product?category=shoes&id_gen_men=23", "http://www.example.com/product?product_id=23&category=clothes", "http://www.example.com/product?category=shoes", "http://shop.example.com/prod?id_gen_women=45&category=shoes" ] }, { "description": "Wildcard not allowed in query params (II)", "pattern": "example.com/path?*_id=34", "match": [ "https://example.com/path?_id=34" ], "no_match": [ "https://example.com/path?my_id=34" ] }, { "description": "Wildcard not allowed in query params (III)", "pattern": "example.com/path?*_id_*=*", "match": [ "https://example.com/path?_id_=34" ], "no_match": [ "https://example.com/path?_id_product=34", "https://example.com/path?my_id_product=34" ] }, { "description": "Single wildcard in query parameter", "pattern": "example.com/path?*", "match": [ "https://example.com/path", "https://example.com/path?id=34", "https://example.com/path?=34" ], "no_match": [ ] }, { "description": "Mix values and wildcards in query params", "pattern": "example.com?id=32&id=A*", "match": [ "http://www.example.com/product?otm=2&id=32", "http://example.com/prod?id=A45&category=shoes", "http://www.example.com/product?otm=2&id=32&id=a43", "http://www.example.com/product?&id=44&id=32" ], "no_match": [ "http://www.example.com/product?&id=44" ] }, { "description": "Empty query", "pattern": "example.com/path?", "match": [ "https://example.com/path", "https://example.com/path?id=34", "https://example.com/path?=34" ], "no_match": [ ] }, { "description": "Port provided", "pattern": "example.com:123", "match": [ "http://www.example.com:123/product?category=shoes&id_gen_men=23", "https://example.com:123/product?category=shoes&id_gen_men=23" ], "no_match": [ "http://www.example.com/product?category=shoes&id_gen_men=23" ] }, { "description": "Port 80 ignored", "pattern": "example.com:80", "match": [ "http://www.example.com:80/product?category=shoes&id_gen_men=23", "http://example.com/product?category=shoes&id_gen_men=23" ], "no_match": [ "https://www.example.com/product?category=shoes&id_gen_men=23", "https://www.example.com:80/product?category=shoes&id_gen_men=23", "http://www.example.com:123/product?category=shoes&id_gen_men=23" ] }, { "description": "Port 80 ignored (II)", "pattern": "http://example.com:80", "match": [ "http://www.example.com:80/product?category=shoes&id_gen_men=23", "http://example.com/product?category=shoes&id_gen_men=23" ], "no_match": [ "https://www.example.com/product?category=shoes&id_gen_men=23", "https://www.example.com:80/product?category=shoes&id_gen_men=23", "http://www.example.com:123/product?category=shoes&id_gen_men=23" ] }, { "description": "Port 443 ignored", "pattern": "example.com:443", "match": [ "https://example.com:443/product?category=shoes&id_gen_men=23", "https://example.com/product?category=shoes&id_gen_men=23" ], "no_match": [ "http://example.com/product?category=shoes&id_gen_men=23", "http://example.com:443/product?category=shoes&id_gen_men=23", "https://example.com:80/product?category=shoes&id_gen_men=23" ] }, { "description": "Port 443 ignored (II)", "pattern": "https://example.com:443", "match": [ "https://example.com:443/product?category=shoes&id_gen_men=23", "https://example.com/product?category=shoes&id_gen_men=23" ], "no_match": [ "http://example.com/product?category=shoes&id_gen_men=23", "http://example.com:443/product?category=shoes&id_gen_men=23", "https://example.com:80/product?category=shoes&id_gen_men=23" ] }, { "description": "Messing with http, https, 80 and 443", "pattern": "https://example.com:80", "match": [ "https://www.example.com:80/product?category=shoes&id_gen_men=23" ], "no_match": [ "http://example.com/product?category=shoes&id_gen_men=23", "https://www.example.com/product?category=shoes&id_gen_men=23", "http://www.example.com:80/product?category=shoes&id_gen_men=23" ] }, { "description": "Messing with http, https, 80 and 443 (II)", "pattern": "http://example.com:443", "match": [ "http://www.example.com:443/product?category=shoes&id_gen_men=23" ], "no_match": [ "https://example.com:443/product?category=shoes&id_gen_men=23", "https://www.example.com/product?category=shoes&id_gen_men=23", "http://www.example.com/product?category=shoes&id_gen_men=23" ] }, { "description": "IPs also works", "pattern": "127.0.0.1", "match": [ "http://127.0.0.1:80/product?category=shoes&id_gen_men=23", "https://127.0.0.1/product?category=shoes&id_gen_men=23" ], "no_match": [ "http://example.com/product?category=shoes&id_gen_men=23" ] } ]url-matcher-0.6.0/tests/test_matcher.py000066400000000000000000000156571475357073500201740ustar00rootroot00000000000000import pytest from url_matcher import Patterns, URLMatcher from url_matcher.matcher import IncludePatternsWithoutDomainError from .util import load_json_fixture PATTERNS_FIXTURE = load_json_fixture("patterns") CORNER_CASES_FIXTURE = load_json_fixture("patterns_corner_cases") RULES_FIXTURE = load_json_fixture("rules") @pytest.mark.parametrize( ("patterns", "match", "no_match"), [(row["patterns"], row["match"], row["no_match"]) for row in PATTERNS_FIXTURE], ids=[row["description"] for row in PATTERNS_FIXTURE], ) def test_matcher_single_rule(patterns, match, no_match): matcher = URLMatcher() matcher.add_or_update(23, Patterns(**patterns)) for url in match: assert matcher.match(url) == 23 assert list(matcher.match_all(url)) == [23] for url in no_match: assert not matcher.match(url) assert list(matcher.match_all(url)) == [] @pytest.mark.parametrize( ("patterns", "match", "no_match"), [(row["patterns"], row["match"], row["no_match"]) for row in CORNER_CASES_FIXTURE], ids=[row["description"] for row in CORNER_CASES_FIXTURE], ) def test_matcher_single_rule_corner_cases(patterns, match, no_match): matcher = URLMatcher() matcher.add_or_update(23, Patterns(**patterns)) for url in match: assert matcher.match(url) == 23 assert list(matcher.match_all(url)) == [23] for url in no_match: assert not matcher.match(url) assert list(matcher.match_all(url)) == [] @pytest.mark.parametrize( ("rules", "cases"), [(row["rules"], row["cases"]) for row in RULES_FIXTURE], ids=[row["description"] for row in RULES_FIXTURE], ) def test_matcher_rules(rules, cases): matcher = URLMatcher() for id, patterns in rules: matcher.add_or_update(id, Patterns(**patterns)) for url, id in cases: assert matcher.match(url) == id def test_matcher_init(): rules = { 1: Patterns(["example.com"]), 2: Patterns(["other.com"]), } matcher1 = URLMatcher(rules) matcher2 = URLMatcher(rules.items()) for matcher in [matcher1, matcher2]: assert matcher.match("http://example.com") == 1 assert matcher.match("http://other.com") == 2 assert matcher.match("http://non-match") is None def test_matcher_add_remove_get(): matcher = URLMatcher() patterns = Patterns(["example.com"]) matcher.add_or_update(1, patterns) assert matcher.match("http://example.com") == 1 assert matcher.get(1) is patterns assert list(matcher.match_universal()) == [] patterns_3 = Patterns(["example.com/articles"]) matcher.add_or_update(3, patterns_3) assert matcher.match("http://example.com/articles") == 3 assert matcher.get(3) is patterns_3 # Testing update patterns = Patterns(["example.com/products"]) matcher.add_or_update(1, patterns) assert matcher.match("http://example.com") is None assert matcher.match("http://example.com/products") == 1 assert matcher.get(1) is patterns # Testing universal patterns univ_patterns = Patterns([""]) matcher.add_or_update(2, univ_patterns) assert matcher.match("http://example.com") == 2 assert matcher.match("http://example.com/products") == 1 assert matcher.get(2) is univ_patterns assert list(matcher.match_universal()) == [2] # Removing a universal pattern matcher.remove(2) assert matcher.match("http://example.com") is None assert matcher.match("http://example.com/products") == 1 assert matcher.get(2) is None assert list(matcher.match_universal()) == [] # Removing regular patterns matcher.remove(3) assert matcher.match("http://example.com/products") == 1 assert matcher.match("http://example.com/articles") is None assert matcher.get(3) is None matcher.remove(1) assert matcher.match("http://example.com/products") is None assert matcher.get(1) is None assert len(matcher.matchers_by_domain) == 0 assert len(matcher.patterns) == 0 # Wrong patterns with pytest.raises(IncludePatternsWithoutDomainError): matcher.add_or_update(1, Patterns(["/no_domain_pattern"])) # Patterns with the same domain shouldn't produce multiple matchers patterns = Patterns(["example.com/products", "example.com/brands"]) matcher.add_or_update(1, patterns) assert len(matcher.matchers_by_domain) == 1 assert len(matcher.matchers_by_domain["example.com"]) == 1 assert len(matcher.patterns) == 1 assert matcher.match("http://example.com") is None assert matcher.match("http://example.com/products") == 1 assert matcher.match("http://example.com/brands") == 1 assert len(list(matcher.match_all("http://example.com/products"))) == 1 def test_dedupe_unique_patterns(): p = [ Patterns(["example.com"]), Patterns(include=["example.com"], exclude=None, priority=500), ] assert len(set(p)) == 1 p.append(Patterns(["example.com"], priority=1)) assert len(set(p)) == 2 def test_patterns_immutability(): p = Patterns(["example.com"]) with pytest.raises(AttributeError): p.priority = 1 # type: ignore[misc] def test_match_all(): matcher = URLMatcher() matcher.add_or_update(1, Patterns(include=["example.com"])) matcher.add_or_update(2, Patterns(include=["foo.example.com"])) matcher.add_or_update(3, Patterns(include=["bar.example.com/products"])) matcher.add_or_update(4, Patterns(include=["bar.example.com"])) assert list(matcher.match_all("http://example.com")) == [1] assert list(matcher.match_all("http://foo.example.com")) == [2, 1] assert list(matcher.match_all("http://bar.example.com")) == [4, 1] assert list(matcher.match_all("http://example.com/products")) == [1] assert list(matcher.match_all("http://foo.example.com/products")) == [2, 1] assert list(matcher.match_all("http://bar.example.com/products")) == [3, 4, 1] def test_match_all_include_universal(): matcher = URLMatcher() matcher.add_or_update(1, Patterns(include=["example.com"])) matcher.add_or_update(2, Patterns(include=[])) matcher.add_or_update(3, Patterns(include=["foo.example.com"])) matcher.add_or_update(4, Patterns(include=[""])) assert list(matcher.match_all("http://example.com")) == [1, 4, 2] assert list(matcher.match_all("http://example.com", include_universal=False)) == [1] assert list(matcher.match_all("http://foo.example.com")) == [3, 1, 4, 2] assert list(matcher.match_all("http://foo.example.com", include_universal=False)) == [3, 1] assert list(matcher.match_all("http://example.net")) == [4, 2] assert list(matcher.match_all("http://example.net", include_universal=False)) == [] def test_match_universal(): matcher = URLMatcher() matcher.add_or_update(1, Patterns(include=["example.com"])) matcher.add_or_update(2, Patterns(include=[])) matcher.add_or_update(3, Patterns(include=["foo.example.com"])) matcher.add_or_update(4, Patterns(include=[""])) assert list(matcher.match_universal()) == [4, 2] url-matcher-0.6.0/tests/test_patterns.py000066400000000000000000000025131475357073500203740ustar00rootroot00000000000000import pytest from url_matcher.patterns import PatternMatcher from .util import load_json_fixture PATTERNS_FIXTURE = load_json_fixture("single_patterns") CORNER_CASES_FIXTURE = load_json_fixture("single_patterns_corner_cases") @pytest.mark.parametrize( ("pattern", "match", "no_match"), [(row["pattern"], row["match"], row["no_match"]) for row in PATTERNS_FIXTURE], ids=[row["description"] for row in PATTERNS_FIXTURE], ) def test_single_patterns(pattern, match, no_match): matcher = PatternMatcher(pattern) for url in match: assert matcher.match(url) for url in no_match: assert not matcher.match(url) @pytest.mark.parametrize( ("pattern", "match", "no_match"), [(row["pattern"], row["match"], row["no_match"]) for row in CORNER_CASES_FIXTURE], ids=[row["description"] for row in CORNER_CASES_FIXTURE], ) def test_single_patterns_corner_cases(pattern, match, no_match): matcher = PatternMatcher(pattern) for url in match: assert matcher.match(url) for url in no_match: assert not matcher.match(url) def test_pattern_matcher_warning(): with pytest.warns(SyntaxWarning): matcher = PatternMatcher("example.com/path?*_id=34") assert matcher.match("http://example.com/path?_id=34") assert not matcher.match("http://example.com/path?a_id=34") url-matcher-0.6.0/tests/util.py000066400000000000000000000004271475357073500164540ustar00rootroot00000000000000from __future__ import annotations import json from pathlib import Path from typing import Any, cast def load_json_fixture(name: str) -> list[dict[str, Any]]: return cast(list[dict[str, Any]], json.loads((Path(__file__).parent / "fixtures" / f"{name}.json").read_text())) url-matcher-0.6.0/tox.ini000066400000000000000000000016661475357073500153040ustar00rootroot00000000000000[tox] envlist = min,py39,py310,py311,py312,py313,mypy,docs [testenv] deps = pytest pytest-cov commands = py.test \ --cov-report=term --cov-report=html --cov-report=xml --cov=url_matcher \ --cov-report=term-missing --doctest-modules \ {posargs:url_matcher tests} [testenv:min] basepython = python3.9 deps = {[testenv]deps} tldextract==1.2 [testenv:mypy] deps = mypy==1.15.0 pytest==8.3.3 commands = mypy --strict url_matcher tests [docs] changedir = docs deps = -rdocs/requirements.txt [testenv:docs] basepython = python3 changedir = {[docs]changedir} deps = {[docs]deps} commands = sphinx-build -W -b html . {envtmpdir}/html [testenv:pre-commit] deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure [testenv:twinecheck] basepython = python3 deps = twine==5.1.1 build==1.2.2 commands = python -m build --sdist twine check dist/* url-matcher-0.6.0/url_matcher/000077500000000000000000000000001475357073500162655ustar00rootroot00000000000000url-matcher-0.6.0/url_matcher/__init__.py000066400000000000000000000001201475357073500203670ustar00rootroot00000000000000__all__ = ["Patterns", "URLMatcher"] from .matcher import Patterns, URLMatcher url-matcher-0.6.0/url_matcher/__version__.py000066400000000000000000000000261475357073500211160ustar00rootroot00000000000000__version__ = "0.6.0" url-matcher-0.6.0/url_matcher/example.py000066400000000000000000000065721475357073500203040ustar00rootroot00000000000000""" Example of usage of the URLMatcher library """ from __future__ import annotations import dataclasses import random import time from collections import Counter from url_matcher import Patterns, URLMatcher from url_matcher.matcher import IncludePatternsWithoutDomainError matcher = URLMatcher() # Let's add a rule for books to scrape product patterns = Patterns(include=["books.toscrape.com/catalogue/"], exclude=["/catalogue/category/"]) matcher.add_or_update("books product", patterns) # Now a rule for product list in books to scrape patterns = Patterns( include=["books.toscrape.com/catalogue/category/", "books.toscrape.com/|", "books.toscrape.com/index.html|"] ) matcher.add_or_update("books productList", patterns) # Let's try it url = "https://books.toscrape.com/catalogue/soumission_998/index.html" assert matcher.match(url) == "books product" url = "https://books.toscrape.com/catalogue/category/books/fiction_10/index.html" assert matcher.match(url) == "books productList" url = "https://amazon.com" assert not matcher.match(url) # Adding a pattern without domain fails try: matcher.add_or_update("won't work", Patterns(["/path"])) raise AssertionError except IncludePatternsWithoutDomainError: ... # But the empty pattern works. It matches anything assert URLMatcher({"Anything": Patterns([""])}).match("http://anything") # Now let's see that priorities are working. They are applied only if several # rules match the URL. patterns = Patterns(["priority.com"]) matcher.add_or_update("low priority", dataclasses.replace(patterns, priority=200)) matcher.add_or_update("high priority", dataclasses.replace(patterns, priority=300)) assert matcher.match("http://priority.com") == "high priority" # Let's invert the priorities matcher.add_or_update("low priority", dataclasses.replace(patterns, priority=300)) matcher.add_or_update("high priority", dataclasses.replace(patterns, priority=200)) assert matcher.match("http://priority.com") == "low priority" # Let's check the speed creating patterns for many domains and matching # urls for these domains. def add_patterns(domain: int) -> None: patterns = Patterns(include=[f"{domain}/catalogue/?param=book"], exclude=["/catalogue/category/"]) matcher.add_or_update(f"{domain} product", patterns) patterns = Patterns(include=[f"{domain}/catalogue/category/?param=book_list", f"{domain}/", f"{domain}/index.html"]) matcher.add_or_update(f"{domain} productList", patterns) N_DOMAINS = 500 N_URLS = 300 URLS = [ "https://books.toscrape.com/catalogue/soumission_998/index.html?param=book&p1=23&p2=45", "https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?param=book_list&p5=23&p6=45", ] # Adding the patterns for idx in range(N_DOMAINS): add_patterns(idx) urls = [] for _ in range(N_URLS): url = random.choice(URLS) domain = random.randint(0, N_DOMAINS - 1) url = url.replace("books.toscrape.com", f"{domain}") urls.append((domain, url)) # Let's try to match the urls start = time.perf_counter() counter: Counter[bool] = Counter() for domain, url in urls: match = matcher.match(url) counter[bool(match)] += 1 assert match and f"{domain}" in match end = time.perf_counter() # It took in my machine ~ 0.04 millis per URL print(f"{((end - start) / N_URLS) * 1000:.3f} milliseconds per URL. Total {end - start} seconds to match {N_URLS} URLs") print("Everything worked fine!") url-matcher-0.6.0/url_matcher/matcher.py000066400000000000000000000215661475357073500202740ustar00rootroot00000000000000""" The matcher module contains the UrlMatcher class. """ from __future__ import annotations from collections.abc import Iterable, Iterator, Mapping from dataclasses import dataclass, field from itertools import chain from typing import Any from url_matcher.patterns import PatternMatcher, get_pattern_domain, hierarchical_str from url_matcher.util import get_domain @dataclass(init=False, frozen=True) class Patterns: include: tuple[str, ...] exclude: tuple[str, ...] priority: int def __init__(self, include: list[str], exclude: list[str] | None = None, priority: int = 500): # The initialization is manually set so that we can support an API of # accepting and returning lists. However, tuples are being used underneath # that class so that the attributes are truly immutable, in addition to # being frozen=True. # Using lists are far less likely to have human typing mistakes compared to # tuples since the trailing `,` char can easily be missed out. For # example: # * ("element") is not the same as ("element",) which is a tuple. # Lastly, the manner of how we set the attribute values below is in line # with how Python's own `dataclasses` library assign attributes to frozen # classes. Here's a reference: # * https://github.com/python/cpython/blob/v3.10.2/Lib/dataclasses.py#L1117-L1120 object.__setattr__(self, "include", tuple(include)) object.__setattr__(self, "exclude", tuple(exclude or [])) object.__setattr__(self, "priority", priority) def get_domains(self) -> list[str]: domains = [get_pattern_domain(pattern) for pattern in self.include] # remove duplicate domains preserving the order return list(dict.fromkeys(domain for domain in domains if domain)) def get_includes_without_domain(self) -> list[str]: return [pattern for pattern in self.include if get_pattern_domain(pattern) is None] def all_includes_have_domain(self) -> bool: """Return true if all the include patterns have a domain""" return not self.get_includes_without_domain() def is_universal_pattern(self) -> bool: """Return true if there are no include patterns or they are empty. A universal pattern matches any domain""" return not any(pattern for pattern in self.include) def get_includes_for(self, domain: str) -> list[str]: return [pattern for pattern in self.include if get_pattern_domain(pattern) == domain] @dataclass class PatternsMatcher: identifier: Any patterns: Patterns include_matchers: list[PatternMatcher] = field(init=False) exclude_matchers: list[PatternMatcher] = field(init=False) def __post_init__(self) -> None: self.include_matchers = [PatternMatcher(pattern) for pattern in self.patterns.include] self.exclude_matchers = [PatternMatcher(pattern) for pattern in self.patterns.exclude] def match(self, url: str) -> bool: if self.include_matchers: for include in self.include_matchers: if include.match(url): break else: return False return not any(exclude.match(url) for exclude in self.exclude_matchers) class IncludePatternsWithoutDomainError(ValueError): def __init__(self, *args: Any, identifier: Any, patterns: Patterns, wrong_patterns: list[str]): super().__init__(*args) self.id = identifier self.patterns = patterns self.wrong_patterns = wrong_patterns class URLMatcher: def __init__(self, data: Mapping[Any, Patterns] | Iterable[tuple[Any, Patterns]] | None = None): """ A class that matches URLs against a list of patterns, returning the identifier of the rule that matched the URL. Example usage:: matcher = URLMatcher() matcher.add_or_update(1, Patterns(include=["example.com/product"])) matcher.add_or_update(2, Patterns(include=["other.com"])) assert matcher.match("http://example.com/product/a_product.html") == 1 assert matcher.match("http://other.com/a_different_page") == 2 :param data: A map or a list of tuples with identifier, patterns pairs to initialize the object from """ self.matchers_by_domain: dict[str, list[PatternsMatcher]] = {} self.matchers_universal: list[PatternsMatcher] = [] self.patterns: dict[Any, Patterns] = {} if data: items = data.items() if isinstance(data, Mapping) else data for identifier, patterns in items: self.add_or_update(identifier, patterns) def add_or_update(self, identifier: Any, patterns: Patterns) -> None: if not patterns.all_includes_have_domain() and not patterns.is_universal_pattern(): wrong_patterns = [p for p in patterns.get_includes_without_domain() if p] raise IncludePatternsWithoutDomainError( f"All include patterns must belong to a domain " f"but the patterns {wrong_patterns} doesn't. " f"For example, the include pattern '/product/* " f"is invalid whereas the pattern 'example.com/product/*' isn't. " f"The only exception is the empty pattern which matches everything " f"and it is allowed. " f"identifier: {identifier}.", identifier=identifier, patterns=patterns, wrong_patterns=wrong_patterns, ) if identifier in self.patterns: self.remove(identifier) self.patterns[identifier] = patterns matcher = PatternsMatcher(identifier, patterns) for domain in patterns.get_domains(): self._add_matcher(domain, matcher) if patterns.is_universal_pattern(): self._add_matcher("", matcher) def remove(self, identifier: Any) -> None: patterns = self.patterns.get(identifier) if not patterns: return del self.patterns[identifier] for domain in patterns.get_domains(): self._del_matcher(domain, identifier) if patterns.is_universal_pattern(): self._del_matcher("", identifier) def get(self, identifier: Any) -> Patterns | None: return self.patterns.get(identifier) def match(self, url: str, *, include_universal: bool = True) -> Any | None: return next(self.match_all(url, include_universal=include_universal), None) def match_all(self, url: str, *, include_universal: bool = True) -> Iterator[Any]: domain = get_domain(url) matchers: Iterable[PatternsMatcher] = self.matchers_by_domain.get(domain) or [] if include_universal: matchers = chain(matchers, self.matchers_universal) for matcher in matchers: if matcher.match(url): yield matcher.identifier def match_universal(self) -> Iterator[Any]: return (m.identifier for m in self.matchers_universal) def _sort_domain(self, domain: str) -> None: """ Sort all the rules within a domain so that the matching can be done in sequence: the first rule matching wins. A total ordering is defined. This is ensured by using including the identifier in the sorting criteria Sorting criteria: * Priority (descending) * Sorted list of includes for this domain (descending) * Rule identifier (descending) """ def sort_key(matcher: PatternsMatcher) -> tuple[int, list[str], Any]: sorted_includes = sorted(map(hierarchical_str, matcher.patterns.get_includes_for(domain))) return (matcher.patterns.priority, sorted_includes, matcher.identifier) self.matchers_by_domain[domain].sort(key=sort_key, reverse=True) self.matchers_universal.sort(key=sort_key, reverse=True) def _del_matcher(self, domain: str, identifier: Any) -> None: matchers = self.matchers_by_domain[domain] for idx in range(len(matchers)): if matchers[idx].identifier == identifier: del matchers[idx] break if not matchers: del self.matchers_by_domain[domain] for idx in range(len(self.matchers_universal)): if self.matchers_universal[idx].identifier == identifier: del self.matchers_universal[idx] break def _add_matcher(self, domain: str, matcher: PatternsMatcher) -> None: # FIXME: This can be made much more efficient if we insert the data directly in order instead of resorting. # The bisect module could be used for this purpose. # I'm leaving it for the future as insertion time is not critical. self.matchers_by_domain.setdefault(domain, []).append(matcher) if domain == "": self.matchers_universal.append(matcher) self._sort_domain(domain) url-matcher-0.6.0/url_matcher/patterns.py000066400000000000000000000223731475357073500205060ustar00rootroot00000000000000""" Utilities to parse patterns and match URLs using them. """ from __future__ import annotations import ipaddress import re import warnings from functools import lru_cache from re import Pattern from typing import NamedTuple from urllib.parse import parse_qs, urlparse from url_matcher.util import get_domain def get_pattern_domain(pattern: str) -> str | None: """ Returns the domain of the pattern if any. >>> get_pattern_domain("") >>> get_pattern_domain("/") >>> get_pattern_domain("dom") 'dom' >>> get_pattern_domain("DOM") 'dom' >>> get_pattern_domain("dom:80") 'dom' >>> get_pattern_domain("http://dom:80") 'dom' >>> get_pattern_domain("http://dom/a_path") 'dom' """ parsed = pattern_parse(pattern) if parsed.netloc: return get_domain(f"//{parsed.netloc}").lower() return None def pattern_to_url(pattern: str) -> str: """ Required for urlparse to recognize the domain in patterns like example.com/path >>> pattern_to_url("example.com/") '//example.com/' >>> pattern_to_url("example.com") '//example.com' >>> pattern_to_url("https://example.com") 'https://example.com' >>> pattern_to_url("MySchema4+.-://example.com") 'MySchema4+.-://example.com' >>> pattern_to_url("//example.com") '////example.com' """ # As defined in https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 has_scheme = re.search(r"^([a-zA-Z][a-zA-Z0-9.+-]*:)?//", pattern) if not has_scheme: pattern = f"//{pattern}" elif pattern.startswith("//"): # This is required because urlparse("//example.com").netloc == "//example.com" # but instead we want it to be parsed into the the path. We achieve it by appending # two more slashes pattern = f"//{pattern}" return pattern class ParseTuple(NamedTuple): scheme: str netloc: str path: str query: str fragment: str @lru_cache(30) def pattern_parse(pattern: str) -> ParseTuple: """ Parses the pattern to a named tuple (scheme, netloc, path, query, fragment) >>> pattern_parse("example.com") ParseTuple(scheme='', netloc='example.com', path='', query='', fragment='') >>> pattern_parse("//example.com/path;this_is_also_path") ParseTuple(scheme='', netloc='', path='//example.com/path;this_is_also_path', query='', fragment='') """ pattern = pattern_to_url(pattern) return _urlparse(pattern) def _urlparse(url: str) -> ParseTuple: """ Returns a named tuple (scheme, netloc, path, query, fragment) where path and params are joined together into path and some other elements are normalized. >>> _urlparse("scheme://example.com/path;params?query=23#fragment") ParseTuple(scheme='scheme', netloc='example.com', path='/path;params', query='query=23', fragment='fragment') >>> _urlparse("http://example.com:80/path") ParseTuple(scheme='http', netloc='example.com', path='/path', query='', fragment='') """ scheme, netloc, path, params, query, fragment = urlparse(url) path = _join_path_and_params(path, params) scheme, netloc = normalize_netloc_and_schema(scheme, netloc) return ParseTuple(scheme, netloc, path, query, fragment) def _wildcard_re_escape(text: str) -> str: return re.escape(text).replace("\\*", ".*") def _join_path_and_params(path: str, params: str) -> str: if params: return f"{path};{params}" return path def normalize_netloc_and_schema(schema: str, netloc: str) -> tuple[str, str]: """ Removes 80 or 443 port when obvious. Deduces http or https when the port is provided >>> normalize_netloc_and_schema("http", "example.com:80") ('http', 'example.com') >>> normalize_netloc_and_schema("http", "example.com:80") ('http', 'example.com') >>> normalize_netloc_and_schema("http", "example.com:443") ('http', 'example.com:443') >>> normalize_netloc_and_schema("https", "example.com:443") ('https', 'example.com') >>> normalize_netloc_and_schema("", "example.com:80") ('http', 'example.com') >>> normalize_netloc_and_schema("", "example.com:443") ('https', 'example.com') """ schema = schema.lower() domain, port = split_domain_port(netloc) if (port == "80" and schema in ("http", "")) or (port == "443" and schema in ("https", "")): return "http" if port == "80" else "https", domain return schema, netloc def hierarchical_str(pattern: str) -> str: """ Rewrites the given pattern in a string that is useful to sort patterns from more general to more concrete. For example, the pattern "example.com" is more general than "blog.example.com" which is more general than "blog.example.com/post/1" >>> hierarchical_str("http://blog.example.com/path?query=23#fragment") 'com.example.blog/pathquery=23fragment' >>> hierarchical_str("http://blog.example.com:1234") 'com.example.blog' >>> hierarchical_str("http://127.0.0.1:80/path") '127.0.0.1/path' """ parsed = pattern_parse(pattern) netloc = parsed.netloc if ":" in parsed.netloc: netloc, _ = split_domain_port(parsed.netloc) try: ipaddress.ip_address(netloc) is_ip = True except ValueError: is_ip = False if not is_ip: # Reversing the domain so that higher levels are before # e.g. blog.example.com -> com.example.blog netloc = ".".join(reversed(netloc.split("."))) return "".join((netloc, *parsed[2:])) def split_domain_port(netloc: str) -> tuple[str, str | None]: """ Splits the netloc into domain and port. >>> split_domain_port("example.com") ('example.com', None) >>> split_domain_port("example.com:80") ('example.com', '80') """ segments = netloc.split(":") if len(segments) > 1: return ":".join(segments[:-1]), segments[-1] return netloc, None class PatternMatcher: def __init__(self, pattern: str): # Parsing and validation self.pattern = pattern self.parsed = pattern_parse(pattern) self.domain = get_pattern_domain(pattern) self.netloc_re: Pattern[str] | None = None self.path_re: Pattern[str] | None = None self.fragment_re: Pattern[str] | None = None self.query_re_dict: dict[str, Pattern[str]] | None = None self._build_regexes() def _build_regexes(self) -> None: """ Builds the compiled regexes that can be used to match the pattern. """ pscheme, pnetloc, ppath, pquery, pfragment = self.parsed if pnetloc: netloc_re = re.escape(pnetloc) if not any((ppath, pquery, pfragment)): # Also match subdomains if there is no path, query or fragment in the pattern netloc_re = rf"(?:.*\.)?{netloc_re}" netloc_re = f"^(?:www.)?{netloc_re}$" self.netloc_re = re.compile(netloc_re, re.IGNORECASE) if ppath: self.path_re = self._path_or_fragment_re(ppath) if pfragment: self.fragment_re = self._path_or_fragment_re(pfragment) if pquery: pkvs = parse_qs(pquery, keep_blank_values=True) query_re_dict = {} for pparam, values in pkvs.items(): pparam = pparam.lower() # noqa: PLW2901 if "*" in pparam: warnings.warn( f"Wildcard expansion is only allowed for the values in the query parameter. Pattern: '{self.pattern}'", SyntaxWarning, stacklevel=3, ) pparam = pparam.replace("*", "") # noqa: PLW2901 if not pparam: continue param_re = rf"^(?:{'|'.join([_wildcard_re_escape(value) for value in values])})$" query_re_dict[pparam] = re.compile(param_re, re.IGNORECASE) self.query_re_dict = query_re_dict or None def match(self, url: str) -> bool: """ Return True if the url matches the pattern. """ parsed = _urlparse(url) if self.parsed.scheme and parsed.scheme != self.parsed.scheme: return False if self.netloc_re and not self.netloc_re.match(parsed.netloc): return False if self.path_re and not self.path_re.match(parsed.path): return False if self.fragment_re and not self.fragment_re.match(parsed.fragment): return False if self.query_re_dict: kvs = parse_qs(parsed.query, keep_blank_values=True) kvs = {k.lower(): v for k, v in kvs.items()} # All params must be present in the URL for param, param_re in self.query_re_dict.items(): if param not in kvs: return False if not any(param_re.match(value) for value in (kvs[param])): return False return True @staticmethod def _path_or_fragment_re(path_or_fragment: str) -> Pattern[str]: """Wildcard expansion + end of line character""" re_str = _wildcard_re_escape(path_or_fragment) if re_str.endswith(r"\|"): # case where the match must be exact re_str = re_str[:-2] else: re_str += r".*" re_str = rf"^{re_str}$" return re.compile(re_str, re.IGNORECASE) url-matcher-0.6.0/url_matcher/py.typed000066400000000000000000000000001475357073500177520ustar00rootroot00000000000000url-matcher-0.6.0/url_matcher/util.py000066400000000000000000000012641475357073500176170ustar00rootroot00000000000000from __future__ import annotations from functools import lru_cache from urllib.parse import urlparse from tldextract import tldextract @lru_cache(100) def get_domain(url: str) -> str: """ Return the domain without any subdomain >>> get_domain("http://blog.example.com") 'example.com' >>> get_domain("http://www.example.com") 'example.com' >>> get_domain("http://deeper.blog.example.co.uk") 'example.co.uk' >>> get_domain("http://127.0.0.1") '127.0.0.1' """ parts = tldextract.extract(url) return ".".join(part for part in (parts.domain, parts.suffix) if part) def is_absolute(url: str) -> bool: return bool(urlparse(url).netloc)