././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1636638789.9051707 soupsieve-2.3.1/0000755000175100001710000000000000000000000013077 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/.pyspelling.yml0000644000175100001710000000425500000000000016074 0ustar00runnerdockerspellchecker: aspell matrix: - name: mkdocs sources: - site/**/*.html hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/mkdocs.dic pipeline: - pyspelling.filters.html: comments: false attributes: - title - alt ignores: - 'code, pre, a.magiclink, span.keys' - '.MathJax_Preview, .md-nav__link, .md-footer-custom-text, .md-source__repository, .headerlink, .md-icon' - '.md-footer-social__link' - pyspelling.filters.url: - name: markdown sources: - README.md hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/mkdocs.dic pipeline: - pyspelling.filters.markdown: markdown_extensions: - pymdownx.superfences: - pymdownx.highlight: - pyspelling.filters.html: comments: false attributes: - title - alt ignores: - :is(code, pre) - pyspelling.filters.url: - name: python sources: - setup.py - '{soupsieve,tests,tools}/**/*.py' hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/python.dic pipeline: - pyspelling.filters.python: group_comments: true - pyspelling.flow_control.wildcard: allow: - py-comment - pyspelling.filters.context: context_visible_first: true delimiters: # Ignore lint (noqa) and coverage (pragma) as well as shebang (#!) - open: '^(?: *(?:noqa\b|pragma: no cover|type: .*?)|!)' close: '$' # Ignore Python encoding string -*- encoding stuff -*- - open: '^ *-\*-' close: '-\*-$' - pyspelling.filters.context: context_visible_first: true escapes: '\\[\\`]' delimiters: # Ignore multiline content between fences (fences can have 3 or more back ticks) # ``` # content # ``` - open: '(?s)^(?P *`{3,})$' close: '^(?P=open)$' # Ignore text between inline back ticks - open: '(?P`+)' close: '(?P=open)' - pyspelling.filters.url: ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/LICENSE.md0000644000175100001710000000211000000000000014475 0ustar00runnerdockerMIT License Copyright (c) 2018 - 2021 Isaac Muse Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/MANIFEST.in0000644000175100001710000000071200000000000014635 0ustar00runnerdockerrecursive-include soupsieve *.py py.typed recursive-include tests *.py recursive-include docs/src/markdown *.md *.png *.gif *.html recursive-include docs/src/dictionary *.txt recursive-include docs/theme *.js *.css *.html recursive-include requirements *.txt recursive-exclude site * include .pyspelling.yml include mkdocs.yml include setup.py include setup.cfg include tox.ini include LICENSE.md include README.md include MANIFEST.in include pyproject.toml ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1636638789.9051707 soupsieve-2.3.1/PKG-INFO0000644000175100001710000001257100000000000014202 0ustar00runnerdockerMetadata-Version: 2.1 Name: soupsieve Version: 2.3.1 Summary: A modern CSS selector implementation for Beautiful Soup. Home-page: https://github.com/facelessuser/soupsieve Author: Isaac Muse Author-email: Isaac.Muse@gmail.com License: MIT License Keywords: CSS HTML XML selector filter query soup Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Typing :: Typed Requires-Python: >=3.6 Description-Content-Type: text/markdown License-File: LICENSE.md [![Donate via PayPal][donate-image]][donate-link] [![Discord][discord-image]][discord-link] [![Build][github-ci-image]][github-ci-link] [![Coverage Status][codecov-image]][codecov-link] [![PyPI Version][pypi-image]][pypi-link] [![PyPI - Python Version][python-image]][pypi-link] ![License][license-image-mit] # Soup Sieve ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `.classes` - `#ids` - `[attributes=value]` - `parent child` - `parent > child` - `sibling ~ sibling` - `sibling + sibling` - `:not(element.class, element2.class)` - `:is(element.class, element2.class)` - `parent:has(> child)` - and [many more](https://facelessuser.github.io/soupsieve/selectors/) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed for your, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, navigate to the root of the project and run ``` python setup.py build python setup.py install ``` ## Documentation Documentation is found here: https://facelessuser.github.io/soupsieve/. ## License MIT License Copyright (c) 2018 - 2021 Isaac Muse Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. [bs4]: https://beautiful-soup-4.readthedocs.io/en/latest/# [github-ci-image]: https://github.com/facelessuser/soupsieve/workflows/build/badge.svg?branch=master&event=push [github-ci-link]: https://github.com/facelessuser/soupsieve/actions?query=workflow%3Abuild+branch%3Amaster [discord-image]: https://img.shields.io/discord/678289859768745989?logo=discord&logoColor=aaaaaa&color=mediumpurple&labelColor=333333 [discord-link]:https://discord.gg/XBnPUZF [codecov-image]: https://img.shields.io/codecov/c/github/facelessuser/soupsieve/master.svg?logo=codecov&logoColor=aaaaaa&labelColor=333333 [codecov-link]: https://codecov.io/github/facelessuser/soupsieve [pypi-image]: https://img.shields.io/pypi/v/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-link]: https://pypi.python.org/pypi/soupsieve [python-image]: https://img.shields.io/pypi/pyversions/soupsieve?logo=python&logoColor=aaaaaa&labelColor=333333 [license-image-mit]: https://img.shields.io/badge/license-MIT-blue.svg?labelColor=333333 [donate-image]: https://img.shields.io/badge/Donate-PayPal-3fabd1?logo=paypal [donate-link]: https://www.paypal.me/facelessuser ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/README.md0000644000175100001710000001045500000000000014363 0ustar00runnerdocker[![Donate via PayPal][donate-image]][donate-link] [![Discord][discord-image]][discord-link] [![Build][github-ci-image]][github-ci-link] [![Coverage Status][codecov-image]][codecov-link] [![PyPI Version][pypi-image]][pypi-link] [![PyPI - Python Version][python-image]][pypi-link] ![License][license-image-mit] # Soup Sieve ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `.classes` - `#ids` - `[attributes=value]` - `parent child` - `parent > child` - `sibling ~ sibling` - `sibling + sibling` - `:not(element.class, element2.class)` - `:is(element.class, element2.class)` - `parent:has(> child)` - and [many more](https://facelessuser.github.io/soupsieve/selectors/) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed for your, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, navigate to the root of the project and run ``` python setup.py build python setup.py install ``` ## Documentation Documentation is found here: https://facelessuser.github.io/soupsieve/. ## License MIT License Copyright (c) 2018 - 2021 Isaac Muse Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. [bs4]: https://beautiful-soup-4.readthedocs.io/en/latest/# [github-ci-image]: https://github.com/facelessuser/soupsieve/workflows/build/badge.svg?branch=master&event=push [github-ci-link]: https://github.com/facelessuser/soupsieve/actions?query=workflow%3Abuild+branch%3Amaster [discord-image]: https://img.shields.io/discord/678289859768745989?logo=discord&logoColor=aaaaaa&color=mediumpurple&labelColor=333333 [discord-link]:https://discord.gg/XBnPUZF [codecov-image]: https://img.shields.io/codecov/c/github/facelessuser/soupsieve/master.svg?logo=codecov&logoColor=aaaaaa&labelColor=333333 [codecov-link]: https://codecov.io/github/facelessuser/soupsieve [pypi-image]: https://img.shields.io/pypi/v/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-link]: https://pypi.python.org/pypi/soupsieve [python-image]: https://img.shields.io/pypi/pyversions/soupsieve?logo=python&logoColor=aaaaaa&labelColor=333333 [license-image-mit]: https://img.shields.io/badge/license-MIT-blue.svg?labelColor=333333 [donate-image]: https://img.shields.io/badge/Donate-PayPal-3fabd1?logo=paypal [donate-link]: https://www.paypal.me/facelessuser ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1636638789.869172 soupsieve-2.3.1/docs/0000755000175100001710000000000000000000000014027 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1636638789.8651724 soupsieve-2.3.1/docs/src/0000755000175100001710000000000000000000000014616 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1636638789.873172 soupsieve-2.3.1/docs/src/dictionary/0000755000175100001710000000000000000000000016763 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/dictionary/en-custom.txt0000644000175100001710000000121500000000000021435 0ustar00runnerdockerAPI Accessors Aspell BCP BeautifulSoup CDATA CSS CSS's Changelog Combinators DOM EOF EOL GitHub Gitter Hashable JQuery MERCHANTABILITY MkDocs NONINFRINGEMENT Precompile PyPI PySpelling SVG TODO Tox Twemoji URL's UTF Unescape Virtualenv WIP XHTML accessor amongst boolean builtin centric combinator combinators deprecations deregister dev directionality formatter hashable html iterable iterables linter lxml matchable matcher matchers multiline namespace namespaces newline parser's parsers pre prerelease prereleases pytest regex sublicense substring subtag subtags traceback tuple tuples un unmatchable unpickle unvisited whitespace wildcard wordlist ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1636638789.873172 soupsieve-2.3.1/docs/src/markdown/0000755000175100001710000000000000000000000016440 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1636638789.873172 soupsieve-2.3.1/docs/src/markdown/about/0000755000175100001710000000000000000000000017552 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/about/changelog.md0000644000175100001710000003410200000000000022023 0ustar00runnerdocker# Changelog ## 2.3.1 - **FIX**: Ensure attribute selectors match tags that have new lines characters in attributes. (#233) ## 2.3 - **NEW**: Officially support Python 3.10. - **NEW**: Add static typing. - **NEW**: `:has()`, `:is()`, and `:where()` now use use a forgiving selector list. While not as forgiving as CSS might be, it will forgive such things as empty sets and empty slots due to multiple consecutive commas, leading commas, or trailing commas. Essentially, these pseudo-classes will match all non-empty selectors and ignore empty ones. As the scraping environment is different than a browser environment, it was chosen not to aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform as expected. - **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes. - **FIX**: Some small corner cases discovered with static typing. ## 2.2.1 - **FIX**: Fix an issue with namespaces when one of the keys is `self`. ## 2.2 - **NEW**: `:link` and `:any-link` no longer include `#!html ` due to a change in the level 4 selector specification. This actually yields more sane results. - **FIX**: BeautifulSoup, when using `find`, is quite forgiving of odd types that a user may place in an element's attribute value. Soup Sieve will also now be more forgiving and attempt to match these unexpected values in a sane manner by normalizing them before compare. (#212) ## 2.1 - **NEW**: Officially support Python 3.9. - **NEW**: Drop official support for Python 3.5. - **NEW**: In order to avoid conflicts with future CSS specification changes, non-standard pseudo classes will now start with the `:-soup-` prefix. As a consequence, `:contains()` will now be known as `:-soup-contains()`, though for a time the deprecated form of `:contains()` will still be allowed with a warning that users should migrate over to `:-soup-contains()`. - **NEW**: Added new non-standard pseudo class `:-soup-contains-own()` which operates similar to `:-soup-contains()` except that it only looks at text nodes directly associated with the currently scoped element and not its descendants. - **FIX**: Import `bs4` globally instead of in local functions as it appears there are no adverse affects due to circular imports as `bs4` does not immediately reference `soupsieve` functions and `soupsieve` does not immediately reference `bs4` functions. This should give a performance boost to functions that had previously included `bs4` locally. ## 2.0.1 - **FIX**: Remove unused code. ## 2.0 - **NEW**: `SelectorSyntaxError` is derived from `Exception` not `SyntaxError`. - **NEW**: Remove deprecated `comments` and `icomments` from the API. - **NEW**: Drop support for EOL Python versions (Python 2 and Python < 3.5). - **FIX**: Corner case with splitting namespace and tag name that that have an escaped `|`. ## 1.9.6 !!! note "Last version for Python 2.7" - **FIX**: Prune dead code. - **FIX**: Corner case with splitting namespace and tag name that that have an escaped `|`. ## 1.9.5 - **FIX**: `:placeholder-shown` should not match if the element has content that overrides the placeholder. ## 1.9.4 - **FIX**: `:checked` rule was too strict with `option` elements. The specification for `:checked` does not require an `option` element to be under a `select` element. - **FIX**: Fix level 4 `:lang()` wildcard match handling with singletons. Implicit wildcard matching should not match any singleton. Explicit wildcard matching (`*` in the language range: `*-US`) is allowed to match singletons. ## 1.9.3 - **FIX**: `[attr!=value]` pattern was mistakenly using `:not([attr|=value])` logic instead of `:not([attr=value])`. - **FIX**: Remove undocumented `_QUIRKS` mode flag. Beautiful Soup was meant to use it to help with transition to Soup Sieve, but never released with it. Help with transition at this point is no longer needed. ## 1.9.2 - **FIX**: Shortcut last descendant calculation if possible for performance. - **FIX**: Fix issue where `Doctype` strings can be mistaken for a normal text node in some cases. - **FIX**: A top level tag is not a `:root` tag if it has sibling text nodes or tag nodes. This is an issue that mostly manifests when using `html.parser` as the parser will allow multiple root nodes. ## 1.9.1 - **FIX**: `:root`, `:contains()`, `:default`, `:indeterminate`, `:lang()`, and `:dir()` will properly account for HTML `iframe` elements in their logic when selecting or matching an element. Their logic will be restricted to the document for which the element under consideration applies. - **FIX**: HTML pseudo-classes will check that all key elements checked are in the XHTML namespace (HTML parsers that do not provide namespaces will assume the XHTML namespace). - **FIX**: Ensure that all pseudo-class names are case insensitive and allow CSS escapes. ## 1.9 - **NEW**: Allow `:contains()` to accept a list of text to search for. (#115) - **NEW**: Add new `escape` function for escaping CSS identifiers. (#125) - **NEW**: Deprecate `comments` and `icomments` functions in the API to ensure Soup Sieve focuses only on CSS selectors. `comments` and `icomments` will most likely be removed in 2.0. (#130) - **NEW**: Add Python 3.8 support. (#133) - **FIX**: Don't install test files when installing the `soupsieve` package. (#111) - **FIX**: Improve efficiency of `:contains()` comparison. - **FIX**: Null characters should translate to the Unicode REPLACEMENT CHARACTER (`U+FFFD`) according to the specification. This applies to CSS escaped NULL characters as well. (#124) - **FIX**: Escaped EOF should translate to `U+FFFD` outside of CSS strings. In a string, they should just be ignored, but as there is no case where we could resolve such a string and still have a valid selector, string handling remains the same. (#128) ## 1.8 - **NEW**: Add custom selector support. (#92)(#108) - **FIX**: Small tweak to CSS identifier pattern to ensure it matches the CSS specification exactly. Specifically, you can't have an identifier of only `-`. (#107) - **FIX**: CSS string patterns should allow escaping newlines to span strings across multiple lines. (#107) - **FIX**: Newline regular expression for CSS newlines should treat `\r\n` as a single character, especially in cases such as string escapes: `\\\r\n`. (#107) - **FIX**: Allow `--` as a valid identifier or identifier start. (#107) - **FIX**: Bad CSS syntax now raises a `SelectorSyntaxError`, which is still currently derived from `SyntaxError`, but will most likely be derived from `Exception` in the future. ## 1.7.3 - **FIX**: Fix regression with tag names in regards to case sensitivity, and ensure there are tests to prevent breakage in the future. - **FIX**: XHTML should always be case sensitive like XML. ## 1.7.2 - **FIX**: Fix HTML detection `type` selector. - **FIX**: Fixes for `:enabled` and `:disabled`. - **FIX**: Provide a way for Beautiful Soup to parse selectors in a quirks mode to mimic some of the quirks of the old select method prior to Soup Sieve, but with warnings. This is to help old scripts to not break during the transitional period with newest Beautiful Soup. In the future, these quirks will raise an exception as Soup Sieve requires selectors to follow the CSS specification. ## 1.7.1 - **FIX**: Fix issue with `:has()` selector where a leading combinator can only be provided in the first selector in a relative selector list. ## 1.7 - **NEW**: Add support for `:in-range` and `:out-of-range` selectors. (#60) - **NEW**: Add support for `:defined` selector. (#76) - **FIX**: Fix pickling issue when compiled selector contains a `NullSelector` object. (#70) - **FIX**: Better exception messages in the CSS selector parser and fix a position reporting issue that can occur in some exceptions. (#72, #73) - **FIX**: Don't compare prefixes when evaluating attribute namespaces, compare the actual namespace. (#75) - **FIX**: Split whitespace attribute lists by all whitespace characters, not just space. - **FIX**: `:nth-*` patterns were converting numbers to base 16 when they should have been converting to base 10. ## 1.6.2 - **FIX**: Fix pattern compile issues on Python < 2.7.4. - **FIX**: Don't use `\d` in Unicode `Re` patterns as they will contain characters outside the range of `[0-9]`. ## 1.6.1 - **FIX**: Fix warning about not importing `Mapping` from `collections.abc`. ## 1.6 - **NEW**: Add `closest` method to the API that matches closest ancestor. - **FIX**: Add missing `select_one` reference to module's `__all__`. ## 1.5 - **NEW**: Add `select_one` method like Beautiful Soup has. - **NEW**: Add `:dir()` selector (HTML only). - **FIX**: Fix issues when handling HTML fragments (elements without a `BeautifulSoup` object as a parent). - **FIX**: Fix internal `nth` range check. ## 1.4.0 - **NEW**: Throw `NotImplementedError` for at-rules: `@page`, etc. - **NEW**: Match nothing for `:host`, `:host()`, and `:host-context()`. - **NEW**: Add support for `:read-write` and `:read-only`. - **NEW**: Selector patterns can be annotated with CSS comments. - **FIX**: `\r`, `\n`, and `\f` cannot be escaped with `\` in CSS. You must use Unicode escapes. ## 1.3.1 - **FIX**: Fix issue with undefined namespaces. ## 1.3 - **NEW**: Add support for `:scope`. - **NEW**: `:user-invalid`, `:playing`, `:paused`, and `:local-link` will not cause a failure, but all will match nothing as their use cases are not possible in an environment outside a web browser. - **FIX**: Fix `[attr~=value]` handling of whitespace. According to the spec, if the value contains whitespace, or is an empty string, it should not match anything. - **FIX**: Precompile internal patterns for pseudo-classes to prevent having to parse them again. ## 1.2.1 - **FIX**: More descriptive exceptions. Exceptions will also now mention position in the pattern that is problematic. - **FIX**: `filter` ignores `NavigableString` objects in normal iterables and `Tag` iterables. Basically, it filters all Beautiful Soup document parts regardless of iterable type where as it used to only filter out a `NavigableString` in a `Tag` object. This is viewed as fixing an inconsistency. - **FIX**: `DEBUG` flag has been added to help with debugging CSS selector parsing. This is mainly for development. - **FIX**: If forced to search for language in `meta` tag, and no language is found, cache that there is no language in the `meta` tag to prevent searching again during the current select. - **FIX**: If a non `BeautifulSoup`/`Tag` object is given to the API to compare against, raise a `TypeError`. ## 1.2 - **NEW**: Add Python 2.7 support. - **NEW**: Remove old pre 1.0 deprecations. ## 1.1 - **NEW**: Adds support for `[attr!=value]` which is equivalent to `:not([attr=value])`. - **NEW**: Add support for `:active`, `:focus`, `:hover`, `:visited`, `:target`, `:focus-within`, `:focus-visible`, `:target-within`, `:current()`/`:current`, `:past`, and `:future`, but they will never match as these states don't exist in the Soup Sieve environment. - **NEW**: Add support for `:checked`, `:enabled`, `:disabled`, `:required`, `:optional`, `:default`, and `:placeholder-shown` which will only match in HTML documents as these concepts are not defined in XML. - **NEW**: Add support for `:link` and `:any-link`, both of which will target all ``, ``, and `` elements with an `href` attribute as all links will be treated as unvisited in Soup Sieve. - **NEW**: Add support for `:lang()` (CSS4) which works in XML and HTML. - **NEW**: Users must install Beautiful Soup themselves. This requirement is removed in the hopes that Beautiful Soup may use this in the future. - **FIX**: Attributes in the form `prefix:attr` can be matched with the form `[prefix\:attr]` without specifying a namespaces if desired. - **FIX**: Fix exception when `[type]` is used (with no value). ## 1.0.2 - **FIX**: Use proper CSS identifier patterns for tag names, classes, ids, etc. Things like `#3` or `#-3` should not match and should require `#\33` or `#-\33`. - **FIX**: Do not raise `NotImplementedError` for supported pseudo classes/elements with bad syntax, instead raise `SyntaxError`. ## 1.0.1 - **FIX**: When giving a tag to `select`, it should only return the children of that tag, never the tag itself. - **FIX**: For informational purposes, raise a `NotImplementedError` when an unsupported pseudo class is used. ## 1.0 - **NEW**: Official 1.0.0 release. ## 1.0.0b2 - **NEW**: Drop document flags. Document type can be detected from the Beautiful Soup object directly. - **FIX**: CSS selectors should be evaluated with CSS whitespace rules. - **FIX**: Processing instructions, CDATA, and declarations should all be ignored in `:contains` and child considerations for `:empty`. - **FIX**: In Beautiful Soup, the document itself is the first tag. Do not match the "document" tag by returning false for any tag that doesn't have a parent. ## 1.0.0b1 - **NEW**: Add support for non-standard `:contains()` selector. - **FIX**: Compare pseudo class names case insensitively when matching unexpected cases. - **FIX**: Don't allow attribute case flags when no attribute value is defined. ## 0.6 - **NEW**: `mode` attribute is now called `flags` to allow for other options in the future. - **FIX**: More corner cases for `nth` selectors. ## 0.5.3 - **FIX**: Previously, all pseudo classes' selector lists were evaluated as one big group, but now each pseudo classes' selector lists are evaluated separately. - **FIX**: CSS selector tokens are not case sensitive. ## 0.5.2 - **FIX**: Add missing `s` flag to attribute selector for forced case sensitivity of attribute values. - **FIX**: Relax attribute pattern matching to allow non-essential whitespace. - **FIX**: Attribute selector flags themselves are not case sensitive. - **FIX**: `type` attribute in HTML is handled special. While all other attributes values are case sensitive, `type` in HTML is usually treated special and is insensitive. In XML, this is not the case. ## 0.5.1 - **FIX**: Fix namespace check for `:nth-of-type`. ## 0.5 - **NEW**: Deprecate `commentsiter` and `selectiter` in favor of `icomments` and `iselect`. Expect removal in version 1.0. ## 0.4 - **NEW**: Initial prerelease. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/about/contributing.md0000644000175100001710000000504400000000000022606 0ustar00runnerdocker# Contributing & Support ## Become a Sponsor :octicons-heart-fill-16:{: .heart-throb} Open source projects take time and money. Help support the project by becoming a sponsor. You can add your support at any tier you feel comfortable with. No amount is too little. We also accept one time contributions via PayPal. [:octicons-mark-github-16: GitHub Sponsors](https://github.com/sponsors/facelessuser){: .md-button .md-button--primary } [:fontawesome-brands-paypal: PayPal](https://www.paypal.me/facelessuser){ .md-button} ## Bug Reports 1. Please **read the documentation** and **search the issue tracker** to try and find the answer to your question **before** posting an issue. 2. When creating an issue on the repository, please provide as much information as possible: - Version being used. - Operating system. - Version of Python. - Errors in console. - Detailed description of the problem. - Examples for reproducing the error. You can post pictures, but if specific text or code is required to reproduce the issue, please provide the text in a plain text format for easy copy/paste. The more info provided the greater the chance someone will take the time to answer, implement, or fix the issue. 3. Be prepared to answer questions and provide additional information if required. Issues in which the creator refuses to respond to follow up questions will be marked as stale and closed. ## Reviewing Code Take part in reviewing pull requests and/or reviewing direct commits. Make suggestions to improve the code and discuss solutions to overcome weakness in the algorithm. ## Answer Questions in Issues Take time and answer questions and offer suggestions to people who've created issues in the issue tracker. Often people will have questions that you might have an answer for. Or maybe you know how to help them accomplish a specific task they are asking about. Feel free to share your experience to help others. ## Pull Requests Pull requests are welcome, and a great way to help fix bugs and add new features. If you you are interested in directly contributing to the code, please check out [Development](./development.md) for more information on the environment and processes. ## Documentation Improvements A ton of time has been spent not only creating and supporting this tool and related extensions, but also spent making this documentation. If you feel it is still lacking, show your appreciation for the tool by helping to improve the documentation. Check out [Development](./development.md) for more info on documentation. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/about/development.md0000644000175100001710000003656200000000000022432 0ustar00runnerdocker# Development ## Project Layout There are a number of files for build, test, and continuous integration in the root of the project, but in general, the project is broken up like so. ``` ├── docs │   └── src │      ├── dictionary │      └── markdown ├── soupsieve ├── requirements └── tests ``` Directory | Description --------------------- | ----------- `docs/src/dictionary` | Contains the spell check wordlist(s) for the project. `docs/src/markdown` | Contains the content for the documentation. `soupsieve` | Contains the source code for the project. `requirements` | Contains files with lists of dependencies that are required for the project, and required for continuous integration. `tests` | Contains unit test files. ## Coding Standards When writing code, the code should roughly conform to PEP8 and PEP257 suggestions. The project utilizes the Flake8 linter (with some additional plugins) to ensure code conforms (give or take some of the rules). When in doubt, follow the formatting hints of existing code when adding files or modifying existing files. Listed below are the modules used: - @gitlab:pycqa/flake8 - @gitlab:pycqa/flake8-docstrings - @gitlab:pycqa/pep8-naming - @ebeweber/flake8-mutable - @gforcada/flake8-builtins Usually this can be automated with Tox (assuming it is installed): `tox -e lint`. ## Building and Editing Documents Documents are in Markdown (with with some additional syntax provided by extensions) and are converted to HTML via Python Markdown. If you would like to build and preview the documentation, you must have these packages installed: - @Python-Markdown/markdown: the Markdown parser. - @mkdocs/mkdocs: the document site generator. - @squidfunk/mkdocs-material: a material theme for MkDocs. - @facelessuser/pymdown-extensions: this Python Markdown extension bundle. In order to build and preview the documents, just run the command below from the root of the project and you should be able to view the documents at `localhost:8000` in your browser. After that, you should be able to update the documents and have your browser preview update live. ``` mkdocs serve ``` ## Spell Checking Documents Spell checking is performed via @facelessuser/pyspelling. During validation we build the docs and spell check various files in the project. [Aspell][aspell] must be installed and in the path. Currently this project uses one of the more recent versions of Aspell. It is not expected that everyone will install and run Aspell locally, but it will be run in CI tests for pull requests. In order to perform the spell check locally, it is expected you are setup to build the documents, and that you have Aspell installed in your system path (if needed you can use the `--binary` option to point to the location of your Aspell binary). It is also expected that you have the `en` dictionary installed as well. To initiate the spell check, run the following command from the root of the project. You will need to make sure the documents are built first: ``` mkdocs build --clean ``` And then run the spell checker. ``` pyspelling ``` It should print out the files with the misspelled words if any are found. If you find it prints words that are not misspelled, you can add them in `docs/src/dictionary/en-custom.text`. ## Validation Tests In order to preserve good code health, a test suite has been put together with pytest (@pytest-dev/pytest). To run these tests, you can use the following command: ``` py.test ``` ### Running Validation With Tox Tox (@tox-dev/tox) is a great way to run the validation tests, spelling checks, and linting in virtual environments so as not to mess with your current working environment. Tox will use the specified Python version for the given environment and create a virtual environment and install all the needed requirements (minus Aspell). You could also setup your own virtual environments with the Virtualenv module without Tox, and manually do the same. First, you need to have Tox installed: ``` pip install tox ``` By running Tox, it will walk through all the environments and create them (assuming you have all the python versions on your machine) and run the related tests. See `tox.ini` to learn more. ``` tox ``` If you don't have all the Python versions needed to test all the environments, those entries will fail. To run the tests for specific versions of Python, you specify the environment with `-e PXY` where `X` is the major version and `Y` is the minor version. ``` tox -e py37 ``` To target linting: ``` tox -e lint ``` To select spell checking and document building: ``` tox -e documents ``` ## Code Coverage When running the validation tests through Tox, it is setup to track code coverage via the Coverage (@bitbucket:ned/coveragepy) module. Coverage is run on each `pyxx` environment. If you've made changes to the code, you can clear the old coverage data: ``` coverage erase ``` Then run each unit test environment to generate coverage data. All the data from each run is merged together. HTML is output for each file in `.tox/pyXX/tmp`. You can use these to see areas that are not covered/exercised yet with testing. You can checkout `tox.ini` to see how this is accomplished. ## Code Documentation The Soup Sieve module is laid out in the following structure: ``` soupseive ├── __init__.py ├── __meta__.py ├── css_match.py ├── css_parser.py ├── css_types.py └── util.py ``` File | Description --------------- | ----------- `__init__.py` | Contains the API for the user. `__meta__.py` | Contains package meta data like version. `css_match.py` | Contains the logic for matching tags with a CSS selector. `css_parser.py` | Contains the CSS selector parser. `css_types.py` | Contains the CSS types for the compiled CSS patterns. `util.py` | Contains miscellaneous helper functions, classes, and constants. ### Compiled CSS Selector Structure When a CSS selector string is given to Soup Sieve, it is run through the `CSSParser` class. `CSSParser` will return a `SelectorList` class. This class is sent to the `SoupSieve` class as a parameter along with things like `namespace` and `flags`. One of the most important things to understand when contributing is the structure of the `SelectorList` class. A `SelectorList` represents a list of compound selectors. So if you had the selector `#!css div > p`, you would get a `SelectorList` object containing one `Selector` object. If you had `#!css div, p`, you would get a `SelectorList` with two `Selector` objects as this is a selector list of two compound selectors. A compound selector gets parsed into pieces. Each part of a specific compound selector is usually assigned to an attribute in a single `Selector` object. The attributes of the `Selector` object may be as simple as a boolean or a string, but they can also be a tuple of more `SelectorList` objects. In the case of `#!css *:not(p, div)`, `#!css *` will be a `SelectorList` with one `Selector`. The `#!css :not(p, div)` selector list will be a tuple containing one `SelectorList` of two `Selectors` (one for `p` and one for `div`) under the `selectors` attribute of the `#!css *` `Selector`. In short, `Selectors` are always contained within a `SelectorList`, and a compound selector is a single `Selector` object that may chain other `SelectorLists` objects depending on the complexity of the compound selector. If you provide a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in turn may chain other `Selector` objects. To view the selector list in in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`, though it is recommended to pretty print them: ```pycon3 >>> import soupsieve as sv >>> sv.compile('this > that.class[name=value]').selectors.pretty() SelectorList( selectors=( Selector( tag=SelectorTag( name='that', prefix=None), ids=(), classes=( 'class', ), attributes=( SelectorAttribute( attribute='name', prefix='', pattern=re.compile( '^value$'), xml_type_pattern=None), ), nth=(), selectors=(), relation=SelectorList( selectors=( Selector( tag=SelectorTag( name='this', prefix=None), ids=(), classes=(), attributes=(), nth=(), selectors=(), relation=SelectorList( selectors=(), is_not=False, is_html=False), rel_type='>', contains=(), lang=(), flags=0), ), is_not=False, is_html=False), rel_type=None, contains=(), lang=(), flags=0), ), is_not=False, is_html=False) ``` ### `SelectorList` ```py3 class SelectorList: """Selector list.""" def __init__(self, selectors=tuple(), is_not=False): """Initialize.""" ``` Attribute | Description -------------- | ----------- `selectors` | A list of `Selector` objects. `is_not` | The selectors in the selector list are from a `:not()`. `is_html` | The selectors in the selector list are HTML specific. ### `Selector` ```py3 class Selector: """Selector.""" def __init__( self, tag, ids, classes, attributes, nth, selectors, relation, rel_type, contains, lang, flags ): """Initialize.""" ``` Flags | Description ------------------- | ----------- `SEL_EMPTY` | The current compound selector contained an `:empty` pseudo-class. `SEL_ROOT` | The current compound selector contains `:root`. `SEL_DEFAULT` | The compound selector has a `:default` pattern and requires additional logic to determine if it is the first `submit` button in a form. `SEL_INDETERMINATE` | The compound selector has a `:indeterminate` pattern and requires additional logic to ensure a `radio` element and all of the `radio` elements with the same `name` under a form are not set. Attribute | Description --------------- | ----------- `tag` | Contains a single [`SelectorTag`](#selectortag) object, or `None`. `id` | Contains a tuple of ids to match. Usually if multiple conflicting ids are present, it simply won't match a tag, but it allows multiple to handle the syntax `tag#1#2` even if it is invalid. `classes` | Contains a tuple of class names to match. `attributes` | Contains a tuple of attributes. Each attribute is represented as a [`SelectorAttribute`](#selectorattribute). `nth` | Contains a tuple containing `nth` selectors, each selector being represented as a [`SelectorNth`](#selectornth). `nth` selectors contain things like `:first-child`, `:only-child`, `#!css :nth-child()`, `#!css :nth-of-type()`, etc. `selectors` | Contains a tuple of `SelectorList` objects for each pseudo-class selector part of the compound selector: `#!css :is()`, `#!css :not()`, `#!css :has()`, etc. `relation` | This will contain a `SelectorList` object with one `Selector` object, which could in turn chain an additional relation depending on the complexity of the compound selector. For instance, `div > p + a` would be a `Selector` for `a` that contains a `relation` for `p` (another `SelectorList` object) which also contains a relation of `div`. When matching, we would match that the tag is `a`, and then walk its relation chain verifying that they all match. In this case, the relation chain would be a direct, previous sibling of `p`, which has a direct parent of `div`. A `:has()` pseudo-class would walk this in the opposite order. `div:has(> p + a)` would verify `div`, and then check for a child of `p` with a sibling of `a`. `rel_type` | `rel_type` is attached to relational selectors. In the case of `#!css div > p + a`, the relational selectors of `div` and `p` would get a relational type of `>` and `+` respectively. `:has()` relational `rel_type` are preceded with `:` to signify a forward looking relation. `contains` | Contains a tuple of [`SelectorContains`](#selectorcontains) objects. Each object contains the list of text to match an element's content against. `lang` | Contains a tuple of [`SelectorLang`](#selectorlang) objects. `flags` | Selector flags that used to signal a type of selector is present. ### `SelectorNull` ```py3 class SelectorNull: """Null Selector.""" def __init__(self): """Initialize.""" ``` The null selector is like `Selector`, but it matches nothing. ### `SelectorTag` ```py3 class SelectorTag: """Selector tag.""" def __init__(self, name, prefix): """Initialize.""" ``` Attribute | Description ------------- | ----------- `name` | `name` contains the tag name to match. `prefix` | `prefix` contains the namespace prefix to match. `prefix` can also be `None`. ### `SelectorAttribute` ```py3 class SelectorAttribute: """Selector attribute rule.""" def __init__(self, attribute, prefix, pattern, xml_type_pattern): """Initialize.""" ``` Attribute | Description ------------------- | ----------- `attribute` | Contains the attribute name to match. `prefix` | Contains the attribute namespace prefix to match if any. `pattern` | Contains a `re` regular expression object that matches the desired attribute value. `xml_type_pattern` | As the default `type` pattern is case insensitive, when the attribute value is `type` and a case sensitivity has not been explicitly defined, a secondary case sensitive `type` pattern is compiled for use with XML documents when detected. ### `SelectorContains` ```py3 class SelectorContains: """Selector contains rule.""" def __init__(self, text): """Initialize.""" ``` Attribute | Description ------------------- | ----------- `text` | A tuple of acceptable text that that an element should match. An element only needs to match at least one. ### `SelectorNth` ```py3 class SelectorNth: """Selector nth type.""" def __init__(self, a, n, b, of_type, last, selectors): """Initialize.""" ``` Attribute | Description ------------- | ----------- `a` | The `a` value in the formula `an+b` specifying an index. `n` | `True` if the provided formula has included a literal `n` which signifies the formula is not a static index. `b` | The `b` value in the formula `an+b`. `type` | `True` if the `nth` pseudo-class is an `*-of-type` variant. `last` | `True` if the `nth` pseudo-class is a `*last*` variant. `selectors` | A `SelectorList` object representing the `of S` portion of `:nth-chld(an+b [of S]?)`. ### `SelectorLang` ```py3 class SelectorLang: """Selector language rules.""" def __init__(self, languages): """Initialize.""" ``` Attribute | Description ------------- | ----------- `languages` | A list of regular expression objects that match a language pattern. --8<-- "links.txt" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/about/license.md0000644000175100001710000000212300000000000021514 0ustar00runnerdocker# License MIT License Copyright (c) 2018 - 2021 Isaac Muse Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/api.md0000644000175100001710000002624500000000000017544 0ustar00runnerdocker# API Soup Sieve implements most of the selectors from the stable specification and even many from the latest draft specification. Selectors can be used to detect and filter elements. To learn more about which specific selectors are implemented, see [CSS Selectors](./selectors/index.md). Soup Sieve will detect the document type being used from the Beautiful Soup object that is given to it, and depending on the document type, its behavior may be slightly different. When detecting XHTML, Soup Sieve simply looks to see if the root element of an XML document is under the XHTML namespace and does not currently look at the `doctype`. If in the future there is a need for stricter XHTML detection, this may change. - HTML document types (HTML, HTML5) will have their tag names and attribute names treated without case sensitivity, like most browsers do. - XML document types (including XHTML) will have their tag names and attribute names treated with case sensitivity. - HTML5, XHTML and XML documents will have namespaces evaluated per the document's support (provided via the parser). Some additional configuration is required when using namespaces, see [Namespace](#namespaces) for more information. !!! tip "Getting Proper Namespaces" The `html5lib` parser provides proper namespaces for HTML5, but `lxml`'s HTML parser will not. If you need namespace support for HTML5, consider using `html5lib`. For XML, the `lxml-xml` parser (`xml` for short) will provide proper namespaces. It is generally suggested that `lxml-xml` is used to parse XHTML documents to take advantage of namespaces. - While attribute values are generally treated as case sensitive, HTML5 and HTML treat the `type` attribute special. The `type` attribute's value is always case insensitive. This is generally how most browsers treat `type`. If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. While Soup Sieve access is exposed through Beautiful Soup's API, Soup Sieve's API can always be imported and accessed directly for more controlled tag selection if needed. ## Flags ### `soupseive.DEBUG` Print debug output when parsing a selector. ```pycon3 >>> import soupsieve as sv >>> sv.compile('p:has(#id) > span.some-class:contains(text)', flags=sv.DEBUG) ## PARSING: 'p:has(#id) > span.some-class:contains(text)' TOKEN: 'tag' --> 'p' at position 0 TOKEN: 'pseudo_class' --> ':has(' at position 1 is_pseudo: True is_open: True is_relative: True TOKEN: 'id' --> '#id' at position 6 TOKEN: 'pseudo_close' --> ')' at position 9 TOKEN: 'combine' --> ' > ' at position 10 TOKEN: 'tag' --> 'span' at position 13 TOKEN: 'class' --> '.some-class' at position 17 TOKEN: 'pseudo_contains' --> ':contains(text)' at position 28 ## END PARSING SoupSieve(pattern='p:has(#id) > span.some-class:contains(text)', namespaces=None, custom=None, flags=1) ``` ## `soupsieve.select_one()` ```py3 def select_one(select, tag, namespaces=None, flags=0, **kwargs): """Select the specified tags.""" ``` `select_one` will return the first tag under the given tag that matches the given CSS selectors provided, or it will return `None` if a suitable tag was not found. `select_one` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and `flags`. ```pycon3 >>> import soupsieve as sv >>> sv.select_one('p:is(.a, .b, .c)', soup)

Cat

``` ## `soupsieve.select()` ```py3 def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs): """Select the specified tags.""" ``` `select` will return all tags under the given tag that match the given CSS selectors provided. You can also limit the number of tags returned by providing a positive integer via the `limit` parameter (0 means to return all tags). `select` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, a `limit`, and `flags`. ```pycon3 >>> import soupsieve as sv >>> sv.select('p:is(.a, .b, .c)', soup) [

Cat

,

Dog

,

Mouse

] ``` ## `soupsieve.iselect()` ```py3 def iselect(select, node, namespaces=None, limit=0, flags=0, **kwargs): """Select the specified tags.""" ``` `iselect` is exactly like `select` except that it returns a generator instead of a list. ## `soupsieve.closest()` ```py3 def closest(select, tag, namespaces=None, flags=0, **kwargs): """Match closest ancestor to the provided tag.""" ``` `closest` returns the tag closest to the given tag that matches the given selector. The element found must be a direct ancestor of the tag or the tag itself. `closest` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and `flags`. ## `soupsieve.match()` ```py3 def match(select, tag, namespaces=None, flags=0, **kwargs): """Match node.""" ``` The `match` function matches a given tag with a given CSS selector. `match` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> nodes = sv.select('p:is(.a, .b, .c)', soup) >>> sv.match('p:not(.b)', nodes[0]) True >>> sv.match('p:not(.b)', nodes[1]) False ``` ## `soupsieve.filter()` ```py3 def filter(select, nodes, namespaces=None, flags=0, **kwargs): """Filter list of nodes.""" ``` `filter` takes an iterable containing HTML nodes and will filter them based on the provided CSS selector string. If given a `Tag`/`BeautifulSoup` object, it will iterate the direct children filtering them. `filter` accepts a CSS selector string, an iterable containing nodes, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> sv.filter('p:not(.b)', soup.div) [

Cat

,

Mouse

] ``` ## `soupsieve.escape()` ```py3 def escape(ident): """Escape CSS identifier.""" ``` `escape` is used to escape CSS identifiers. It follows the [CSS specification][cssom] and escapes any character that would normally cause an identifier to be invalid. ```pycon3 >>> sv.escape(".foo#bar") '\\.foo\\#bar' >>> sv.escape("()[]{}") '\\(\\)\\[\\]\\{\\}' >>> sv.escape('--a') '--a' >>> sv.escape('0') '\\30 ' >>> sv.escape('\0') '�' ``` !!! new "New in 1.9.0" `escape` is a new API function added in 1.9.0. ## `soupsieve.compile()` ```py3 def compile(pattern, namespaces=None, flags=0, **kwargs): """Compile CSS pattern.""" ``` `compile` will pre-compile a CSS selector pattern returning a `SoupSieve` object. The `SoupSieve` object has the same selector functions available via the module without the need to specify the selector, namespaces, or flags. ```py3 class SoupSieve: """Match tags in Beautiful Soup with CSS selectors.""" def match(self, tag): """Match.""" def closest(self, tag): """Match closest ancestor.""" def filter(self, iterable): """Filter.""" def select_one(self, tag): """Select a single tag.""" def select(self, tag, limit=0): """Select the specified tags.""" def iselect(self, tag, limit=0): """Iterate the specified tags.""" ``` ## `soupsieve.purge()` Soup Sieve caches compiled patterns for performance. If for whatever reason, you need to purge the cache, simply call `purge`. ## Custom Selectors The custom selector feature is loosely inspired by the `css-extensions` [proposal][custom-extensions-1]. In its current form, Soup Sieve allows assigning a complex selector to a custom pseudo-class name. The pseudo-class name must start with `:--` to avoid conflicts with any future pseudo-classes. To create custom selectors, you simply need to pass a dictionary containing the custom pseudo-class names (keys) with the associated CSS selectors that the pseudo-classes are meant to represent (values). It is important to remember that pseudo-class names are not case sensitive, so even though a dictionary will allow you to specify multiple keys with the same name (as long as the character cases are different), Soup Sieve will not and will throw an exception if you attempt to do so. In the following example, we will define our own custom selector called `#!css :--header` that will be an alias for `#!css h1, h2, h3, h4, h5, h6`. ```py3 import soupsieve as sv import bs4 markup = """

Header 1

Header 2

child

Header 1,

Header 2

] ``` Custom selectors can also be dependent upon other custom selectors. You don't have to worry about the order in the dictionary as custom selectors will be compiled "just in time" when they are needed. Be careful though, if you create a circular dependency, you will get a `SelectorSyntaxError`. Assuming the same markup as in the first example, we will now create a custom selector that should find any element that has child elements, we will call the selector `:--parent`. Then we will create another selector called `:--parent-paragraph` that will use the `:--parent` selector to find `#!html

` elements that are also parents: ```py3 custom = { ":--parent": ":has(> *|*)", ":--parent-paragraph": "p:--parent" } print(sv.select(':--parent-paragraph', soup, custom=custom)) ``` The above code will yield the only paragraph that is a parent: ``` [

child

] ``` ## Namespaces Many of Soup Sieve's selector functions take an optional namespace dictionary. Namespaces, just like CSS, must be defined for Soup Sieve to evaluate `ns|tag` type selectors. This is analogous to CSS's namespace at-rule: ```css @namespace url("http://www.w3.org/1999/xhtml"); @namespace svg url("http://www.w3.org/2000/svg"); ``` A namespace dictionary should have keys (prefixes) and values (namespaces). An empty key string for a key would denote the default key. An empty value would essentially represent a null namespace. To represent the above CSS example for Soup Sieve, we would configure it like so: ```py3 namespace = { "": "http://www.w3.org/1999/xhtml", # Default namespace is for XHTML "svg": "http://www.w3.org/2000/svg", # The SVG namespace defined with prefix of "svg" } ``` Prefixes used in the namespace dictionary do not have to match the prefixes in the document. The provided prefix is never compared against the prefixes in the document, only the namespaces are compared. The prefixes in the document are only there for the parser to know which tags get which namespace. And the prefixes in the namespace dictionary are only defined in order to provide an alias for the namespaces when using the namespace selector syntax: `ns|name`. Tags do not necessarily have to have a prefix for Soup Sieve to recognize them either. For instance, in HTML5, SVG *should* automatically get the SVG namespace. Depending how namespaces were defined in the document, tags may inherit namespaces in some conditions. Namespace assignment is mainly handled by the parser and exposed through the Beautiful Soup API. Soup Sieve uses the Beautiful Soup API to then compare namespaces for supported documents. --8<-- refs.txt --8<-- ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/differences.md0000644000175100001710000001420500000000000021241 0ustar00runnerdocker# Beautiful Soup Differences Soup Sieve is the official CSS "select" implementation of Beautiful Soup 4.7.0+. While the inclusion of Soup Sieve fixes many issues and greatly expands CSS support in Beautiful Soup, it does introduce some differences which may surprise some who've become accustom to the old "select" implementation. Beautiful Soup's old select method had numerous limitations and quirks that do not align with the actual CSS specifications. Most are insignificant, but there are a couple differences that people over the years had come to rely on. Soup Sieve, which aims to follow the CSS specification closely, does not support these differences. ## Attribute Values Beautiful Soup was very relaxed when it came to attribute values in selectors: `#!css [attribute=value]`. Beautiful Soup would allow almost anything for a valid unquoted value. Soup Sieve, on the other hand, follows the CSS specification and requires that a value be a valid identifier, or it must be quoted. If you get an error complaining about a malformed attribute, you may need to quote the value. For instance, if you previously used a selector like this: ```py3 soup.select('[attr={}]') ``` You would need to quote the value as `{}` is not a valid CSS identifier, so it must be quoted: ```py3 soup.select('[attr="{}"]') ``` You can also use the [escape](./api.md#soupsieveescape) function to escape dynamic content: ```py3 import soupsieve soup.select('[attr=%s]' % soupsieve.escape('{}')) ``` ## CSS Identifiers Since Soup Sieve follows the CSS specification, class names, id names, tag names, etc. must be valid identifiers. Since identifiers, according to the CSS specification, cannot *start* with a number, some users may find that their old class, id, or tag name selectors that started with numbers will not work. To specify such selectors, you'll have to use CSS escapes. So if you used to use: ```py3 soup.select('.2class') ``` You would need to update with: ```py3 soup.select(r'.\32 class') ``` Numbers in the middle or at the end of a class will work as they always did: ```py3 soup.select('.class2') ``` ## Relative Selectors Whether on purpose or on accident, Beautiful Soup used to allow relative selectors: ```py3 soup.select('> div') ``` The above is not a valid CSS selector according the CSS specifications. Relative selector lists have only recently been added to the CSS specifications, and they are only allowed in a `#!css :has()` pseudo-class: ```css article:has(> div) ``` But, in the level 4 CSS specifications, the `:scope` pseudo-class has been added which allows for the same feel as using `#!css > div`. Since Soup Sieve supports the `:scope` pseudo-class, it can be used to produce the same behavior as the legacy select method. In CSS, the `:scope` pseudo-class represents the element that the CSS select operation is called on. In supported browsers, the following JavaScript example would treats `:scope` as the element that `el` references: ```js el.querySelectorAll(':scope > .class') ``` Just like in the JavaScript example above, Soup Sieve would also treat `:scope` as the element that `el` references: ```py3 el.select(':scope > .class') ``` In the case where the element is the document node, `:scope` would simply represent the root element of the document. So, if you used to to have selectors such as: ```py3 soup.select('> div') ``` You can simply add `:scope`, and it should work the same: ```py3 soup.select(':scope > div') ``` While this will generally give you what is expected for the relative, descendant selectors, this will not work for sibling selectors, and the reasons why are covered in more details in [Out of Scope Selectors](#out-of-scope-selectors). ## Out of Scope Selectors In a browser, when requesting a selector via `querySelectorAll`, the element that `querySelectorAll` is called on is the *scoped* element. So in the following example, `el` is the *scoped* element. ```js el.querySelectorAll('.class') ``` This same concept applies to Soup Sieve, where the element that `select` or `select_one` is called on is also the *scoped* element. So in the following example, `el` is also the *scoped* element: ```py3 el.select('.class') ``` In browsers, `querySelectorAll` and `querySelector` only return elements under the *scoped* element. They do not return the *scoped* element itself, its parents, or its siblings. Only when `querySelectorAll` or `querySelector` is called on the document node will it return the *scoped* selector, which would be the *root* element, as the query is being called on the document itself and not the *scoped* element. Soup Sieve aims to essentially mimic the browser functions such as `querySelector`, `querySelectorAll`, `matches`, etc. In Soup Sieve `select` and `select_one` are analogous to `querySelectorAll` and `querySelector` respectively. For this reason, Soup Sieve also only returns elements under the *scoped* element. The idea is to provide a familiar interface that behaves, as close as possible, to what people familiar with CSS selectors are used to. So while Soup Sieve will find elements relative to `:scope` with `>` or  : ```py3 soup.select(':scope > div') ``` It will not find elements relative to `:scope` with `+` or `~` as siblings to the *scoped* element are not under the *scoped* element: ```py3 soup.select(':scope + div') ``` This is by design and is in align with the behavior exhibited in all web browsers. ## Selected Element Order Another quirk of Beautiful Soup's old implementation was that it returned the HTML nodes in the order of how the selectors were defined. For instance, Beautiful Soup, if given the pattern `#!css article, body` would first return `#!html
` and then `#!html `. Soup Sieve does not, and frankly cannot, honor Beautiful Soup's old ordering convention due to the way it is designed. Soup Sieve returns the nodes in the order they are defined in the document as that is how the elements are searched. This much more efficient and provides better performance. So, given the earlier selector pattern of `article, body`, Soup Sieve would return the element `#!html ` and then `#!html
` as that is how it is ordered in the HTML document. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/faq.md0000644000175100001710000000617400000000000017541 0ustar00runnerdocker# Frequent Asked Questions ## Why do selectors not work the same in Beautiful Soup 4.7+? Soup Sieve is the official CSS selector library in Beautiful Soup 4.7+, and with this change, Soup Sieve introduces a number of changes that break some of the expected behaviors that existed in versions prior to 4.7. In short, Soup Sieve follows the CSS specifications fairly close, and this broke a number of non-standard behaviors. These non-standard behaviors were not allowed according to the CSS specifications. Soup Sieve has no intentions of bringing back these behaviors. For more details on specific changes, and the reasoning why a specific change is considered a good change, or simply a feature that Soup Sieve cannot/will not support, see [Beautiful Soup Differences](./differences.md). ## How does `iframe` handling work? In web browsers, CSS selectors do not usually select content inside an `iframe` element if the selector is called on an element outside of the `iframe`. Each HTML document is usually encapsulated and CSS selector leakage across this `iframe` boundary is usually prevented. In it's current iteration, Soup Sieve is not aware of the origin of the documents in the `iframe`, and Soup Sieve will not prevent selectors from crossing these boundaries. Soup Sieve is not used to style documents, but to scrape documents. For this reason, it seems to be more helpful to allow selector combinators to cross these boundaries. Soup Sieve isn't entirely unaware of `iframe` elements though. In Soup Sieve 1.9.1, it was noticed that some pseudo-classes behaved in unexpected ways without awareness to `iframes`, this was fixed in 1.9.1. Pseudo-classes such as [`:default`](./selectors/pseudo-classes.md#:default), [`:indeterminate`](./selectors/pseudo-classes.md#:indeterminate), [`:dir()`](./selectors/pseudo-classes.md#:dir), [`:lang()`](./selectors/pseudo-classes.md#:lang), [`:root`](./selectors/pseudo-classes.md#:root), and [`:contains()`](./selectors/pseudo-classes.md#:contains) where given awareness of `iframes` to ensure they behaved properly and returned the expected elements. This doesn't mean that `select` won't return elements in `iframes`, but it won't allow something like `:default` to select a `button` in an `iframe` whose parent `form` is outside the `iframe`. Or better put, a default `button` will be evaluated in the context of the document it is in. With all of this said, if your selectors have issues with `iframes`, it is most likely because `iframes` are handled differently by different parsers. `html.parser` will usually parse `iframe` elements as it sees them. `lxml` parser will often remove `html` and `body` tags of an `iframe` HTML document. `lxml-xml` will simply ignore the content in a XHTML document. And `html5lib` will HTML escape the content of an `iframe` making traversal impossible. In short, Soup Sieve will return elements from all documents, even `iframes`. But certain pseudo-classes may take into consideration the context of the document they are in. But even with all of this, a parser's handling of `iframes` may make handling its content difficult if it doesn't parse it as HTML elements, or augments its structure. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/index.md0000644000175100001710000001242600000000000020076 0ustar00runnerdocker# Quick Start ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `#!css .classes` - `#!css #ids` - `#!css [attributes=value]` - `#!css parent child` - `#!css parent > child` - `#!css sibling ~ sibling` - `#!css sibling + sibling` - `#!css :not(element.class, element2.class)` - `#!css :is(element.class, element2.class)` - `#!css parent:has(> child)` - and [many more](./selectors/index.md) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed for your, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, navigate to the root of the project and run ``` python setup.py build python setup.py install ``` ## Usage To use Soup Sieve, you must create a `BeautifulSoup` object: ```pycon3 >>> import bs4 >>> text = """ ...
... ...

Cat

...

Dog

...

Mouse

...
... """ >>> soup = bs4.BeautifulSoup(text, 'html5lib') ``` For most people, using the Beautiful Soup 4.7.0+ API may be more than sufficient. Beautiful Soup offers two methods that employ Soup Sieve: `select` and `select_one`. Beautiful Soup's select API is identical to Soup Sieve's, except that you don't have to hand it the tag object, the calling object passes itself to Soup Sieve: ```pycon3 >>> soup = bs4.BeautifulSoup(text, 'html5lib') >>> soup.select_one('p:is(.a, .b, .c)')

Cat

``` ```pycon3 >>> soup = bs4.BeautifulSoup(text, 'html5lib') >>> soup.select('p:is(.a, .b, .c)') [

Cat

,

Dog

,

Mouse

] ``` You can also use the Soup Sieve API directly to get access to the full range of possibilities that Soup Sieve offers. You can select a single tag: ```pycon3 >>> import soupsieve as sv >>> sv.select_one('p:is(.a, .b, .c)', soup)

Cat

``` You can select all tags: ```pycon3 >>> import soupsieve as sv >>> sv.select('p:is(.a, .b, .c)', soup) [

Cat

,

Dog

,

Mouse

] ``` You can select the closest ancestor: ```pycon3 >>> import soupsieve as sv >>> el = sv.select_one('.c', soup) >>> sv.closest('div', el)

Cat

Dog

Mouse

``` You can filter a tag's Children (or an iterable of tags): ```pycon3 >>> sv.filter('p:not(.b)', soup.div) [

Cat

,

Mouse

] ``` You can match a single tag: ```pycon3 >>> els = sv.select('p:is(.a, .b, .c)', soup) >>> sv.match(els[0], 'p:not(.b)') True >>> sv.match(els[1], 'p:not(.b)') False ``` Or even just extract comments: ```pycon3 >>> sv.comments(soup) [' These are animals '] ``` Selectors do not have to be constrained to one line either. You can span selectors over multiple lines just like you would in a CSS file. ```pycon3 >>> selector = """ ... .a, ... .b, ... .c ... """ >>> sv.select(selector, soup) [

Cat

,

Dog

,

Mouse

] ``` You can even use comments to annotate a particularly complex selector. ```pycon3 >>> selector = """ ... /* This isn't complicated, but we're going to annotate it anyways. ... This is the a class */ ... .a, ... /* This is the b class */ ... .b, ... /* This is the c class */ ... .c ... """ >>> sv.select(selector, soup) [

Cat

,

Dog

,

Mouse

] ``` If you've ever used Python's Re library for regular expressions, you may know that it is often useful to pre-compile a regular expression pattern, especially if you plan to use it more than once. The same is true for Soup Sieve's matchers, though is not required. If you have a pattern that you want to use more than once, it may be wise to pre-compile it early on: ```pycon3 >>> selector = sv.compile('p:is(.a, .b, .c)') >>> selector.filter(soup.div) [

Cat

,

Dog

,

Mouse

] ``` A compiled object has all the same methods, though the parameters will be slightly different as they don't need things like the pattern or flags once compiled. See [API](./api.md) documentation for more info. Compiled patterns are cached, so if for any reason you need to clear the cache, simply issue the `purge` command. ```pycon3 >>> sv.purge() ``` --8<-- refs.txt --8<-- ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1636638789.8771718 soupsieve-2.3.1/docs/src/markdown/selectors/0000755000175100001710000000000000000000000020443 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/selectors/basic.md0000644000175100001710000004325500000000000022057 0ustar00runnerdocker# Basic Selectors Syntax and notation for basic selectors. ## Escapes Soup Sieve selectors support using CSS escapes. So if you need provide Unicode, or non-standard characters, you can use CSS style escapes. Escapes can be specified with a backslash followed by 1 - 6 hexadecimal digits: `#!css \20AC`, `#!css \0020AC`, etc. If you need to terminate an escape to avoid it accumulating unintended hexadecimal characters, you can use a space: `#!css \0020AC dont-escape-me`. You can also escape any non-hexadecimal character, and it will be treated as that character: `#!css \+` --> `+`. The one exception is that you cannot escape the form feed, newline, or carriage return. You can always use Soup Sieve's [escape command](../api.md#soupsieveescape) to escape identifiers as well. ## Type Selectors Type selectors match elements by node name. If a default namespace is defined in the [namespace dictionary](../api.md#namespaces), and no [namespace](#namespace-selectors) is explicitly defined, it will be assumed that the element must be in the default namespace. === "Syntax" ```css element ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div')) [
Here is some text.
,
Here is some more text.
] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Type_selectors ## Universal Selectors The Universal selector (`*`) matches elements of any type. === "Syntax" ```css * ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Here is some text.

...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('*')) [
Here is some text.
Here is some more text.
, ,
Here is some text.
Here is some more text.
,
Here is some text.
,
Here is some more text.
] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Universal_selectors ## ID Selectors The ID selector matches an element based on its `id` attribute. The ID must match exactly. === "Syntax" ```css #id ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('#some-id')) [
Here is some text.
] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/ID_selectors !!! note "XML Support" While the use of the `id` attribute (in the context of CSS) is a very HTML centric idea, it is supported for XML as well because Beautiful Soup supported it before Soup Sieve's existence. ## Class Selectors The class selector matches an element based on the values contained in the `class` attribute. The `class` attribute is treated as a whitespace separated list, where each item is a **class**. === "Syntax" ```css .class ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('.some-class')) [
Here is some text.
] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Class_selectors !!! note "XML Support" While the use of the `class` attribute (in the context of CSS) is a very HTML centric idea, it is supported for XML as well because Beautiful Soup supported it before Soup Sieve's existence. ## Attribute Selectors The attribute selector matches an element based on its attributes. When specifying a value of an attribute, if it contains whitespace or special characters, you should quote them with either single or double quotes. !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors `[attribute]` : Represents elements with an attribute named **attribute**. === "Syntax" ```css [attr] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href]')) [Internal link, Example link, Insensitive internal link, Example org link] ``` `[attribute=value]` : Represents elements with an attribute named **attribute** that also has a value of **value**. === "Syntax" ```css [attr=value] [attr="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#internal"]')) [Internal link] ``` `[attribute~=value]` : Represents elements with an attribute named **attribute** whose value is a space separated list which contains **value**. === "Syntax" ```css [attr~=value] [attr~="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[class~=class2]')) [Internal link] ``` `[attribute|=value]` : Represents elements with an attribute named **attribute** whose value is a dash separated list that starts with **value**. === "Syntax" ```css [attr|=value] [attr|="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Some text
...
Some more text
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div[lang|="en"]')) [
Some text
,
Some more text
] ``` `[attribute^=value]` : Represents elements with an attribute named **attribute** whose value starts with **value**. === "Syntax" ```css [attr^=value] [attr^="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href^=http]')) [Example link, Example org link] ``` `[attribute$=value]` : Represents elements with an attribute named **attribute** whose value ends with **value**. === "Syntax" ```css [attr$=value] [attr$="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href$=org]')) [Example org link] ``` `[attribute*=value]` : Represents elements with an attribute named **attribute** whose value containing the substring **value**. === "Syntax" ```css [attr*=value] [attr*="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href*="example"]')) [Example link, Example org link] ``` `[attribute!=value]`:material-star:{: title="Custom" data-md-color-primary="green" .icon} : Equivalent to `#!css :not([attribute=value])`. === "Syntax" ```css [attr!=value] [attr!="value"] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('a[href!="#internal"]')) [Example link, Insensitive internal link, Example org link] ``` `[attribute operator value i]`:material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} : Represents elements with an attribute named **attribute** and whose value, when the **operator** is applied, matches **value** *without* case sensitivity. In general, attribute comparison is insensitive in normal HTML, but not XML. `i` is most useful in XML documents. === "Syntax" ```css [attr=value i] [attr="value" i] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#INTERNAL" i]')) [Internal link] ``` `[attribute operator value s]` :material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} : Represents elements with an attribute named **attribute** and whose value, when the **operator** is applied, matches **value** *with* case sensitivity. === "Syntax" ```css [attr=value s] [attr="value" s] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#INTERNAL" s]')) [] >>> print(soup.select('[href="#internal" s]')) [Internal link] ``` ## Namespace Selectors Namespace selectors are used in conjunction with type and universal selectors as well as attribute names in attribute selectors. They are specified by declaring the namespace and the selector separated with `|`: `namespace|selector`. `namespace`, in this context, is the prefix defined via the [namespace dictionary](../api.md#namespaces). The prefix defined for the CSS selector does not need to match the prefix name in the document as it is the namespace associated with the prefix that is compared, not the prefix itself. The universal selector (`*`) can be used to represent any namespace just as it can with types. By default, type selectors without a namespace selector will match any element whose type matches, regardless of namespace. But if a CSS default namespace is declared (one with an empty key: `{"": "http://www.w3.org/1999/xhtml"}`), all type selectors will assume the default namespace unless an explicit namespace selector is specified. For example, if the default name was defined to be `http://www.w3.org/1999/xhtml`, the selector `a` would only match `a` tags that are within the `http://www.w3.org/1999/xhtml` namespace. The one exception is within pseudo classes (`:not()`, `:has()`, etc.) as namespaces are not considered within pseudo classes unless one is explicitly specified. If the namespace is omitted (`|element`), any element without a namespace will be matched. In HTML documents that support namespaces (XHTML and HTML5), HTML elements are counted as part of the `http://www.w3.org/1999/xhtml` namespace, but attributes usually do not have a namespace unless one is explicitly defined in the markup. Namespaces can be used with attribute selectors as well except that when `[|attribute`] is used, it is equivalent to `[attribute]`. === "Syntax" ```css ns|element ns|* *|* *|element |element [ns|attr] [*|attr] [|attr] ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

SVG Example

...

Soup Sieve Docs

... ... ... MDN Web Docs ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('svg|a', namespaces={'svg': 'http://www.w3.org/2000/svg'})) [MDN Web Docs] >>> print(soup.select('a', namespaces={'svg': 'http://www.w3.org/2000/svg'})) [Soup Sieve Docs, MDN Web Docs] >>> print(soup.select('a', namespaces={'': 'http://www.w3.org/1999/xhtml', 'svg': 'http://www.w3.org/2000/svg'})) [Soup Sieve Docs] >>> print(soup.select('[xlink|href]', namespaces={'xlink': 'http://www.w3.org/1999/xlink'})) [MDN Web Docs] >>> print(soup.select('[|href]', namespaces={'xlink': 'http://www.w3.org/1999/xlink'})) [Soup Sieve Docs] ``` --8<-- selector_styles.txt refs.txt --8<-- ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/selectors/combinators.md0000644000175100001710000000736400000000000023317 0ustar00runnerdocker# Combinators and Selector Lists CSS employs a number of tokens in order to represent lists or to provide relational context between two selectors. ## Selector Lists Selector lists use the comma (`,`) to join multiple selectors in a list. When presented with a selector list, any selector in the list that matches an element will return that element. === "Syntax" ```css element1, element2 ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1, p')) [

Title

,

Paragraph

] ``` ## Descendant Combinator Descendant combinators combine two selectors with whitespace ( ) in order to signify that the second element is matched if it has an ancestor that matches the first element. === "Syntax" ```css parent descendant ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('body p')) [

Paragraph 1

,

Paragraph 2

] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Descendant_combinator ## Child combinator Child combinators combine two selectors with `>` in order to signify that the second element is matched if it has a parent that matches the first element. === "Syntax" ```css parent > child ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Paragraph 1

...
  • Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div > p')) [

Paragraph 1

] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Child_combinator ## General sibling combinator General sibling combinators combine two selectors with `~` in order to signify that the second element is matched if it has a sibling that precedes it that matches the first element. === "Syntax" ```css prevsibling ~ sibling ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1 ~ p')) [

Paragraph 1

,

Paragraph 2

] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/General_sibling_combinator ## Adjacent sibling combinator Adjacent sibling combinators combine two selectors with `+` in order to signify that the second element is matched if it has an adjacent sibling that precedes it that matches the first element. === "Syntax" ```css prevsibling + nextsibling ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1 ~ p')) [

Paragraph 1

] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/Adjacent_sibling_combinator --8<-- selector_styles.txt refs.txt --8<-- ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/selectors/index.md0000644000175100001710000001412500000000000022077 0ustar00runnerdocker# General Details ## Implementation Specifics The CSS selectors are based off of the CSS specification and includes not only stable selectors, but may also include selectors currently under development from the draft specifications. Primarily support has been added for selectors that were feasible to implement and most likely to get practical use. In addition to the selectors in the specification, Soup Sieve also supports a couple non-standard selectors. Soup Sieve aims to allow users to target XML/HTML elements with CSS selectors. It implements many pseudo classes, but it does not currently implement any pseudo elements and has no plans to do so. Soup Sieve also will not match anything for pseudo classes that are only relevant in a live, browser environment, but it will gracefully handle them if they've been implemented; such pseudo classes are non-applicable in the Beautiful Soup environment and are noted in [Non-Applicable Pseudo Classes](./unsupported.md#non-applicable-pseudo-classes). When speaking about namespaces, they only apply to XML, XHTML, or when dealing with recognized foreign tags in HTML5. Currently, Beautiful Soup's `html5lib` parser is the only parser that will return the appropriate namespaces for a HTML5 document. If you are using XHTML, you have to use the Beautiful Soup's `lxml-xml` parser (or `xml` for short) to get the appropriate namespaces in an XHTML document. In addition to using the correct parser, you must provide a dictionary of namespaces to Soup Sieve in order to use namespace selectors. See the documentation on [namespaces](../api.md#namespaces) to learn more. While an effort is made to mimic CSS selector behavior, there may be some differences or quirks, please report issues if any are found. ## Selector Context Key
Symbol Name Description
:material-language-html5:{: data-md-color-primary="orange" .big-icon} HTML Some selectors are very specific to HTML and either have no meaningful representation in XML, or such functionality has not been implemented. Selectors that are HTML only will be noted with :material-language-html5:{: data-md-color-primary="orange"}, and will match nothing if used in XML.
:material-star:{: data-md-color-primary="green" .big-icon} Custom Soup Sieve has implemented a couple non-standard selectors. These can contain useful selectors that were rejected from the official CSS specifications, selectors implemented by other systems such as JQuery, or even selectors specifically created for Soup Sieve. If a selector is considered non standard, it will be marked with :material-star:{: title="Custom" data-md-color-primary="green"}.
:material-flask:{: title="Experimental" data-md-color-primary="purple" .big-icon} Experimental All selectors that are from the current working draft of CSS4 are considered experimental and are marked with :material-flask:{: title="Experimental" data-md-color-primary="purple"}. Additionally, if there are other immature selectors, they may be marked as experimental as well. Experimental may mean we are not entirely sure if our implementation is correct, that things may still be in flux as they are part of a working draft, or even both. If at anytime a working draft drops a selector from the current draft, it will most likely also be removed here, most likely with a deprecation path, except where there may be a conflict that requires a less graceful transition. One exception is in the rare case that the selector is found to be far too useful despite being rejected. In these cases, we may adopt them as "custom" selectors.
!!! tip "Additional Reading" If usage of a selector is not clear in this documentation, you can find more information by reading these specification documents: [CSS Level 3 Specification](https://www.w3.org/TR/selectors-3/) : Contains the latest official document outlying official behaviors of CSS selectors. [CSS Level 4 Working Draft](https://www.w3.org/TR/selectors-4/) : Contains the latest published working draft of the CSS level 4 selectors which outlines the experimental new selectors and experimental behavioral changes. [HTML5](https://www.w3.org/TR/html50/) : The HTML 5.0 specification document. Defines the semantics regarding HTML. [HTML Living Standard](https://html.spec.whatwg.org/) : The HTML Living Standard document. Defines semantics regarding HTML. ## Selector Terminology Certain terminology is used throughout this document when describing selectors. In order to fully understand the syntax a selector may implement, it is important to understand a couple of key terms. ### Selector Selector is used to describe any selector whether it is a [simple](#simple-selector), [compound](#compound-selector), or [complex](#complex-selector) selector. ### Simple Selector A simple selector represents a single condition on an element. It can be a [type selector](#type-selectors), [universal selector](#universal-selectors), [ID selector](#id-selectors), [class selector](#class-selectors), [attribute selector](#attribute-selectors), or [pseudo class selector](#pseudo-classes). ### Compound Selector A [compound](#compound-selector) selector is a sequence of [simple](#simple-selector) selectors. They do not contain any [combinators](#combinators-and-selector-lists). If a universal or type selector is used, they must come first, and only one instance of either a universal or type selector can be used, both cannot be used at the same time. ### Complex Selector A complex selector consists of multiple [simple](#simple-selector) or [compound](#compound-selector) selectors joined with [combinators](#combinators-and-selector-lists). ### Selector List A selector list is a list of selectors joined with a comma (`,`). A selector list is used to specify that a match is valid if any of the selectors in a list matches. --8<-- selector_styles.txt refs.txt --8<-- ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/docs/src/markdown/selectors/pseudo-classes.md0000644000175100001710000014453200000000000023730 0ustar00runnerdocker# Pseudo-Classes ## Overview These are pseudo classes that are either fully or partially supported. Partial support is usually due to limitations of not being in a live, browser environment. Pseudo classes that cannot be implemented are found under [Non-Applicable Pseudo Classes](./unsupported.md/#non-applicable-pseudo-classes). Any selectors that are not found here or under the non-applicable either are under consideration, have not yet been evaluated, or are too new and viewed as a risk to implement as they might not stick around. ## `:any-link`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon}:material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} {:#:any-link} Selects every `#!html `, or `#!html ` element that has an `href` attribute, independent of whether it has been visited. === "Syntax" ```css :any-link ``` === "Usage" ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

A link to click

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select(':any-link')) [click] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/:any-link !!! new "New in 2.2" The CSS specification recently updated to not include `#!html ` in the definition; therefore, Soup Sieve has removed it as well. ## `:checked`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon} {:#:checked} Selects any `#!html `, `#!html `, or `#!html ] ``` !!! tip "Additional Reading" https://developer.mozilla.org/en-US/docs/Web/CSS/:checked ## `:default`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon}:material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} {:#:default} Selects any form element that is the default among a group of related elements, including: `#!html
""" self.assert_selector( markup, ":default", ['summer', 'd1', 'd3', 'hamster', 'enable'], flags=util.HTML ) def test_iframe(self): """Test with `iframe`.""" markup = """
""" self.assert_selector( markup, ":default", ['d1', 'd3', 'd4'], flags=util.PYHTML ) def test_nested_form(self): """ Test nested form. This is technically invalid use of forms, but browsers will generally evaluate first in the nested forms. """ markup = """
""" self.assert_selector( markup, ":default", ['d1'], flags=util.HTML ) def test_default_cached(self): """ Test that we use the cached "default". For the sake of coverage, we will do this impractical select to ensure we reuse the cached default. """ markup = """
""" self.assert_selector( markup, ":default:default", ['d1'], flags=util.HTML ) def test_nested_form_fail(self): """ Test that the search for elements will bail after the first nested form. You shouldn't nest forms, but if you do, when a parent form encounters a nested form, we will bail evaluation like browsers do. We should see button 1 getting found for nested form, but button 2 will not be found for parent form. """ markup = """
what
""" self.assert_selector( markup, ":default", [], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_defined.py0000644000175100001710000000501300000000000021501 0ustar00runnerdocker"""Test defined selectors.""" from .. import util class TestDefined(util.TestCase): """Test defined selectors.""" def test_defined_html(self): """Test defined HTML.""" markup = """
""" self.assert_selector( markup, 'body :defined', ['0', '2', '3'], flags=util.HTML ) @util.skip_no_lxml def test_defined_xhtml(self): """Test defined XHTML.""" markup = """
""" from lxml import etree self.assert_selector( markup, 'body :defined', # We should get 3, but for LXML versions less than 4.4.0 we don't for reasons stated above. ['0', '2'] if etree.LXML_VERSION < (4, 4, 0, 0) else ['0', '1', '2'], flags=util.XHTML ) def test_defined_xml(self): """Test defined HTML.""" markup = """
""" # Defined is a browser thing. # XML doesn't care about defined and this will match nothing in XML. self.assert_selector( markup, 'body :defined', [], flags=util.XML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_dir.py0000644000175100001710000001243300000000000020665 0ustar00runnerdocker# -*- coding: utf-8 -*- """Test direction selectors.""" from .. import util import soupsieve as sv class TestDir(util.TestCase): """Test direction selectors.""" MARKUP = """
test1
test2
עִבְרִית()
עִבְרִית
test3
""" def test_dir_rtl(self): """Test general direction right to left.""" self.assert_selector( self.MARKUP, "div:dir(rtl)", ["1", "4", "6"], flags=util.HTML ) def test_dir_ltr(self): """Test general direction left to right.""" self.assert_selector( self.MARKUP, "div:dir(ltr)", ["3"], flags=util.HTML ) def test_dir_conflict(self): """Test conflicting direction.""" self.assert_selector( self.MARKUP, "div:dir(ltr):dir(rtl)", [], flags=util.HTML ) def test_dir_xml(self): """Test direction with XML (not supported).""" self.assert_selector( self.MARKUP, "div:dir(ltr)", [], flags=util.XML ) def test_dir_bidi_detect(self): """Test bidirectional detection.""" self.assert_selector( self.MARKUP, "span:dir(rtl)", ['2', '5', '7'], flags=util.HTML ) self.assert_selector( self.MARKUP, "span:dir(ltr)", ['8'], flags=util.HTML ) def test_dir_on_input(self): """Test input direction rules.""" self.assert_selector( self.MARKUP, ":is(input, textarea):dir(ltr)", ['9', '10', '11', '12', '13'], flags=util.HTML5 ) def test_dir_on_root(self): """Test that the root is assumed left to right if not explicitly defined.""" self.assert_selector( self.MARKUP, "html:dir(ltr)", ['0'], flags=util.HTML ) def test_dir_auto_root(self): """Test that the root is assumed left to right if auto used.""" markup = """ """ self.assert_selector( markup, "html:dir(ltr)", ['0'], flags=util.HTML ) def test_dir_on_input_root(self): """Test input direction when input is the root.""" markup = """""" # Input is root for parser in util.available_parsers('html.parser', 'lxml', 'html5lib'): soup = self.soup(markup, parser) fragment = soup.input.extract() self.assertTrue(sv.match(":root:dir(ltr)", fragment, flags=sv.DEBUG)) def test_iframe(self): """Test direction in `iframe`.""" markup = """
""" self.assert_selector( markup, "div:dir(ltr)", ['1'], flags=util.PYHTML ) self.assert_selector( markup, "div:dir(rtl)", ['2'], flags=util.PYHTML ) def test_xml_in_html(self): """Test cases for when we have XML in HTML.""" markup = """
עִבְרִית other text
""" self.assert_selector( markup, "div:dir(ltr)", ['1'], flags=util.HTML5 ) self.assert_selector( markup, "div:dir(rtl)", [], flags=util.HTML5 ) self.assert_selector( markup, "math:dir(rtl)", [], flags=util.HTML5 ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_focus_visible.py0000644000175100001710000000124300000000000022740 0ustar00runnerdocker"""Test focus visible selectors.""" from .. import util class TestFocusVisible(util.TestCase): """Test focus visible selectors.""" MARKUP = """
""" def test_focus_visible(self): """Test focus visible.""" self.assert_selector( self.MARKUP, "form:focus-visible", [], flags=util.HTML ) def test_not_focus_visible(self): """Test inverse of focus visible.""" self.assert_selector( self.MARKUP, "form:not(:focus-visible)", ["form"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_focus_within.py0000644000175100001710000000123200000000000022603 0ustar00runnerdocker"""Test focus within selectors.""" from .. import util class TestFocusWithin(util.TestCase): """Test focus within selectors.""" MARKUP = """
""" def test_focus_within(self): """Test focus within.""" self.assert_selector( self.MARKUP, "form:focus-within", [], flags=util.HTML ) def test_not_focus_within(self): """Test inverse of focus within.""" self.assert_selector( self.MARKUP, "form:not(:focus-within)", ["form"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_future.py0000644000175100001710000000144300000000000021420 0ustar00runnerdocker"""Test future selectors.""" from .. import util class TestFuture(util.TestCase): """Test future selectors.""" MARKUP = """

Some text in a paragraph. Link Placeholder text.

""" def test_future(self): """Test future (should match nothing).""" self.assert_selector( self.MARKUP, "p:future", [], flags=util.HTML ) def test_not_future(self): """Test not future.""" self.assert_selector( self.MARKUP, "p:not(:future)", ["0"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_has.py0000644000175100001710000001136200000000000020662 0ustar00runnerdocker"""Test has selectors.""" from .. import util from soupsieve import SelectorSyntaxError class TestHas(util.TestCase): """Test has selectors.""" MARKUP = """

""" MARKUP2 = """

""" def test_has_descendant(self): """Test has descendant.""" self.assert_selector( self.MARKUP, 'div:not(.aaaa):has(.kkkk > p.llll)', ['4', '5', '6'], flags=util.HTML ) def test_has_next_sibling(self): """Test has next sibling.""" self.assert_selector( self.MARKUP, 'p:has(+ .dddd:has(+ div .jjjj))', ['2'], flags=util.HTML ) def test_has_subsequent_sibling(self): """Test has subsequent sibling.""" self.assert_selector( self.MARKUP, 'p:has(~ .jjjj)', ['7', '8'], flags=util.HTML ) def test_has_child(self): """Test has2.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb)', ['0'], flags=util.HTML ) def test_has_case(self): """Test has case insensitive.""" self.assert_selector( self.MARKUP, 'div:NOT(.aaaa):HAS(.kkkk > p.llll)', ['4', '5', '6'], flags=util.HTML ) def test_has_mixed(self): """Test has mixed.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb, .ffff, .jjjj)', ['0', '4', '8'], flags=util.HTML ) self.assert_selector( self.MARKUP2, 'div:has(.ffff, > .bbbb, .jjjj)', ['0', '4', '8'], flags=util.HTML ) def test_has_nested_pseudo(self): """Test has with nested pseudo.""" self.assert_selector( self.MARKUP2, 'div:has(> :not(.bbbb, .ffff, .jjjj))', ['2', '6', '8'], flags=util.HTML ) self.assert_selector( self.MARKUP2, 'div:not(:has(> .bbbb, .ffff, .jjjj))', ['2', '6'], flags=util.HTML ) def test_has_empty(self): """Test has with empty slot due to multiple commas.""" self.assert_selector( self.MARKUP2, 'div:has()', [], flags=util.HTML ) def test_has_multi_commas(self): """Test has with empty slot due to multiple commas.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb, .ffff, , .jjjj)', ['0', '4', '8'], flags=util.HTML ) def test_has_leading_commas(self): """Test has with empty slot due to leading commas.""" self.assert_selector( self.MARKUP2, 'div:has(, > .bbbb, .ffff, .jjjj)', ['0', '4', '8'], flags=util.HTML ) def test_has_trailing_commas(self): """Test has with empty slot due to trailing commas.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb, .ffff, .jjjj, )', ['0', '4', '8'], flags=util.HTML ) def test_invalid_incomplete_has(self): """Test `:has()` fails with just a combinator.""" self.assert_raises(':has(>)', SelectorSyntaxError) def test_invalid_has_double_combinator(self): """Test `:has()` fails with consecutive combinators.""" self.assert_raises(':has(>> has a)', SelectorSyntaxError) self.assert_raises(':has(> has, >> a)', SelectorSyntaxError) self.assert_raises(':has(> has >> a)', SelectorSyntaxError) def test_invalid_has_trailing_combinator(self): """Test `:has()` fails with trailing combinator.""" self.assert_raises(':has(> has >)', SelectorSyntaxError) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_host.py0000644000175100001710000000111400000000000021056 0ustar00runnerdocker"""Test host selectors.""" from .. import util class TestHost(util.TestCase): """Test host selectors.""" MARKUP = """

header

some text

""" def test_host(self): """Test host (not supported).""" self.assert_selector( self.MARKUP, ":host", [], flags=util.HTML ) def test_host_func(self): """Test host function (not supported).""" self.assert_selector( self.MARKUP, ":host(h1)", [], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_host_context.py0000644000175100001710000000065200000000000022630 0ustar00runnerdocker"""Test host context selectors.""" from .. import util class TestHostContext(util.TestCase): """Test host context selectors.""" def test_host_context(self): """Test host context (not supported).""" markup = """

header

some text

""" self.assert_selector( markup, ":host-context(h1, h2)", [], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_in_range.py0000644000175100001710000002532300000000000021673 0ustar00runnerdocker"""Test in range selectors.""" from .. import util class TestInRange(util.TestCase): """Test in range selectors.""" def test_in_range_number(self): """Test in range number.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7', '8'], flags=util.HTML ) def test_in_range_range(self): """Test in range range.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7', '8'], flags=util.HTML ) def test_in_range_month(self): """Test in range month.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_week(self): """Test in range week.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_in_range_date(self): """Test in range date.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_date_time(self): """Test in range date_time.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_time(self): """Test in range time.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_indeterminate.py0000644000175100001710000000454700000000000022746 0ustar00runnerdocker"""Test indeterminate selectors.""" from .. import util class TestIndeterminate(util.TestCase): """Test indeterminate selectors.""" def test_indeterminate(self): """Test indeterminate.""" markup = """
""" self.assert_selector( markup, ":indeterminate", ['checkbox', 'radio1', 'radio6', 'radio4', 'radio5', 'radio-no-name1'], flags=util.HTML ) def test_iframe(self): """Test indeterminate when `iframe` is involved.""" markup = """
""" self.assert_selector( markup, ":indeterminate", ['radio1', 'radio3'], flags=util.PYHTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_is.py0000644000175100001710000000647500000000000020533 0ustar00runnerdocker"""Test is selectors.""" from .. import util from soupsieve import SelectorSyntaxError class TestIs(util.TestCase): """Test is selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_is(self): """Test multiple selectors with "is".""" self.assert_selector( self.MARKUP, ":is(span, a)", ["1", "2"], flags=util.HTML ) def test_is_multi_comma(self): """Test multiple selectors but with an empty slot due to multiple commas.""" self.assert_selector( self.MARKUP, ":is(span, , a)", ["1", "2"], flags=util.HTML ) def test_is_leading_comma(self): """Test multiple selectors but with an empty slot due to leading commas.""" self.assert_selector( self.MARKUP, ":is(, span, a)", ["1", "2"], flags=util.HTML ) def test_is_trailing_comma(self): """Test multiple selectors but with an empty slot due to trailing commas.""" self.assert_selector( self.MARKUP, ":is(span, a, )", ["1", "2"], flags=util.HTML ) def test_is_empty(self): """Test empty `:is()` selector list.""" self.assert_selector( self.MARKUP, ":is()", [], flags=util.HTML ) def test_nested_is(self): """Test multiple nested selectors.""" self.assert_selector( self.MARKUP, ":is(span, a:is(#\\32))", ["1", "2"], flags=util.HTML ) self.assert_selector( self.MARKUP, ":is(span, a:is(#\\32))", ["1", "2"], flags=util.HTML ) def test_is_with_other_pseudo(self): """Test `:is()` behavior when paired with `:not()`.""" # Each pseudo class is evaluated separately # So this will not match self.assert_selector( self.MARKUP, ":is(span):not(span)", [], flags=util.HTML ) def test_multiple_is(self): """Test `:is()` behavior when paired with `:not()`.""" # Each pseudo class is evaluated separately # So this will not match self.assert_selector( self.MARKUP, ":is(span):is(div)", [], flags=util.HTML ) # Each pseudo class is evaluated separately # So this will match self.assert_selector( self.MARKUP, ":is(a):is(#\\32)", ['2'], flags=util.HTML ) def test_invalid_pseudo_class_start_combinator(self): """Test invalid start combinator in pseudo-classes other than `:has()`.""" self.assert_raises(':is(> div)', SelectorSyntaxError) self.assert_raises(':is(div, > div)', SelectorSyntaxError) def test_invalid_pseudo_orphan_close(self): """Test invalid, orphaned pseudo close.""" self.assert_raises('div)', SelectorSyntaxError) def test_invalid_pseudo_open(self): """Test invalid pseudo close.""" self.assert_raises(':is(div', SelectorSyntaxError) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_lang.py0000644000175100001710000002174700000000000021040 0ustar00runnerdocker"""Test language selectors.""" from .. import util class TestLang(util.TestCase): """Test language selectors.""" MARKUP = """

""" def test_lang(self): """Test language and that it uses implicit wildcard.""" # Implicit wild self.assert_selector( self.MARKUP, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_lang_missing_range(self): """Test language range with a missing range.""" # Implicit wild self.assert_selector( self.MARKUP, "p:lang(de--DE)", [], flags=util.HTML ) def test_explicit_wildcard(self): """Test language with explicit wildcard (same as implicit).""" # Explicit wild self.assert_selector( self.MARKUP, "p:lang(de-\\*-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_only_wildcard(self): """Test language with only a wildcard.""" self.assert_selector( self.MARKUP, "p:lang('*')", ['1', '2', '3', '4', '5', '6', '7', '8', '9'], flags=util.HTML ) def test_wildcard_start_no_match(self): """Test language with a wildcard at start, but it matches nothing.""" self.assert_selector( self.MARKUP, "p:lang('*-de-DE')", [], flags=util.HTML ) def test_wildcard_start_collapse(self): """Test that language with multiple wildcard patterns at start collapse.""" self.assert_selector( self.MARKUP, "p:lang('*-*-*-DE')", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_wildcard_at_start_escaped(self): """ Test language with wildcard at start (escaped). Wildcard in the middle is same as implicit, but at the start, it has specific meaning. """ self.assert_selector( self.MARKUP, "p:lang(\\*-DE)", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_language_quoted(self): """Test language (quoted).""" # Normal quoted self.assert_selector( self.MARKUP, "p:lang('de-DE')", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_language_quoted_with_escaped_newline(self): """Test language (quoted) with escaped new line.""" # Normal quoted self.assert_selector( self.MARKUP, "p:lang('de-\\\nDE')", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_wildcard_at_start_quoted(self): """Test language with wildcard at start (quoted).""" # First wild quoted self.assert_selector( self.MARKUP, "p:lang('*-DE')", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_avoid_implicit_language(self): """Test that we can narrow language selection to elements that match and explicitly state language.""" # Target element with language and language attribute self.assert_selector( self.MARKUP, "p[lang]:lang(de-DE)", ['6'], flags=util.HTML ) def test_language_list(self): """Test language list.""" # Multiple languages markup = """

""" self.assert_selector( markup, "p:lang(de-DE, '*-US')", ['1', '3', '4', '5', '6'], flags=util.HTML ) def test_undetermined_language(self): """Test undetermined language.""" markup = """

""" self.assert_selector( markup, "p:lang(en)", [], flags=util.HTML ) def test_language_in_header(self): """Test that we can find language in header.""" markup = """

""" self.assert_selector( markup, "p:lang('*-US')", ['1', '2'], flags=util.HTML ) def test_xml_style_language_in_html5(self): """Test XML style language when out of HTML5 namespace.""" markup = """
""" self.assert_selector( markup, "mtext:lang(en)", ['1'], flags=util.HTML5 ) def test_xml_style_language(self): """Test XML style language.""" # XML style language markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.XML ) def test_language_in_xhtml(self): """Test language in XHTML.""" markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.XML ) def test_language_in_xhtml_without_html_style_lang(self): """ Test language in XHTML. HTML namespace elements must use HTML style language. """ # XHTML language: `lang` markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", [], flags=util.XHTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_local_link.py0000644000175100001710000000133100000000000022211 0ustar00runnerdocker"""Test local link selectors.""" from .. import util class TestLocalLink(util.TestCase): """Test local link selectors.""" MARKUP = """ Link Another link """ def test_local_link(self): """Test local link (matches nothing).""" self.assert_selector( self.MARKUP, "a:local-link", [], flags=util.HTML ) def test_not_local_link(self): """Test not local link.""" self.assert_selector( self.MARKUP, "a:not(:local-link)", ["1", "2"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_matches.py0000644000175100001710000000140700000000000021532 0ustar00runnerdocker"""Test matches selectors.""" from .. import util class TestMatches(util.TestCase): """Test matches selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_matches(self): """Test multiple selectors with "matches".""" self.assert_selector( self.MARKUP, ":matches(span, a)", ["1", "2"], flags=util.HTML ) def test_nested_matches(self): """Test multiple nested selectors with "matches".""" self.assert_selector( self.MARKUP, ":matches(span, a:matches(#\\32))", ["1", "2"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_not.py0000644000175100001710000000132300000000000020703 0ustar00runnerdocker"""Test not selectors.""" from .. import util class TestNot(util.TestCase): """Test not selectors.""" def test_multi_nested_not(self): """Test nested not and multiple selectors.""" markup = """

Some text in a paragraph.

Link Direct child
        Child 1
        Child 2
        Child 3
        
""" self.assert_selector( markup, 'div :not(p, :not([id=\\35]))', ['5'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_nth_child.py0000644000175100001710000000226200000000000022042 0ustar00runnerdocker"""Test `nth` child selectors.""" from .. import util class TestNthChild(util.TestCase): """Test `nth` child selectors.""" MARKUP = """

""" def test_nth_child_of_s_simple(self): """Test `nth` child with selector (simple).""" self.assert_selector( self.MARKUP, ":nth-child(-n+3 of p)", ['0', '1', '7'], flags=util.HTML ) def test_nth_child_of_s_complex(self): """Test `nth` child with selector (complex).""" self.assert_selector( self.MARKUP, ":nth-child(2n + 1 of :is(p, span).test)", ['2', '6', '10'], flags=util.HTML ) self.assert_selector( self.MARKUP, ":nth-child(2n + 1 OF :is(p, span).test)", ['2', '6', '10'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_optional.py0000644000175100001710000000152100000000000021730 0ustar00runnerdocker"""Test optional selectors.""" from .. import util class TestOptional(util.TestCase): """Test optional selectors.""" MARKUP = """
""" def test_optional(self): """Test optional.""" self.assert_selector( self.MARKUP, ":optional", ['3', '4', '5'], flags=util.HTML ) def test_specific_optional(self): """Test specific optional.""" self.assert_selector( self.MARKUP, "input:optional", ['3'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_out_of_range.py0000644000175100001710000002526000000000000022560 0ustar00runnerdocker"""Test out of range selectors.""" from .. import util class TestOutOfRange(util.TestCase): """Test out of range selectors.""" def test_out_of_range_number(self): """Test in range number.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['9', '10', '11'], flags=util.HTML ) def test_out_of_range_range(self): """Test in range range.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['9', '10'], flags=util.HTML ) def test_out_of_range_month(self): """Test in range month.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10'], flags=util.HTML ) def test_out_of_range_week(self): """Test in range week.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['8', '9', '10', '11'], flags=util.HTML ) def test_out_of_range_date(self): """Test in range date.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10', '11', '12'], flags=util.HTML ) def test_out_of_range_date_time(self): """Test in range date time.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10', '11', '12', '13', '14', '15', '16'], flags=util.HTML ) def test_out_of_range_time(self): """Test in range time.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['8', '9', '10', '11', '12', '13', '14'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_past.py0000644000175100001710000000142100000000000021051 0ustar00runnerdocker"""Test past selectors.""" from .. import util class TestPast(util.TestCase): """Test past selectors.""" MARKUP = """

Some text in a paragraph. Link Placeholder text.

""" def test_past(self): """Test past (should match nothing).""" self.assert_selector( self.MARKUP, "p:past", [], flags=util.HTML ) def test_not_past(self): """Test not past.""" self.assert_selector( self.MARKUP, "p:not(:past)", ["0"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_paused.py0000644000175100001710000000164300000000000021371 0ustar00runnerdocker"""Test paused selectors.""" from .. import util class TestPaused(util.TestCase): """Test paused selectors.""" MARKUP = """ """ def test_paused(self): """Test paused (matches nothing).""" # Not actually sure how this is used, but it won't match anything anyways self.assert_selector( self.MARKUP, "video:paused", [], flags=util.HTML ) def test_not_paused(self): """Test not paused.""" self.assert_selector( self.MARKUP, "video:not(:paused)", ["vid"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_placeholder_shown.py0000644000175100001710000000620200000000000023604 0ustar00runnerdocker"""Test placeholder shown selectors.""" from .. import util class TestPlaceholderShown(util.TestCase): """Test placeholder shown selectors.""" def test_placeholder_shown(self): """Test placeholder shown.""" markup = """ /* These have a placeholder. */ /* These do not have a placeholder. */ /* All types that should register has having a placeholder. */ /* Types that should not register has having a placeholder. */ /* Value will not override this instance as value is empty. */ /* Value will override this input */ /* Text area content overides the placeholder /* Text area is still considered empty with a single new line (does not include carriage return). */ """ self.assert_selector( markup, ":placeholder-shown", ['0', '1', '4', '5', '6', '7', '8', '9', '10', '11', '12', '28', '32'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_playing.py0000644000175100001710000000165400000000000021555 0ustar00runnerdocker"""Test playing selectors.""" from .. import util class TestPlaying(util.TestCase): """Test playing selectors.""" MARKUP = """ """ def test_playing(self): """Test playing (matches nothing).""" # Not actually sure how this is used, but it won't match anything anyways self.assert_selector( self.MARKUP, "video:playing", [], flags=util.HTML ) def test_not_playing(self): """Test not playing.""" self.assert_selector( self.MARKUP, "video:not(:playing)", ["vid"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_read_only.py0000644000175100001710000000337600000000000022071 0ustar00runnerdocker"""Test read only selectors.""" from .. import util class TestReadOnly(util.TestCase): """Test read only selectors.""" def test_read_only(self): """Test read only.""" markup = """

Text

Text

Text

Text

Text

""" self.assert_selector( markup, "body :read-only", [ '3', '13', '14', '15', '18', '19', '20', '22', '23', '24', '25', '31', '32', '33' ], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_read_write.py0000644000175100001710000000337400000000000022240 0ustar00runnerdocker"""Test read write selectors.""" from .. import util class TestReadWrite(util.TestCase): """Test read write selectors.""" def test_read_write(self): """Test read write.""" markup = """

Text

Text

Text

Text

Text

""" self.assert_selector( markup, ":read-write", [ '0', '1', '2', '4', '5', '6', '7', '8', '9', '10', '11', '12', '16', '17', '21', '26', '27', '28', '29', '30' ], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_required.py0000644000175100001710000000155500000000000021732 0ustar00runnerdocker"""Test required selectors.""" from .. import util class TestRequired(util.TestCase): """Test required selectors.""" MARKUP = """
""" def test_required(self): """Test required.""" self.assert_selector( self.MARKUP, ":required", ['1', '2', '4', '5'], flags=util.HTML ) def test_specific_required(self): """Test specific required.""" self.assert_selector( self.MARKUP, "input:required", ['1', '2'], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_scope.py0000644000175100001710000000555600000000000021230 0ustar00runnerdocker"""Test scope selectors.""" from .. import util import soupsieve as sv class TestScope(util.TestCase): """Test scope selectors.""" MARKUP = """

Some text in a paragraph.

Link Direct child
    Child 1
    Child 2
    Child 3
    
""" def test_scope_is_root(self): """Test scope is the root when the a specific element is not the target of the select call.""" # Scope is root when applied to a document node self.assert_selector( self.MARKUP, ":scope", ["root"], flags=util.HTML ) self.assert_selector( self.MARKUP, ":scope > body > div", ["div"], flags=util.HTML ) def test_scope_cannot_select_target(self): """Test that scope, the element which scope is called on, cannot be selected.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope is the element we are applying the select to, and that element is never returned self.assertTrue(len(sv.select(':scope', el, flags=sv.DEBUG)) == 0) def test_scope_is_select_target(self): """Test that scope is the element which scope is called on.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope here means the current element under select ids = [] for el in sv.select(':scope div', el, flags=sv.DEBUG): ids.append(el.attrs['id']) self.assertEqual(sorted(ids), sorted(['div'])) el = soup.body ids = [] for el in sv.select(':scope div', el, flags=sv.DEBUG): ids.append(el.attrs['id']) self.assertEqual(sorted(ids), sorted(['div'])) # `div` is the current element under select, and it has no `div` elements. el = soup.div ids = [] for el in sv.select(':scope div', el, flags=sv.DEBUG): ids.append(el.attrs['id']) self.assertEqual(sorted(ids), sorted([])) # `div` does have an element with the class `.wordshere` ids = [] for el in sv.select(':scope .wordshere', el, flags=sv.DEBUG): ids.append(el.attrs['id']) self.assertEqual(sorted(ids), sorted(['pre'])) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_target_within.py0000644000175100001710000000147200000000000022760 0ustar00runnerdocker"""Test target within selectors.""" from .. import util class TestTargetWithin(util.TestCase): """Test target within selectors.""" MARKUP = """ Jump

Header 1

content

Header 2

content

""" def test_target_within(self): """Test target within.""" self.assert_selector( self.MARKUP, "article:target-within", [], flags=util.HTML ) def test_not_target_within(self): """Test inverse of target within.""" self.assert_selector( self.MARKUP, "article:not(:target-within)", ["article"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_user_invalid.py0000644000175100001710000000113400000000000022567 0ustar00runnerdocker"""Test invalid selectors.""" from .. import util class TestInvalid(util.TestCase): """Test invalid selectors.""" def test_user_invalid(self): """Test user invalid (matches nothing).""" markup = """
""" self.assert_selector( markup, "input:user-invalid", [], flags=util.HTML ) self.assert_selector( markup, "input:not(:user-invalid)", ["1"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_level4/test_where.py0000644000175100001710000000136300000000000021221 0ustar00runnerdocker"""Test where selectors.""" from .. import util class TestWhere(util.TestCase): """Test where selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_where(self): """Test multiple selectors with "where".""" self.assert_selector( self.MARKUP, ":where(span, a)", ["1", "2"], flags=util.HTML ) def test_nested_where(self): """Test multiple nested selectors with "where".""" self.assert_selector( self.MARKUP, ":where(span, a:where(#\\32))", ["1", "2"], flags=util.HTML ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_quirks.py0000644000175100001710000000101600000000000017166 0ustar00runnerdocker"""Test quirky behaviors.""" from . import util from bs4 import BeautifulSoup as BS class TestQuirks(util.TestCase): """Test quirky behaviors.""" def test_quirky_user_attrs(self): """Test cases where a user creates weird attributes: nested sequences.""" html = """
test
""" soup = BS(html, 'html.parser') soup.div.attrs['user'] = [['a']] print(soup.div.attrs) self.assertTrue(soup.select_one('div[user="[\'a\']"]') is not None) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/test_versions.py0000644000175100001710000000772400000000000017534 0ustar00runnerdocker"""Version tests.""" import unittest from soupsieve.__meta__ import Version, parse_version class TestVersion(unittest.TestCase): """Test versions.""" def test_version_output(self): """Test that versions generate proper strings.""" assert Version(1, 0, 0, "final")._get_canonical() == "1.0" assert Version(1, 2, 0, "final")._get_canonical() == "1.2" assert Version(1, 2, 3, "final")._get_canonical() == "1.2.3" assert Version(1, 2, 0, "alpha", pre=4)._get_canonical() == "1.2a4" assert Version(1, 2, 0, "beta", pre=4)._get_canonical() == "1.2b4" assert Version(1, 2, 0, "candidate", pre=4)._get_canonical() == "1.2rc4" assert Version(1, 2, 0, "final", post=1)._get_canonical() == "1.2.post1" assert Version(1, 2, 3, ".dev-alpha", pre=1)._get_canonical() == "1.2.3a1.dev0" assert Version(1, 2, 3, ".dev")._get_canonical() == "1.2.3.dev0" assert Version(1, 2, 3, ".dev", dev=1)._get_canonical() == "1.2.3.dev1" def test_version_comparison(self): """Test that versions compare proper.""" assert Version(1, 0, 0, "final") < Version(1, 2, 0, "final") assert Version(1, 2, 0, "alpha", pre=4) < Version(1, 2, 0, "final") assert Version(1, 2, 0, "final") < Version(1, 2, 0, "final", post=1) assert Version(1, 2, 3, ".dev-beta", pre=2) < Version(1, 2, 3, "beta", pre=2) assert Version(1, 2, 3, ".dev") < Version(1, 2, 3, ".dev-beta", pre=2) assert Version(1, 2, 3, ".dev") < Version(1, 2, 3, ".dev", dev=1) def test_version_parsing(self): """Test version parsing.""" assert parse_version( Version(1, 0, 0, "final")._get_canonical() ) == Version(1, 0, 0, "final") assert parse_version( Version(1, 2, 0, "final")._get_canonical() ) == Version(1, 2, 0, "final") assert parse_version( Version(1, 2, 3, "final")._get_canonical() ) == Version(1, 2, 3, "final") assert parse_version( Version(1, 2, 0, "alpha", pre=4)._get_canonical() ) == Version(1, 2, 0, "alpha", pre=4) assert parse_version( Version(1, 2, 0, "beta", pre=4)._get_canonical() ) == Version(1, 2, 0, "beta", pre=4) assert parse_version( Version(1, 2, 0, "candidate", pre=4)._get_canonical() ) == Version(1, 2, 0, "candidate", pre=4) assert parse_version( Version(1, 2, 0, "final", post=1)._get_canonical() ) == Version(1, 2, 0, "final", post=1) assert parse_version( Version(1, 2, 3, ".dev-alpha", pre=1)._get_canonical() ) == Version(1, 2, 3, ".dev-alpha", pre=1) assert parse_version( Version(1, 2, 3, ".dev")._get_canonical() ) == Version(1, 2, 3, ".dev") assert parse_version( Version(1, 2, 3, ".dev", dev=1)._get_canonical() ) == Version(1, 2, 3, ".dev", dev=1) def test_asserts(self): """Test asserts.""" with self.assertRaises(ValueError): Version("1", "2", "3") with self.assertRaises(ValueError): Version(1, 2, 3, 1) with self.assertRaises(ValueError): Version("1", "2", "3") with self.assertRaises(ValueError): Version(1, 2, 3, "bad") with self.assertRaises(ValueError): Version(1, 2, 3, "alpha") with self.assertRaises(ValueError): Version(1, 2, 3, "alpha", pre=1, dev=1) with self.assertRaises(ValueError): Version(1, 2, 3, "alpha", pre=1, post=1) with self.assertRaises(ValueError): Version(1, 2, 3, ".dev-alpha") with self.assertRaises(ValueError): Version(1, 2, 3, ".dev-alpha", pre=1, post=1) with self.assertRaises(ValueError): Version(1, 2, 3, pre=1) with self.assertRaises(ValueError): Version(1, 2, 3, dev=1) with self.assertRaises(ValueError): parse_version('bad&version') ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tests/util.py0000644000175100001710000001012600000000000015570 0ustar00runnerdocker"""Test utilities.""" import unittest import bs4 import textwrap import soupsieve as sv import pytest try: from bs4.builder import HTML5TreeBuilder # noqa: F401 HTML5LIB_PRESENT = True except ImportError: HTML5LIB_PRESENT = False try: from bs4.builder import LXMLTreeBuilderForXML, LXMLTreeBuilder # noqa: F401 LXML_PRESENT = True except ImportError: LXML_PRESENT = False HTML5 = 0x1 HTML = 0x2 XHTML = 0x4 XML = 0x8 PYHTML = 0x10 LXML_HTML = 0x20 def skip_no_lxml(func): """Decorator that skips lxml is not available.""" def skip_if(self, *args, **kwargs): """Skip conditional wrapper.""" if LXML_PRESENT: return func(self, *args, **kwargs) else: raise pytest.skip('lxml is not found') class TestCase(unittest.TestCase): """Test case.""" def wrap_xhtml(self, html): """Wrap HTML content with XHTML header and body.""" return """ {} """.format(html) def setUp(self): """Setup.""" sv.purge() def purge(self): """Purge cache.""" sv.purge() def compile_pattern(self, selectors, namespaces=None, custom=None, flags=0): """Compile pattern.""" print('PATTERN: ', selectors) flags |= sv.DEBUG return sv.compile(selectors, namespaces=namespaces, custom=custom, flags=flags) def soup(self, markup, parser): """Get soup.""" print('\n====PARSER: ', parser) return bs4.BeautifulSoup(textwrap.dedent(markup.replace('\r\n', '\n')), parser) def get_parsers(self, flags): """Get parsers.""" mode = flags & 0x3F if mode == HTML: parsers = ('html5lib', 'lxml', 'html.parser') elif mode == PYHTML: parsers = ('html.parser',) elif mode == LXML_HTML: parsers = ('lxml',) elif mode in (HTML5, 0): parsers = ('html5lib',) elif mode in (XHTML, XML): parsers = ('xml',) return parsers def assert_raises(self, pattern, exception, namespace=None, custom=None): """Assert raises.""" print('----Running Assert Test----') with self.assertRaises(exception): self.compile_pattern(pattern, namespaces=namespace, custom=custom) def assert_selector(self, markup, selectors, expected_ids, namespaces={}, custom=None, flags=0): """Assert selector.""" parsers = self.get_parsers(flags) print('----Running Selector Test----') selector = self.compile_pattern(selectors, namespaces, custom) for parser in available_parsers(*parsers): soup = self.soup(markup, parser) # print(soup) ids = [] for el in selector.select(soup): print('TAG: ', el.name) ids.append(el.attrs['id']) self.assertEqual(sorted(ids), sorted(expected_ids)) def available_parsers(*parsers): """Filter a list of parsers, down to the available ones. If there are none, report the test as skipped to pytest. """ ran_test = False for parser in parsers: if ( (parser in ('xml', 'lxml') and not LXML_PRESENT) or (parser == 'html5lib' and not HTML5LIB_PRESENT) ): print('SKIPPED {}, not installed'.format(parser)) else: ran_test = True yield parser if not ran_test: raise pytest.skip('no available parsers') def requires_lxml(test): """Decorator that marks a test as requiring LXML.""" return pytest.mark.skipif( not LXML_PRESENT, reason='test requires lxml')(test) def requires_html5lib(test): """Decorator that marks a test as requiring html5lib.""" return pytest.mark.skipif( not HTML5LIB_PRESENT, reason='test requires html5lib')(test) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1636638786.0 soupsieve-2.3.1/tox.ini0000644000175100001710000000200500000000000014407 0ustar00runnerdocker[tox] envlist = py36,py37,py38,py39,py310, lint, nolxml, nohtml5lib [testenv] passenv = * deps = -rrequirements/tests.txt commands = mypy py.test --cov soupsieve --cov-append {toxinidir} coverage html -d {envtmpdir}/coverage coverage xml coverage report --show-missing [testenv:documents] passenv = * deps = -rrequirements/docs.txt -rrequirements/project.txt commands = mkdocs build --clean --verbose --strict pyspelling [testenv:lint] passenv = * deps = -rrequirements/project.txt -rrequirements/lint.txt commands = flake8 {toxinidir} [testenv:nolxml] passenv = * deps = -rrequirements/tests.txt commands = pip uninstall -y lxml py.test {toxinidir} [testenv:nohtml5lib] passenv = * deps = -rrequirements/tests.txt commands = pip uninstall -y html5lib py.test {toxinidir} [flake8] exclude=build/*,.tox/* max-line-length=120 ignore=D202,D203,D401,E741,W504,N817,N818 [pytest] filterwarnings = ignore:\nCSS selector pattern:UserWarning