pax_global_header 0000666 0000000 0000000 00000000064 15200550336 0014511 g ustar 00root root 0000000 0000000 52 comment=b1d55db4d16a5425fc68576d63519bbfe26dd9c0
scrapy-sphinx-scrapy-615b009/ 0000775 0000000 0000000 00000000000 15200550336 0016070 5 ustar 00root root 0000000 0000000 scrapy-sphinx-scrapy-615b009/.github/ 0000775 0000000 0000000 00000000000 15200550336 0017430 5 ustar 00root root 0000000 0000000 scrapy-sphinx-scrapy-615b009/.github/workflows/ 0000775 0000000 0000000 00000000000 15200550336 0021465 5 ustar 00root root 0000000 0000000 scrapy-sphinx-scrapy-615b009/.github/workflows/publish.yml 0000664 0000000 0000000 00000001064 15200550336 0023657 0 ustar 00root root 0000000 0000000 name: Publish
on:
push:
tags:
- '[0-9]+.[0-9]+.[0-9]+'
jobs:
publish:
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/${{ github.event.repository.name }}
permissions:
id-token: write
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.14
- run: |
python -m pip install --upgrade build
python -m build
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true
scrapy-sphinx-scrapy-615b009/.pre-commit-config.yaml 0000664 0000000 0000000 00000000742 15200550336 0022354 0 ustar 00root root 0000000 0000000 repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.13
hooks:
- id: ruff-check
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.16.0
hooks:
- id: mypy
- repo: https://github.com/adamchainz/blacken-docs
rev: 1.19.1
hooks:
- id: blacken-docs
additional_dependencies:
- black==25.1.0
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
scrapy-sphinx-scrapy-615b009/.pre-commit-hooks.yaml 0000664 0000000 0000000 00000000176 15200550336 0022233 0 ustar 00root root 0000000 0000000 - id: sphinx-scrapy
name: sphinx-scrapy
entry: sphinx-scrapy update-rtd-config
language: python
pass_filenames: false
scrapy-sphinx-scrapy-615b009/CHANGES.rst 0000664 0000000 0000000 00000010322 15200550336 0017670 0 ustar 00root root 0000000 0000000 =============
Release notes
=============
0.8.6 (2026-05-12)
==================
- Add index templates for extension-defined cross-reference types so related
objects appear in generated Sphinx indexes.
0.8.5 (2026-04-29)
==================
- Normalize project IDs loaded from :file:`pyproject.toml` and inferred from
Sphinx metadata.
0.8.4 (2026-04-28)
==================
- Fix links in :file:`llms.txt`.
0.8.3 (2026-04-22)
==================
- Rewrite generated Markdown links for supported Intersphinx targets from
``.html`` to ``.md``.
- | Extended easy Intersphinx configuration to
| `aiohttp `_
| `curl-cffi `_
| `niquests `_
- Document installation from Git for ``sphinx-scrapy`` and
``sphinx-scrapy[tox]``.
- Upgrade sphinx-markdown-builder.
0.8.2 (2026-04-10)
==================
- Upgrade sphinx-markdown-builder.
- Make the copy-as-Markdown button smaller and place it to the right of the
page title.
0.8.1 (2026-04-10)
==================
- Addressed warnings affecting sitemap generation due to missing ``/`` at the
end of Intersphinx URLs.
0.8.0 (2026-04-09)
==================
- Now generates a sitemap.
0.7.2 (2026-04-06)
==================
- Added a ``tox`` extra, to be used in the ``requires`` section of
``tox.ini`` to ensure a supported version of ``tox`` is used.
0.7.1 (2026-04-06)
==================
- Fixed the generated Read the Docs configuration, which was causing builds
to fail.
- When the pre-commit hook fails, it now reports that it updated
:file:`.readthedocs.yml`.
0.7.0 (2026-04-02)
==================
- Dropped support for Python 3.8 and 3.9, added official support for Python
3.14.
- Added a tox plugin that provides a ``docs`` environment.
- Added a pre-commit hook to create or update ``.readthedocs.yml``.
- Enabling the ``sphinx_scrapy`` extension now automatically enables
``sphinx.ext.autodoc``, ``sphinx.ext.viewcode``, ``sphinx_copybutton``, and
``sphinx_llms_txt``.
- Added a "Copy as Markdown" button on HTML pages that copies each page's
corresponding ``.md`` counterpart.
- | Extended easy Intersphinx configuration to
| `coverage `_
| `cryptography `_
| `multidict `_
| `sphinx `_
| `tox `_
0.6.1 (2025-09-16)
==================
Fixed Python 3.8 support.
0.6.0 (2025-09-16)
==================
| Extended easy Intersphinx configuration to:
| `pydantic `_
0.5.0 (2025-09-16)
==================
Change the minimum version of Python from 3.9 to 3.8.
0.4.0 (2025-09-16)
==================
| Extended easy Intersphinx configuration to:
| `packaging `_
| `pytest `_
0.3.0 (2025-09-02)
==================
| Extended easy Intersphinx configuration to:
| `jinja `_
| `lxml `_
0.2.0 (2025-06-16)
==================
| Extended easy Intersphinx configuration to:
| `dateparser `_
| `form2request `_
| `formasaurus `_
| `python-scrapinghub `_
| `scrapy-spider-metadata `_
| `scrapy-zyte-api `_
| `scrapy-zyte-smartproxy `_
| `scrapyd `_
| `shub `_
| `shub-image `_
| `spidermon `_
| `url-matcher `_
| `zyte-parsers `_
0.1.0 (2025-06-14)
==================
Initial release.
scrapy-sphinx-scrapy-615b009/LICENSE 0000664 0000000 0000000 00000002755 15200550336 0017106 0 ustar 00root root 0000000 0000000 Copyright (c) Scrapy developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Scrapy nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
scrapy-sphinx-scrapy-615b009/README.rst 0000664 0000000 0000000 00000015335 15200550336 0017566 0 ustar 00root root 0000000 0000000 =============
sphinx-scrapy
=============
|version| |python_version|
.. |version| image:: https://img.shields.io/pypi/v/sphinx-scrapy.svg
:target: https://pypi.org/pypi/sphinx-scrapy
:alt: PyPI version
.. |python_version| image:: https://img.shields.io/pypi/pyversions/sphinx-scrapy.svg
:target: https://pypi.org/pypi/sphinx-scrapy
:alt: Supported Python versions
`Sphinx `_ extension for documentation in the
`Scrapy `_ ecosystem.
Features
========
- Provides a ``docs`` `tox `_
environment.
- Generates a `Read the Docs `_ configuration.
- Enables `commonly-used Sphinx extensions <#sphinx-extensions>`_.
- Configures `sphinx.ext.intersphinx`_ for `Python
`_ and Scrapy_, and streamlines configuration for
`additional packages <#intersphinx-packages>`_.
- Allows you to easily link to Scrapy settings, request metadata keys,
signals and commands:
.. code-block:: rst
:setting:`BOT_NAME`
:setting:`LOG_LEVEL `
:reqmeta:`download_slot`
:signal:`spider_opened`
:command:`crawl`
Setup
=====
#. Configure in ``pyproject.toml`` the Python version for documentation
builds, e.g.:
.. code-block:: toml
[tool.sphinx-scrapy]
python-version = "3.14"
It must be `supported by Read the Docs
`_.
#. Add to ``docs/requirements.txt``:
Install from Git, because this project currently depends on Git-based
packages and cannot be published to PyPI:
.. code-block:: text
sphinx-scrapy @ git+https://github.com/scrapy/sphinx-scrapy.git@0.8.6
#. Add to ``docs/conf.py``:
.. code-block:: python
extensions = [
"sphinx_scrapy",
]
To automatically configure `sphinx.ext.intersphinx`_ for installed
`supported packages <#intersphinx-packages>`_, set:
.. code-block:: python
scrapy_intersphinx_enable_installed = True
You can also enable or disable the automatic `sphinx.ext.intersphinx`_
configuration of packages manually:
.. code-block:: python
scrapy_intersphinx_enable = [
"parsel",
"w3lib",
]
scrapy_intersphinx_disable = [
"scrapy",
]
The ``html_baseurl`` option for `sitemap generation
`_
is generated automatically based on the project name (``pyproject.toml`` or
``conf.py``) and known documentation URLs (the same used for easy
intersphinx configuration) with a fallback to
https://.readthedocs.io/en/latest/. You can alternatively define
the setting yourself in ``conf.py``.
#. Add to ``docs/.gitignore``:
.. code-block::
/_build/
#. Add to ``.pre-commit-config.yaml``:
.. code-block:: yaml
repos:
- repo: https://github.com/scrapy/sphinx-scrapy
rev: 0.8.6
hooks:
- id: sphinx-scrapy
#. Add to ``tox.ini``:
.. code-block:: ini
[tox]
requires =
sphinx-scrapy[tox] @ git+https://github.com/scrapy/sphinx-scrapy.git@0.8.6
envlist = …,docs
.. note:: ``docs`` in ``envlist`` is required.
You can now build the docs with:
.. code-block:: bash
tox -e docs
.. _sphinx-extensions:
Sphinx extensions
=================
The following Sphinx extensions are automatically enabled when you enable
``sphinx_scrapy``:
- `sphinx.ext.autodoc
`_
- `sphinx.ext.intersphinx`_
- `sphinx.ext.viewcode
`_
- `sphinx_copybutton `_
- `sphinx_llms_txt `_
.. _intersphinx-packages:
Intersphinx packages
====================
``scrapy_intersphinx_enable`` supports the following packages:
| `aiohttp `_
| `attrs `_
| `coverage `_
| `cryptography `_
| `cssselect `_
| `curl-cffi `_
| `dateparser `_
| `form2request `_
| `formasaurus `_
| `itemloaders `_
| `jinja `_
| `lxml `_
| `multidict `_
| `niquests `_
| `packaging `_
| `parsel `_
| `pydantic `_
| `pytest `_
| `python-scrapinghub `_
| `python-zyte-api `_
| `scrapy-poet `_
| `scrapy-spider-metadata `_
| `scrapy-zyte-api `_
| `scrapy-zyte-smartproxy `_
| `scrapyd `_
| `shub `_
| `shub-image `_
| `sphinx `_
| `spidermon `_
| `tenacity `_
| `tox `_
| `twisted `_ (and `twistedapi `_)
| `url-matcher `_
| `w3lib `_
| `web-poet `_
| `zyte `_
| `zyte-common-items `_
| `zyte-parsers `_
| `zyte-spider-templates `_
Release notes
=============
See the `release notes
`_ for a list of
releases and their changes.
.. _sphinx.ext.intersphinx: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html scrapy-sphinx-scrapy-615b009/pyproject.toml 0000664 0000000 0000000 00000007412 15200550336 0021010 0 ustar 00root root 0000000 0000000 [build-system]
requires = ["hatchling>=1.27.0"]
build-backend = "hatchling.build"
[project]
name = "sphinx-scrapy"
version = "0.8.6"
description = "Sphinx extension for documentation in the Scrapy ecosystem"
dependencies = [
"packaging",
"sphinx",
"sphinx-copybutton",
"sphinx-llms-txt @ git+https://github.com/zytedata/sphinx-llms-txt.git@5e8866cb0cc249aa2017ad9050b3b83a7ca16f69",
"sphinx-markdown-builder @ git+https://github.com/zytedata/sphinx-markdown-builder.git@cfe4c0bfd7b4542f7e6b65a58cdf9ec765829940",
"sphinx-sitemap",
"tomli; python_version < '3.11'",
]
classifiers = [
"Development Status :: 4 - Beta",
"Framework :: Scrapy",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Software Development :: Libraries :: Application Frameworks",
"Topic :: Software Development :: Libraries :: Python Modules",
]
license = "BSD-3-Clause"
license-files = ["LICENSE"]
readme = "README.rst"
requires-python = ">=3.10"
authors = [{ name = "Scrapy developers", email = "opensource@zyte.com" }]
maintainers = [{ name = "Scrapy developers", email = "opensource@zyte.com" }]
[project.scripts]
sphinx-scrapy = "sphinx_scrapy.cli:main"
[project.entry-points.tox]
sphinx_scrapy = "sphinx_scrapy.tox_plugin"
[project.urls]
Source = "https://github.com/scrapy/sphinx-scrapy"
Tracker = "https://github.com/scrapy/sphinx-scrapy/issues"
"Release notes" = "https://github.com/scrapy/sphinx-scrapy/blob/main/CHANGES.rst"
[project.optional-dependencies]
tox = ["tox>=4.29.0"]
[tool.hatch.metadata]
allow-direct-references = true
[tool.mypy]
strict = true
[tool.bumpversion]
current_version = "0.8.6"
commit = true
tag = true
tag_name = "{new_version}"
[[tool.bumpversion.files]]
filename = "CHANGES.rst"
search = "\\(unreleased\\)$"
replace = "({now:%Y-%m-%d})"
regex = true
[[tool.bumpversion.files]]
filename = "README.rst"
search = "rev: {current_version}"
replace = "rev: {new_version}"
[[tool.bumpversion.files]]
filename = "README.rst"
search = "sphinx-scrapy @ git+https://github.com/scrapy/sphinx-scrapy.git@{current_version}"
replace = "sphinx-scrapy @ git+https://github.com/scrapy/sphinx-scrapy.git@{new_version}"
[[tool.bumpversion.files]]
filename = "README.rst"
search = "sphinx-scrapy[tox] @ git+https://github.com/scrapy/sphinx-scrapy.git@{current_version}"
replace = "sphinx-scrapy[tox] @ git+https://github.com/scrapy/sphinx-scrapy.git@{new_version}"
[tool.ruff.lint]
extend-select = [
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# pydocstyle
"D",
# flake8-future-annotations
"FA",
# flynt
"FLY",
# refurb
"FURB",
# isort
"I",
# flake8-implicit-str-concat
"ISC",
# flake8-logging
"LOG",
# Perflint
"PERF",
# pygrep-hooks
"PGH",
# flake8-pie
"PIE",
# pylint
"PL",
# flake8-pytest-style
"PT",
# flake8-use-pathlib
"PTH",
# flake8-pyi
"PYI",
# flake8-quotes
"Q",
# flake8-return
"RET",
# flake8-raise
"RSE",
# Ruff-specific rules
"RUF",
# flake8-bandit
"S",
# flake8-simplify
"SIM",
# flake8-slots
"SLOT",
# flake8-debugger
"T10",
# flake8-type-checking
"TC",
# flake8-tidy-imports
"TID",
# pyupgrade
"UP",
# pycodestyle warnings
"W",
# flake8-2020
"YTT",
]
ignore = [
# No public API to document
"D",
]
[tool.ruff.lint.pydocstyle]
convention = "pep257"
scrapy-sphinx-scrapy-615b009/sphinx_scrapy/ 0000775 0000000 0000000 00000000000 15200550336 0020762 5 ustar 00root root 0000000 0000000 scrapy-sphinx-scrapy-615b009/sphinx_scrapy/__init__.py 0000664 0000000 0000000 00000030661 15200550336 0023101 0 ustar 00root root 0000000 0000000 from __future__ import annotations
import re
from logging import getLogger
from typing import TYPE_CHECKING
from .config import load_project_config, normalize_project_id
if TYPE_CHECKING:
from collections.abc import Generator
from sphinx.application import Sphinx
from sphinx.config import Config
logger = getLogger(__name__)
INTERSPHINX_MAPPING = {
"aiohttp": ("https://docs.aiohttp.org/en/stable/", None),
"attrs": ("https://www.attrs.org/en/stable/", None),
"coverage": ("https://coverage.readthedocs.io/en/latest/", None),
"cryptography": ("https://cryptography.io/en/latest/", None),
"cssselect": ("https://cssselect.readthedocs.io/en/latest/", None),
"curl-cffi": ("https://curl-cffi.readthedocs.io/en/latest/", None),
"dateparser": ("https://dateparser.readthedocs.io/en/latest/", None),
"form2request": ("https://form2request.readthedocs.io/en/latest/", None),
"formasaurus": ("https://formasaurus.readthedocs.io/en/latest/", None),
"itemloaders": ("https://itemloaders.readthedocs.io/en/latest/", None),
"jinja": ("https://jinja.palletsprojects.com/en/latest/", None),
"lxml": ("https://lxml.de/apidoc/", None),
"multidict": ("https://multidict.aio-libs.org/en/latest/", None),
"niquests": ("https://niquests.readthedocs.io/en/latest/", None),
"packaging": ("https://packaging.pypa.io/en/stable/", None),
"parsel": ("https://parsel.readthedocs.io/en/latest/", None),
"pydantic": ("https://pydantic.dev/docs/validation/latest/", None),
"pytest": ("https://docs.pytest.org/en/stable/", None),
"python": ("https://docs.python.org/3/", None),
"python-scrapinghub": (
"https://python-scrapinghub.readthedocs.io/en/latest/",
None,
),
"python-zyte-api": ("https://python-zyte-api.readthedocs.io/en/stable/", None),
"scrapy": ("https://docs.scrapy.org/en/latest/", None),
"scrapy-poet": ("https://scrapy-poet.readthedocs.io/en/stable/", None),
"scrapy-spider-metadata": (
"https://scrapy-spider-metadata.readthedocs.io/en/latest/",
None,
),
"scrapy-zyte-api": ("https://scrapy-zyte-api.readthedocs.io/en/latest/", None),
"scrapy-zyte-smartproxy": (
"https://scrapy-zyte-smartproxy.readthedocs.io/en/latest/",
None,
),
"scrapyd": ("https://scrapyd.readthedocs.io/en/latest/", None),
"shub": ("https://shub.readthedocs.io/en/latest/", None),
"shub-image": ("https://shub-image.readthedocs.io/en/latest/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
"spidermon": ("https://spidermon.readthedocs.io/en/latest/", None),
"tenacity": ("https://tenacity.readthedocs.io/en/latest/", None),
"tox": ("https://tox.wiki/en/latest/", None),
"twisted": ("https://docs.twisted.org/en/stable/", None),
"twistedapi": ("https://docs.twisted.org/en/stable/api/", None),
"url-matcher": ("https://url-matcher.readthedocs.io/en/latest/", None),
"w3lib": ("https://w3lib.readthedocs.io/en/latest/", None),
"web-poet": ("https://web-poet.readthedocs.io/en/stable/", None),
"zyte": ("https://docs.zyte.com/", None),
"zyte-common-items": ("https://zyte-common-items.readthedocs.io/en/latest/", None),
"zyte-parsers": ("https://zyte-parsers.readthedocs.io/en/latest/", None),
"zyte-spider-templates": (
"https://zyte-spider-templates.readthedocs.io/en/latest/",
None,
),
}
# By default, interphinx entries are configured if a same-name module is
# installed. Here you can set True to always configure the entry unless
# explicitly disabled, False to never configure the entry unless explicitly
# requested, or a string to use as the package name to check for.
PACKAGE_OVERRIDES = {
"python": True,
"python-zyte-api": "zyte-api",
"scrapy": True,
"twistedapi": "twisted",
"zyte": False,
}
COPY_AS_MARKDOWN_BUTTON_JS = """
(function () {
var DEFAULT_LABEL = 'M\u2193';
var SUCCESS_LABEL = 'Copied';
var ERROR_LABEL = 'Error';
function markdownPathFromCurrentPage(pathname) {
if (pathname.endsWith('.html')) {
return pathname.slice(0, -5) + '.md';
}
if (pathname.endsWith('/')) {
return pathname + 'index.md';
}
var lastPart = pathname.split('/').pop() || '';
if (!lastPart.includes('.')) {
return pathname + '.md';
}
return pathname + '.md';
}
async function copyToClipboard(text) {
if (navigator.clipboard && window.isSecureContext) {
await navigator.clipboard.writeText(text);
return;
}
var textarea = document.createElement('textarea');
textarea.value = text;
textarea.setAttribute('readonly', 'readonly');
textarea.style.position = 'fixed';
textarea.style.left = '-9999px';
document.body.appendChild(textarea);
textarea.select();
document.execCommand('copy');
document.body.removeChild(textarea);
}
function setTemporaryLabel(button, label, previousLabel) {
var prev = typeof previousLabel !== 'undefined' ? previousLabel : button.textContent;
button.textContent = label;
window.setTimeout(function () {
button.textContent = prev;
button.disabled = false;
}, 1000);
}
async function onButtonClick(button) {
var previousLabel = button.textContent;
button.disabled = true;
button.textContent = '...';
try {
var mdPath = markdownPathFromCurrentPage(window.location.pathname);
var response = await fetch(mdPath, { credentials: 'same-origin' });
if (!response.ok) {
throw new Error('Unable to fetch markdown source');
}
var markdown = await response.text();
await copyToClipboard(markdown);
setTemporaryLabel(button, SUCCESS_LABEL, previousLabel);
} catch (_error) {
setTemporaryLabel(button, ERROR_LABEL, previousLabel);
}
}
function addStyles() {
var style = document.createElement('style');
style.textContent = [
'.scrapy-copy-as-markdown {',
' display: inline-block;',
' margin-left: 0.25rem;',
' border: 1px solid #c9d4de;',
' border-radius: 0.45rem;',
' background: #ffffff;',
' color: #233a50;',
' font: inherit;',
' font-size: 0.875rem;',
' line-height: 1;',
' padding: 0.25rem 0.25rem;',
' box-shadow: 0 2px 10px rgba(0, 0, 0, 0.08);',
' cursor: pointer;',
'}',
'.scrapy-copy-as-markdown:hover {',
' background: #f3f7fb;',
'}',
'.scrapy-copy-as-markdown:disabled {',
' opacity: 0.75;',
' cursor: default;',
'}',
'.scrapy-copy-as-markdown-title-wrapper {',
' display: flex;',
' align-items: center;',
' justify-content: space-between;',
' gap: 1rem;',
' width: 100%;',
'}',
'.scrapy-copy-as-markdown-title-wrapper h1 {',
' margin: 0;',
'}',
].join('\\n');
document.head.appendChild(style);
}
function addButton() {
if (!document.body || document.querySelector('.scrapy-copy-as-markdown')) {
return;
}
addStyles();
var button = document.createElement('button');
button.type = 'button';
button.className = 'scrapy-copy-as-markdown';
button.title = 'Copy this page as Markdown';
button.setAttribute('aria-label', 'Copy this page as Markdown');
button.textContent = DEFAULT_LABEL;
button.addEventListener('click', function () {
onButtonClick(button);
});
var h1 = document.querySelector('#main h1') || document.querySelector('h1');
if (h1 && h1.parentNode) {
var parent = h1.parentNode;
var wrapper = document.createElement('div');
wrapper.className = 'scrapy-copy-as-markdown-title-wrapper';
parent.replaceChild(wrapper, h1);
wrapper.appendChild(h1);
wrapper.appendChild(button);
} else {
// fallback: insert at top of body but keep within the first container
var container = document.body.firstElementChild || document.body;
container.insertBefore(button, container.firstChild);
}
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', addButton);
} else {
addButton();
}
})();
"""
def setup(app: Sphinx) -> None:
app.add_config_value(
"scrapy_intersphinx_enable", [], "env", types=frozenset({list})
)
app.add_config_value(
"scrapy_intersphinx_enable_installed", False, "env", types=frozenset({bool})
)
app.add_config_value(
"scrapy_intersphinx_disable", [], "env", types=frozenset({list})
)
for extension in (
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
"sphinx_copybutton",
"sphinx_llms_txt",
"sphinx_sitemap",
):
app.setup_extension(extension)
app.connect("builder-inited", add_copy_as_markdown_button)
app.connect("builder-inited", set_better_defaults)
app.connect("config-inited", update_config)
# https://github.com/scrapy/scrapy/blob/dba37674e6eaa6c2030c8eb35ebf8127cd488062/docs/_ext/scrapydocs.py#L90C16-L110C6
for crossref_type in ("setting", "signal", "command", "reqmeta"):
app.add_crossref_type(
directivename=crossref_type,
rolename=crossref_type,
indextemplate=f"pair: %s; {crossref_type}",
)
def add_copy_as_markdown_button(app: Sphinx) -> None:
if app.builder.format != "html":
return
app.add_js_file(None, body=COPY_AS_MARKDOWN_BUTTON_JS)
def update_config(app: Sphinx, config: Config) -> None:
configure_intersphinx(config)
configure_sitemap(config)
def set_better_defaults(app: Sphinx) -> None:
manual_conf = getattr(app, "_raw_config", {})
better_defaults = {
"sitemap_excludes": ["genindex.html", "search.html"],
"sitemap_url_scheme": "{link}",
}
for key, value in better_defaults.items():
if key in manual_conf:
continue
setattr(app.config, key, value)
def configure_intersphinx(config: Config) -> None:
known = set(INTERSPHINX_MAPPING)
default = {k for k in INTERSPHINX_MAPPING if PACKAGE_OVERRIDES.get(k) is True}
disabled = set(config.scrapy_intersphinx_disable)
non_disabled = known - disabled
requested = set(config.scrapy_intersphinx_enable)
to_configure = (
requested
| (default - disabled)
| (
set(installed(non_disabled - requested - default))
if config.scrapy_intersphinx_enable_installed
else set()
)
)
for k in to_configure:
config.intersphinx_mapping[k] = INTERSPHINX_MAPPING[k]
def configure_sitemap(config: Config) -> None:
if not config.html_baseurl:
package: str | None = None
project_config = load_project_config()
if project_config.project_id:
package = normalize_project_id(project_config.project_id)
elif hasattr(config, "project"):
package = normalize_project_id(re.sub(r"\s+", "-", str(config.project)))
if not package:
return
if package in INTERSPHINX_MAPPING:
base_url = INTERSPHINX_MAPPING[package][0]
else:
base_url = f"https://{package}.readthedocs.io/en/latest/"
config.html_baseurl = base_url
if not config.html_baseurl.endswith("/"):
config.html_baseurl = config.html_baseurl + "/"
logger.warning("html_baseurl should end with a slash; automatically fixed to %r", config.html_baseurl)
def installed(names: set[str]) -> Generator[str, None, None]:
checked: dict[str, bool] = {}
for name in names:
if name in PACKAGE_OVERRIDES:
package = PACKAGE_OVERRIDES[name]
if package is False:
continue
if name in checked:
if checked[name]:
yield name
continue
module_name = name.replace("-", "_")
try:
__import__(module_name)
except ImportError:
checked[name] = False
continue
checked[name] = True
yield name
scrapy-sphinx-scrapy-615b009/sphinx_scrapy/cli.py 0000664 0000000 0000000 00000027070 15200550336 0022111 0 ustar 00root root 0000000 0000000 from __future__ import annotations
import argparse
from collections.abc import Callable
import concurrent.futures
import re
import shutil
import sys
from http import HTTPStatus
from typing import TYPE_CHECKING
from urllib.error import HTTPError, URLError
from urllib.parse import urlsplit, urlunsplit
from urllib.request import Request, urlopen
from packaging.version import InvalidVersion, Version
from sphinx.cmd.build import main as sphinx_build_main
from . import INTERSPHINX_MAPPING
from .config import LATEST_RTD_PYTHON_VERSION, load_project_config, normalize_project_id
if TYPE_CHECKING:
from pathlib import Path
URL_PATTERN = re.compile(r"https?://[^\s<>()\[\]{}\"']+")
MARKDOWN_LINK_PATTERN = re.compile(
r"(?P!?\[[^\]]*\]\()"
r"(?P[^)\s]+)"
r"(?P\s+\"[^\"]*\")?"
r"(?P\))"
)
def _intersphinx_base_urls() -> tuple[str, ...]:
bases = {url for url, _inventory in INTERSPHINX_MAPPING.values()}
return tuple(sorted(bases, key=len, reverse=True))
def _project_docs_base_url(project_id: str | None) -> str | None:
if not project_id:
return None
normalized_project_id = normalize_project_id(project_id)
if not normalized_project_id:
return None
if normalized_project_id in INTERSPHINX_MAPPING:
return INTERSPHINX_MAPPING[normalized_project_id][0]
return f"https://{normalized_project_id}.readthedocs.io/en/latest/"
def _split_wrapped_markdown_target(target: str) -> tuple[str, str, str]:
if len(target) >= 2 and target.startswith("<") and target.endswith(">"):
return "<", target[1:-1], ">"
return "", target, ""
def _rewrite_markdown_links(
content: str,
rewrite_target: Callable[[str], str],
) -> str:
in_fence = False
fence_char = ""
rewritten_lines: list[str] = []
def replacement(match: re.Match[str]) -> str:
target = match.group("target")
rewritten_target = rewrite_target(target)
if rewritten_target == target:
return match.group(0)
title = match.group("title") or ""
return (
f"{match.group('prefix')}"
f"{rewritten_target}"
f"{title}"
f"{match.group('suffix')}"
)
for line in content.splitlines(keepends=True):
stripped = line.lstrip()
if stripped.startswith("```") or stripped.startswith("~~~"):
marker = stripped[0]
if not in_fence:
in_fence = True
fence_char = marker
elif marker == fence_char:
in_fence = False
fence_char = ""
rewritten_lines.append(line)
continue
if in_fence:
rewritten_lines.append(line)
continue
rewritten_lines.append(MARKDOWN_LINK_PATTERN.sub(replacement, line))
return "".join(rewritten_lines)
def _rewrite_llms_link_target(
target: str,
docs_host: str,
docs_path_prefix: str,
) -> str:
wrapper_prefix, wrapped_target, wrapper_suffix = (
_split_wrapped_markdown_target(target)
)
parts = urlsplit(wrapped_target)
if parts.scheme or parts.netloc:
if parts.scheme not in {"http", "https"}:
return target
if parts.netloc.lower() != docs_host:
return target
if not parts.path.endswith(".md"):
return target
clean_path = parts.path.lstrip("/")
while clean_path.startswith("./"):
clean_path = clean_path[2:]
if clean_path.startswith("../"):
return target
if docs_path_prefix and not clean_path.startswith(docs_path_prefix):
clean_path = f"{docs_path_prefix}{clean_path}"
rewritten_target = urlunsplit(("", "", clean_path, parts.query, parts.fragment))
return f"{wrapper_prefix}{rewritten_target}{wrapper_suffix}"
def _rewrite_llms_links_to_docs_path(
output_dir: Path,
docs_base_url: str | None,
) -> None:
llms_path = output_dir / "llms.txt"
if docs_base_url is None or not llms_path.is_file():
return
docs_parts = urlsplit(docs_base_url)
docs_host = docs_parts.netloc.lower()
docs_path = docs_parts.path.strip("/")
docs_path_prefix = f"{docs_path}/" if docs_path else ""
content = llms_path.read_text(encoding="utf-8")
rewritten = _rewrite_markdown_links(
content,
lambda target: _rewrite_llms_link_target(target, docs_host, docs_path_prefix),
)
if rewritten != content:
llms_path.write_text(rewritten, encoding="utf-8")
def _iter_markdown_outputs(output_dir: Path) -> list[Path]:
files = sorted(output_dir.rglob("*.md"))
for filename in ("llms.txt", "llms-full.txt"):
path = output_dir / filename
if path.is_file():
files.append(path)
return files
def _matching_base_url(url: str, base_urls: tuple[str, ...]) -> str | None:
for base_url in base_urls:
if url.startswith(base_url):
return base_url
return None
def _supports_markdown(base_url: str) -> bool:
probe_url = f"{base_url.rstrip('/')}/index.md"
headers = {"User-Agent": "sphinx-scrapy"}
for method in ("HEAD", "GET"):
request = Request(probe_url, headers=headers, method=method) # noqa: S310
try:
with urlopen(request, timeout=5) as response: # noqa: S310
return HTTPStatus.OK <= response.status < HTTPStatus.BAD_REQUEST
except HTTPError as error:
if method == "HEAD" and error.code in {
HTTPStatus.METHOD_NOT_ALLOWED,
HTTPStatus.NOT_IMPLEMENTED,
}:
continue
return False
except URLError:
return False
return False
def _rewrite_url_to_markdown(url: str, enabled_base_urls: set[str], base_urls: tuple[str, ...]) -> str:
base_url = _matching_base_url(url, base_urls)
if not base_url or base_url not in enabled_base_urls:
return url
parts = urlsplit(url)
path = parts.path
if not path.endswith(".html"):
return url
new_path = f"{path[:-5]}.md"
return urlunsplit((parts.scheme, parts.netloc, new_path, parts.query, parts.fragment))
def _rewrite_intersphinx_links_to_markdown(output_dir: Path) -> None:
target_files = _iter_markdown_outputs(output_dir)
if not target_files:
return
base_urls = _intersphinx_base_urls()
file_contents: dict[Path, str] = {}
candidate_base_urls: set[str] = set()
for file in target_files:
content = file.read_text(encoding="utf-8")
file_contents[file] = content
for match in URL_PATTERN.finditer(content):
url = match.group(0)
if ".html" not in url:
continue
base_url = _matching_base_url(url, base_urls)
if base_url is not None:
candidate_base_urls.add(base_url)
if not candidate_base_urls:
return
with concurrent.futures.ThreadPoolExecutor(
max_workers=min(8, len(candidate_base_urls))
) as executor:
availability = {
base_url: executor.submit(_supports_markdown, base_url)
for base_url in candidate_base_urls
}
enabled_base_urls = {
base_url
for base_url, future in availability.items()
if future.result()
}
if not enabled_base_urls:
return
for file, content in file_contents.items():
rewritten = URL_PATTERN.sub(
lambda match: _rewrite_url_to_markdown(
match.group(0),
enabled_base_urls,
base_urls,
),
content,
)
if rewritten != content:
file.write_text(rewritten, encoding="utf-8")
def _builder_settings(builder: str) -> list[str]:
if builder == "html":
return ["-D", "llms_txt_uri_template={docname}.md"]
if builder == "singlemarkdown":
return [
"-D", "llms_txt_uri_template={docname}.md",
"-D", "singlemarkdown_flavor=llm",
]
return []
def _run_builder(builder: str, source_dir: Path, build_dir: Path) -> None:
args = [
"-b",
builder,
*_builder_settings(builder),
str(source_dir),
str(build_dir / builder),
]
exit_code = sphinx_build_main(args)
if exit_code != 0:
msg = f"sphinx builder failed for '{builder}' with exit code {exit_code}"
raise RuntimeError(msg)
def build_docs() -> int:
config = load_project_config()
docs_base_url = _project_docs_base_url(config.project_id)
docs_dir = config.root / "docs"
if not docs_dir.is_dir():
print("docs directory not found", file=sys.stderr)
return 1
sphinx_build_dir = docs_dir / "_build"
sphinx_build_dir.mkdir(parents=True, exist_ok=True)
builders = ["html", "markdown", "singlemarkdown"]
with concurrent.futures.ProcessPoolExecutor(max_workers=len(builders)) as executor:
futures = [
executor.submit(
_run_builder,
builder,
docs_dir,
sphinx_build_dir,
)
for builder in builders
]
for future in concurrent.futures.as_completed(futures):
future.result()
all_dir = sphinx_build_dir / "all"
all_dir.mkdir(parents=True, exist_ok=True)
shutil.copytree(sphinx_build_dir / "html", all_dir, dirs_exist_ok=True)
shutil.copytree(sphinx_build_dir / "markdown", all_dir, dirs_exist_ok=True)
shutil.copy2(sphinx_build_dir / "singlemarkdown" / "index.md", all_dir / "llms-full.txt")
_rewrite_intersphinx_links_to_markdown(all_dir)
_rewrite_llms_links_to_docs_path(all_dir, docs_base_url)
print("\nDocumentation generated in docs/_build/all.")
return 0
def update_rtd_config() -> int:
config = load_project_config()
try:
config_python_version = Version(config.python_version)
except InvalidVersion:
print(
f"Invalid Python version in pyproject.toml: {config.python_version}",
file=sys.stderr,
)
return 1
if config_python_version > Version(LATEST_RTD_PYTHON_VERSION):
print(
f"Configured Python ({config.python_version}) is newer than the "
f"latest Read the Docs supported Python version known to "
f"sphinx-scrapy ({LATEST_RTD_PYTHON_VERSION}).",
file=sys.stderr,
)
return 1
output = config.root / ".readthedocs.yml"
output.write_text(
"\n".join(
[
"version: 2",
"build:",
" os: ubuntu-24.04",
" tools:",
f' python: "{config.python_version}"',
" commands:",
" - pip install tox",
" - tox -e docs",
" - mkdir -p $READTHEDOCS_OUTPUT/html",
" - cp -a docs/_build/all/. $READTHEDOCS_OUTPUT/html/",
"",
]
),
encoding="utf-8",
)
print("Updated .readthedocs.yml")
return 0
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="sphinx-scrapy")
subparsers = parser.add_subparsers(dest="command", required=True)
subparsers.add_parser("build")
subparsers.add_parser("update-rtd-config")
return parser
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
if args.command == "build":
return build_docs()
if args.command == "update-rtd-config":
return update_rtd_config()
parser.error(f"Unknown command: {args.command}")
return 2
if __name__ == "__main__":
raise SystemExit(main())
scrapy-sphinx-scrapy-615b009/sphinx_scrapy/config.py 0000664 0000000 0000000 00000004731 15200550336 0022606 0 ustar 00root root 0000000 0000000 from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from packaging.utils import canonicalize_name
try:
import tomllib
except ModuleNotFoundError:
import tomli as tomllib
LATEST_RTD_PYTHON_VERSION = "3.14"
@dataclass(frozen=True)
class ProjectConfig:
root: Path
python_version: str | None = None
extras: set[str] = field(default_factory=set)
project_id: str | None = None
def find_project_root(start: Path | None = None) -> Path:
path = (start or Path.cwd()).resolve()
for candidate in (path, *path.parents):
if (candidate / "pyproject.toml").is_file():
return candidate
msg = "Could not find pyproject.toml in the current directory or parent directories"
raise FileNotFoundError(msg)
def get_extras(pyproject_data: dict[str, object]) -> set[str]:
project_data = pyproject_data.get("project", {})
optional_dependencies = project_data.get("optional-dependencies", {})
return {str(key) for key in optional_dependencies}
def normalize_project_id(project_id: str) -> str:
return canonicalize_name(project_id.strip())
def load_project_config(root: Path | None = None) -> ProjectConfig:
"""Load project configuration from a pyproject.toml if available.
If no pyproject.toml can be found (starting from ``root`` or CWD),
return a minimal ``ProjectConfig`` containing only the resolved
project root and leaving other fields empty/None.
"""
try:
project_root = root or find_project_root()
except FileNotFoundError:
resolved_root = (root or Path.cwd()).resolve()
return ProjectConfig(root=resolved_root)
pyproject_path = project_root / "pyproject.toml"
with pyproject_path.open("rb") as fp:
pyproject_data = tomllib.load(fp)
tool_data = pyproject_data.get("tool", {})
scrapy_data = tool_data.get("sphinx-scrapy", {})
python_version = scrapy_data.get("python-version", LATEST_RTD_PYTHON_VERSION)
extras = get_extras(pyproject_data)
raw_project_id = pyproject_data.get("project", {}).get("name")
project_id = None
if raw_project_id is not None:
normalized_project_id = normalize_project_id(str(raw_project_id))
if normalized_project_id:
project_id = normalized_project_id
return ProjectConfig(
root=project_root,
python_version=str(python_version) if python_version is not None else None,
extras=extras,
project_id=project_id,
)
scrapy-sphinx-scrapy-615b009/sphinx_scrapy/tox_plugin.py 0000664 0000000 0000000 00000001616 15200550336 0023530 0 ustar 00root root 0000000 0000000 from __future__ import annotations
from typing import TYPE_CHECKING
from tox.config.loader.memory import MemoryLoader
from tox.plugin import impl
from .config import load_project_config
if TYPE_CHECKING:
from tox.config.sets import ConfigSet
from tox.session.state import State
def _python_executable(version: str) -> str:
return f"python{version}"
@impl
def tox_extend_envs() -> tuple[str, ...]:
return ("docs",)
@impl
def tox_add_core_config(core_conf: ConfigSet, state: State) -> None:
project_config = load_project_config()
state.conf.memory_seed_loaders["docs"].append(
MemoryLoader(
description="build documentation",
base_python=_python_executable(project_config.python_version),
deps=["-rdocs/requirements.txt"],
extras=tuple(project_config.extras),
commands=["sphinx-scrapy build"],
)
)