././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1763461527.882978 pysolr-3.11.0/0000755000175100017510000000000015107044630012572 5ustar00runnerrunner././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.editorconfig0000644000175100017510000000077415107044623015261 0ustar00runnerrunner# See https://editorconfig.org for format details and # https://editorconfig.org/#download for editor / IDE integration root = true [*] indent_style = space indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true end_of_line = lf charset = utf-8 # Makefiles always use tabs for indentation [Makefile] indent_style = tab # We don't want to apply our defaults to third-party code or minified bundles: [**/{external,vendor}/**,**.min.{js,css}] indent_style = ignore indent_size = ignore ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.gitchangelog.rc0000644000175100017510000001413215107044623015634 0ustar00runnerrunner## ## Format ## ## ACTION: [AUDIENCE:] COMMIT_MSG [!TAG ...] ## ## Description ## ## ACTION is one of 'chg', 'fix', 'new' ## ## Is WHAT the change is about. ## ## 'chg' is for refactor, small improvement, cosmetic changes... ## 'fix' is for bug fixes ## 'new' is for new features, big improvement ## ## AUDIENCE is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' ## ## Is WHO is concerned by the change. ## ## 'dev' is for developpers (API changes, refactors...) ## 'usr' is for final users (UI changes) ## 'pkg' is for packagers (packaging changes) ## 'test' is for testers (test only related changes) ## 'doc' is for doc guys (doc only changes) ## ## COMMIT_MSG is ... well ... the commit message itself. ## ## TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' ## ## They are preceded with a '!' or a '@' (prefer the former, as the ## latter is wrongly interpreted in github.) Commonly used tags are: ## ## 'refactor' is obviously for refactoring code only ## 'minor' is for a very meaningless change (a typo, adding a comment) ## 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) ## 'wip' is for partial functionality but complete subfunctionality. ## ## Example: ## ## new: usr: support of bazaar implemented ## chg: re-indentend some lines !cosmetic ## new: dev: updated code to be compatible with last version of killer lib. ## fix: pkg: updated year of licence coverage. ## new: test: added a bunch of test around user usability of feature X. ## fix: typo in spelling my name in comment. !minor ## ## Please note that multi-line commit message are supported, and only the ## first line will be considered as the "summary" of the commit message. So ## tags, and other rules only applies to the summary. The body of the commit ## message will be displayed in the changelog without reformatting. ## ## ``ignore_regexps`` is a line of regexps ## ## Any commit having its full commit message matching any regexp listed here ## will be ignored and won't be reported in the changelog. ## ignore_regexps = [ r'@minor', r'!minor', r'@cosmetic', r'!cosmetic', r'@refactor', r'!refactor', r'@wip', r'!wip', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[p|P]kg:', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[d|D]ev:', r'^(.{3,3}\s*:)?\s*[fF]irst commit.?\s*$', ] ## ``section_regexps`` is a list of 2-tuples associating a string label and a ## list of regexp ## ## Commit messages will be classified in sections thanks to this. Section ## titles are the label, and a commit is classified under this section if any ## of the regexps associated is matching. ## section_regexps = [ ('New', [ r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Changes', [ r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Fix', [ r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Other', None ## Match all lines ), ] ## ``body_process`` is a callable ## ## This callable will be given the original body and result will ## be used in the changelog. ## ## Available constructs are: ## ## - any python callable that take one txt argument and return txt argument. ## ## - ReSub(pattern, replacement): will apply regexp substitution. ## ## - Indent(chars=" "): will indent the text with the prefix ## Please remember that template engines gets also to modify the text and ## will usually indent themselves the text if needed. ## ## - Wrap(regexp=r"\n\n"): re-wrap text in separate paragraph to fill 80-Columns ## ## - noop: do nothing ## ## - ucfirst: ensure the first letter is uppercase. ## (usually used in the ``subject_process`` pipeline) ## ## - final_dot: ensure text finishes with a dot ## (usually used in the ``subject_process`` pipeline) ## ## - strip: remove any spaces before or after the content of the string ## ## Additionally, you can `pipe` the provided filters, for instance: #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') | Indent(chars=" ") #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') #body_process = noop body_process = ReSub(r'((^|\n)[A-Z]\w+(-\w+)*: .*(\n\s+.*)*)+$', r'') | strip ## ``subject_process`` is a callable ## ## This callable will be given the original subject and result will ## be used in the changelog. ## ## Available constructs are those listed in ``body_process`` doc. subject_process = (strip | ReSub(r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$', r'\4') | ucfirst | final_dot) ## ``tag_filter_regexp`` is a regexp ## ## Tags that will be used for the changelog must match this regexp. ## tag_filter_regexp = r'^v[0-9]+\.[0-9]+(\.[0-9]+)?$' ## ``unreleased_version_label`` is a string ## ## This label will be used as the changelog Title of the last set of changes ## between last valid tag and HEAD if any. unreleased_version_label = "%%version%% (unreleased)" ## ``output_engine`` is a callable ## ## This will change the output format of the generated changelog file ## ## Available choices are: ## ## - rest_py ## ## Legacy pure python engine, outputs ReSTructured text. ## This is the default. ## ## - mustache() ## ## Template name could be any of the available templates in ## ``templates/mustache/*.tpl``. ## Requires python package ``pystache``. ## Examples: ## - mustache("markdown") ## - mustache("restructuredtext") ## ## - makotemplate() ## ## Template name could be any of the available templates in ## ``templates/mako/*.tpl``. ## Requires python package ``mako``. ## Examples: ## - makotemplate("restructuredtext") ## output_engine = rest_py #output_engine = mustache("restructuredtext") #output_engine = mustache("markdown") #output_engine = makotemplate("restructuredtext") ## ``include_merge`` is a boolean ## ## This option tells git-log whether to include merge commits in the log. ## The default is to include them. include_merge = True ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1763461527.8809779 pysolr-3.11.0/.github/0000755000175100017510000000000015107044630014132 5ustar00runnerrunner././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/dependabot.yml0000644000175100017510000000044715107044623016771 0ustar00runnerrunner# Keep GitHub Actions up to date with Dependabot... # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/issue_template.md0000644000175100017510000000050615107044623017502 0ustar00runnerrunner# I have * [ ] Tested with the latest release * [ ] Tested with the current master branch * [ ] Searched for similar existing issues ## Expected behaviour ## Actual behaviour ## Steps to reproduce the behaviour 1. ## Configuration * Operating system version: * Search engine version: * Python version: * pysolr version: ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/pull_request_template.md0000644000175100017510000000061115107044623021073 0ustar00runnerrunner# Hey, thanks for contributing to pysolr. Please confirm that [the tests pass](https://github.com/django-haystack/pysolr/blob/master/README.rst#running-tests) locally # Once your pull request has been submitted, the full test suite will be executed on https://travis-ci.org/django-haystack/pysolr/pull_requests. Pull requests with passing tests are far more likely to be reviewed and merged. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/stale.yml0000644000175100017510000000125315107044623015770 0ustar00runnerrunner# Number of days of inactivity before an issue becomes stale daysUntilStale: 180 # Number of days of inactivity before a stale issue is closed daysUntilClose: 30 # Issues with these labels will never be considered stale exemptLabels: - pinned - security # Label to use when marking an issue as stale staleLabel: stale # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: true ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1763461527.8809779 pysolr-3.11.0/.github/workflows/0000755000175100017510000000000015107044630016167 5ustar00runnerrunner././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/workflows/codeql-analysis.yml0000644000175100017510000000207115107044623022004 0ustar00runnerrunnername: "CodeQL" on: push: branches: [master, ] pull_request: # The branches below must be a subset of the branches above branches: [master] schedule: - cron: '0 19 * * 6' jobs: analyze: name: Analyze runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v5 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. fetch-depth: 2 # If this run was triggered by a pull request event, then checkout # the head of the pull request instead of the merge commit. - run: git checkout HEAD^2 if: ${{ github.event_name == 'pull_request' }} # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v4 # Override language selection by uncommenting this and choosing your languages # with: # languages: go, javascript, csharp, python, cpp, java - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v4 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/workflows/pypi-release.yml0000644000175100017510000000215115107044623021312 0ustar00runnerrunnername: "PyPI releases" on: release jobs: build_sdist: name: Build Python source distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - name: Build sdist run: pipx run build --sdist - uses: actions/upload-artifact@v5 with: path: dist/*.tar.gz pypi-publish: name: Upload release to PyPI if: github.event_name == 'release' && github.event.action == 'published' needs: - build_sdist runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/pysolr permissions: id-token: write steps: - uses: actions/download-artifact@v6 with: # unpacks default artifact into dist/ # if `name: artifact` is omitted, the action will create extra parent dir name: artifact path: dist - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.github/workflows/tox.yml0000644000175100017510000000077515107044623017537 0ustar00runnerrunnername: ci on: push: branches: [master] pull_request: branches: [master] jobs: tox: strategy: fail-fast: false matrix: python: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} allow-prereleases: true - run: pip install --upgrade pip - run: pip install tox - run: tox -e py ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.gitignore0000644000175100017510000000007215107044623014563 0ustar00runnerrunner.tox solr*.tgz solr-app solr solr.* venv logs __pycache__ ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/.pre-commit-config.yaml0000644000175100017510000000302315107044623017053 0ustar00runnerrunnerci: autoupdate_schedule: monthly repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: check-added-large-files args: ["--maxkb=128"] - id: check-ast - id: check-case-conflict - id: check-docstring-first - id: check-executables-have-shebangs - id: check-json - id: check-merge-conflict - id: check-symlinks - id: check-toml - id: check-xml - id: check-yaml - id: debug-statements - id: detect-aws-credentials args: ["--allow-missing-credentials"] - id: detect-private-key - id: end-of-file-fixer - id: fix-byte-order-marker - id: mixed-line-ending args: ["--fix=lf"] - id: pretty-format-json args: ["--autofix", "--no-sort-keys", "--indent=4"] - id: trailing-whitespace - repo: https://github.com/codespell-project/codespell rev: v2.4.1 hooks: - id: codespell additional_dependencies: - tomli - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.3 hooks: - id: ruff-check args: [ --fix ] - id: ruff-format - repo: https://github.com/tox-dev/pyproject-fmt rev: v2.11.0 hooks: - id: pyproject-fmt - repo: https://github.com/abravalheri/validate-pyproject rev: v0.24.1 hooks: - id: validate-pyproject ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/AUTHORS0000644000175100017510000000303415107044623013644 0ustar00runnerrunnerPrimaries: * Joseph Kocherhans * Daniel Lindsley * Jacob Kaplan-Moss * Chris Adams Contributors: * initcrash for a patch regarding datetime formatting. * maciekp.lists for a patch correcting URL construction. * jarek & dekstop for a patch regarding sending Unicode documents. * Tomasz.Wegrzanowski for a patch to enable document boosting. * thomas.j.lee for a patch to add stats support. * Chak for a patch regarding empty string being unnecessarily sent. * james.colin.brady for a patch to enable working with the cores. * anti-social for a patch on charset sending. * akaihola for a patch regarding long queries. * bochecha for various patches. * stugots for an invalid character patch. * notanumber for a field boosting patch. * acdha for various patches. * zyegfryed for various patches. * girasquid for a patch related to server string. * David Cramer (dcramer) for various patches. * dourvais for a query time patch. * soypunk for a debug patch. * cordmata for a patch to handle how Solr 3.X returns suggestions. * pabluk for Tika integration improvements. * gthb for a patch to add grouping support. * timsavage for a patch making add() compatible with generators * Karol Sikora (@sicarrots) for Solr 4 softCommit support * Çağatay Çallı (@faraday) for Solr 4 field update support * Emmanuel Leblond (@touilleMan) for fixing error handling on Python 3 * Michał Jaworski (@swistakm) for improved Sentry-friendly logging * Upayavira (@upayavira) for SolrCloud support * Kwame Porter Robinson (@robinsonkwame) for adding overwrite support to Solr.add ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/CHANGELOG.rst0000644000175100017510000007567115107044623014635 0ustar00runnerrunnerChangelog ========= %%version%% (unreleased) ------------------------ New ~~~ - Support for nested documents (closes #170) [Chris Adams] This adds support for Solr's nested documents in `Solr.add` Thanks to @skirsdeda for the patch - ZooKeeper can receive an existing KazooClient instance. [Chris Adams] This simplifies advanced customization by allowing you to pass in an existing instance which is configured in whatever manner necessary. Changes ~~~~~~~ - Logging: pass full request body + headers as extra data. [Chris Adams] This doesn't affect the normal logging output but is helpful for aggregation systems such as Sentry where the full request information may be displayed for debugging purposes - Basic max_retries for ZooKeeper. [Chris Adams] Kazoo sets the default to hang forever which is frequently not the desired error-handling behavior. This makes it easier to set a limit on the number of retries and we use it in testing to avoid the suite hanging endlessly. - Better error message for Solr failures. [Chris Adams] Previously when ZooKeeper had no active shards pysolr would return an error when `random.shuffle` received an empty list. Now it will raise an exception which will hopefully indicate just how bad the situation is. - Remove __del__ methods. [Chris Adams] The __del__ methods were added in an attempt to avoid Kazoo-related failures as part of the SolrCloud support but can cause other problems on different versions of Python (see #193). Since the errors in question were observed during testing this commit removes the __del__ methods and we will have to find an alternative for making tests fail safely. - Custom Commit Policy . [Evan Fagerberg] Previously a ``Solr`` object assumed that an operation should commit by default. It is generally good practice to limit the amount of commits to solr. Excessive commits risk opening too many searcher or using too many system resources. Therefore the commit policy is configurable via the ``always_commit`` attribute of the ``Solr`` object. Most solr configurations should already have an interval that defines how long to wait before doing a commit to solr anyway. (Measured either in time or number of documents) IMPORTANT: If you are upgrading to this version and need to keep committing by default, change the Solr objects to have ``always_commit=True``. - `pysolr.version_info` and `pysolr.pkg_distribution` have been removed. [Craig de Stigter] - Added dependency on `importlib_metadata` for Python < 3.8 [Craig de Stigter] Fix ~~~ - Set KazooClient timeout. [Chris Adams] `__init__` was not actually passing this to the ZooKeeper client Other ~~~~~ - Better docstring for SolrCoreAdmin. [Chris Adams] Thanks to Patricio Del Boca (@pdelboca) for the patch. Closes #185 - Require requests >= 2.9.1 (closes #177) [Chris Adams] This will avoid compatibility issues on Python 3 which can produce confusing errors. - Merge pull request #203 from bendemott/documentation. [Chris Adams] updated typo in documentation example - Updated typo in documentation example. [Ben DeMott] "Zookeeper" should be "ZooKeeper" on line 104 in README.rst - Docs: note that add has commit=True by default (see #46) [Chris Adams] Thanks to @mlissner - Adds note about commit=True being the default. [Mike Lissner] - Correctly handle time-zone aware dates (#201) [Andrew Kuchling] Thanks to Andrew Kuchling (@akuchling) for the patch. Closes #197, #198 - Oops.. Add a missing assert in tests. [Tadas Dailyda] - Refactor _build_doc to be recursive and allow deeper document nesting, fix tests accordingly. [Tadas Dailyda] - Add some block join queries to test_search. [Tadas Dailyda] - Add some nested docs to the tests. [Tadas Dailyda] - Implement nested documents functionality. [Tadas Dailyda] - ZooKeeper: by default use the same timeout for commands and connections. [Chris Adams] - Tox: run SolrCloud tests (parity with Travis CI) [Chris Adams] - Update project URL. [Chris Adams] - Fixed DeprecationWarning from `pkg_resources` on Python 3.10+ [Craig de Stigter] Closes #464 v3.5.0 (2016-05-24) ------------------- New ~~~ - Expose the full Solr response in `Results` [Chris Adams] This makes life easier for anyone using custom extensions by removing the need to create a `Results` subclass just to get access to an extra dictionary key. - More flexible control of request handlers. [nuarhu] This allows configuring the default search handler and overriding it for every query method Thanks to @nuarhu for the patch - Start maintaining a changelog from gitchangelog. [Chris Adams] - Overwrite flag for Solr.add (closes #182) [Chris Adams] Thanks to @robinsonkwame for the patch - SolrCloud support (see #138) [Chris Adams] This optionally adds support for SolrCloud using the Kazoo client library. Thanks to @upayavira Other ~~~~~ - V3.5.0. [Chris Adams] - Merge pull request #192 from dhruvpathak/optimize_commit_flag. [Chris Adams] chg: `optimize()` also accepts `commit` flag - Included commit flag in optimize() to let optimize call run with or without commit. [dhruv.pathak] - Merge pull request #188 from TigorC/master. [Chris Adams] Removed py26 from tox.ini - Removed py26 from tox.ini. [Igor Tokarev] - Tests: avoid timeout-based CI failures. [Chris Adams] These caused sporadic CI build failures and weren’t otherwise testing actual functionality since we don’t have a test which does something like SIGSTOP the test Solr server long enough to confirm a timeout. We’ll confirm that the timeout is passed through but otherwise use the defaults. - Update Travis CI badge in the README. [Chris Adams] - Merge pull request #184 from atuljangra/master. [Chris Adams] Correct documentation for `_update` Thanks to @atuljangra for the patch! - Merge branch 'master' of https://github.com/atuljangra/pysolr. [atuljangra] - Misleading comments. [atuljangra] - Travis: use build matrix for regular and SolrCloud tests. [Chris Adams] - Test_cloud: remove dead code. [Chris Adams] The first instance of test_custom_results_class was broken because it used the wrong port but this wasn’t failing because the same method name was redefined further down in the file and that used the updated port config. - PEP-8. [Chris Adams] - ZooKeeper: log unexpected format changes to watched aliases. [Chris Adams] - ZooKeeper: restore JSON blob decoding. [Chris Adams] - PEP-8. [Chris Adams] - PEP-8 unused imports. [Chris Adams] - PEP-8. [Chris Adams] - PEP-8. [Chris Adams] - PEP-8. [Chris Adams] - Setup.cfg: add pep8 and isort config. [Chris Adams] - Tear down requests.Session instance at close. [Chris Adams] This avoids log-spew on modern unittest implementations which report unclosed file handles at the end of a run. - Remove Python 2.6 from Travis test matrix. [Chris Adams] - Add __future__ absolute_import. [Chris Adams] This is currently moot but avoids any chance of regression between Python 2.x and 3.x. - PEP-8. [Chris Adams] - Drop support for Python 2.6. [Chris Adams] We have some old import dances and other overhead for Python 2.6 support, which the CPython developers dropped support for in 2013: http://www.curiousefficiency.org/posts/2015/04/stop-supporting-python26.html - Allow queries to be directed to different search handlers. [Chris Adams] The `search` method now allows you override the default `select` handler when your Solr instance has multiple search handlers. Thanks to @k-patel for the patch. Closes #174 Closes #175 v3.4.0 (2016-02-02) ------------------- - Update version numbers for v3.4.0. [Chris Adams] - Logging: better message for HTTP status != 200. [Chris Adams] We already extract error message from Solr responses and that is great. Unfortunately it can contain the data that may change with every request (like document id). This creates an issue when user uses Sentry or other solution that captures logging or exceptions. Previous implementation causes many duplicated events in Sentry if message extracted using `self._extract_error(resp)` contained such variable data. This change uses 'non-mutable' message that is complemented with extracted data that using string formatting option supplied by Python logging. Thanks to this, Sentry and other solutions can perform better grouping of logging messages (by status code). This is approach that is already used in handling other errors. - Fix response error handling on Python 3 (closes #162) [Chris Adams] Previously the error handling did not work correctly on Python 3 because a byte-string response wasn't decoded before processing. Thanks to Emmanuel Leblond (@touilleMan) for the patch. - Merge pull request #167 from swistakm/master. [Chris Adams] Refactor common response processing to Results class - Move response manipulation responsibility to Results class and allow custom results classes. [mjaworski] - Add Python 3.5 to automated test matrix. [Chris Adams] v3.3.3 (2015-10-24) ------------------- - V3.3.3. [Chris Adams] - Fix response error handling on Python 3 (closes #162) [Chris Adams] Previously the error handling did not work correctly on Python 3 because a byte-string response wasn't decoded before processing. Thanks to Emmanuel Leblond (@touilleMan) for the patch. - Tests: upgrade Solr to 4.10.4. [Chris Adams] * Resync test Solar script with django-haystack These are still not quite the same; at some point it would be nice to look into a common tool which both projects could use * Update Solr configuration script to set correct libpath for solr-cell to avoid lazy-load failures during testing as was reported on e.g. #162 - Tests: update Solr download script for recent API change. [Chris Adams] - Merge pull request #142 from yspanchal/master. [Chris Adams] Add support for cursormark pagination - Added cursormark deep pagination support. [Yogesh Panchal] v3.3.2 (2015-05-26) ------------------- - Version 3.3.2. [Chris Adams] - Python 2.6 backwards compatibility. [Chris Adams] Python 2.6 shipped with ElementTree 1.2.x. Among other differences, it lacks support for the attribute selectors used to process valid XML error messages, which was added in ElementTree 1.3. - Merge pull request #155 from domenkozar/solr4/exceptions. [Chris Adams] Support Solr 4 XML error format parsing Thanks @domenkozar for the patch - Overhaul Travis config. [Chris Adams] * Sidestep use of Tox in favor of Travis-managed Python versions * Enable container-based builds * Enable caching for Solr server downloads - Use builtin unittest2 runner on Python 2.7 as well. [Chris Adams] - Simple error extraction. [Chris Adams] Previously pysolr depended on lxml and cssselect to extract text from Tomcat’s error messages, which was unreliable. This change uses regular expressions to deal with invalid XML rather than lxml’s salvaging parser and avoids having to maintain the code which attempted to find the main error message in tag soup Closes #149 - Update test Solr download script to work with default Python 3. [Chris Adams] v3.3.1 (2015-05-12) ------------------- - Version 3.3.1. [Chris Adams] - Prepare for 3.3.1 release. [Chris Adams] - Convert all HTTP client errors to SolrError. [Chris Adams] This commit ensures that an outside caller can handle all HTTP-related errors by catching SolrError without knowing whether the exception class is owned by requests, urllib3, or httplib. - Merge pull request #146 from gryphius/fix_doc_typo. [Chris Adams] Fix typo in ExtractingRequestHandler documentation Thanks @gryphius - Doc fix: a very simply model -> a very simple model. [Oli] - Merge pull request #139 from upayavira/feature/no-optimize. [Daniel Lindsley] Optimize is no longer recommended - Optimize is no longer recommended. [Upayavira] Since Solr 3.6, Solr has used the TieredMergePolicy which makes, in most scenarios, optimization a harmful rather than beneficial step. v3.3.0 (2015-02-03) ------------------- - Bumped to v3.3.0! [Daniel Lindsley] - Added @acdha to primaries for all his hard work. [Daniel Lindsley] - Support Solr 4+ individual field updates (closes #129) [Chris Adams] Now fields can be updated individually: conn.add(docs, fieldUpdates={'myfield1_ss': 'add', 'myfield2_s': 'set', 'myfield3_i': 'inc'}) Thanks to Çağatay Çallı (@faraday) for the patch. - Merge pull request #137 from LuRsT/patch-1. [Chris Adams] Fixed syntax error in README.rst example (thanks @LuRsT) - Fixed syntax error in README.rst example. [Gil Gonçalves] - Add softCommit support (closes #98) [Chris Adams] add() and commit() may now be called with softCommit=True Thanks to @sicarrots for the patch - Merge pull request #123 from ulivedit/master. [Chris Adams] Python 3 compatibility for error message extraction (thanks @ulivedit) - Fix python 3.4 error with forcing unicode strings. [Eric Hagman] - Merge pull request #135 from Grokzen/master. [Chris Adams] Use DEBUG_PYSOLR environmental variable to configure logging This offers an alternative to editing pysolr.py or reconfiguring logging elsewhere - Make it easier to debug pysolr via environment variable. [Johan Andersson] - Merge pull request #131 from andreif/highlighted-readme. [Chris Adams] Highlight Python code in README.rst (thanks @andreif) - Highlight Python code in README.rst. [Andrei Fokau] - Add support for error responses in JSON format (closes #113) [Chris Adams] Thanks to @andreif for the patch and tests - Merge pull request #125 from phill-tornroth/patch-1. [Chris Adams] Fix get-solr-download-url.py for Python 2.6 - Fixes 'zero field length' error from `format()` [Phill Tornroth] Unless I'm missing something... :) - Travis: download Solr before starting tests. [Chris Adams] This should avoid download errors being presented as test failures - Tests: increase Solr startup timeout. [Chris Adams] - Add test Solr tarball downloads to .gitignore. [Chris Adams] - Tests: add Python 3.4 targets. [Chris Adams] - Tests: use Solr 4.7.2 from nearest mirror (closes #115) [Chris Adams] - Tests: add a script to retrieve the closest Apache mirror. [Chris Adams] See #115 - Merge pull request #111 from redjack/py26-tests. [Chris Adams] Update 'run-tests.py' to invoke unittest2 correctly on Python 2.6 - Update 'run-tests.py' to invoke unittest2 correctly on Python 2.6. [Andy Freeland] - Expanded testing section of the README. [Chris Adams] - Merge pull request #36 from glenbot/master. [Chris Adams] Update to SolrCoreAdmin.create to use correct action - Updated create command in SolrCoreAdmin to use correct action. [glenbot] - Fix type in SolrAdmin.create default parameter. [Chris Adams] See #36 - Updated ignores. [Daniel Lindsley] v3.2.0 (2014-01-27) ------------------- - Bumped to v3.2.0! [Daniel Lindsley] - Merge pull request #104 from tongwang/master. [Chris Adams] Fix content extraction (thanks @tongwang) - Remove unnecessary comment. [Tong Wang] - Fixed both issues https://github.com/toastdriven/pysolr/issues/96 and https://github.com/toastdriven/pysolr/issues/90 and updated test solr sever from 4.1.0 to 4.6.0. All tests pass. [Tong Wang] - Tests: set Tox basepython versions for tomcat tests. [Chris Adams] - Tests: update test_full_url for multi-core config. [Chris Adams] - Tests: expect content extraction to fail. [Chris Adams] Once https://github.com/toastdriven/pysolr/issues/90 is fixed we can re-enable this test - Skip tomcat error tests when lxml is unavailable. [Chris Adams] Until _scrap_response has a Tomcat path which doesn't depend on lxml.html there's no point in running these tests on a different config - Enable Travis CI. [Chris Adams] - Use tox for testing multiple versions. [Chris Adams] * Add a simple test-runner which handles starting and stopping Solr * Added a basic tox.ini for Python 2.6, 2.7 and 3.3 with and without Tomcat to keep us honest about extra_requires… - Move test setup to script & update README. [Chris Adams] This avoids the README drifting out of sync - Bump requests dependency to 2.x for Unicode handling. [Chris Adams] - Update testing instructions in the README after the Solr mirror went away. [Chris Adams] This uses the canonical Apache archive which should be more stable than the mirror we were using - Merge remote-tracking branch 'anti-social/clean_xml' [Daniel Lindsley] - Fixed error when invalid xml chars present in document. [Alexander Koval] - Merge remote-tracking branch 'anti-social/absolute_import' [Daniel Lindsley] - Added absolute_import. [Alexander Koval] - Ignored env3. [Daniel Lindsley] v3.1.0 (2013-07-17) ------------------- - Bumped to v3.1.0! [Daniel Lindsley] - Better Unicode behavior under Python 3. [Daniel Lindsley] - Merge pull request #69 from zyegfryed/patch-1. [Daniel Lindsley] Added MoreLikeThis handler to solrconfig.xml test cores. - Added MoreLikeThis handler to solrconfig.xml test cores. [Sébastien Fievet] - README tweaks. Thanks to @msabramo for the original patch! [Daniel Lindsley] - Slightly better tomcat errors. [Daniel Lindsley] - Improved scraping of tomcat error. [Dougal Matthews] When scraping for the HTML error message include the description if found. - Merge pull request #86 from anti-social/fix_eval. [Chris Adams] Fixed eval in the _to_python method (thanks @anti-social) Ah, nice: since we no longer support Python 2.5 this is a great move. - Fixed eval in the _to_python method. [Alexander Koval] - Solr.add generator expression support (closes #81) [Chris Adams] The only compatibility issue before was a logging statement using len() on the input docs variable, which fails on generator expressions. Thanks to @timsavage for a patch changing this to measuring the message which is actually sent to Solr instead - Enable request's session pooling (closes #82) [Chris Adams] Performing requests using a session enables urllib3's connection pooling, reducing connection latency. Thanks @cody-young for the patch Closes #83 v3.0.6 (2013-04-13) ------------------- - Setup.py: require lxml 3.0+ for tomcat error messages. [Chris Adams] * Bumped version to 3.0.6 - Merge pull request #71 from mjumbewu/master. [Daniel Lindsley] Trailing slash in the base URL will break requests - Make sure trailing and leading slashes do not collide. [Mjumbe Wawatu Ukweli] v3.0.5 (2013-02-16) ------------------- - Update error message string interpolation (closes #70) [Chris Adams] Python's string interpolation requires a tuple, not a list v3.0.4 (2013-02-11) ------------------- - Tag version 3.0.4 for PyPI. [Chris Adams] 3.x had a minor bug (see SHA:74b0a36) but it broke logging for Solr errors which seems worth an easily deployed fix - Correct log.error syntax on timeouts. [Chris Adams] v3.0.3 (2013-01-24) ------------------- - Update version to 3.0.3. [Chris Adams] Since python 2.6 compatibility was broken in 3.0+ this seems worth an update - Force_unicode: backwards compatibility with Python 2.6. [Chris Adams] v3.0.2 (2013-01-24) ------------------- - Update version to 3.0.2. [Chris Adams] - Fix rich content extraction method & tests. [Chris Adams] * Update test setup instructions with content extraction handler dependencies * Enable file upload support to _send_request * Added simple extract test - Fix field boosting, simplify _build_doc. [Chris Adams] * Ensure that numbers are converted to strings to avoid lxml choking when asked to serialize a number (in 2013!). * Refactor logic to have a single code-path for both single and multi-value fields * Refactor use **kwargs style so there's a single Element() create call - Force_unicode support for non-string types. [Chris Adams] Now force_unicode(1.0) will return u"1.0" for consistency and to avoid confusion with the Django function of the same name v3.0.1 (2013-01-23) ------------------- - Bumped to v3.0.1! [Daniel Lindsley] - Updated README to include testing info & made sure the README gets included n the package. [Daniel Lindsley] - Updated ignores. [Daniel Lindsley] v3.0.0 (2013-01-23) ------------------- - Bumped to v3.0.0, adding Python3 support! [Daniel Lindsley] Dependencies have changed & been slimmed down. - Bumped to v2.1.0! [Daniel Lindsley] - Catch socket errors for httplib fallback path. [Chris Adams] - Catch IOError in _send_request. [Chris Adams] httplib2 can raise a bare socket.error in _send_request, which handles only AttributeError. This change catches all IOError subclasses, tells logging to include exception information and moves logging code outside of the try/except block to avoid any possibility of an exception in a log handler being caught by mistake. - Fall back to HTML title when scraping error messages. [Chris Adams] Solr 3.6 + Jetty is not reliably detected by the existing approach but it does return a reasonably useful message in the title which is a lot more informative than "None" - Provide full headers & response to logging handlers. [Chris Adams] This allows handlers such as Raven / Sentry to do something smart with the full HTTP headers and/or response body. Among other things this should provide more insight in situations when pysolr currently logs "Response: None" - Full exception logging for basic connection failures. [Chris Adams] - Logging: use obvious exc_info= syntax. [Chris Adams] As per the documentation, logging exc_info just needs to evaluate to True. This change makes it obvious that the passed in value is not actually used in any other way - Added gthb to AUTHORS. [Daniel Lindsley] - PEP-8 nitpicks. [Chris Adams] - Don't bork on response with no "response" attr. [Gunnlaugur Þór Briem] (happens e.g. in grouped queries) - Support 'grouped' in Solr results. [Gunnlaugur Þór Briem] - Added ``extra_requires`` to cover the ``BeautifulSoup`` dependency. Thanks to kylemacfarlane for the report! [Daniel Lindsley] - Added pabluk to AUTHORS. [Daniel Lindsley] - Updated README file with optional requirement. [Pablo SEMINARIO] - Added kwargs to extract() method. [Pablo SEMINARIO] - Avoid forcing string interpolation when logging. [Chris Adams] This allows aggregators like Sentry and other consumers to see the raw, unformatted string and variables so they can e.g. group all instances of the same message even if the specific request values differ. - Added HTTPS support for httplib. [Richard Mitchell] - Added a long description for PyPI. [Daniel Lindsley] - Added support for Solr rich-content extraction. [Chris Adams] This exposes Solr's http://wiki.apache.org/solr/ExtractingRequestHandler which allows you to index text content from structured file formats like PDF, Microsoft Office, etc. - Bumped for the next round of beta. [Daniel Lindsley] - Added cordmata to AUTHORS. [Daniel Lindsley] - Updated suggest_terms so that it correctly handles response from Solr 3.x releases. [Matt Cordial] - Edited README via GitHub. [Daniel Lindsley] - Bumped to v2.0.15! [Daniel Lindsley] - Fixed a bug where ``server_string`` could come back as ``None``. Thanks to croddy for the report! [Daniel Lindsley] - Added dourvais & soypunk to AUTHORS. [Daniel Lindsley] - Unescape html entities in error messages. [David Cramer] - Added support for getting at the Solr querying debug data when using search(). [Shawn Medero] Passing ``debug=True`` as kwarg, the ``search()`` method will activate this property in the JSON results. - Fixed bug, qtime wasn't set when it was 0. [Daniel Dourvaris] - Added query time to results as attribute. [Daniel Dourvaris] - Bumped revision for dev on the next release. [Daniel Lindsley] v2.0.14 (2011-04-29) -------------------- - V2.0.14. [Daniel Lindsley] - Always send commit if its not-null. [David Cramer] - Add support for waitFlush and waitSearcher on update queries. Added support for expungeDeletes on commit(). Added support for maxSegments on optimize() [David Cramer] - Ensure port is coerced to an integer as (at least some version of) socket does not handle unicode ports nicely. [David Cramer] - Add support for commitWithin on Solr.add. [David Cramer] - Better compatibility with the latest revisions of lxml. Thanks to ghostmob for pointing this out! [Daniel Lindsley] - Fixed occasionally trying to call ``lower`` on ``None``. Thanks to girasquid for the report & original patch! [Daniel Lindsley] v2.0.13 (2010-09-15) -------------------- - Cleaned up how parameters are checked. Thanks to zyegfryed for the patch. v2.0.13. [Daniel Lindsley] - Fixed a bug in the weighting when given a string field that's weighted. Thanks to akaihola for the report. [Daniel Lindsley] - Fixed the case where the data being converted would be clean unicode. Thanks to acdha for submitting another version of this patch. [Daniel Lindsley] - Fixed the long URL support to correctly deal with sequences. [Daniel Lindsley] - Fixed a bug where additional parameters could cause the URL to be longer than 1024 even if the query is not. Thanks to zyegfryed for the report & patch! [Daniel Lindsley] - Boost values are now coerced into a string. Thanks to notanumber for the patch! [Daniel Lindsley] - All params are now safely encoded. Thanks to acdha for the patch! [Daniel Lindsley] - Added term suggestion. Requires Solr 1.4+. Thanks to acdha for the patch! [Daniel Lindsley] - If invalid characters are found, replace them. Thanks to stugots for the report and fix. [Daniel Lindsley] - Slicing ``None`` doesn't work. Make it a string... [Daniel Lindsley] - Added basic logging support. Thanks to sjaday for the suggestion. [Daniel Lindsley] v2.0.12 (2010-06-20) -------------------- - Releasing version v2.0.12. [Daniel Lindsley] - Added a more helpful message for the ever classic "'NoneType' object has no attribute 'makefile'" error when providing an incorrect URL. [Daniel Lindsley] - Added better error support when using Tomcat. Thanks to bochecha for the original patch. [Daniel Lindsley] - Fixed a long-standing TODO, allowing commits to happen without a second request. Thanks to lyblandin for finally chiding me into fixing it. [Daniel Lindsley] - Fixed a bug when sending long queries. Thanks to akaihola & gthb for the report and patch. [Daniel Lindsley] - Corrected a bug where Unicode character might not transmit correctly. Thanks to anti-social for the initial patch. [Daniel Lindsley] - Added field-based boost support. Thanks to notanumber for the patch. [David Sauve] - Better error messages are now provided when things go south. Thanks to bochecha for the patch. [Daniel Lindsley] - Added support for working with Solr cores. Thanks to james.colin.brady for the original patch. [Daniel Lindsley] - Fixed a bug where empty strings/``None`` would be erroneously sent. Thanks to Chak for the patch. [Daniel Lindsley] - Added support for the Stats component. Thanks to thomas.j.lee for the original patch. [Daniel Lindsley] - Fixed datetime/date handling to use ``isoformat`` instead of manually constructing the string. Thanks to joegermuska for the suggestion. [Daniel Lindsley] - Added document boost support. Thanks to Tomasz.Wegrzanowski for the patch. [Daniel Lindsley] - Fixed pysolr to add documents explicitly using UTF-8. Thanks to jarek & dekstop for the patch. [Daniel Lindsley] v2.0.11 (2010-04-28) -------------------- - Fixed initialization parameters on ``Results``. Thanks to jonathan.slenders for pointing this out. v2.0.11. [Daniel Lindsley] - Added a sane .gitignore. [Daniel Lindsley] v2.0.10 (2010-04-28) -------------------- - Fixed a bug in URL construction with httplib2. Thanks to maciekp.lists for the patch. v2.0.10. [Daniel Lindsley] - Added a way to handle queries longer than 1024. Adapted from cogtree's Python Solr fork. [Daniel Lindsley] - Fixed isinstance bug that can occur with the now potentially different datetime/date objects. [Daniel Lindsley] - Altered pysolr to use, if available, Django's implementation of datetime for dates before 1900. Falls back to the default implementation of datetime. [Daniel Lindsley] - If MLT was enabled but no reindexing was performed, Solr returns null instead of no docs. Handle this slightly more gracefully. [Daniel Lindsley] - Corrected a regression when errors occur while using httplib. [Daniel Lindsley] - Bumped version number for previous commit. [Daniel Lindsley] - Altered the '_extract_error' method to be a little more useful when things go south. [Daniel Lindsley] - Bumped version for previous commit. [polarcowz] - Added (optional but default) sanitizing for updates. This cleans the XML sent of control characters which cause Solr's XML parser to break. [polarcowz] - Fixed up a couple distribution bits. [polarcowz] - Added spellchecking support. [polarcowz] - Added timeouts (optional if httplib2 is installed). [polarcowz] - Fixed DATETIME_REGEX & _from_python to match Solr documentation. Thanks initcrash! [polarcowz] - Under some circumstances, Solr returns a regular data type instead of a string. Deal with it in _to_python as best as possible. [polarcowz] - Added '_to_python' method for converting data back to its native Python type. Backward compatible (requires manually calling). [polarcowz] - Updated pysolr to version 2.0. [polarcowz] New bits: * Now uses JSON instead of parsing XML. (jkocherhans) * Added support for passing many types of query parameters to Solr. (daniellindsley) * Added support for More Like This (requires Solr 1.3+). (daniellindsley) * Added support for highlighting. (daniellindsley) * Added support for faceting. (daniellindsley) Ought to be fairly backward-compatible (no known issues) but caution is advised when upgrading. Newly requires either the 'json' or 'simplejson' modules. - Added the stuff needed to easy_install pysolr. And a LICENSE, since I just made fun of another project for not having one. [jacob.kaplanmoss] - It would probably help if I imported the correct thing. [jkocherhans] - This is getting a bit hairy, but try to import ElementTree from lxml as well. [jkocherhans] - Use cElementTree if it's available. [jkocherhans] - Removed unused import. Thanks, jarek.zgoda. [jkocherhans] - Removed default values for start and rows from the search method. Thanks, jarek.zgoda. This will allow people to let solr determine what the default for those should be. [jkocherhans] - Added converters for float and decimal. This references Issue 1. Thanks, jarek.zgoda. [jkocherhans] - Fixed a bug for connections that don't specify a port number. [jkocherhans] - Fixed Python 2.5-ism. [jkocherhans] - Allowed for connections to solr instances that don't live at /solr. [jkocherhans] - Added multiValue field handling support. [jkocherhans] - Broke results out into a separate object with docs and hits attributes. [jkocherhans] - Fixed typo that caused breakage with python < 2.5. [jkocherhans] - Fixed a small typo. [jkocherhans] - Initial import of pysolr. [jkocherhans] - Initial directory structure. [(no author)] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/LICENSE0000644000175100017510000000301715107044623013602 0ustar00runnerrunnerCopyright (c) Joseph Kocherhans, Jacob Kaplan-Moss, Daniel Lindsley. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of pysolr nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/MANIFEST.in0000644000175100017510000000007115107044623014330 0ustar00runnerrunnerinclude LICENSE include README.rst include CHANGELOG.rst ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1763461527.882978 pysolr-3.11.0/PKG-INFO0000644000175100017510000002235515107044630013676 0ustar00runnerrunnerMetadata-Version: 2.4 Name: pysolr Version: 3.11.0 Summary: Lightweight Python client for Apache Solr Home-page: https://github.com/django-haystack/pysolr/ Author: Daniel Lindsley Author-email: daniel@toastdriven.com License: BSD Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search Classifier: Programming Language :: Python :: 3 Requires-Python: >=3.10 License-File: LICENSE License-File: AUTHORS Requires-Dist: requests>=2.32.5 Requires-Dist: setuptools Provides-Extra: solrcloud Requires-Dist: kazoo>=2.5.0; extra == "solrcloud" Dynamic: author Dynamic: author-email Dynamic: classifier Dynamic: description Dynamic: home-page Dynamic: license Dynamic: license-file Dynamic: provides-extra Dynamic: requires-dist Dynamic: requires-python Dynamic: summary ====== pysolr ====== ``pysolr`` is a lightweight Python client for `Apache Solr`_. It provides an interface that queries the server and returns results based on the query. .. _`Apache Solr`: https://solr.apache.org/ Status ====== `Changelog `_ Features ======== * Basic operations such as selecting, updating & deleting. * Index optimization. * `"More Like This" `_ support (if set up in Solr). * `Spelling correction `_ (if set up in Solr). * Timeout support. * SolrCloud awareness Requirements ============ * Python 3.10+ * Requests 2.32.5+ * **Optional** - ``simplejson`` * **Optional** - ``kazoo`` for SolrCloud mode Installation ============ pysolr is on PyPI: .. code-block:: console $ pip install pysolr Or if you want to install directly from the repository: .. code-block:: console $ python setup.py install Usage ===== Basic usage looks like: .. code-block:: python import pysolr # Create a client instance. The timeout and authentication options are not required. solr = pysolr.Solr('http://localhost:8983/solr/', always_commit=True, [timeout=10], [auth=]) # Note that auto_commit defaults to False for performance. You can set # `auto_commit=True` to have commands always update the index immediately, make # an update call with `commit=True`, or use Solr's `autoCommit` / `commitWithin` # to have your data be committed following a particular policy. # Do a health check. solr.ping() # How you'd index data. solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", "_doc": [ { "id": "child_doc_1", "title": "peel" }, { "id": "child_doc_2", "title": "seed" }, ] }, ]) # You can index a parent/child document relationship by # associating a list of child documents with the special key '_doc'. This # is helpful for queries that join together conditions on children and parent # documents. # Later, searching is easy. In the simple case, just a plain Lucene-style # query is fine. results = solr.search('bananas') # The ``Results`` object stores total results found, by default the top # ten most relevant results and any additional data like # facets/highlighting/spelling/etc. print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. for result in results: print("The title is '{0}'.".format(result['title'])) # For a more advanced query, say involving highlighting, you can pass # additional options to Solr. results = solr.search('bananas', **{ 'hl': 'true', 'hl.fragsize': 10, }) # Traverse a cursor using its iterator: for doc in solr.search('*:*',fl='id',sort='id ASC',cursorMark='*'): print(doc['id']) # You can also perform More Like This searches, if your Solr is configured # correctly. similar = solr.more_like_this(q='id:doc_2', mltfl='text') # Finally, you can delete either individual documents, solr.delete(id='doc_1') # also in batches... solr.delete(id=['doc_1', 'doc_2']) # ...or all documents. solr.delete(q='*:*') .. code-block:: python # For SolrCloud mode, initialize your Solr like this: zookeeper = pysolr.ZooKeeper("zkhost1:2181,zkhost2:2181,zkhost3:2181") solr = pysolr.SolrCloud(zookeeper, "collection1", auth=) Multicore Index ~~~~~~~~~~~~~~~ Simply point the URL to the index core: .. code-block:: python # Setup a Solr instance. The timeout is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', timeout=10) Custom Request Handlers ~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', use_qt_param=False) If ``use_qt_param`` is ``True`` it is essential that the name of the handler is exactly what is configured in ``solrconfig.xml``, including the leading slash if any. If ``use_qt_param`` is ``False`` (default), the leading and trailing slashes can be omitted. If ``search_handler`` is not specified, pysolr will default to ``/select``. The handlers for MoreLikeThis, Update, Terms etc. all default to the values set in the ``solrconfig.xml`` SOLR ships with: ``mlt``, ``update``, ``terms`` etc. The specific methods of pysolr's ``Solr`` class (like ``more_like_this``, ``suggest_terms`` etc.) allow for a kwarg ``handler`` to override that value. This includes the ``search`` method. Setting a handler in ``search`` explicitly overrides the ``search_handler`` setting (if any). Custom Authentication ~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) solr = pysolr.Solr('http://localhost:8983/solr/', auth=kerberos_auth) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", auth=kerberos_auth) If your Solr servers run off https ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in an https environment solr = pysolr.Solr('http://localhost:8983/solr/', verify=path/to/cert.pem) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", verify=path/to/cert.perm) Custom Commit Policy ~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. # All requests to Solr will be immediately committed because `always_commit=True`: solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', always_commit=True) ``always_commit`` signals to the Solr object to either commit or not commit by default for any solr request. Be sure to change this to ``True`` if you are upgrading from a version where the default policy was always commit by default. Functions like ``add`` and ``delete`` also still provide a way to override the default by passing the ``commit`` kwarg. It is generally good practice to limit the amount of commits to Solr as excessive commits risk opening too many searchers or excessive system resource consumption. See the Solr documentation for more information and details about the ``autoCommit`` and ``commitWithin`` options: https://lucene.apache.org/solr/guide/7_7/updatehandlers-in-solrconfig.html#UpdateHandlersinSolrConfig-autoCommit LICENSE ======= ``pysolr`` is licensed under the New BSD license. Contributing to pysolr ====================== For consistency, this project uses `pre-commit `_ to manage Git commit hooks: #. Install the `pre-commit` package: e.g. `brew install pre-commit`, `pip install pre-commit`, etc. #. Run `pre-commit install` each time you check out a new copy of this Git repository to ensure that every subsequent commit will be processed by running `pre-commit run`, which you may also do as desired. To test the entire repository or in a CI scenario, you can check every file rather than just the staged ones using `pre-commit run --all`. Running Tests ============= The ``run-tests.py`` script will automatically perform the steps below and is recommended for testing by default unless you need more control. Running a test Solr instance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Downloading, configuring and running Solr 4 looks like this:: ./start-solr-test-server.sh Running the tests ~~~~~~~~~~~~~~~~~ .. code-block:: console $ python -m unittest tests ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/Pipfile0000644000175100017510000000026515107044623014112 0ustar00runnerrunner[[source]] name = "pypi" url = "https://pypi.org/simple" verify_ssl = true [dev-packages] coverage = "*" [packages] requests = "*" kazoo = "*" [requires] python_version = "3.10" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/Pipfile.lock0000644000175100017510000005123715107044623015046 0ustar00runnerrunner{ "_meta": { "hash": { "sha256": "dd4fe0e32363dea806c1a7a0af5ebe279e8f25f04a44837a6b8c045ab739aff8" }, "pipfile-spec": 6, "requires": { "python_version": "3.10" }, "sources": [ { "name": "pypi", "url": "https://pypi.org/simple", "verify_ssl": true } ] }, "default": { "certifi": { "hashes": [ "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316" ], "markers": "python_version >= '3.7'", "version": "==2025.11.12" }, "charset-normalizer": { "hashes": [ "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc", "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63", "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d", "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505", "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af", "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318", "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576", "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1", "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1", "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88", "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a", "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7", "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2", "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf", "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6", "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa", "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3", "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9", "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50", "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf", "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac", "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c", "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e", "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4", "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84", "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074", "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3", "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d", "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966", "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3", "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608" ], "markers": "python_version >= '3.7'", "version": "==3.4.4" }, "idna": { "hashes": [ "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902" ], "markers": "python_version >= '3.8'", "version": "==3.11" }, "kazoo": { "hashes": [ "sha256:905796ae4f4c12bd4e4ae92e6e5d018439e6b56c8cfbb24825362e79b230dab1", "sha256:de2d69168de432ff66b457a26c727a5bf7ff53af5806653fd1df7f04b6a5483c" ], "index": "pypi", "version": "==2.10.0" }, "requests": { "hashes": [ "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf" ], "index": "pypi", "markers": "python_version >= '3.9'", "version": "==2.32.5" }, "urllib3": { "hashes": [ "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc" ], "markers": "python_version >= '3.9'", "version": "==2.5.0" } }, "develop": { "coverage": { "hashes": [ "sha256:004cdcea3457c0ea3233622cd3464c1e32ebba9b41578421097402bee6461b63", "sha256:0542ddf6107adbd2592f29da9f59f5d9cff7947b5bb4f734805085c327dcffaa", "sha256:05fd3fb6edff0c98874d752013588836f458261e5eba587afe4c547bba544afd", "sha256:074e6a5cd38e06671580b4d872c1a67955d4e69639e4b04e87fc03b494c1f060", "sha256:07bc7745c945a6d95676953e86ba7cebb9f11de7773951c387f4c07dc76d03f5", "sha256:08c0bcf932e47795c49f0406054824b9d45671362dfc4269e0bc6e4bff010704", "sha256:097acc18bedf2c6e3144eaf09b5f6034926c3c9bb9e10574ffd0942717232507", "sha256:0c986537abca9b064510f3fd104ba33e98d3036608c7f2f5537f869bc10e1ee5", "sha256:0dba4da36730e384669e05b765a2c49f39514dd3012fcc0398dd66fba8d746d5", "sha256:0e920567f8c3a3ce68ae5a42cf7c2dc4bb6cc389f18bff2235dd8c03fa405de5", "sha256:0f59387f5e6edbbffec2281affb71cdc85e0776c1745150a3ab9b6c1d016106b", "sha256:12d821de7408292530b0d241468b698bce18dd12ecaf45316149f53877885f8c", "sha256:13b2066303a1c1833c654d2af0455bb009b6e1727b3883c9964bc5c2f643c1d0", "sha256:1410bac9e98afd9623f53876fae7d8a5db9f5a0ac1c9e7c5188463cb4b3212e2", "sha256:1451464fd855d9bd000c19b71bb7dafea9ab815741fb0bd9e813d9b671462d6f", "sha256:149eccc85d48c8f06547534068c41d69a1a35322deaa4d69ba1561e2e9127e75", "sha256:1e33d0bebf895c7a0905fcfaff2b07ab900885fc78bba2a12291a2cfbab014cc", "sha256:200bb89fd2a8a07780eafcdff6463104dec459f3c838d980455cfa84f5e5e6e1", "sha256:2376e8a9c889016f25472c452389e98bc6e54a19570b107e27cde9d47f387b64", "sha256:28c5251b3ab1d23e66f1130ca0c419747edfbcb4690de19467cd616861507af7", "sha256:2ec27a7a991d229213c8070d31e3ecf44d005d96a9edc30c78eaeafaa421c001", "sha256:305716afb19133762e8cf62745c46c4853ad6f9eeba54a593e373289e24ea237", "sha256:31663572f20bf3406d7ac00d6981c7bbbcec302539d26b5ac596ca499664de31", "sha256:3224c7baf34e923ffc78cb45e793925539d640d42c96646db62dbd61bbcfa131", "sha256:351511ae28e2509c8d8cae5311577ea7dd511ab8e746ffc8814a0896c3d33fbe", "sha256:385977d94fc155f8731c895accdfcc3dd0d9dd9ef90d102969df95d3c637ab80", "sha256:39764c6167c82d68a2d8c97c33dba45ec0ad9172570860e12191416f4f8e6e1b", "sha256:3e33a968672be1394eded257ec10d4acbb9af2ae263ba05a99ff901bb863557e", "sha256:4234914b8c67238a3c4af2bba648dc716aa029ca44d01f3d51536d44ac16854f", "sha256:426559f105f644b69290ea414e154a0d320c3ad8a2bb75e62884731f69cf8e2c", "sha256:465695268414e149bab754c54b0c45c8ceda73dd4a5c3ba255500da13984b16d", "sha256:4bec8c7160688bd5a34e65c82984b25409563134d63285d8943d0599efbc448e", "sha256:4c5627429f7fbff4f4131cfdd6abd530734ef7761116811a707b88b7e205afd7", "sha256:4ca5f876bf41b24378ee67c41d688155f0e54cdc720de8ef9ad6544005899240", "sha256:4d4ca49f5ba432b0755ebb0fc3a56be944a19a16bb33802264bbc7311622c0d1", "sha256:4ebcddfcdfb4c614233cff6e9a3967a09484114a8b2e4f2c7a62dc83676ba13f", "sha256:4f2bb4ee8dd40f9b2a80bb4adb2aecece9480ba1fa60d9382e8c8e0bd558e2eb", "sha256:56f909a40d68947ef726ce6a34eb38f0ed241ffbe55c5007c64e616663bcbafc", "sha256:5b771b59ac0dfb7f139f70c85b42717ef400a6790abb6475ebac1ecee8de782f", "sha256:603c4414125fc9ae9000f17912dcfd3d3eb677d4e360b85206539240c96ea76e", "sha256:60ca149a446da255d56c2a7a813b51a80d9497a62250532598d249b3cdb1a926", "sha256:68c4eb92997dbaaf839ea13527be463178ac0ddd37a7ac636b8bc11a51af2428", "sha256:6bb599052a974bb6cedfa114f9778fedfad66854107cf81397ec87cb9b8fbcf2", "sha256:6f033dec603eea88204589175782290a038b436105a8f3637a81c4359df27832", "sha256:72c8b494bd20ae1c58528b97c4a67d5cfeafcb3845c73542875ecd43924296de", "sha256:77ffb3b7704eb7b9b3298a01fe4509cef70117a52d50bcba29cffc5f53dd326a", "sha256:84b892e968164b7a0498ddc5746cdf4e985700b902128421bb5cec1080a6ee36", "sha256:86d27d2dd7c7c5a44710565933c7dc9cd70e65ef97142e260d16d555667deef7", "sha256:876a3ee7fd2613eb79602e4cdb39deb6b28c186e76124c3f29e580099ec21a87", "sha256:8bba7e4743e37484ae17d5c3b8eb1ce78b564cb91b7ace2e2182b25f0f764cb5", "sha256:8d16bbe566e16a71d123cd66382c1315fcd520c7573652a8074a8fe281b38c6a", "sha256:8d264402fc179776d43e557e1ca4a7d953020d3ee95f7ec19cc2c9d769277f06", "sha256:8f067ada2c333609b52835ca4d4868645d3b63ac04fb2b9a658c55bba7f667d3", "sha256:8f4cbfff5cf01fa07464439a8510affc9df281535f41a1f5312fbd2b59b4ab5c", "sha256:900580bc99c145e2561ea91a2d207e639171870d8a18756eb57db944a017d4bb", "sha256:9061a3e3c92b27fd8036dafa26f25d95695b6aa2e4514ab16a254f297e664f83", "sha256:90a96fcd824564eae6137ec2563bd061d49a32944858d4bdbae5c00fb10e76ac", "sha256:9245bd392572b9f799261c4c9e7216bafc9405537d0f4ce3ad93afe081a12dc9", "sha256:9799bd6a910961cb666196b8583ed0ee125fa225c6fdee2cbf00232b861f29d2", "sha256:9a1d577c20b4334e5e814c3d5fe07fa4a8c3ae42a601945e8d7940bab811d0bd", "sha256:a6b17c2b5e0b9bb7702449200f93e2d04cb04b1414c41424c08aa1e5d352da76", "sha256:a730cd0824e8083989f304e97b3f884189efb48e2151e07f57e9e138ab104200", "sha256:a8258f10059b5ac837232c589a350a2df4a96406d6d5f2a09ec587cbdd539655", "sha256:ab6212e62ea0e1006531a2234e209607f360d98d18d532c2fa8e403c1afbdd71", "sha256:abb903ffe46bd319d99979cdba350ae7016759bb69f47882242f7b93f3356055", "sha256:abcea3b5f0dc44e1d01c27090bc32ce6ffb7aa665f884f1890710454113ea902", "sha256:ac5d5329c9c942bbe6295f4251b135d860ed9f86acd912d418dce186de7c19ac", "sha256:adb9b7b42c802bd8cb3927de8c1c26368ce50c8fdaa83a9d8551384d77537044", "sha256:ae12fe90b00b71a71b69f513773310782ce01d5f58d2ceb2b7c595ab9d222094", "sha256:b5cd111d3ab7390be0c07ad839235d5ad54d2ca497b5f5db86896098a77180a4", "sha256:bb9d7efdb063903b3fdf77caec7b77c3066885068bdc0d44bc1b0c171033f944", "sha256:c0a3b6e32457535df0d41d2d895da46434706dd85dbaf53fbc0d3bd7d914b362", "sha256:c381a252317f63ca0179d2c7918e83b99a4ff3101e1b24849b999a00f9cd4f86", "sha256:c713c1c528284d636cd37723b0b4c35c11190da6f932794e145fc40f8210a14a", "sha256:c8be5bfcdc7832011b2652db29ed7672ce9d353dd19bce5272ca33dbcf60aaa8", "sha256:c8f563b245b4ddb591e99f28e3cd140b85f114b38b7f95b2e42542f0603eb7d7", "sha256:ca90ef33a152205fb6f2f0c1f3e55c50df4ef049bb0940ebba666edd4cdebc55", "sha256:d60bf4d7f886989ddf80e121a7f4d140d9eac91f1d2385ce8eb6bda93d563297", "sha256:d8750dd20362a1b80e3cf84f58013d4672f89663aee457ea59336df50fab6739", "sha256:dd9ca2d44ed8018c90efb72f237a2a140325a4c3339971364d758e78b175f58e", "sha256:e22539b676fafba17f0a90ac725f029a309eb6e483f364c86dcadee060429d46", "sha256:e2a96fdc7643c9517a317553aca13b5cae9bad9a5f32f4654ce247ae4d321405", "sha256:e5f4bfac975a2138215a38bda599ef00162e4143541cf7dd186da10a7f8e69f1", "sha256:e8feeb5e8705835f0622af0fe7ff8d5cb388948454647086494d6c41ec142c2e", "sha256:eb5069074db19a534de3859c43eec78e962d6d119f637c41c8e028c5ab3f59dd", "sha256:f0b4101e2b3c6c352ff1f70b3a6fcc7c17c1ab1a91ccb7a33013cb0782af9820", "sha256:f761dbcf45e9416ec4698e1a7649248005f0064ce3523a47402d1bff4af2779e", "sha256:f9c96a29c6d65bd36a91f5634fef800212dff69dacdb44345c4c9783943ab0df", "sha256:fb58da65e3339b3dbe266b607bb936efb983d86b00b03eb04c4ad5b442c58428", "sha256:fbffc22d80d86fbe456af9abb17f7a7766e7b2101f7edaacc3535501691563f7", "sha256:fdc5255eb4815babcdf236fa1a806ccb546724c8a9b129fd1ea4a5448a0bf07c", "sha256:fe3425dc6021f906c6325d3c415e048e7cdb955505a94f1eb774dafc779ba203" ], "index": "pypi", "markers": "python_version >= '3.10'", "version": "==7.11.3" } } } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/README.rst0000644000175100017510000002037415107044623014271 0ustar00runnerrunner====== pysolr ====== ``pysolr`` is a lightweight Python client for `Apache Solr`_. It provides an interface that queries the server and returns results based on the query. .. _`Apache Solr`: https://solr.apache.org/ Status ====== `Changelog `_ Features ======== * Basic operations such as selecting, updating & deleting. * Index optimization. * `"More Like This" `_ support (if set up in Solr). * `Spelling correction `_ (if set up in Solr). * Timeout support. * SolrCloud awareness Requirements ============ * Python 3.10+ * Requests 2.32.5+ * **Optional** - ``simplejson`` * **Optional** - ``kazoo`` for SolrCloud mode Installation ============ pysolr is on PyPI: .. code-block:: console $ pip install pysolr Or if you want to install directly from the repository: .. code-block:: console $ python setup.py install Usage ===== Basic usage looks like: .. code-block:: python import pysolr # Create a client instance. The timeout and authentication options are not required. solr = pysolr.Solr('http://localhost:8983/solr/', always_commit=True, [timeout=10], [auth=]) # Note that auto_commit defaults to False for performance. You can set # `auto_commit=True` to have commands always update the index immediately, make # an update call with `commit=True`, or use Solr's `autoCommit` / `commitWithin` # to have your data be committed following a particular policy. # Do a health check. solr.ping() # How you'd index data. solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", "_doc": [ { "id": "child_doc_1", "title": "peel" }, { "id": "child_doc_2", "title": "seed" }, ] }, ]) # You can index a parent/child document relationship by # associating a list of child documents with the special key '_doc'. This # is helpful for queries that join together conditions on children and parent # documents. # Later, searching is easy. In the simple case, just a plain Lucene-style # query is fine. results = solr.search('bananas') # The ``Results`` object stores total results found, by default the top # ten most relevant results and any additional data like # facets/highlighting/spelling/etc. print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. for result in results: print("The title is '{0}'.".format(result['title'])) # For a more advanced query, say involving highlighting, you can pass # additional options to Solr. results = solr.search('bananas', **{ 'hl': 'true', 'hl.fragsize': 10, }) # Traverse a cursor using its iterator: for doc in solr.search('*:*',fl='id',sort='id ASC',cursorMark='*'): print(doc['id']) # You can also perform More Like This searches, if your Solr is configured # correctly. similar = solr.more_like_this(q='id:doc_2', mltfl='text') # Finally, you can delete either individual documents, solr.delete(id='doc_1') # also in batches... solr.delete(id=['doc_1', 'doc_2']) # ...or all documents. solr.delete(q='*:*') .. code-block:: python # For SolrCloud mode, initialize your Solr like this: zookeeper = pysolr.ZooKeeper("zkhost1:2181,zkhost2:2181,zkhost3:2181") solr = pysolr.SolrCloud(zookeeper, "collection1", auth=) Multicore Index ~~~~~~~~~~~~~~~ Simply point the URL to the index core: .. code-block:: python # Setup a Solr instance. The timeout is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', timeout=10) Custom Request Handlers ~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', use_qt_param=False) If ``use_qt_param`` is ``True`` it is essential that the name of the handler is exactly what is configured in ``solrconfig.xml``, including the leading slash if any. If ``use_qt_param`` is ``False`` (default), the leading and trailing slashes can be omitted. If ``search_handler`` is not specified, pysolr will default to ``/select``. The handlers for MoreLikeThis, Update, Terms etc. all default to the values set in the ``solrconfig.xml`` SOLR ships with: ``mlt``, ``update``, ``terms`` etc. The specific methods of pysolr's ``Solr`` class (like ``more_like_this``, ``suggest_terms`` etc.) allow for a kwarg ``handler`` to override that value. This includes the ``search`` method. Setting a handler in ``search`` explicitly overrides the ``search_handler`` setting (if any). Custom Authentication ~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) solr = pysolr.Solr('http://localhost:8983/solr/', auth=kerberos_auth) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", auth=kerberos_auth) If your Solr servers run off https ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in an https environment solr = pysolr.Solr('http://localhost:8983/solr/', verify=path/to/cert.pem) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", verify=path/to/cert.perm) Custom Commit Policy ~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. # All requests to Solr will be immediately committed because `always_commit=True`: solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', always_commit=True) ``always_commit`` signals to the Solr object to either commit or not commit by default for any solr request. Be sure to change this to ``True`` if you are upgrading from a version where the default policy was always commit by default. Functions like ``add`` and ``delete`` also still provide a way to override the default by passing the ``commit`` kwarg. It is generally good practice to limit the amount of commits to Solr as excessive commits risk opening too many searchers or excessive system resource consumption. See the Solr documentation for more information and details about the ``autoCommit`` and ``commitWithin`` options: https://lucene.apache.org/solr/guide/7_7/updatehandlers-in-solrconfig.html#UpdateHandlersinSolrConfig-autoCommit LICENSE ======= ``pysolr`` is licensed under the New BSD license. Contributing to pysolr ====================== For consistency, this project uses `pre-commit `_ to manage Git commit hooks: #. Install the `pre-commit` package: e.g. `brew install pre-commit`, `pip install pre-commit`, etc. #. Run `pre-commit install` each time you check out a new copy of this Git repository to ensure that every subsequent commit will be processed by running `pre-commit run`, which you may also do as desired. To test the entire repository or in a CI scenario, you can check every file rather than just the staged ones using `pre-commit run --all`. Running Tests ============= The ``run-tests.py`` script will automatically perform the steps below and is recommended for testing by default unless you need more control. Running a test Solr instance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Downloading, configuring and running Solr 4 looks like this:: ./start-solr-test-server.sh Running the tests ~~~~~~~~~~~~~~~~~ .. code-block:: console $ python -m unittest tests ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/get-solr-download-url.py0000755000175100017510000000310515107044623017311 0ustar00runnerrunner#!/usr/bin/env python import sys from itertools import chain from urllib.parse import urljoin import requests if len(sys.argv) != 2: print("Usage: %s SOLR_VERSION" % sys.argv[0], file=sys.stderr) sys.exit(1) solr_version = sys.argv[1] tarball = "solr-{0}.tgz".format(solr_version) dist_path = "lucene/solr/{0}/{1}".format(solr_version, tarball) download_url = urljoin("https://archive.apache.org/dist/", dist_path) mirror_response = requests.get( "https://www.apache.org/dyn/mirrors/mirrors.cgi/%s?asjson=1" % dist_path ) if not mirror_response.ok: print( "Apache mirror request returned HTTP %d" % mirror_response.status_code, file=sys.stderr, ) sys.exit(1) mirror_data = mirror_response.json() # Since the Apache mirrors are often unreliable and releases may disappear # without notice we'll try the preferred mirror, all of the alternates and # backups, and fall back to the main Apache archive server: for base_url in chain( (mirror_data["preferred"],), mirror_data["http"], mirror_data["backup"], ("https://archive.apache.org/dist/",), ): test_url = urljoin(base_url, mirror_data["path_info"]) # The Apache mirror script's response format has recently changed to exclude # the actual file paths: if not test_url.endswith(tarball): test_url = urljoin(test_url, dist_path) if requests.head(test_url, allow_redirects=True).status_code == 200: download_url = test_url break else: print("None of the Apache mirrors have %s" % dist_path, file=sys.stderr) sys.exit(1) print(download_url) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/pyproject.toml0000644000175100017510000000376015107044623015516 0ustar00runnerrunner[tool.ruff] target-version = "py37" lint.select = [ "A", # flake8-builtins "AIR", # Airflow "ASYNC", # flake8-async "B", # flake8-bugbear "BLE", # flake8-blind-except "C4", # flake8-comprehensions "C90", # McCabe cyclomatic complexity "E", # pycodestyle "ERA", # eradicate "EXE", # flake8-executable "F", # Pyflakes "G", # flake8-logging-format "I", # isort "ICN", # flake8-import-conventions "INP", # flake8-no-pep420 "INT", # flake8-gettext "ISC", # flake8-implicit-str-concat "N", # pep8-naming "PGH", # pygrep-hooks "PIE", # flake8-pie "PL", # Pylint "PT", # flake8-pytest-style "PTH", # flake8-use-pathlib "PYI", # flake8-pyi "RET", # flake8-return "RSE", # flake8-raise "RUF", # Ruff-specific rules "S", # flake8-bandit "T10", # flake8-debugger "TCH", # flake8-type-checking "TID", # flake8-tidy-imports "W", # pycodestyle "YTT", # flake8-2020 # "ANN", # flake8-annotations # "ARG", # flake8-unused-arguments # "COM", # flake8-commas # "D", # pydocstyle # "DJ", # flake8-django # "DTZ", # flake8-datetimez # "EM", # flake8-errmsg # "FBT", # flake8-boolean-trap # "NPY", # NumPy-specific rules # "PD", # pandas-vet # "Q", # flake8-quotes # "SIM", # flake8-simplify # "SLF", # flake8-self # "T20", # flake8-print # "TRY", # tryceratops # "UP", # pyupgrade ] lint.ignore = [ "B018", "B026", "B904", "N802", "N803", "N806", "PGH004", "PLR5501", "PLW2901", "PT009", "PT027", "PTH123", "RET505", "RET506", "RUF100", "S113", "S314", "S603", ] lint.per-file-ignores."tests/*" = [ "S101", ] lint.mccabe.max-complexity = 16 lint.pylint.allow-magic-value-types = [ "int", "str", ] lint.pylint.max-args = 12 lint.pylint.max-branches = 20 lint.pylint.max-returns = 7 lint.pylint.max-statements = 54 [tool.codespell] ignore-words-list = "dekstop,assertin,nwe,wahtever,yello" skip = "./.*" ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1763461527.882978 pysolr-3.11.0/pysolr.egg-info/0000755000175100017510000000000015107044630015614 5ustar00runnerrunner././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461527.0 pysolr-3.11.0/pysolr.egg-info/PKG-INFO0000644000175100017510000002235515107044627016726 0ustar00runnerrunnerMetadata-Version: 2.4 Name: pysolr Version: 3.11.0 Summary: Lightweight Python client for Apache Solr Home-page: https://github.com/django-haystack/pysolr/ Author: Daniel Lindsley Author-email: daniel@toastdriven.com License: BSD Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search Classifier: Programming Language :: Python :: 3 Requires-Python: >=3.10 License-File: LICENSE License-File: AUTHORS Requires-Dist: requests>=2.32.5 Requires-Dist: setuptools Provides-Extra: solrcloud Requires-Dist: kazoo>=2.5.0; extra == "solrcloud" Dynamic: author Dynamic: author-email Dynamic: classifier Dynamic: description Dynamic: home-page Dynamic: license Dynamic: license-file Dynamic: provides-extra Dynamic: requires-dist Dynamic: requires-python Dynamic: summary ====== pysolr ====== ``pysolr`` is a lightweight Python client for `Apache Solr`_. It provides an interface that queries the server and returns results based on the query. .. _`Apache Solr`: https://solr.apache.org/ Status ====== `Changelog `_ Features ======== * Basic operations such as selecting, updating & deleting. * Index optimization. * `"More Like This" `_ support (if set up in Solr). * `Spelling correction `_ (if set up in Solr). * Timeout support. * SolrCloud awareness Requirements ============ * Python 3.10+ * Requests 2.32.5+ * **Optional** - ``simplejson`` * **Optional** - ``kazoo`` for SolrCloud mode Installation ============ pysolr is on PyPI: .. code-block:: console $ pip install pysolr Or if you want to install directly from the repository: .. code-block:: console $ python setup.py install Usage ===== Basic usage looks like: .. code-block:: python import pysolr # Create a client instance. The timeout and authentication options are not required. solr = pysolr.Solr('http://localhost:8983/solr/', always_commit=True, [timeout=10], [auth=]) # Note that auto_commit defaults to False for performance. You can set # `auto_commit=True` to have commands always update the index immediately, make # an update call with `commit=True`, or use Solr's `autoCommit` / `commitWithin` # to have your data be committed following a particular policy. # Do a health check. solr.ping() # How you'd index data. solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", "_doc": [ { "id": "child_doc_1", "title": "peel" }, { "id": "child_doc_2", "title": "seed" }, ] }, ]) # You can index a parent/child document relationship by # associating a list of child documents with the special key '_doc'. This # is helpful for queries that join together conditions on children and parent # documents. # Later, searching is easy. In the simple case, just a plain Lucene-style # query is fine. results = solr.search('bananas') # The ``Results`` object stores total results found, by default the top # ten most relevant results and any additional data like # facets/highlighting/spelling/etc. print("Saw {0} result(s).".format(len(results))) # Just loop over it to access the results. for result in results: print("The title is '{0}'.".format(result['title'])) # For a more advanced query, say involving highlighting, you can pass # additional options to Solr. results = solr.search('bananas', **{ 'hl': 'true', 'hl.fragsize': 10, }) # Traverse a cursor using its iterator: for doc in solr.search('*:*',fl='id',sort='id ASC',cursorMark='*'): print(doc['id']) # You can also perform More Like This searches, if your Solr is configured # correctly. similar = solr.more_like_this(q='id:doc_2', mltfl='text') # Finally, you can delete either individual documents, solr.delete(id='doc_1') # also in batches... solr.delete(id=['doc_1', 'doc_2']) # ...or all documents. solr.delete(q='*:*') .. code-block:: python # For SolrCloud mode, initialize your Solr like this: zookeeper = pysolr.ZooKeeper("zkhost1:2181,zkhost2:2181,zkhost3:2181") solr = pysolr.SolrCloud(zookeeper, "collection1", auth=) Multicore Index ~~~~~~~~~~~~~~~ Simply point the URL to the index core: .. code-block:: python # Setup a Solr instance. The timeout is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', timeout=10) Custom Request Handlers ~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', use_qt_param=False) If ``use_qt_param`` is ``True`` it is essential that the name of the handler is exactly what is configured in ``solrconfig.xml``, including the leading slash if any. If ``use_qt_param`` is ``False`` (default), the leading and trailing slashes can be omitted. If ``search_handler`` is not specified, pysolr will default to ``/select``. The handlers for MoreLikeThis, Update, Terms etc. all default to the values set in the ``solrconfig.xml`` SOLR ships with: ``mlt``, ``update``, ``terms`` etc. The specific methods of pysolr's ``Solr`` class (like ``more_like_this``, ``suggest_terms`` etc.) allow for a kwarg ``handler`` to override that value. This includes the ``search`` method. Setting a handler in ``search`` explicitly overrides the ``search_handler`` setting (if any). Custom Authentication ~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) solr = pysolr.Solr('http://localhost:8983/solr/', auth=kerberos_auth) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment from requests_kerberos import HTTPKerberosAuth, OPTIONAL kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL, sanitize_mutual_error_response=False) zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", auth=kerberos_auth) If your Solr servers run off https ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance in an https environment solr = pysolr.Solr('http://localhost:8983/solr/', verify=path/to/cert.pem) .. code-block:: python # Setup a CloudSolr instance in a kerborized environment zookeeper = pysolr.ZooKeeper("zkhost1:2181/solr, zkhost2:2181,...,zkhostN:2181") solr = pysolr.SolrCloud(zookeeper, "collection", verify=path/to/cert.perm) Custom Commit Policy ~~~~~~~~~~~~~~~~~~~~ .. code-block:: python # Setup a Solr instance. The trailing slash is optional. # All requests to Solr will be immediately committed because `always_commit=True`: solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', always_commit=True) ``always_commit`` signals to the Solr object to either commit or not commit by default for any solr request. Be sure to change this to ``True`` if you are upgrading from a version where the default policy was always commit by default. Functions like ``add`` and ``delete`` also still provide a way to override the default by passing the ``commit`` kwarg. It is generally good practice to limit the amount of commits to Solr as excessive commits risk opening too many searchers or excessive system resource consumption. See the Solr documentation for more information and details about the ``autoCommit`` and ``commitWithin`` options: https://lucene.apache.org/solr/guide/7_7/updatehandlers-in-solrconfig.html#UpdateHandlersinSolrConfig-autoCommit LICENSE ======= ``pysolr`` is licensed under the New BSD license. Contributing to pysolr ====================== For consistency, this project uses `pre-commit `_ to manage Git commit hooks: #. Install the `pre-commit` package: e.g. `brew install pre-commit`, `pip install pre-commit`, etc. #. Run `pre-commit install` each time you check out a new copy of this Git repository to ensure that every subsequent commit will be processed by running `pre-commit run`, which you may also do as desired. To test the entire repository or in a CI scenario, you can check every file rather than just the staged ones using `pre-commit run --all`. Running Tests ============= The ``run-tests.py`` script will automatically perform the steps below and is recommended for testing by default unless you need more control. Running a test Solr instance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Downloading, configuring and running Solr 4 looks like this:: ./start-solr-test-server.sh Running the tests ~~~~~~~~~~~~~~~~~ .. code-block:: console $ python -m unittest tests ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461527.0 pysolr-3.11.0/pysolr.egg-info/SOURCES.txt0000644000175100017510000000131015107044627017501 0ustar00runnerrunner.editorconfig .gitchangelog.rc .gitignore .pre-commit-config.yaml AUTHORS CHANGELOG.rst LICENSE MANIFEST.in Pipfile Pipfile.lock README.rst get-solr-download-url.py pyproject.toml pysolr.py run-tests.py setup.py sonar-project.properties start-solr-test-server.sh tox.ini .github/dependabot.yml .github/issue_template.md .github/pull_request_template.md .github/stale.yml .github/workflows/codeql-analysis.yml .github/workflows/pypi-release.yml .github/workflows/tox.yml pysolr.egg-info/PKG-INFO pysolr.egg-info/SOURCES.txt pysolr.egg-info/dependency_links.txt pysolr.egg-info/requires.txt pysolr.egg-info/top_level.txt tests/__init__.py tests/test_admin.py tests/test_client.py tests/test_cloud.py tests/utils.py././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461527.0 pysolr-3.11.0/pysolr.egg-info/dependency_links.txt0000644000175100017510000000000115107044627021670 0ustar00runnerrunner ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461527.0 pysolr-3.11.0/pysolr.egg-info/requires.txt0000644000175100017510000000006615107044627020224 0ustar00runnerrunnerrequests>=2.32.5 setuptools [solrcloud] kazoo>=2.5.0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461527.0 pysolr-3.11.0/pysolr.egg-info/top_level.txt0000644000175100017510000000000715107044627020351 0ustar00runnerrunnerpysolr ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/pysolr.py0000644000175100017510000014176115107044623014510 0ustar00runnerrunnerimport ast import datetime import logging import os import random import re import time from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _get_version from xml.etree import ElementTree # noqa: ICN001 import requests try: from kazoo.client import KazooClient, KazooState except ImportError: KazooClient = KazooState = None try: # Prefer simplejson, if installed. import simplejson as json except ImportError: import json import html.entities as htmlentities from http.client import HTTPException from urllib.parse import quote, urlencode __author__ = "Daniel Lindsley, Joseph Kocherhans, Jacob Kaplan-Moss, Thomas Rieder" __all__ = ["Solr"] try: __version__ = _get_version(__name__) except PackageNotFoundError: __version__ = "0.0.dev0" def get_version(): return __version__ DATETIME_REGEX = re.compile( r"^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?Z$" # NOQA: E501 ) # dict key used to add nested documents to a document NESTED_DOC_KEY = "_childDocuments_" VALID_XML_CHARS_REGEX = re.compile( "[^\u0020-\ud7ff\u0009\u000a\u000d\ue000-\ufffd\U00010000-\U0010ffff]+" ) class NullHandler(logging.Handler): def emit(self, record): pass # Add the ``NullHandler`` to avoid logging by default while still allowing # others to attach their own handlers. LOG = logging.getLogger("pysolr") h = NullHandler() LOG.addHandler(h) # For debugging... if os.environ.get("DEBUG_PYSOLR", "").lower() in ("true", "1"): LOG.setLevel(logging.DEBUG) stream = logging.StreamHandler() LOG.addHandler(stream) def force_unicode(value): """ Forces a bytestring to become a Unicode string. """ if isinstance(value, bytes): value = value.decode("utf-8", errors="replace") elif not isinstance(value, str): value = str(value) return value def force_bytes(value): """ Forces a Unicode string to become a bytestring. """ if isinstance(value, str): value = value.encode("utf-8", "backslashreplace") return value def unescape_html(text): """ Removes HTML or XML character references and entities from a text string. @param text The HTML (or XML) source text. @return The plain text, as a Unicode string, if necessary. Source: http://effbot.org/zone/re-sub.htm#unescape-html """ def fixup(m): text = m.group(0) if text[:2] == "&#": # character reference try: if text[:3] == "&#x": return chr(int(text[3:-1], 16)) else: return chr(int(text[2:-1])) except ValueError: pass else: # named entity try: text = chr(htmlentities.name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is return re.sub(r"&#?\w+;", fixup, text) def safe_urlencode(params, doseq=False): """ URL-encode parameters using UTF-8 encoding. This is a wrapper around `urllib.parse.urlencode` that ensures consistent UTF-8 handling for all parameter values. """ return urlencode(params, doseq) def clean_xml_string(s): """ Cleans string from invalid xml chars Solution was found there:: http://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python """ return VALID_XML_CHARS_REGEX.sub("", s) class SolrError(Exception): pass class Results(object): """ Default results class for wrapping decoded (from JSON) solr responses. Required ``decoded`` argument must be a Solr response dictionary. Individual documents can be retrieved either through ``docs`` attribute or by iterating over results instance. Optional ``next_page_query`` argument is a callable to be invoked when iterating over the documents from the result. Example:: results = Results({ 'response': { 'docs': [{'id': 1}, {'id': 2}, {'id': 3}], 'numFound': 3, } }) # this: for doc in results: print doc # ... is equivalent to: for doc in results.docs: print doc # also: list(results) == results.docs Note that ``Results`` object does not support indexing and slicing. If you need to retrieve documents by index just use ``docs`` attribute. Other common response metadata (debug, highlighting, qtime, etc.) are available as attributes. The full response from Solr is provided as the `raw_response` dictionary for use with features which change the response format. """ def __init__(self, decoded, next_page_query=None): self.raw_response = decoded # main response part of decoded Solr response response_part = decoded.get("response") or {} self.docs = response_part.get("docs", ()) self.hits = response_part.get("numFound", 0) # other response metadata self.debug = decoded.get("debug", {}) self.highlighting = decoded.get("highlighting", {}) self.facets = decoded.get("facet_counts", {}) self.spellcheck = decoded.get("spellcheck", {}) self.stats = decoded.get("stats", {}) self.qtime = decoded.get("responseHeader", {}).get("QTime", None) self.grouped = decoded.get("grouped", {}) self.nextCursorMark = decoded.get("nextCursorMark", None) self._next_page_query = ( self.nextCursorMark is not None and next_page_query ) or None def __len__(self): if self._next_page_query: return self.hits else: return len(self.docs) def __iter__(self): result = self while result: for d in result.docs: yield d result = result._next_page_query and result._next_page_query() class Solr(object): """ The main object for working with Solr. Optionally accepts ``decoder`` for an alternate JSON decoder instance. Default is ``json.JSONDecoder()``. Optionally accepts ``encoder`` for an alternate JSON Encoder instance. Default is ``json.JSONEncoder()``. Optionally accepts ``timeout`` for wait seconds until giving up on a request. Default is ``60`` seconds. Optionally accepts ``results_cls`` that specifies class of results object returned by ``.search()`` and ``.more_like_this()`` methods. Default is ``pysolr.Results``. Usage:: solr = pysolr.Solr('http://localhost:8983/solr') # With a 10 second timeout. solr = pysolr.Solr('http://localhost:8983/solr', timeout=10) # with a dict as a default results class instead of pysolr.Results solr = pysolr.Solr('http://localhost:8983/solr', results_cls=dict) """ def __init__( self, url, decoder=None, encoder=None, timeout=60, results_cls=Results, search_handler="select", use_qt_param=False, always_commit=False, auth=None, verify=True, session=None, ): self.decoder = decoder or json.JSONDecoder() self.encoder = encoder or json.JSONEncoder() self.url = url self.timeout = timeout self.log = self._get_log() self.session = session self.results_cls = results_cls self.search_handler = search_handler self.use_qt_param = use_qt_param self.auth = auth self.verify = verify self.always_commit = always_commit def get_session(self): if self.session is None: self.session = requests.Session() self.session.stream = False self.session.verify = self.verify return self.session def _get_log(self): return LOG def _create_full_url(self, path=""): if len(path): return "/".join([self.url.rstrip("/"), path.lstrip("/")]) # No path? No problem. return self.url def _send_request(self, method, path="", body=None, headers=None, files=None): url = self._create_full_url(path) method = method.lower() log_body = body if headers is None: headers = {} if log_body is None: log_body = "" elif not isinstance(log_body, str): log_body = repr(body) self.log.debug( "Starting request to '%s' (%s) with body '%s'...", url, method, log_body[:10], ) start_time = time.time() session = self.get_session() try: requests_method = getattr(session, method) except AttributeError: raise SolrError("Unable to use unknown HTTP method '{0}.".format(method)) # Everything except the body can be Unicode. The body must be # encoded to bytes to work properly on Py3. bytes_body = body if bytes_body is not None: bytes_body = force_bytes(body) try: resp = requests_method( url, data=bytes_body, headers=headers, files=files, timeout=self.timeout, auth=self.auth, ) except requests.exceptions.Timeout as err: error_message = "Connection to server '%s' timed out: %s" self.log.exception(error_message, url, err) # NOQA: G200 raise SolrError(error_message % (url, err)) except requests.exceptions.ConnectionError as err: error_message = "Failed to connect to server at %s: %s" self.log.exception(error_message, url, err) # NOQA: G200 raise SolrError(error_message % (url, err)) except HTTPException as err: error_message = "Unhandled error: %s %s: %s" self.log.exception(error_message, method, url, err) # NOQA: G200 raise SolrError(error_message % (method, url, err)) end_time = time.time() self.log.info( "Finished '%s' (%s) with body '%s' in %0.3f seconds, with status %s", url, method, log_body[:10], end_time - start_time, resp.status_code, ) if int(resp.status_code) != 200: error_message = "Solr responded with an error (HTTP %s): %s" solr_message = self._extract_error(resp) self.log.error( error_message, resp.status_code, solr_message, extra={ "data": { "headers": resp.headers, "response": resp.content, "request_body": bytes_body, "request_headers": headers, } }, ) raise SolrError(error_message % (resp.status_code, solr_message)) return force_unicode(resp.content) def _select(self, params, handler=None): """ :param params: :param handler: defaults to self.search_handler (fallback to 'select') :return: """ # Returns json docs unless otherwise specified params.setdefault("wt", "json") custom_handler = handler or self.search_handler handler = "select" if custom_handler: if self.use_qt_param: params["qt"] = custom_handler else: handler = custom_handler params_encoded = safe_urlencode(params, True) if len(params_encoded) < 1024: # Typical case. path = "%s/?%s" % (handler, params_encoded) return self._send_request("get", path) else: # Handles very long queries by submitting as a POST. path = "%s/" % handler headers = { "Content-type": "application/x-www-form-urlencoded; charset=utf-8" } return self._send_request( "post", path, body=params_encoded, headers=headers ) def _mlt(self, params, handler="mlt"): return self._select(params, handler) def _suggest_terms(self, params, handler="terms"): return self._select(params, handler) def _update( self, message, clean_ctrl_chars=True, commit=None, softCommit=False, commitWithin=None, waitFlush=None, waitSearcher=None, overwrite=None, handler="update", solrapi="XML", min_rf=None, ): """ Posts the given xml or json message to http:///update and returns the result. Passing `clean_ctrl_chars` as False will prevent the message from being cleaned of control characters (default True). This is done by default because these characters would cause Solr to fail to parse the XML. Only pass False if you're positive your data is clean. """ # Per http://wiki.apache.org/solr/UpdateXmlMessages, we can append a # ``commit=true`` to the URL and have the commit happen without a # second request. query_vars = [] path_handler = handler if self.use_qt_param: path_handler = "select" query_vars.append("qt=%s" % safe_urlencode(handler, True)) path = "%s/" % path_handler if commit is None: commit = self.always_commit if min_rf: query_vars.append("min_rf=%i" % min_rf) if commit: query_vars.append("commit=%s" % str(bool(commit)).lower()) elif softCommit: query_vars.append("softCommit=%s" % str(bool(softCommit)).lower()) elif commitWithin is not None: query_vars.append("commitWithin=%s" % str(int(commitWithin))) if waitFlush is not None: query_vars.append("waitFlush=%s" % str(bool(waitFlush)).lower()) if overwrite is not None: query_vars.append("overwrite=%s" % str(bool(overwrite)).lower()) if waitSearcher is not None: query_vars.append("waitSearcher=%s" % str(bool(waitSearcher)).lower()) if query_vars: path = "%s?%s" % (path, "&".join(query_vars)) # Clean the message of ctrl characters. if clean_ctrl_chars: message = sanitize(message) if solrapi == "XML": return self._send_request( "post", path, message, {"Content-type": "text/xml; charset=utf-8"} ) elif solrapi == "JSON": return self._send_request( "post", path, message, {"Content-type": "application/json; charset=utf-8"}, ) else: raise ValueError("unknown solrapi {}".format(solrapi)) def _extract_error(self, resp): """ Extract the actual error message from a solr response. """ reason = resp.headers.get("reason", None) full_response = None if reason is None: try: # if response is in json format reason = resp.json()["error"]["msg"] except KeyError: # if json response has unexpected structure full_response = resp.content except ValueError: # otherwise we assume it's html reason, full_html = self._scrape_response(resp.headers, resp.content) full_response = unescape_html(full_html) msg = "[Reason: %s]" % reason if reason is None: msg += "\n%s" % full_response return msg def _scrape_response(self, headers, response): """ Scrape the html response. """ # identify the responding server server_type = None server_string = headers.get("server", "") if server_string and "jetty" in server_string.lower(): server_type = "jetty" if server_string and "coyote" in server_string.lower(): server_type = "tomcat" reason = None full_html = "" dom_tree = None if hasattr(response, "decode"): response = response.decode() if response.startswith("]*>\s*(.+?)\s*", response, re.IGNORECASE) if m: reason = m.group(2) else: full_html = "%s" % response else: # Let's assume others do produce a valid XML response try: dom_tree = ElementTree.fromstring(response) reason_node = None # html page might be different for every server if server_type == "jetty": reason_node = dom_tree.find("body/pre") else: reason_node = dom_tree.find("head/title") if reason_node is not None: reason = reason_node.text if reason is None: full_html = ElementTree.tostring(dom_tree) except SyntaxError as err: LOG.warning( # NOQA: G200 "Unable to extract error message from invalid XML: %s", err, extra={"data": {"response": response}}, ) full_html = "%s" % response full_html = force_unicode(full_html) full_html = full_html.replace("\n", "") full_html = full_html.replace("\r", "") full_html = full_html.replace("
", "") full_html = full_html.replace("
", "") full_html = full_html.strip() return reason, full_html # Conversion ############################################################# def _from_python(self, value): """ Converts python values to a form suitable for insertion into the xml we send to solr. """ if hasattr(value, "strftime"): if hasattr(value, "hour"): offset = value.utcoffset() if offset: value = value - offset value = value.replace(tzinfo=None).isoformat() + "Z" else: value = "%sT00:00:00Z" % value.isoformat() elif isinstance(value, bool): if value: value = "true" else: value = "false" else: if isinstance(value, bytes): value = str(value, errors="replace") # NOQA: F821 value = "{0}".format(value) return clean_xml_string(value) def _to_python(self, value): """ Converts values from Solr to native Python values. """ if isinstance(value, (int, float, complex)): return value if isinstance(value, (list, tuple)): result = [self._to_python(v) for v in value] if isinstance(value, tuple): result = tuple(result) return result if value == "true": return True elif value == "false": return False is_string = False if isinstance(value, bytes): value = force_unicode(value) if isinstance(value, str): is_string = True if is_string: possible_datetime = DATETIME_REGEX.search(value) if possible_datetime: date_values = possible_datetime.groupdict() for dk, dv in date_values.items(): date_values[dk] = int(dv) return datetime.datetime( date_values["year"], date_values["month"], date_values["day"], date_values["hour"], date_values["minute"], date_values["second"], ) try: # This is slightly gross but it's hard to tell otherwise what the # string's original type might have been. return ast.literal_eval(value) except (ValueError, SyntaxError): # If it fails, continue on. pass return value def _is_null_value(self, value): """ Check if a given value is ``null``. Criteria for this is based on values that shouldn't be included in the Solr ``add`` request at all. """ if value is None: return True if isinstance(value, str) and len(value) == 0: return True # TODO: This should probably be removed when solved in core Solr level? return False # API Methods ############################################################ def search(self, q, search_handler=None, **kwargs): """ Performs a search and returns the results. Requires a ``q`` for a string version of the query to run. Optionally accepts ``**kwargs`` for additional options to be passed through the Solr URL. Returns ``self.results_cls`` class object (defaults to ``pysolr.Results``) Usage:: # All docs. results = solr.search('*:*') # Search with highlighting. results = solr.search('ponies', **{ 'hl': 'true', 'hl.fragsize': 10, }) """ params = {"q": q} params.update(kwargs) response = self._select(params, handler=search_handler) decoded = self.decoder.decode(response) self.log.debug( "Found '%s' search results.", # cover both cases: there is no response key or value is None (decoded.get("response", {}) or {}).get("numFound", 0), ) cursorMark = params.get("cursorMark", None) if cursorMark != decoded.get("nextCursorMark", cursorMark): def next_page_query(): nextParams = params.copy() nextParams["cursorMark"] = decoded["nextCursorMark"] return self.search(search_handler=search_handler, **nextParams) return self.results_cls(decoded, next_page_query) else: return self.results_cls(decoded) def more_like_this(self, q, mltfl, handler="mlt", **kwargs): """ Finds and returns results similar to the provided query. Returns ``self.results_cls`` class object (defaults to ``pysolr.Results``) Requires Solr 1.3+. Usage:: similar = solr.more_like_this('id:doc_234', 'text') """ params = {"q": q, "mlt.fl": mltfl} params.update(kwargs) response = self._mlt(params, handler=handler) decoded = self.decoder.decode(response) self.log.debug( "Found '%s' MLT results.", # cover both cases: there is no response key or value is None (decoded.get("response", {}) or {}).get("numFound", 0), ) return self.results_cls(decoded) def suggest_terms(self, fields, prefix, handler="terms", **kwargs): """ Accepts a list of field names and a prefix Returns a dictionary keyed on field name containing a list of ``(term, count)`` pairs Requires Solr 1.4+. """ params = {"terms.fl": fields, "terms.prefix": prefix} params.update(kwargs) response = self._suggest_terms(params, handler=handler) result = self.decoder.decode(response) terms = result.get("terms", {}) res = {} # in Solr 1.x the value of terms is list of elements with the field name # and a flat list of value, count pairs: # ["field_name", ["dance", 23, "dancers", 10, …]] # # in Solr 3+ the value of terms is a dict of field name and a flat list of # value, count pairs: {"field_name": ["dance", 23, "dancers", 10, …]} if isinstance(terms, (list, tuple)): terms = dict(zip(terms[0::2], terms[1::2])) for field, values in terms.items(): tmp = [] while values: tmp.append((values.pop(0), values.pop(0))) res[field] = tmp self.log.debug( "Found '%d' Term suggestions results.", sum(len(j) for i, j in res.items()) ) return res def _build_docs(self, docs, boost=None, fieldUpdates=None): # if no boost needed use json multidocument api # The JSON API skips the XML conversion and speedup load from 15 to 20 times. # CPU Usage is drastically lower. if boost is None: solrapi = "JSON" message = docs # single doc convert to array of docs if isinstance(message, dict): # convert dict to list message = [message] # json array of docs if isinstance(message, list): # convert to string cleaned_message = [ self._build_json_doc(doc, fieldUpdates=fieldUpdates) for doc in message ] m = self.encoder.encode(cleaned_message).encode("utf-8") else: raise ValueError("wrong message type") else: solrapi = "XML" message = ElementTree.Element("add") for doc in docs: el = self._build_xml_doc(doc, boost=boost, fieldUpdates=fieldUpdates) message.append(el) # This returns a bytestring. Ugh. m = ElementTree.tostring(message, encoding="utf-8") # Convert back to Unicode please. m = force_unicode(m) return (solrapi, m, len(message)) def _build_json_doc(self, doc, fieldUpdates=None): if fieldUpdates is None: cleaned_doc = {k: v for k, v in doc.items() if not self._is_null_value(v)} else: # id must be added without a modifier # if using field updates, all other fields should have a modifier cleaned_doc = { k: {fieldUpdates[k]: v} if k in fieldUpdates else v for k, v in doc.items() } return cleaned_doc def _build_xml_doc(self, doc, boost=None, fieldUpdates=None): doc_elem = ElementTree.Element("doc") for key, value in doc.items(): if key == NESTED_DOC_KEY: for child in value: doc_elem.append(self._build_xml_doc(child, boost, fieldUpdates)) continue if key == "boost": doc_elem.set("boost", force_unicode(value)) continue # To avoid multiple code-paths we'd like to treat all of our values # as iterables: if isinstance(value, (list, tuple, set)): values = value else: values = (value,) use_field_updates = fieldUpdates and key in fieldUpdates if use_field_updates and not values: values = ("",) for bit in values: attrs = {"name": key} if self._is_null_value(bit): if use_field_updates: bit = "" attrs["null"] = "true" else: continue if key == "_doc": child = self._build_xml_doc(bit, boost) doc_elem.append(child) continue if use_field_updates: attrs["update"] = fieldUpdates[key] if boost and key in boost: attrs["boost"] = force_unicode(boost[key]) field = ElementTree.Element("field", **attrs) field.text = self._from_python(bit) doc_elem.append(field) return doc_elem def add( self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False, commitWithin=None, waitFlush=None, waitSearcher=None, overwrite=None, handler="update", min_rf=None, ): """ Adds or updates documents. Requires ``docs``, which is a list of dictionaries. Each key is the field name and each value is the value to index. Optionally accepts ``commit``. Default is ``None``. None signals to use default Optionally accepts ``softCommit``. Default is ``False``. Optionally accepts ``boost``. Default is ``None``. Optionally accepts ``fieldUpdates``. Default is ``None``. Optionally accepts ``commitWithin``. Default is ``None``. Optionally accepts ``waitFlush``. Default is ``None``. Optionally accepts ``waitSearcher``. Default is ``None``. Optionally accepts ``overwrite``. Default is ``None``. Optionally accepts ``min_rf``. Default is ``None``. Usage:: solr.add([ { "id": "doc_1", "title": "A test document", }, { "id": "doc_2", "title": "The Banana: Tasty or Dangerous?", }, ]) """ start_time = time.time() self.log.debug("Starting to build add request...") solrapi, m, len_message = self._build_docs( docs, boost, fieldUpdates, ) end_time = time.time() self.log.debug( "Built add request of %s docs in %0.2f seconds.", len_message, end_time - start_time, ) return self._update( m, commit=commit, softCommit=softCommit, commitWithin=commitWithin, waitFlush=waitFlush, waitSearcher=waitSearcher, overwrite=overwrite, handler=handler, solrapi=solrapi, min_rf=min_rf, ) def delete( self, id=None, # NOQA: A002 q=None, commit=None, softCommit=False, waitFlush=None, waitSearcher=None, handler="update", ): # NOQA: A002 """ Deletes documents. Requires *either* ``id`` or ``query``. ``id`` is if you know the specific document id to remove. Note that ``id`` can also be a list of document ids to be deleted. ``query`` is a Lucene-style query indicating a collection of documents to delete. Optionally accepts ``commit``. Default is ``True``. Optionally accepts ``softCommit``. Default is ``False``. Optionally accepts ``waitFlush``. Default is ``None``. Optionally accepts ``waitSearcher``. Default is ``None``. Usage:: solr.delete(id='doc_12') solr.delete(id=['doc_1', 'doc_3']) solr.delete(q='*:*') """ if id is None and q is None: raise ValueError('You must specify "id" or "q".') elif id is not None and q is not None: raise ValueError('You many only specify "id" OR "q", not both.') elif id is not None: if not isinstance(id, (list, set, tuple)): doc_id = [id] else: doc_id = list(filter(None, id)) if doc_id: et = ElementTree.Element("delete") for one_doc_id in doc_id: subelem = ElementTree.SubElement(et, "id") subelem.text = one_doc_id m = ElementTree.tostring(et) else: raise ValueError("The list of documents to delete was empty.") elif q is not None: et = ElementTree.Element("delete") subelem = ElementTree.SubElement(et, "query") subelem.text = q m = ElementTree.tostring(et) return self._update( m, commit=commit, softCommit=softCommit, waitFlush=waitFlush, waitSearcher=waitSearcher, handler=handler, ) def commit( self, softCommit=False, waitFlush=None, waitSearcher=None, expungeDeletes=None, handler="update", ): """ Forces Solr to write the index data to disk. Optionally accepts ``expungeDeletes``. Default is ``None``. Optionally accepts ``waitFlush``. Default is ``None``. Optionally accepts ``waitSearcher``. Default is ``None``. Optionally accepts ``softCommit``. Default is ``False``. Usage:: solr.commit() """ if expungeDeletes is not None: msg = '' % str(bool(expungeDeletes)).lower() else: msg = "" return self._update( msg, commit=not softCommit, softCommit=softCommit, waitFlush=waitFlush, waitSearcher=waitSearcher, handler=handler, ) def optimize( self, commit=True, waitFlush=None, waitSearcher=None, maxSegments=None, handler="update", ): """ Tells Solr to streamline the number of segments used, essentially a defragmentation operation. Optionally accepts ``maxSegments``. Default is ``None``. Optionally accepts ``waitFlush``. Default is ``None``. Optionally accepts ``waitSearcher``. Default is ``None``. Usage:: solr.optimize() """ if maxSegments: msg = '' % maxSegments else: msg = "" return self._update( msg, commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher, handler=handler, ) def extract(self, file_obj, extractOnly=True, handler="update/extract", **kwargs): """ POSTs a file to the Solr ExtractingRequestHandler so rich content can be processed using Apache Tika. See the Solr wiki for details: http://wiki.apache.org/solr/ExtractingRequestHandler The ExtractingRequestHandler has a very simple model: it extracts contents and metadata from the uploaded file and inserts it directly into the index. This is rarely useful as it allows no way to store additional data or otherwise customize the record. Instead, by default we'll use the extract-only mode to extract the data without indexing it so the caller has the opportunity to process it as appropriate; call with ``extractOnly=False`` if you want to insert with no additional processing. Returns None if metadata cannot be extracted; otherwise returns a dictionary containing at least two keys: :contents: Extracted full-text content, if applicable :metadata: key:value pairs of text strings """ if not hasattr(file_obj, "name"): raise ValueError( "extract() requires file-like objects which have a defined name" ) params = { "extractOnly": "true" if extractOnly else "false", "lowernames": "true", "wt": "json", } params.update(kwargs) filename = quote(file_obj.name.encode("utf-8")) try: # We'll provide the file using its true name as Tika may use that # as a file type hint: resp = self._send_request( "post", handler, body=params, files={"file": (filename, file_obj)} ) except (IOError, SolrError): self.log.exception("Failed to extract document metadata") raise try: data = self.decoder.decode(resp) except ValueError: self.log.exception("Failed to load JSON response") raise data["contents"] = data.pop(filename, None) data["metadata"] = metadata = {} raw_metadata = data.pop("%s_metadata" % filename, None) if raw_metadata: # The raw format is somewhat annoying: it's a flat list of # alternating keys and value lists while raw_metadata: metadata[raw_metadata.pop()] = raw_metadata.pop() return data def ping(self, handler="admin/ping", **kwargs): """ Sends a ping request. Usage:: solr.ping() """ params = kwargs params_encoded = safe_urlencode(params, True) if len(params_encoded) < 1024: # Typical case. path = "%s/?%s" % (handler, params_encoded) return self._send_request("get", path) else: # Handles very long queries by submitting as a POST. path = "%s/" % handler headers = { "Content-type": "application/x-www-form-urlencoded; charset=utf-8" } return self._send_request( "post", path, body=params_encoded, headers=headers ) class SolrCoreAdmin(object): """ Handles core admin operations: see http://wiki.apache.org/solr/CoreAdmin This must be initialized with the full admin cores URL:: solr_admin = SolrCoreAdmin('http://localhost:8983/solr/admin/cores') status = solr_admin.status() Operations offered by Solr are: 1. STATUS 2. CREATE 3. RELOAD 4. RENAME 5. ALIAS 6. SWAP 7. UNLOAD 8. LOAD (not currently implemented) """ def __init__(self, url, *args, **kwargs): super(SolrCoreAdmin, self).__init__(*args, **kwargs) self.url = url def _get_url(self, url, params=None, headers=None): if params is None: params = {} if headers is None: headers = {"Content-Type": "application/x-www-form-urlencoded"} resp = requests.get(url, data=safe_urlencode(params), headers=headers) return force_unicode(resp.content) def status(self, core=None): """ Get core status information See https://wiki.apache.org/solr/CoreAdmin#STATUS """ params = {"action": "STATUS"} if core is not None: params.update(core=core) return self._get_url(self.url, params=params) def create( self, name, instance_dir=None, config="solrconfig.xml", schema="schema.xml" ): """ Create a new core See https://wiki.apache.org/solr/CoreAdmin#CREATE """ params = {"action": "CREATE", "name": name, "config": config, "schema": schema} if instance_dir is None: params.update(instanceDir=name) else: params.update(instanceDir=instance_dir) return self._get_url(self.url, params=params) def reload(self, core): # NOQA: A003 """ Reload a core See https://wiki.apache.org/solr/CoreAdmin#RELOAD """ params = {"action": "RELOAD", "core": core} return self._get_url(self.url, params=params) def rename(self, core, other): """ Rename a core See http://wiki.apache.org/solr/CoreAdmin#RENAME """ params = {"action": "RENAME", "core": core, "other": other} return self._get_url(self.url, params=params) def swap(self, core, other): """ Swap a core See http://wiki.apache.org/solr/CoreAdmin#SWAP """ params = {"action": "SWAP", "core": core, "other": other} return self._get_url(self.url, params=params) def unload(self, core): """ Unload a core See http://wiki.apache.org/solr/CoreAdmin#UNLOAD """ params = {"action": "UNLOAD", "core": core} return self._get_url(self.url, params=params) def load(self, core): raise NotImplementedError("Solr 1.4 and below do not support this operation.") # Using two-tuples to preserve order. REPLACEMENTS = ( # Nuke nasty control characters. (b"\x00", b""), # Start of heading (b"\x01", b""), # Start of heading (b"\x02", b""), # Start of text (b"\x03", b""), # End of text (b"\x04", b""), # End of transmission (b"\x05", b""), # Enquiry (b"\x06", b""), # Acknowledge (b"\x07", b""), # Ring terminal bell (b"\x08", b""), # Backspace (b"\x0b", b""), # Vertical tab (b"\x0c", b""), # Form feed (b"\x0e", b""), # Shift out (b"\x0f", b""), # Shift in (b"\x10", b""), # Data link escape (b"\x11", b""), # Device control 1 (b"\x12", b""), # Device control 2 (b"\x13", b""), # Device control 3 (b"\x14", b""), # Device control 4 (b"\x15", b""), # Negative acknowledge (b"\x16", b""), # Synchronous idle (b"\x17", b""), # End of transmission block (b"\x18", b""), # Cancel (b"\x19", b""), # End of medium (b"\x1a", b""), # Substitute character (b"\x1b", b""), # Escape (b"\x1c", b""), # File separator (b"\x1d", b""), # Group separator (b"\x1e", b""), # Record separator (b"\x1f", b""), # Unit separator ) def sanitize(data): fixed_string = force_bytes(data) for bad, good in REPLACEMENTS: fixed_string = fixed_string.replace(bad, good) return force_unicode(fixed_string) class SolrCloud(Solr): def __init__( self, zookeeper, collection, decoder=None, encoder=None, timeout=60, retry_count=5, retry_timeout=0.2, auth=None, verify=True, *args, **kwargs, ): url = zookeeper.getRandomURL(collection) self.auth = auth self.collection = collection self.retry_count = retry_count self.retry_timeout = retry_timeout self.verify = verify self.zookeeper = zookeeper super(SolrCloud, self).__init__( url, decoder=decoder, encoder=encoder, timeout=timeout, auth=self.auth, verify=self.verify, *args, **kwargs, ) def _send_request(self, method, path="", body=None, headers=None, files=None): for retry_number in range(self.retry_count): try: self.url = self.zookeeper.getRandomURL(self.collection) return Solr._send_request(self, method, path, body, headers, files) except (SolrError, requests.exceptions.RequestException): LOG.exception( "%s %s failed on retry %s, will retry after %0.1fs", method, self.url, retry_number, self.retry_timeout, ) time.sleep(self.retry_timeout) raise SolrError( "Request %s %s failed after %d attempts" % (method, path, self.retry_count) ) def _update(self, *args, **kwargs): self.url = self.zookeeper.getLeaderURL(self.collection) LOG.debug("Using leader URL: %s", self.url) return Solr._update(self, *args, **kwargs) class ZooKeeper(object): # Constants used by the REST API: LIVE_NODES_ZKNODE = "/live_nodes" ALIASES = "/aliases.json" CLUSTER_STATE = "/clusterstate.json" COLLECTION_STATUS = "/collections" COLLECTION_STATE = "/collections/%s/state.json" SHARDS = "shards" REPLICAS = "replicas" STATE = "state" ACTIVE = "active" LEADER = "leader" BASE_URL = "base_url" TRUE = "true" FALSE = "false" COLLECTION = "collection" def __init__(self, zkServerAddress, timeout=15, max_retries=-1, kazoo_client=None): if KazooClient is None: logging.error("ZooKeeper requires the `kazoo` library to be installed") raise RuntimeError self.collections = {} self.liveNodes = {} self.aliases = {} self.state = None if kazoo_client is None: self.zk = KazooClient( zkServerAddress, read_only=True, timeout=timeout, command_retry={"max_tries": max_retries}, connection_retry={"max_tries": max_retries}, ) else: self.zk = kazoo_client self.zk.start() def connectionListener(state): if state == KazooState.LOST: self.state = state elif state == KazooState.SUSPENDED: self.state = state self.zk.add_listener(connectionListener) @self.zk.DataWatch(ZooKeeper.CLUSTER_STATE) def watchClusterState(data, *args, **kwargs): if not data: LOG.warning("No cluster state available: no collections defined?") else: self.collections = json.loads(data.decode("utf-8")) LOG.info("Updated collections: %s", self.collections) @self.zk.ChildrenWatch(ZooKeeper.LIVE_NODES_ZKNODE) def watchLiveNodes(children): self.liveNodes = children LOG.info("Updated live nodes: %s", children) @self.zk.DataWatch(ZooKeeper.ALIASES) def watchAliases(data, stat): if data: json_data = json.loads(data.decode("utf-8")) if ZooKeeper.COLLECTION in json_data: self.aliases = json_data[ZooKeeper.COLLECTION] else: LOG.warning( "Expected to find %s in alias update %s", ZooKeeper.COLLECTION, json_data.keys(), ) else: self.aliases = None LOG.info("Updated aliases: %s", self.aliases) def watchCollectionState(data, *args, **kwargs): if not data: LOG.warning("No cluster state available: no collections defined?") else: self.collections.update(json.loads(data.decode("utf-8"))) LOG.info("Updated collections: %s", self.collections) @self.zk.ChildrenWatch(ZooKeeper.COLLECTION_STATUS) def watchCollectionStatus(children): LOG.info("Updated collection: %s", children) for c in children: self.zk.DataWatch(self.COLLECTION_STATE % c, watchCollectionState) def getHosts(self, collname, only_leader=False, seen_aliases=None): if self.aliases and collname in self.aliases: return self.getAliasHosts(collname, only_leader, seen_aliases) hosts = [] if collname not in self.collections: raise SolrError("Unknown collection: %s" % collname) collection = self.collections[collname] shards = collection[ZooKeeper.SHARDS] for shardname in shards.keys(): shard = shards[shardname] if shard[ZooKeeper.STATE] == ZooKeeper.ACTIVE: replicas = shard[ZooKeeper.REPLICAS] for replicaname in replicas.keys(): replica = replicas[replicaname] if replica[ZooKeeper.STATE] == ZooKeeper.ACTIVE: if not only_leader or ( replica.get(ZooKeeper.LEADER, None) == ZooKeeper.TRUE ): base_url = replica[ZooKeeper.BASE_URL] if base_url not in hosts: hosts.append(base_url) return hosts def getAliasHosts(self, collname, only_leader, seen_aliases): if seen_aliases: if collname in seen_aliases: LOG.warning("%s in circular alias definition - ignored", collname) return [] else: seen_aliases = [] seen_aliases.append(collname) collections = self.aliases[collname].split(",") hosts = [] for collection in collections: for host in self.getHosts(collection, only_leader, seen_aliases): if host not in hosts: hosts.append(host) return hosts def getRandomURL(self, collname, only_leader=False): hosts = self.getHosts(collname, only_leader=only_leader) if not hosts: raise SolrError("ZooKeeper returned no active shards!") return "%s/%s" % (random.choice(hosts), collname) # NOQA: S311 def getLeaderURL(self, collname): return self.getRandomURL(collname, only_leader=True) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/run-tests.py0000755000175100017510000000102115107044623015107 0ustar00runnerrunner#!/usr/bin/env python import faulthandler import signal import unittest from tests import utils as test_utils def main(): faulthandler.register(signal.SIGUSR1, all_threads=True) print("Installed SIGUSR1 handler to print stack traces: pkill -USR1 -f run-tests") test_utils.prepare() test_utils.start_solr() try: unittest.main(module="tests", verbosity=1) finally: print("Tests complete; halting Solr servers…") test_utils.stop_solr() if __name__ == "__main__": main() ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1763461527.882978 pysolr-3.11.0/setup.cfg0000644000175100017510000000004615107044630014413 0ustar00runnerrunner[egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/setup.py0000644000175100017510000000173015107044623014307 0ustar00runnerrunnertry: from setuptools import setup except ImportError: from distutils.core import setup setup( name="pysolr", use_scm_version=True, description="Lightweight Python client for Apache Solr", author="Daniel Lindsley", author_email="daniel@toastdriven.com", long_description=open("README.rst", "r").read(), py_modules=["pysolr"], classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Topic :: Internet :: WWW/HTTP :: Indexing/Search", "Programming Language :: Python :: 3", ], url="https://github.com/django-haystack/pysolr/", license="BSD", install_requires=["requests>=2.32.5", "setuptools"], python_requires=">=3.10", extras_require={"solrcloud": ["kazoo>=2.5.0"]}, setup_requires=["setuptools_scm"], ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/sonar-project.properties0000644000175100017510000000040015107044623017472 0ustar00runnerrunnersonar.exclusions=coverage.xml,solr/** sonar.sources=. sonar.projectKey=django-haystack_pysolr sonar.organization=django-haystack sonar.host.url=https://sonarcloud.io sonar.python.coverage.reportPaths=coverage.xml sonar.coverage.exclusions=setup.py,solr/** ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1763461523.0 pysolr-3.11.0/start-solr-test-server.sh0000755000175100017510000001357115107044623017535 0ustar00runnerrunner#!/bin/bash set -e # Redirect output to log files when stdin is not a TTY: if [ ! -t 0 ]; then exec 1>test-solr.stdout.log 2>test-solr.stderr.log fi SOLR_VERSION=4.10.4 ROOT=$(cd `dirname $0`; pwd) APP=$ROOT/solr-app PIDS=$ROOT/solr.pids export SOLR_ARCHIVE="solr-${SOLR_VERSION}.tgz" LOGS=$ROOT/logs cd $ROOT function download_solr() { if [ -d "${HOME}/download-cache/" ]; then export SOLR_ARCHIVE="${HOME}/download-cache/${SOLR_ARCHIVE}" fi if [ -f ${SOLR_ARCHIVE} ]; then # If the tarball doesn't extract cleanly, remove it so it'll download again: tar -tf ${SOLR_ARCHIVE} > /dev/null || rm ${SOLR_ARCHIVE} fi if [ ! -f ${SOLR_ARCHIVE} ]; then SOLR_DOWNLOAD_URL=$(python get-solr-download-url.py $SOLR_VERSION) curl -Lo $SOLR_ARCHIVE ${SOLR_DOWNLOAD_URL} || (echo "Unable to download ${SOLR_DOWNLOAD_URL}"; exit 2) fi } function extract_solr() { APP=solr-app echo "Extracting Solr ${SOLR_VERSION} to `pwd`/$APP" rm -rf $APP mkdir $APP tar -C $APP -xf ${SOLR_ARCHIVE} --strip-components 1 solr-${SOLR_VERSION} } function prepare_solr_home() { SOLR_HOME=$1 HOST=$2 echo "Preparing SOLR_HOME at $SOLR_HOME for host $HOST" APP=$(pwd)/solr-app mkdir -p ${SOLR_HOME} cp solr-app/example/solr/solr.xml ${SOLR_HOME}/ cp solr-app/example/solr/zoo.cfg ${SOLR_HOME}/ } function prepare_core() { SOLR_HOME=$1 CORE=$2 echo "Preparing core $CORE" CORE_DIR=${SOLR_HOME}/${CORE} mkdir -p ${CORE_DIR} cp -r solr-app/example/solr/collection1/conf ${CORE_DIR}/ perl -p -i -e 's|\n \n\n\n