pax_global_header00006660000000000000000000000064141414066010014507gustar00rootroot0000000000000052 comment=294e981edad035a7ac6f0e2b48f1738368fa4b34 toolz-0.11.2/000077500000000000000000000000001414140660100127375ustar00rootroot00000000000000toolz-0.11.2/.gitattributes000066400000000000000000000000371414140660100156320ustar00rootroot00000000000000toolz/_version.py export-subst toolz-0.11.2/.github/000077500000000000000000000000001414140660100142775ustar00rootroot00000000000000toolz-0.11.2/.github/workflows/000077500000000000000000000000001414140660100163345ustar00rootroot00000000000000toolz-0.11.2/.github/workflows/test.yml000066400000000000000000000030131414140660100200330ustar00rootroot00000000000000name: Test on: push: branches: [ master ] pull_request: jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: ["ubuntu-latest"] python-version: [3.5, 3.6, 3.7, 3.8, 3.9, "3.10-dev", "pypy-3.6", "pypy-3.7"] steps: - name: Checkout uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel pip install coverage pep8 pytest pip install -e . - name: PyTest run: | coverage run -m pytest --doctest-modules toolz/ pytest bench/ pep8 --ignore="E731,W503,E402" --exclude=conf.py,tests,examples,bench -r --show-source . - name: Coverage env: GITHUB_TOKEN: ${{ secrets.github_token }} COVERALLS_FLAG_NAME: ${{ matrix.python-version}} COVERALLS_PARALLEL: true if: (! contains(matrix.python-version, 'pypy')) run: | coverage report --show-missing --fail-under=100 pip install coveralls coverage report --show-missing coveralls --service=github finish: needs: test runs-on: ubuntu-latest steps: - name: Coveralls Finished uses: coverallsapp/github-action@master with: github-token: ${{ secrets.github_token }} parallel-finished: true toolz-0.11.2/.gitignore000066400000000000000000000001261414140660100147260ustar00rootroot00000000000000*.pyc build/ dist/ *.egg-info/ bench/shakespeare.txt .coverage *.sw? .DS_STORE \.tox/ toolz-0.11.2/AUTHORS.md000066400000000000000000000030311414140660100144030ustar00rootroot00000000000000[Matthew Rocklin](http://matthewrocklin.com) [@mrocklin](http://github.com/mrocklin/) [John Jacobsen](http://eigenhombre.com) [@eigenhombre](http://github.com/eigenhombre/) Erik Welch [@eriknw](https://github.com/eriknw/) John Crichton [@jcrichton](https://github.com/jcrichton/) Han Semaj [@microamp](https://github.com/microamp/) [Graeme Coupar](https://twitter.com/obmarg) [@obmarg](https://github.com/obmarg/) [Leonid Shvechikov](http://brainstorage.me/shvechikov) [@shvechikov](https://github.com/shvechikov) Lars Buitinck [@larsmans](http://github.com/larsmans) José Ricardo [@josericardo](https://github.com/josericardo) Tom Prince [@tomprince](https://github.com/tomprince) Bart van Merriënboer [@bartvm](https://github.com/bartvm) Nikolaos-Digenis Karagiannis [@digenis](https://github.com/digenis/) [Antonio Lima](https://twitter.com/themiurgo) [@themiurgo](https://github.com/themiurgo/) Joe Jevnik [@llllllllll](https://github.com/llllllllll) Rory Kirchner [@roryk](https://github.com/roryk) [Steven Cutting](http://steven-cutting.github.io) [@steven_cutting](https://github.com/steven-cutting) Aric Coady [@coady](https://github.com/coady) toolz-0.11.2/LICENSE.txt000066400000000000000000000027241414140660100145670ustar00rootroot00000000000000Copyright (c) 2013 Matthew Rocklin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of toolz nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. toolz-0.11.2/MANIFEST.in000066400000000000000000000001361414140660100144750ustar00rootroot00000000000000include LICENSE.txt include toolz/tests/*.py include versioneer.py include toolz/_version.py toolz-0.11.2/README.rst000066400000000000000000000077521414140660100144410ustar00rootroot00000000000000Toolz ===== |Build Status| |Coverage Status| |Version Status| A set of utility functions for iterators, functions, and dictionaries. See the PyToolz documentation at https://toolz.readthedocs.io LICENSE ------- New BSD. See `License File `__. Install ------- ``toolz`` is on the Python Package Index (PyPI): :: pip install toolz Structure and Heritage ---------------------- ``toolz`` is implemented in three parts: |literal itertoolz|_, for operations on iterables. Examples: ``groupby``, ``unique``, ``interpose``, |literal functoolz|_, for higher-order functions. Examples: ``memoize``, ``curry``, ``compose``, |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, ``update-in``, ``merge``. .. |literal itertoolz| replace:: ``itertoolz`` .. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py .. |literal functoolz| replace:: ``functoolz`` .. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py .. |literal dicttoolz| replace:: ``dicttoolz`` .. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py These functions come from the legacy of functional languages for list processing. They interoperate well to accomplish common complex tasks. Read our `API Documentation `__ for more details. Example ------- This builds a standard wordcount function from pieces within ``toolz``: .. code:: python >>> def stem(word): ... """ Stem word to primitive form """ ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") >>> from toolz import compose, frequencies >>> from toolz.curried import map >>> wordcount = compose(frequencies, map(stem), str.split) >>> sentence = "This cat jumped over this other cat!" >>> wordcount(sentence) {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} Dependencies ------------ ``toolz`` supports Python 3.5+ with a common codebase. It is pure Python and requires no dependencies beyond the standard library. It is, in short, a lightweight dependency. CyToolz ------- The ``toolz`` project has been reimplemented in `Cython `__. The ``cytoolz`` project is a drop-in replacement for the Pure Python implementation. See `CyToolz GitHub Page `__ for more details. See Also -------- - `Underscore.js `__: A similar library for JavaScript - `Enumerable `__: A similar library for Ruby - `Clojure `__: A functional language whose standard library has several counterparts in ``toolz`` - `itertools `__: The Python standard library for iterator tools - `functools `__: The Python standard library for function tools Contributions Welcome --------------------- ``toolz`` aims to be a repository for utility functions, particularly those that come from the functional programming and list processing traditions. We welcome contributions that fall within this scope. We also try to keep the API small to keep ``toolz`` manageable. The ideal contribution is significantly different from existing functions and has precedent in a few other functional systems. Please take a look at our `issue page `__ for contribution ideas. Community --------- See our `mailing list `__. We're friendly. .. |Build Status| image:: https://github.com/pytoolz/toolz/workflows/Test/badge.svg :target: https://github.com/pytoolz/toolz/actions .. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master :target: https://coveralls.io/r/pytoolz/toolz .. |Version Status| image:: https://badge.fury.io/py/toolz.svg :target: https://badge.fury.io/py/toolz toolz-0.11.2/bench/000077500000000000000000000000001414140660100140165ustar00rootroot00000000000000toolz-0.11.2/bench/test_curry.py000066400000000000000000000002321414140660100165700ustar00rootroot00000000000000from toolz.curried import get pairs = [(1, 2) for i in range(100000)] def test_get_curried(): first = get(0) for p in pairs: first(p) toolz-0.11.2/bench/test_curry_baseline.py000066400000000000000000000002611414140660100204340ustar00rootroot00000000000000from toolz import get from functools import partial pairs = [(1, 2) for i in range(100000)] def test_get(): first = partial(get, 0) for p in pairs: first(p) toolz-0.11.2/bench/test_first.py000066400000000000000000000002761414140660100165630ustar00rootroot00000000000000from toolz import first, second pairs = [(1, 2) for i in range(1000000)] def test_first(): for p in pairs: first(p) def test_second(): for p in pairs: second(p) toolz-0.11.2/bench/test_first_iter.py000066400000000000000000000004371414140660100176050ustar00rootroot00000000000000import itertools from toolz import first, second def test_first_iter(): iters = map(iter, [(1, 2) for i in range(1000000)]) for p in iters: first(p) def test_second_iter(): iters = map(iter, [(1, 2) for i in range(1000000)]) for p in iters: second(p) toolz-0.11.2/bench/test_frequencies.py000066400000000000000000000003721414140660100177420ustar00rootroot00000000000000from toolz import frequencies, identity big_data = list(range(1000)) * 1000 small_data = list(range(100)) def test_frequencies(): frequencies(big_data) def test_frequencies_small(): for i in range(1000): frequencies(small_data) toolz-0.11.2/bench/test_get.py000066400000000000000000000002001414140660100161760ustar00rootroot00000000000000from toolz import get tuples = [(1, 2, 3) for i in range(100000)] def test_get(): for tup in tuples: get(1, tup) toolz-0.11.2/bench/test_get_list.py000066400000000000000000000002051414140660100172360ustar00rootroot00000000000000from toolz import get tuples = [(1, 2, 3) for i in range(100000)] def test_get(): for tup in tuples: get([1, 2], tup) toolz-0.11.2/bench/test_groupby.py000066400000000000000000000001701414140660100171140ustar00rootroot00000000000000from toolz import groupby, identity data = list(range(1000)) * 1000 def test_groupby(): groupby(identity, data) toolz-0.11.2/bench/test_join.py000066400000000000000000000012161414140660100163660ustar00rootroot00000000000000from toolz.curried import * import random try: xrange except NameError: xrange = range def burn(seq): for item in seq: pass small = [(i, str(i)) for i in range(100)] * 10 big = pipe([110]*10000, map(range), concat, list) def test_many_to_many_large(): burn(join(get(0), small, identity, big)) def test_one_to_one_tiny(): A = list(range(20)) B = A[::2] + A[1::2][::-1] for i in xrange(50000): burn(join(identity, A, identity, B)) def test_one_to_many(): A = list(range(20)) B = pipe([20]*1000, map(range), concat, list) for i in xrange(100): burn(join(identity, A, identity, B)) toolz-0.11.2/bench/test_memoize.py000066400000000000000000000002201414140660100170660ustar00rootroot00000000000000from toolz import memoize def test_memoize_no_kwargs(): @memoize def f(x): return x for i in range(100000): f(3) toolz-0.11.2/bench/test_memoize_kwargs.py000066400000000000000000000002221414140660100204460ustar00rootroot00000000000000from toolz import memoize def test_memoize_kwargs(): @memoize def f(x, y=3): return x for i in range(100000): f(3) toolz-0.11.2/bench/test_pluck.py000066400000000000000000000003631414140660100165470ustar00rootroot00000000000000from toolz import pluck tuples = [(1, 2, 3) for i in range(100000)] less_tuples = [(1, 2, 3) for i in range(100)] def test_pluck(): for i in pluck(2, tuples): pass for i in range(1000): tuple(pluck(2, less_tuples)) toolz-0.11.2/bench/test_sliding_window.py000066400000000000000000000001651414140660100204510ustar00rootroot00000000000000from toolz import sliding_window seq = range(1000000) def test_sliding_window(): list(sliding_window(3, seq)) toolz-0.11.2/bench/test_wordcount.py000066400000000000000000000007371414140660100174620ustar00rootroot00000000000000from toolz.curried import * import os if not os.path.exists('bench/shakespeare.txt'): os.system('wget http://www.gutenberg.org/files/100/100-0.txt' ' -O bench/shakespeare.txt') def stem(word): """ Stem word to primitive form """ return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") wordcount = comp(frequencies, map(stem), concat, map(str.split)) def test_shakespeare(): with open('bench/shakespeare.txt') as f: counts = wordcount(f) toolz-0.11.2/conda.recipe/000077500000000000000000000000001414140660100152715ustar00rootroot00000000000000toolz-0.11.2/conda.recipe/bld.bat000066400000000000000000000000551414140660100165220ustar00rootroot00000000000000cd %RECIPE_DIR%\.. %PYTHON% setup.py install toolz-0.11.2/conda.recipe/build.sh000066400000000000000000000000531414140660100167220ustar00rootroot00000000000000cd $RECIPE_DIR/.. $PYTHON setup.py install toolz-0.11.2/conda.recipe/meta.yaml000066400000000000000000000005761414140660100171130ustar00rootroot00000000000000package: name: toolz version: "0.10.0" build: number: {{environ.get('BINSTAR_BUILD', 1)}} requirements: build: - setuptools - python run: - python test: requires: - pytest imports: - toolz commands: - py.test -x --doctest-modules --pyargs toolz about: home: https://toolz.readthedocs.io/ license: BSD toolz-0.11.2/doc/000077500000000000000000000000001414140660100135045ustar00rootroot00000000000000toolz-0.11.2/doc/Makefile000066400000000000000000000127011414140660100151450ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Toolz.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Toolz.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Toolz" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Toolz" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." toolz-0.11.2/doc/make.bat000066400000000000000000000117571414140660100151240ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source set I18NSPHINXOPTS=%SPHINXOPTS% source if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Toolz.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Toolz.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) :end toolz-0.11.2/doc/source/000077500000000000000000000000001414140660100150045ustar00rootroot00000000000000toolz-0.11.2/doc/source/api.rst000066400000000000000000000030751414140660100163140ustar00rootroot00000000000000API === This page contains a comprehensive list of all functions within ``toolz``. Docstrings should provide sufficient understanding for any individual function. Itertoolz --------- .. currentmodule:: toolz.itertoolz .. autosummary:: accumulate concat concatv cons count diff drop first frequencies get groupby interleave interpose isdistinct isiterable iterate join last mapcat merge_sorted nth partition partition_all peek peekn pluck random_sample reduceby remove second sliding_window tail take take_nth topk unique .. currentmodule:: toolz.recipes .. autosummary:: countby partitionby Functoolz --------- .. currentmodule:: toolz.functoolz .. autosummary:: apply complement compose compose_left curry do excepts flip identity juxt memoize pipe thread_first thread_last Dicttoolz --------- .. currentmodule:: toolz.dicttoolz .. autosummary:: assoc assoc_in dissoc get_in itemfilter itemmap keyfilter keymap merge merge_with update_in valfilter valmap Sandbox ------- .. currentmodule:: toolz.sandbox .. autosummary:: parallel.fold core.EqualityHashKey core.unzip Definitions ----------- .. automodule:: toolz.itertoolz :members: .. automodule:: toolz.recipes :members: .. automodule:: toolz.functoolz :members: .. automodule:: toolz.dicttoolz :members: .. automodule:: toolz.sandbox.core :members: .. automodule:: toolz.sandbox.parallel :members: toolz-0.11.2/doc/source/composition.rst000066400000000000000000000071431414140660100201060ustar00rootroot00000000000000Composability ============= Toolz functions interoperate because they consume and produce only a small set of common, core data structures. Each ``toolz`` function consumes just iterables, dictionaries, and functions and each ``toolz`` function produces just iterables, dictionaries, and functions. This standardized interface enables us to compose several general purpose functions to solve custom problems. Standard interfaces enable us to use many tools together, even if those tools were not designed with each other in mind. We call this "using together" composition. Standard Interface ------------------ This is best explained by two examples; the automobile industry and LEGOs. Autos ^^^^^ Automobile pieces are not widely composable because they do not adhere to a standard interface. You can't connect a Porsche engine to the body of a Volkswagen Beetle but include the safety features of your favorite luxury car. As a result when something breaks you need to find a specialist who understands exactly your collection of components and, depending on the popularity of your model, replacement parts may be difficult to find. While the customization provides a number of efficiencies important for automobiles, it limits the ability of downstream tinkerers. This ability for future developers to tinker is paramount in good software design. Lego ^^^^ Contrast this with Lego toys. With Lego you *can* connect a rocket engine and skis to a rowboat. This is a perfectly natural thing to do because every piece adheres to a simple interface - those simple and regular 5mm circular bumps. This freedom to connect pieces at will lets children unleash their imagination in such varied ways (like going arctic shark hunting with a rocket-ski-boat). The abstractions in programming make it far more like Lego than like building cars. This breaks down a little when we start to be constrained by performance or memory issues but this affects only a very small fraction of applications. Most of the time we have the freedom to operate in the Lego model if we choose to give up customization and embrace simple core standards. Other Standard Interfaces ------------------------- The Toolz project builds off of a standard interface -- this choice is not unique. Other standard interfaces exist and provide immeasurable benefit to their application areas. The NumPy array serves as a foundational object for numeric and scientific computing within Python. The ability of any project to consume and produce NumPy arrays is largely responsible for the broad success of the various SciPy projects. We see similar development today with the Pandas DataFrame. The UNIX toolset relies on files and streams of text. JSON emerged as the standard interface for communication over the web. The virtues of standardization become glaringly apparent when we contrast JSON with its predecessor, XML. XML was designed to be extensible/customizable, allowing each application to design its own interface. This resulted in a sea of difficult to understand custom data languages that failed to develop a common analytic and data processing infrastructure. In contrast JSON is very restrictive and allows only a fixed set of data structures, namely lists, dictionaries, numbers, strings. Fortunately this set is common to most modern languages and so JSON is extremely widely supported, perhaps falling second only to CSV. Standard interfaces permeate physical reality as well. Examples range from supra-national currencies to drill bits and electrical circuitry. In all cases the interoperation that results becomes a defining and invaluable feature of each solution. toolz-0.11.2/doc/source/conf.py000066400000000000000000000220151414140660100163030ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Toolz documentation build configuration file, created by # sphinx-quickstart on Sun Sep 22 18:06:00 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.autosummary'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Toolz' copyright = u'2013, Matthew Rocklin, John Jacobsen' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. import toolz version = toolz.__version__ # The full version, including alpha/beta/rc tags. release = toolz.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'Toolzdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'Toolz.tex', u'Toolz Documentation', u'Matthew Rocklin, John Jacobsen', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'toolz', u'Toolz Documentation', [u'Matthew Rocklin, John Jacobsen'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'Toolz', u'Toolz Documentation', u'Matthew Rocklin, John Jacobsen', 'Toolz', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. epub_title = u'Toolz' epub_author = u'Matthew Rocklin, John Jacobsen' epub_publisher = u'Matthew Rocklin, John Jacobsen' epub_copyright = u'2013, Matthew Rocklin, John Jacobsen' # The language of the text. It defaults to the language option # or en if the language is not set. #epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. #epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. #epub_identifier = '' # A unique identification for the text. #epub_uid = '' # A tuple containing the cover image and cover page html template filenames. #epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_pre_files = [] # HTML files shat should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_post_files = [] # A list of files that should not be packed into the epub file. #epub_exclude_files = [] # The depth of the table of contents in toc.ncx. #epub_tocdepth = 3 # Allow duplicate toc entries. #epub_tocdup = True toolz-0.11.2/doc/source/control.rst000066400000000000000000000161031414140660100172170ustar00rootroot00000000000000Control Flow ============ Programming is hard when we think simultaneously about several concepts. Good programming breaks down big problems into small problems and builds up small solutions into big solutions. By this practice the need for simultaneous thought is restricted to only a few elements at a time. All modern languages provide mechanisms to build data into data structures and to build functions out of other functions. The third element of programming, besides data and functions, is control flow. Building complex control flow out of simple control flow presents deeper challenges. What? ----- Each element in a computer program is either - A variable or value literal like ``x``, ``total``, or ``5`` - A function or computation like the ``+`` in ``x + 1``, the function ``fib`` in ``fib(3)``, the method ``split`` in ``line.split(',')``, or the ``=`` in ``x = 0`` - Control flow like ``if``, ``for``, or ``return`` Here is a piece of code; see if you can label each term as either variable/value, function/computation, or control flow .. code:: def fib(n): a, b = 0, 1 for i in range(n): a, b = b, a + b return b Programming is hard when we have to juggle many code elements of each type at the same time. Good programming is about managing these three elements so that the developer is only required to think about a handful of them at a time. For example we might collect many integer variables into a list of integers or build a big function out of smaller ones. We organize our data into **data structures** like lists, dictionaries, or objects in order to group related data together -- this allows us to manipulate large collections of related data as if we were only manipulating a single entity. We **build large functions out of smaller ones**, enabling us to break up a complex task like doing laundry into a sequence of simpler tasks. .. code:: def do_laundry(clothes): wet_clothes = wash(clothes) dry_clothes = dry(wet_clothes) return fold(dry_clothes) While we have natural ways to manage data and functions, **control flow presents more of a challenge**. How do we break down complex control flow into simpler pieces that fit in our brain? How do we encapsulate commonly recurring patterns? Let's motivate this with an example of a common control structure, applying a function to each element in a list. Imagine we want to download the HTML source for a number of webpages. .. code:: from urllib import urlopen urls = ['http://www.google.com', 'http://www.wikipedia.com', 'http://www.apple.com'] html_texts = [] for item in urls: html_texts.append(urlopen(item)) Or maybe we want to compute the Fibonacci numbers on a particular set of integers .. code:: integers = [1, 2, 3, 4, 5] fib_integers = [] for item in integers: fib_integers.append(fib(item)) These two unrelated applications share an identical control flow pattern. They apply a function (``urlopen`` or ``fib``) onto each element of an input list (``urls``, or ``integers``), appending the result onto an output list. Because this control flow pattern is so common we give it a name, ``map``, and say that we map a function (like ``urlopen``) onto a list (like ``urls``). Because Python can treat functions like variables we can encode this control pattern into a higher-order-function as follows: .. code:: def map(function, sequence): output = [] for item in sequence: output.append(function(item)) return output This allows us to simplify our code above to the following, pithy solutions .. code:: html_texts = map(urlopen, urls) fib_integers = map(fib, integers) Experienced Python programmers know that this control pattern is so popular that it has been elevated to the status of **syntax** with the popular list comprehension .. code:: html_texts = [urlopen(url) for url in urls] Why? ---- So maybe you already knew about ``map`` and don't use it or maybe you just prefer list comprehensions. Why should you keep reading? Managing Complexity ^^^^^^^^^^^^^^^^^^^ The higher order function ``map`` gives us a name to call a particular control pattern. Regardless of whether or not you use a for loop, a list comprehension, or ``map`` itself, it is useful to recognize the operation and to give it a name. Naming control patterns lets us tackle complex problems at larger scale without burdening our mind with rote details. It is just as important as bundling data into data structures or building complex functions out of simple ones. *Naming control flow patterns enables programmers to manipulate increasingly complex operations.* Other Patterns ^^^^^^^^^^^^^^ The function ``map`` has friends. Advanced programmers may know about ``map``'s siblings, ``filter`` and ``reduce``. The ``filter`` control pattern is also handled by list comprehension syntax and ``reduce`` is often replaced by straight for loops, so if you don't want to use them there is no immediately practical reason why you would care. Most programmers however don't know about the many cousins of ``map``/``filter``/``reduce``. Consider for example the unsung heroine, ``groupby``. A brief example grouping names by their length follows: .. code:: >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> groupby(len, names) {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} ``groupby`` collects each element of a list into sublists determined by the value of a function. Let's see ``groupby`` in action again, grouping numbers by evenness. .. code:: >>> def iseven(n): ... return n % 2 == 0 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7]) {True: [2, 4, 6], False: [1, 3, 5, 7]} If we were to write this second operation out by hand it might look something like the following: .. code:: evens = [] odds = [] for item in numbers: if iseven(item): evens.append(item) else: odds.append(item) Most programmers have written code exactly like this over and over again, just like they may have repeated the ``map`` control pattern. When we identify code as a ``groupby`` operation we mentally collapse the detailed manipulation into a single concept. Additional Considerations ^^^^^^^^^^^^^^^^^^^^^^^^^ The Toolz library contains dozens of patterns like ``map`` and ``groupby``. Learning a core set (maybe a dozen) covers the vast majority of common programming tasks often done by hand. *A rich vocabulary of core control functions conveys the following benefits:* - You identify new patterns - You make fewer errors in rote coding - You can depend on well tested and benchmarked implementations But this does not come for free. As in spoken language the use of a rich vocabulary can alienate new practitioners. Most functional languages have fallen into this trap and are seen as unapproachable and smug. Python maintains a low-brow reputation and benefits from it. Just as with spoken language the value of using just-the-right-word must be moderated with the comprehension of the intended audience. toolz-0.11.2/doc/source/curry.rst000066400000000000000000000060371414140660100167100ustar00rootroot00000000000000 Curry ===== Traditionally partial evaluation of functions is handled with the ``partial`` higher order function from ``functools``. Currying provides syntactic sugar. .. code:: >>> double = partial(mul, 2) # Partial evaluation >>> doubled = double(5) # Currying This syntactic sugar is valuable when developers chain several higher order functions together. Partial Evaluation ------------------ Often when composing smaller functions to form big ones we need partial evaluation. We do this in the word counting example: .. code:: >>> def stem(word): ... """ Stem word to primitive form """ ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") >>> wordcount = compose(frequencies, partial(map, stem), str.split) Here we want to map the ``stem`` function onto each of the words produced by ``str.split``. We want a ``stem_many`` function that takes a list of words, stems them, and returns a list back. In full form this would look like the following: .. code:: >>> def stem_many(words): ... return map(stem, words) The ``partial`` function lets us create this function more naturally. .. code:: >>> stem_many = partial(map, stem) In general .. code:: >>> def f(x, y, z): ... # Do stuff with x, y, and z >>> # partially evaluate f with known values a and b >>> def g(z): ... return f(a, b, z) >>> # alternatively we could use `partial` >>> g = partial(f, a, b) Curry ----- In this context currying is just syntactic sugar for partial evaluation. A curried function partially evaluates if it does not receive enough arguments to compute a result. .. code:: >>> from toolz import curry >>> @curry # We can use curry as a decorator ... def mul(x, y): ... return x * y >>> double = mul(2) # mul didn't receive enough arguments to evaluate ... # so it holds onto the 2 and waits, returning a ... # partially evaluated function `double` >>> double(5) 10 So if ``map`` was curried... .. code:: >>> map = curry(map) Then we could replace the ``partial`` with a function evaluation .. code:: >>> # wordcount = compose(frequencies, partial(map, stem), str.split) >>> wordcount = compose(frequencies, map(stem), str.split) In this particular example it's probably simpler to stick with ``partial``. Once ``partial`` starts occurring several times in your code it may be time to switch to the ``curried`` namespace. The Curried Namespace --------------------- All functions present in the ``toolz`` namespace are curried in the ``toolz.curried`` namespace. So you can exchange an import line like the following .. code:: >>> from toolz import * For the following .. code:: >>> from toolz.curried import * And all of your favorite ``toolz`` functions will curry automatically. We've also included curried versions of the standard Python higher order functions like ``map``, ``filter``, ``reduce`` so you'll get them too (whether you like it or not.) toolz-0.11.2/doc/source/heritage.rst000066400000000000000000000056351414140660100173370ustar00rootroot00000000000000Heritage ======== While Python was originally intended as an imperative language [Guido_], it contains all elements necessary to support a rich set of features from the functional paradigm. In particular its core data structures, lazy iterators, and functions as first class objects can be combined to implement a common standard library of functions shared among many functional languages. This was first recognized and supported through the standard libraries itertools_ and functools_ which contain functions like ``permutations``, ``chain`` and ``partial`` to complement the standard ``map``, ``filter``, ``reduce`` already found in the core language. While these libraries contain substantial functionality they do not achieve the same level of adoption found in similar projects in other languages. This may be because they are incomplete and lack a number of commonly related functions like ``compose`` and ``groupby`` which often complement these core operations. A completion of this set of functions was first attempted in the projects itertoolz_ and functoolz_ (note the z). These libraries contained several functions that were absent in the standard itertools_/functools_ libraries. The ``itertoolz``/``functoolz`` libraries were eventually merged into the monolithic ``toolz`` project described here. Most contemporary functional languages (Haskell, Scala, Clojure, ...) contain some variation of the functions found in ``toolz``. The ``toolz`` project generally adheres closely to the API found in the Clojure standard library (see cheatsheet_) and where disagreements occur that API usually dominates. The ``toolz`` API is also strongly affected by the principles of the Python language itself, and often makes deviations in order to be more approachable to that community. The development of a functional standard library within a popular imperative language is not unique. Similar projects have arisen in other imperative-by-design languages that contain the necessary elements to support a functional standard library. Underscore.js_ in JavaScript has attained notable popularity in the web community. ``LINQ`` in C# follows a similar philosophy but mimics declarative database languages rather than functional ones. Enumerable_ is is the closest project in Ruby. Other excellent projects also exist within the Python ecosystem, most notably Fn.py_ and Funcy_. .. [itertools] https://docs.python.org/2/library/itertools.html .. [functools] https://docs.python.org/2/library/functools.html .. [itertoolz] https://github.com/mrocklin/itertoolz .. [functoolz] https://github.com/mrocklin/functoolz .. [Underscore.js] https://underscorejs.org .. [cheatsheet] https://clojure.org/cheatsheet .. [Guido] https://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html .. [Enumerable] https://ruby-doc.org/core-2.0.0/Enumerable.html .. [funcy] https://github.com/suor/funcy/ .. [fn.py] https://github.com/kachayev/fn.py toolz-0.11.2/doc/source/index.rst000066400000000000000000000032351414140660100166500ustar00rootroot00000000000000 PyToolz API Documentation ========================= Toolz provides a set of utility functions for iterators, functions, and dictionaries. These functions interoperate well and form the building blocks of common data analytic operations. They extend the standard libraries `itertools` and `functools` and borrow heavily from the standard libraries of contemporary functional languages. Toolz provides a suite of functions which have the following functional virtues: - **Composable:** They interoperate due to their use of core data structures. - **Pure:** They don't change their inputs or rely on external state. - **Lazy:** They don't run until absolutely necessary, allowing them to support large streaming data sets. Toolz functions are *pragmatic*. They understand that most programmers have deadlines. - **Low Tech:** They're just functions, no syntax or magic tricks to learn - **Tuned:** They're profiled and optimized - **Serializable:** They support common solutions for parallel computing This gives developers the power to write *powerful* programs to solve *complex problems* with relatively *simple code*. This code can be *easy to understand* without sacrificing *performance*. Toolz enables this approach, commonly associated with functional programming, within a natural Pythonic style suitable for most developers. BSD licensed source code is available at http://github.com/pytoolz/toolz/ . Contents ^^^^^^^^ .. toctree:: :maxdepth: 2 heritage.rst install.rst composition.rst purity.rst laziness.rst control.rst curry.rst streaming-analytics.rst parallelism.rst api.rst tips-and-tricks.rst references.rst toolz-0.11.2/doc/source/install.rst000066400000000000000000000006201414140660100172020ustar00rootroot00000000000000Installation and Dependencies ============================= Toolz is pure Python and so is easily installable by the standard dependency manager ``pip``:: pip install toolz Toolz endeavors to be a very light dependency. It accomplishes this in three ways: 1. Toolz is pure Python 2. Toolz relies only on the standard library 3. Toolz simultaneously supports Python versions 3.5+ and PyPy toolz-0.11.2/doc/source/laziness.rst000066400000000000000000000060611414140660100173710ustar00rootroot00000000000000Laziness ======== Lazy iterators evaluate only when necessary. They allow us to semantically manipulate large amounts of data while keeping very little of it actually in memory. They act like lists but don't take up space. Example - A Tale of Two Cities ------------------------------ We open a file containing the text of the classic text "A Tale of Two Cities" by Charles Dickens[1_]. .. code:: >>> book = open('tale-of-two-cities.txt') Much like a secondary school student, Python owns and opens the book without reading a single line of the text. The object ``book`` is a lazy iterator! Python will give us a line of the text only when we explicitly ask it to do so .. code:: >>> next(book) "It was the best of times," >>> next(book) "it was the worst of times," and so on. Each time we call ``next`` on ``book`` we burn through another line of the text and the ``book`` iterator marches slowly onwards through the text. Computation ----------- We can lazily operate on lazy iterators without doing any actual computation. For example let's read the book in upper case .. code:: >>> from toolz import map # toolz' map is lazy by default >>> loud_book = map(str.upper, book) >>> next(loud_book) "IT WAS THE AGE OF WISDOM," >>> next(loud_book) "IT WAS THE AGE OF FOOLISHNESS," It is as if we applied the function ``str.upper`` onto every line of the book; yet the first line completes instantaneously. Instead Python does the uppercasing work only when it becomes necessary, i.e. when you call ``next`` to ask for another line. Reductions ---------- You can operate on lazy iterators just as you would with lists, tuples, or sets. You can use them in for loops as in .. code:: for line in loud_book: ... You can instantiate them all into memory by calling them with the constructors ``list``, or ``tuple``. .. code:: loud_book = list(loud_book) Of course if they are very large then this might be unwise. Often we use laziness to avoid loading large datasets into memory at once. Many computations on large datasets don't require access to all of the data at a single time. In particular *reductions* (like sum) often take large amounts of sequential data (like [1, 2, 3, 4]) and produce much more manageable results (like 10) and can do so just by viewing the data a little bit at a time. For example we can count all of the letters in the Tale of Two Cities trivially using functions from ``toolz`` .. code:: >>> from toolz import concat, frequencies >>> letters = frequencies(concat(loud_book)) { 'A': 48036, 'B': 8402, 'C': 13812, 'D': 28000, 'E': 74624, ... In this case ``frequencies`` is a sort of reduction. At no time were more than a few hundred bytes of Tale of Two Cities necessarily in memory. We could just have easily done this computation on the entire Gutenberg collection or on Wikipedia. In this case we are limited by the size and speed of our hard drive and not by the capacity of our memory. .. [1] http://www.gutenberg.org/cache/epub/98/pg98.txt toolz-0.11.2/doc/source/parallelism.rst000066400000000000000000000065051414140660100200510ustar00rootroot00000000000000Parallelism =========== PyToolz tries to support other parallel processing libraries. It does this by ensuring easy serialization of ``toolz`` functions and providing architecture-agnostic parallel algorithms. In practice ``toolz`` is developed against ``multiprocessing`` and ``ipyparallel``. Serialization ------------- Multiprocessing or distributed computing requires the transmission of functions between different processes or computers. This is done through serializing the function into text, sending that text over a wire, and deserializing the text back into a function. To the extent possible PyToolz functions are compatible with the standard serialization library ``pickle``. The ``pickle`` library often fails for complex functions including lambdas, closures, and class methods. When this occurs we recommend the alternative serialization library ``dill``. Example with parallel map ------------------------- Most parallel processing tasks may be significantly accelerated using only a parallel map operation. A number of high quality parallel map operations exist in other libraries, notably ``multiprocessing``, ``ipyparallel``, and ``threading`` (if your operation is not processor bound). In the example below we extend our wordcounting solution with a parallel map. We show how one can progress in development from sequential, to multiprocessing, to distributed computation all with the same domain code. .. code:: from toolz.curried import map from toolz import frequencies, compose, concat, merge_with def stem(word): """ Stem word to primitive form >>> stem("Hello!") 'hello' """ return word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'") wordcount = compose(frequencies, map(stem), concat, map(str.split), open) if __name__ == '__main__': # Filenames for thousands of books from which we'd like to count words filenames = ['Book_%d.txt'%i for i in range(10000)] # Start with sequential map for development # pmap = map # Advance to Multiprocessing map for heavy computation on single machine # from multiprocessing import Pool # p = Pool(8) # pmap = p.map # Finish with distributed parallel map for big data from ipyparallel import Client p = Client()[:] pmap = p.map_sync total = merge_with(sum, pmap(wordcount, filenames)) This smooth transition is possible because 1. The ``map`` abstraction is a simple function call and so can be replaced. By contrast, this transformation would be difficult if we had written our code with a for loop or list comprehension. 2. The operation ``wordcount`` is separate from the parallel solution. 3. The task is embarrassingly parallel, needing only a very simple parallel strategy. Fortunately this is the common case. Parallel Algorithms ------------------- PyToolz does not implement parallel processing systems. It does however provide parallel algorithms that can extend existing parallel systems. Our general solution is to build algorithms that operate around a user-supplied parallel map function. In particular we provide a parallel ``fold`` in ``toolz.sandbox.parallel.fold``. This fold can work equally well with ``multiprocessing.Pool.map``, ``threading.Pool.map``, or ``ipyparallel``'s ``map_async``. toolz-0.11.2/doc/source/purity.rst000066400000000000000000000055231414140660100170770ustar00rootroot00000000000000Function Purity =============== We call a function *pure* if it meets the following criteria 1. It does not depend on hidden state, or equivalently it only depends on its inputs. 2. Evaluation of the function does not cause side effects In short the internal work of a pure function is isolated from the rest of the program. Examples -------- This is made clear by two examples: .. code:: # A pure function def min(x, y): if x < y: return x else: return y # An impure function exponent = 2 def powers(L): for i in range(len(L)): L[i] = L[i]**exponent return L The function ``min`` is pure. It always produces the same result given the same inputs and it doesn't affect any external variable. The function ``powers`` is impure for two reasons. First, it depends on a global variable, ``exponent``, which can change [*]_. Second, it changes the input ``L`` which may have external state. Consider the following execution: .. code:: >>> data = [1, 2, 3] >>> result = powers(data) >>> print(result) [1, 4, 9] >>> print(data) [1, 4, 9] We see that ``powers`` affected the variable ``data``. Users of our function might be surprised by this. Usually we expect our inputs to be unchanged. Another problem occurs when we run this code in a different context: .. code:: >>> data = [1, 2, 3] >>> result = powers(data) >>> print(result) [1, 8, 27] When we give ``powers`` the same inputs we receive different outputs; how could this be? Someone must have changed the value of ``exponent`` to be ``3``, producing cubes rather than squares. At first this flexibility may seem like a feature and indeed in many cases it may be. The cost for this flexibility is that we need to keep track of the ``exponent`` variable separately whenever we use ``powers``. As we use more functions these extra variables become a burden. .. [*] A function depending on a global value can be pure if the value never changes, i.e. is immutable. State ----- Impure functions are often more efficient but also require that the programmer "keep track" of the state of several variables. Keeping track of this state becomes increasingly difficult as programs grow in size. By eschewing state programmers are able to conceptually scale out to solve much larger problems. The loss of performance is often negligible compared to the freedom to trust that your functions work as expected on your inputs. Maintaining state provides efficiency at the cost of surprises. Pure functions produce no surprises and so lighten the mental load of the programmer. Testing ------- As an added bonus, testing pure functions is substantially simpler than testing impure ones. A programmer who has tried to test functions that include randomness will know this first-hand. toolz-0.11.2/doc/source/references.rst000066400000000000000000000021041414140660100176540ustar00rootroot00000000000000References ========== - `Underscore.js `__: A similar library for JavaScript - `Enumerable `__: A similar library for Ruby - `Clojure `__: A functional language whose standard library has several counterparts in ``toolz`` - `itertools `__: The Python standard library for iterator tools - `functools `__: The Python standard library for function tools - `Functional Programming HOWTO `__: The description of functional programming features from the official Python docs. Contemporary Projects --------------------- These projects also provide iterator and functional utilities within Python. Their functionality overlaps substantially with that of PyToolz. - `funcy `__ - `fn.py `__ - `more\_itertools `__ toolz-0.11.2/doc/source/streaming-analytics.rst000066400000000000000000000243111414140660100215150ustar00rootroot00000000000000Streaming Analytics =================== The toolz functions can be composed to analyze large streaming datasets. Toolz supports common analytics patterns like the selection, grouping, reduction, and joining of data through pure composable functions. These functions often have analogs to familiar operations in other data analytics platforms like SQL or Pandas. Throughout this document we'll use this simple dataset of accounts .. code:: >>> accounts = [(1, 'Alice', 100, 'F'), # id, name, balance, gender ... (2, 'Bob', 200, 'M'), ... (3, 'Charlie', 150, 'M'), ... (4, 'Dennis', 50, 'M'), ... (5, 'Edith', 300, 'F')] Selecting with ``map`` and ``filter`` ------------------------------------- Simple projection and linear selection from a sequence is achieved through the standard functions ``map`` and ``filter``. .. code:: SELECT name, balance FROM accounts WHERE balance > 150; These functions correspond to the SQL commands ``SELECT`` and ``WHERE``. .. code:: >>> from toolz.curried import pipe, map, filter, get >>> pipe(accounts, filter(lambda acc: acc[2] > 150), ... map(get([1, 2])), ... list) Note: this uses the `curried`` versions of ``map`` and ``filter``. Of course, these operations are also well supported with standard list/generator comprehension syntax. This syntax is more often used and generally considered to be more Pythonic. .. code:: >>> [(name, balance) for (id, name, balance, gender) in accounts ... if balance > 150] Split-apply-combine with ``groupby`` and ``reduceby`` ----------------------------------------------------- We separate split-apply-combine operations into the following two concepts 1. Split the dataset into groups by some property 2. Reduce each of the groups with some synopsis function Toolz supports this common workflow with 1. a simple in-memory solution 2. a more sophisticated streaming solution. In Memory Split-Apply-Combine ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The in-memory solution depends on the functions `groupby`_ to split, and `valmap`_ to apply/combine. .. code:: SELECT gender, SUM(balance) FROM accounts GROUP BY gender; We first show these two functions piece by piece to show the intermediate groups. .. code:: >>> from toolz import compose >>> from toolz.curried import get, pluck, groupby, valmap >>> groupby(get(3), accounts) {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')], 'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')]} >>> valmap(compose(sum, pluck(2)), ... _) # The underscore captures results from the previous prompt {'F': 400, 'M': 400} Then we chain them together into a single computation .. code:: >>> pipe(accounts, groupby(get(3)), ... valmap(compose(sum, pluck(2)))) {'F': 400, 'M': 400} Streaming Split-Apply-Combine ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``groupby`` function collects the entire dataset in memory into a dictionary. While convenient, the ``groupby`` operation is *not streaming* and so this approach is limited to datasets that can fit comfortably into memory. Toolz achieves streaming split-apply-combine with `reduceby`_, a function that performs a simultaneous reduction on each group as the elements stream in. To understand this section you should first be familiar with the builtin function ``reduce``. The ``reduceby`` operation takes a key function, like ``get(3)`` or ``lambda x: x[3]``, and a binary operator like ``add`` or ``lesser = lambda acc, x: acc if acc < x else x``. It applies the key function to each item in succession, accumulating running totals for each key by combining each new value with the previous using the binary operator. It can't accept full reduction operations like ``sum`` or ``min`` as these require access to the entire group at once. Here is a simple example: .. code:: >>> from toolz import reduceby >>> def iseven(n): ... return n % 2 == 0 >>> def add(x, y): ... return x + y >>> reduceby(iseven, add, [1, 2, 3, 4]) {True: 6, False: 4} The even numbers are added together ``(2 + 4 = 6)`` into group ``True``, and the odd numbers are added together ``(1 + 3 = 4)`` into group ``False``. Note that we have to replace the reduction ``sum`` with the binary operator ``add``. The incremental nature of ``add`` allows us to do the summation work as new data comes in. The use of binary operators like ``add`` over full reductions like ``sum`` enables computation on very large streaming datasets. The challenge to using ``reduceby`` often lies in the construction of a suitable binary operator. Here is the solution for our accounts example that adds up the balances for each group: .. code:: >>> binop = lambda total, account: total + account[2] >>> reduceby(get(3), binop, accounts, 0) {'F': 400, 'M': 400} This construction supports datasets that are much larger than available memory. Only the output must be able to fit comfortably in memory and this is rarely an issue, even for very large split-apply-combine computations. Semi-Streaming ``join`` ----------------------- We register multiple datasets together with `join`_. Consider a second dataset storing addresses by ID .. code:: >>> addresses = [(1, '123 Main Street'), # id, address ... (2, '5 Adams Way'), ... (5, '34 Rue St Michel')] We can join this dataset against our accounts dataset by specifying attributes which register different elements with each other; in this case they share a common first column, id. .. code:: SELECT accounts.name, addresses.address FROM accounts JOIN addresses ON accounts.id = addresses.id; .. code:: >>> from toolz import join, first >>> result = join(first, accounts, ... first, addresses) >>> for ((id, name, bal, gender), (id, address)) in result: ... print((name, address)) ('Alice', '123 Main Street') ('Bob', '5 Adams Way') ('Edith', '34 Rue St Michel') Join takes four main arguments, a left and right key function and a left and right sequence. It returns a sequence of pairs of matching items. In our case the return value of ``join`` is a sequence of pairs of tuples such that the first element of each tuple (the ID) is the same. In the example above we unpack this pair of tuples to get the fields that we want (``name`` and ``address``) from the result. Join on arbitrary functions / data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Those familiar with SQL are accustomed to this kind of join on columns. However a functional join is more general than this; it doesn't need to operate on tuples, and key functions do not need to get particular columns. In the example below we match numbers from two collections so that exactly one is even and one is odd. .. code:: >>> def iseven(x): ... return x % 2 == 0 >>> def isodd(x): ... return x % 2 == 1 >>> list(join(iseven, [1, 2, 3, 4], ... isodd, [7, 8, 9])) [(2, 7), (4, 7), (1, 8), (3, 8), (2, 9), (4, 9)] Semi-Streaming Join ^^^^^^^^^^^^^^^^^^^ The Toolz Join operation fully evaluates the *left* sequence and streams the *right* sequence through memory. Thus, if streaming support is desired the larger of the two sequences should always occupy the right side of the join. Algorithmic Details ^^^^^^^^^^^^^^^^^^^ The semi-streaming join operation in ``toolz`` is asymptotically optimal. Computationally it is linear in the size of the input + output. In terms of storage the left sequence must fit in memory but the right sequence is free to stream. The results are not normalized, as in SQL, in that they permit repeated values. If normalization is desired, consider composing with the function ``unique`` (note that ``unique`` is not fully streaming.) More Complex Example ^^^^^^^^^^^^^^^^^^^^ The accounts example above connects two one-to-one relationships, ``accounts`` and ``addresses``; there was exactly one name per ID and one address per ID. This need not be the case. The join abstraction is sufficiently flexible to join one-to-many or even many-to-many relationships. The following example finds city/person pairs where that person has a friend who has a residence in that city. This is an example of joining two many-to-many relationships, because a person may have many friends and because a friend may have many residences. .. code:: >>> friends = [('Alice', 'Edith'), ... ('Alice', 'Zhao'), ... ('Edith', 'Alice'), ... ('Zhao', 'Alice'), ... ('Zhao', 'Edith')] >>> cities = [('Alice', 'NYC'), ... ('Alice', 'Chicago'), ... ('Dan', 'Syndey'), ... ('Edith', 'Paris'), ... ('Edith', 'Berlin'), ... ('Zhao', 'Shanghai')] >>> # Vacation opportunities >>> # In what cities do people have friends? >>> result = join(second, friends, ... first, cities) >>> for ((name, friend), (friend, city)) in sorted(unique(result)): ... print((name, city)) ('Alice', 'Berlin') ('Alice', 'Paris') ('Alice', 'Shanghai') ('Edith', 'Chicago') ('Edith', 'NYC') ('Zhao', 'Chicago') ('Zhao', 'NYC') ('Zhao', 'Berlin') ('Zhao', 'Paris') Join is computationally powerful: * It is expressive enough to cover a wide set of analytics operations * It runs in linear time relative to the size of the input and output * Only the left sequence must fit in memory Disclaimer ---------- Toolz is a general purpose functional standard library, not a library specifically for data analytics. While there are obvious benefits (streaming, composition, ...) users interested in data analytics might be better served by using projects specific to data analytics like Pandas_ or SQLAlchemy. .. _groupby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.groupby .. _join: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.join .. _reduceby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.reduceby .. _valmap: https://toolz.readthedocs.io/en/latest/api.html#toolz.dicttoolz.valmap .. _Pandas: http://pandas.pydata.org/pandas-docs/stable/groupby.html .. _curried: https://toolz.readthedocs.io/en/latest/curry.html toolz-0.11.2/doc/source/tips-and-tricks.rst000066400000000000000000000053771414140660100205660ustar00rootroot00000000000000Tips and Tricks =============== Toolz functions can be combined to make functions that, while common, aren't a part of toolz's standard offerings. This section presents a few of these recipes. * .. function:: pick(allowlist, dictionary) Return a subset of the provided dictionary with keys contained in the allowlist. :: from toolz import keyfilter def pick(allowlist, d): return keyfilter(lambda k: k in allowlist, d) Example: >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} >>> pick(['a', 'b'], alphabet) {'a': 1, 'b': 2} * .. function:: omit(denylist, dictionary) Return a subset of the provided dictionary with keys *not* contained in the denylist. :: from toolz import keyfilter def omit(denylist, d): return keyfilter(lambda k: k not in denylist, d) Example: >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} >>> omit(['a', 'b'], alphabet) {'c': 3, 'd': 4} * .. function:: compact(iterable) Filter an iterable on "truthy" values. :: from toolz import filter def compact(iter): return filter(None, iter) Example: >>> results = [0, 1, 2, None, 3, False] >>> list(compact(results)) [1, 2, 3] * .. function:: keyjoin(leftkey, leftseq, rightkey, rightseq) Inner join two sequences of dictionaries on specified keys, merging matches with right value precedence. :: from itertools import starmap from toolz import join, merge def keyjoin(leftkey, leftseq, rightkey, rightseq): return starmap(merge, join(leftkey, leftseq, rightkey, rightseq)) Example: >>> people = [{'id': 0, 'name': 'Anonymous Guy', 'location': 'Unknown'}, {'id': 1, 'name': 'Karan', 'location': 'San Francisco'}, {'id': 2, 'name': 'Matthew', 'location': 'Oakland'}] >>> hobbies = [{'person_id': 1, 'hobby': 'Tennis'}, {'person_id': 1, 'hobby': 'Acting'}, {'person_id': 2, 'hobby': 'Biking'}] >>> list(keyjoin('id', people, 'person_id', hobbies)) [{'hobby': 'Tennis', 'id': 1, 'location': 'San Francisco', 'name': 'Karan', 'person_id': 1}, {'hobby': 'Acting', 'id': 1, 'location': 'San Francisco', 'name': 'Karan', 'person_id': 1}, {'hobby': 'Biking', 'id': 2, 'location': 'Oakland', 'name': 'Matthew', 'person_id': 2}] * .. function:: areidentical(\*seqs) Determine if sequences are identical element-wise. This lazily evaluates the sequences and stops as soon as the result is determined. :: from toolz import diff def areidentical(*seqs): return not any(diff(*seqs, default=object())) Example: >>> areidentical([1, 2, 3], (1, 2, 3)) True >>> areidentical([1, 2, 3], [1, 2]) False toolz-0.11.2/examples/000077500000000000000000000000001414140660100145555ustar00rootroot00000000000000toolz-0.11.2/examples/fib.py000066400000000000000000000015311414140660100156670ustar00rootroot00000000000000# / 0 if i is 0 # fib(i) = | 1 if i is 1 # \ fib(i - 1) + fib(i - 2) otherwise def fib(n): """ Imperative definition of Fibonacci numbers """ a, b = 0, 1 for i in range(n): a, b = b, a + b return a # This is intuitive but VERY slow def fib(n): """ Functional definition of Fibonacci numbers """ if n == 0 or n == 1: return n else: return fib(n - 1) + fib(n - 2) from toolz import memoize # Oh wait, it's fast again fib = memoize(fib) # Provide a cache with initial values to `memoize` @memoize(cache={0: 0, 1: 1}) def fib(n): """ Functional definition of Fibonacci numbers with initial terms cached. fib(0) == 0 fib(1) == 1 ... fib(n) == fib(n - 1) + fib(n - 2) """ return fib(n - 1) + fib(n - 2) toolz-0.11.2/examples/graph.py000066400000000000000000000014031414140660100162260ustar00rootroot00000000000000from toolz.curried import * a, b, c, d, e, f, g = 'abcdefg' edges = [(a, b), (b, a), (a, c), (a, d), (d, a), (d, e), (e, f), (d, f), (f, d), (d, g), (e, g)] out_degrees = countby(first, edges) # {'a': 3, 'b': 1, 'd': 4, 'e': 2, 'f': 1} in_degrees = countby(second, edges) # {'a': 2, 'b': 1, 'c': 1, 'd': 2, 'e': 1, 'f': 2, 'g': 2} out_neighbors = valmap(comp(tuple, map(second)), groupby(first, edges)) # {'a': ('b', 'c', 'd'), # 'b': ('a',), # 'd': ('a', 'e', 'f', 'g'), # 'e': ('f', 'g'), # 'f': ('d',)} in_neighbors = valmap(comp(tuple, map(first)), groupby(second, edges)) # {'a': ('b', 'd'), # 'b': ('a',), # 'c': ('a',), # 'd': ('a', 'f'), # 'e': ('d',), # 'f': ('e', 'd'), # 'g': ('d', 'e')} toolz-0.11.2/examples/wordcount.py000066400000000000000000000005461414140660100171600ustar00rootroot00000000000000from toolz import * def stem(word): """ Stem word to primitive form """ return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") wordcount = comp(frequencies, partial(map, stem), str.split) if __name__ == '__main__': print(wordcount("This cat jumped over this other cat!")) # prints {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} toolz-0.11.2/release-notes000066400000000000000000000027251414140660100154360ustar00rootroot00000000000000New in 0.4.2 Removed intersection New in 0.5.3 * get_in function * add itervalues, iterkeys, iteritems to compatibility * Add do function, remove side_effects from sandbox * Add juxt, partner to map * Performance improvements to merge_with * Errors from curried functions propagate upwards * keyfilter, valfilter * do New Authors: Graeme Coupar, @obmarg New in 0.6.0 * memoize is curried by default * memoize support `key` keyword argument * Cleaned up issues in curried namespace * Unary functions memoize with just the single argument, not a tuple * Flattened directory structure * Add `pluck` function from underscore.js * Remove `sandbox.jackknife` New in 0.6.1 * Python 3.4 support * New `join` operation * `join`, `groupby`, ... accept non-callable key functions. * Many speed improvements: * Cache method lookup * Faster `merge_sorted` without key * An additional round of tuning on `groupby` * Toolz builds on binstar build under mrocklin channel * Avoid generators, favor map. Assists in debugging. * Cleaner `curry` implementation * Fix serialization issues for `juxt`, `complement` * `reduceby` no longer requires `default` keyword argument * Fix bug in `get` where `get([1], coll)` used to return element rather than length-one tuple * `EqualityHashKey` added to sandbox * `juxt` returns a tuple, not a generator New Authors: Leonid Shvechikov, José Ricardo, Lars Buitinck, Tom Prince toolz-0.11.2/setup.cfg000066400000000000000000000005271414140660100145640ustar00rootroot00000000000000[versioneer] VCS = git style = pep440 versionfile_source = toolz/_version.py versionfile_build = toolz/_version.py tag_prefix = parentdir_prefix = toolz- [coverage:run] source = toolz omit = toolz/tests/test* toolz/*/tests/test* toolz/compatibility.py toolz/_version.py [coverage:report] exclude_lines = pragma: no cover toolz-0.11.2/setup.py000077500000000000000000000030031414140660100144500ustar00rootroot00000000000000#!/usr/bin/env python from os.path import exists from setuptools import setup import versioneer setup(name='toolz', version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description='List processing tools and functional utilities', url='https://github.com/pytoolz/toolz/', author='https://raw.github.com/pytoolz/toolz/master/AUTHORS.md', maintainer='Erik Welch', maintainer_email='erik.n.welch@gmail.com', license='BSD', keywords='functional utility itertools functools', packages=['toolz', 'toolz.sandbox', 'toolz.curried', 'tlz'], package_data={'toolz': ['tests/*.py']}, long_description=(open('README.rst').read() if exists('README.rst') else ''), zip_safe=False, python_requires=">=3.5", classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy"]) toolz-0.11.2/tlz/000077500000000000000000000000001414140660100135505ustar00rootroot00000000000000toolz-0.11.2/tlz/__init__.py000066400000000000000000000005221414140660100156600ustar00rootroot00000000000000"""``tlz`` mirrors the ``toolz`` API and uses ``cytoolz`` if possible. The ``tlz`` package is installed when ``toolz`` is installed. It provides a convenient way to use functions from ``cytoolz``--a faster Cython implementation of ``toolz``--if it is installed, otherwise it uses functions from ``toolz``. """ from . import _build_tlz toolz-0.11.2/tlz/_build_tlz.py000066400000000000000000000065241414140660100162600ustar00rootroot00000000000000import sys import types import toolz from importlib import import_module class TlzLoader(object): """ Finds and loads ``tlz`` modules when added to sys.meta_path""" def __init__(self): self.always_from_toolz = { toolz.pipe, } def _load_toolz(self, fullname): rv = {} package, dot, submodules = fullname.partition('.') try: module_name = ''.join(['cytoolz', dot, submodules]) rv['cytoolz'] = import_module(module_name) except ImportError: pass try: module_name = ''.join(['toolz', dot, submodules]) rv['toolz'] = import_module(module_name) except ImportError: pass if not rv: raise ImportError(fullname) return rv def find_module(self, fullname, path=None): # pragma: py3 no cover package, dot, submodules = fullname.partition('.') if package == 'tlz': return self def load_module(self, fullname): # pragma: py3 no cover if fullname in sys.modules: # pragma: no cover return sys.modules[fullname] spec = TlzSpec(fullname, self) module = self.create_module(spec) sys.modules[fullname] = module self.exec_module(module) return module def find_spec(self, fullname, path, target=None): # pragma: no cover package, dot, submodules = fullname.partition('.') if package == 'tlz': return TlzSpec(fullname, self) def create_module(self, spec): return types.ModuleType(spec.name) def exec_module(self, module): toolz_mods = self._load_toolz(module.__name__) fast_mod = toolz_mods.get('cytoolz') or toolz_mods['toolz'] slow_mod = toolz_mods.get('toolz') or toolz_mods['cytoolz'] module.__dict__.update(toolz.merge(fast_mod.__dict__, module.__dict__)) package = fast_mod.__package__ if package is not None: package, dot, submodules = package.partition('.') module.__package__ = ''.join(['tlz', dot, submodules]) if not module.__doc__: module.__doc__ = fast_mod.__doc__ # show file from toolz during introspection try: module.__file__ = slow_mod.__file__ except AttributeError: pass for k, v in fast_mod.__dict__.items(): tv = slow_mod.__dict__.get(k) try: hash(tv) except TypeError: tv = None if tv in self.always_from_toolz: module.__dict__[k] = tv elif ( isinstance(v, types.ModuleType) and v.__package__ == fast_mod.__name__ ): package, dot, submodules = v.__name__.partition('.') module_name = ''.join(['tlz', dot, submodules]) submodule = import_module(module_name) module.__dict__[k] = submodule class TlzSpec(object): def __init__(self, name, loader): self.name = name self.loader = loader self.origin = None self.submodule_search_locations = [] self.loader_state = None self.cached = None self.parent = None self.has_location = False tlz_loader = TlzLoader() sys.meta_path.append(tlz_loader) tlz_loader.exec_module(sys.modules['tlz']) toolz-0.11.2/toolz/000077500000000000000000000000001414140660100141065ustar00rootroot00000000000000toolz-0.11.2/toolz/__init__.py000066400000000000000000000005751414140660100162260ustar00rootroot00000000000000from .itertoolz import * from .functoolz import * from .dicttoolz import * from .recipes import * from functools import partial, reduce sorted = sorted map = map filter = filter # Aliases comp = compose from . import curried, sandbox functoolz._sigs.create_signature_registry() from ._version import get_versions __version__ = get_versions()['version'] del get_versions toolz-0.11.2/toolz/_signatures.py000066400000000000000000000500761414140660100170130ustar00rootroot00000000000000"""Internal module for better introspection of builtins. The main functions are ``is_builtin_valid_args``, ``is_builtin_partial_args``, and ``has_unknown_args``. Other functions in this module support these three. Notably, we create a ``signatures`` registry to enable introspection of builtin functions in any Python version. This includes builtins that have more than one valid signature. Currently, the registry includes builtins from ``builtins``, ``functools``, ``itertools``, and ``operator`` modules. More can be added as requested. We don't guarantee full coverage. Everything in this module should be regarded as implementation details. Users should try to not use this module directly. """ import functools import inspect import itertools import operator from importlib import import_module from .functoolz import (is_partial_args, is_arity, has_varargs, has_keywords, num_required_args) import builtins # We mock builtin callables using lists of tuples with lambda functions. # # The tuple spec is (num_position_args, lambda_func, keyword_only_args). # # num_position_args: # - The number of positional-only arguments. If not specified, # all positional arguments are considered positional-only. # # lambda_func: # - lambda function that matches a signature of a builtin, but does # not include keyword-only arguments. # # keyword_only_args: (optional) # - Tuple of keyword-only argumemts. module_info = {} module_info[builtins] = dict( abs=[ lambda x: None], all=[ lambda iterable: None], anext=[ lambda aiterator: None, lambda aiterator, default: None], any=[ lambda iterable: None], apply=[ lambda object: None, lambda object, args: None, lambda object, args, kwargs: None], ascii=[ lambda obj: None], bin=[ lambda number: None], bool=[ lambda x=False: None], buffer=[ lambda object: None, lambda object, offset: None, lambda object, offset, size: None], bytearray=[ lambda: None, lambda int: None, lambda string, encoding='utf8', errors='strict': None], callable=[ lambda obj: None], chr=[ lambda i: None], classmethod=[ lambda function: None], cmp=[ lambda x, y: None], coerce=[ lambda x, y: None], complex=[ lambda real=0, imag=0: None], delattr=[ lambda obj, name: None], dict=[ lambda **kwargs: None, lambda mapping, **kwargs: None], dir=[ lambda: None, lambda object: None], divmod=[ lambda x, y: None], enumerate=[ (0, lambda iterable, start=0: None)], eval=[ lambda source: None, lambda source, globals: None, lambda source, globals, locals: None], execfile=[ lambda filename: None, lambda filename, globals: None, lambda filename, globals, locals: None], file=[ (0, lambda name, mode='r', buffering=-1: None)], filter=[ lambda function, iterable: None], float=[ lambda x=0.0: None], format=[ lambda value: None, lambda value, format_spec: None], frozenset=[ lambda: None, lambda iterable: None], getattr=[ lambda object, name: None, lambda object, name, default: None], globals=[ lambda: None], hasattr=[ lambda obj, name: None], hash=[ lambda obj: None], hex=[ lambda number: None], id=[ lambda obj: None], input=[ lambda: None, lambda prompt: None], int=[ lambda x=0: None, (0, lambda x, base=10: None)], intern=[ lambda string: None], isinstance=[ lambda obj, class_or_tuple: None], issubclass=[ lambda cls, class_or_tuple: None], iter=[ lambda iterable: None, lambda callable, sentinel: None], len=[ lambda obj: None], list=[ lambda: None, lambda iterable: None], locals=[ lambda: None], long=[ lambda x=0: None, (0, lambda x, base=10: None)], map=[ lambda func, sequence, *iterables: None], memoryview=[ (0, lambda object: None)], next=[ lambda iterator: None, lambda iterator, default: None], object=[ lambda: None], oct=[ lambda number: None], ord=[ lambda c: None], pow=[ lambda x, y: None, lambda x, y, z: None], property=[ lambda fget=None, fset=None, fdel=None, doc=None: None], range=[ lambda stop: None, lambda start, stop: None, lambda start, stop, step: None], raw_input=[ lambda: None, lambda prompt: None], reduce=[ lambda function, sequence: None, lambda function, sequence, initial: None], reload=[ lambda module: None], repr=[ lambda obj: None], reversed=[ lambda sequence: None], round=[ (0, lambda number, ndigits=0: None)], set=[ lambda: None, lambda iterable: None], setattr=[ lambda obj, name, value: None], slice=[ lambda stop: None, lambda start, stop: None, lambda start, stop, step: None], staticmethod=[ lambda function: None], sum=[ lambda iterable: None, lambda iterable, start: None], super=[ lambda type: None, lambda type, obj: None], tuple=[ lambda: None, lambda iterable: None], type=[ lambda object: None, lambda name, bases, dict: None], unichr=[ lambda i: None], unicode=[ lambda object: None, lambda string='', encoding='utf8', errors='strict': None], vars=[ lambda: None, lambda object: None], xrange=[ lambda stop: None, lambda start, stop: None, lambda start, stop, step: None], zip=[ lambda *iterables: None], __build_class__=[ (2, lambda func, name, *bases, **kwds: None, ('metaclass',))], __import__=[ (0, lambda name, globals=None, locals=None, fromlist=None, level=None: None)], ) module_info[builtins]['exec'] = [ lambda source: None, lambda source, globals: None, lambda source, globals, locals: None] module_info[builtins].update( breakpoint=[ lambda *args, **kws: None], bytes=[ lambda: None, lambda int: None, lambda string, encoding='utf8', errors='strict': None], compile=[ (0, lambda source, filename, mode, flags=0, dont_inherit=False, optimize=-1: None)], max=[ (1, lambda iterable: None, ('default', 'key',)), (1, lambda arg1, arg2, *args: None, ('key',))], min=[ (1, lambda iterable: None, ('default', 'key',)), (1, lambda arg1, arg2, *args: None, ('key',))], open=[ (0, lambda file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None: None)], sorted=[ (1, lambda iterable: None, ('key', 'reverse'))], str=[ lambda object='', encoding='utf', errors='strict': None], ) module_info[builtins]['print'] = [ (0, lambda *args: None, ('sep', 'end', 'file', 'flush',))] module_info[functools] = dict( cmp_to_key=[ (0, lambda mycmp: None)], partial=[ lambda func, *args, **kwargs: None], partialmethod=[ lambda func, *args, **kwargs: None], reduce=[ lambda function, sequence: None, lambda function, sequence, initial: None], ) module_info[itertools] = dict( accumulate=[ (0, lambda iterable, func=None: None)], chain=[ lambda *iterables: None], combinations=[ (0, lambda iterable, r: None)], combinations_with_replacement=[ (0, lambda iterable, r: None)], compress=[ (0, lambda data, selectors: None)], count=[ lambda start=0, step=1: None], cycle=[ lambda iterable: None], dropwhile=[ lambda predicate, iterable: None], filterfalse=[ lambda function, sequence: None], groupby=[ (0, lambda iterable, key=None: None)], ifilter=[ lambda function, sequence: None], ifilterfalse=[ lambda function, sequence: None], imap=[ lambda func, sequence, *iterables: None], islice=[ lambda iterable, stop: None, lambda iterable, start, stop: None, lambda iterable, start, stop, step: None], izip=[ lambda *iterables: None], izip_longest=[ (0, lambda *iterables: None, ('fillvalue',))], permutations=[ (0, lambda iterable, r=0: None)], repeat=[ (0, lambda object, times=0: None)], starmap=[ lambda function, sequence: None], takewhile=[ lambda predicate, iterable: None], tee=[ lambda iterable: None, lambda iterable, n: None], zip_longest=[ (0, lambda *iterables: None, ('fillvalue',))], ) module_info[itertools].update( product=[ (0, lambda *iterables: None, ('repeat',))], ) module_info[operator] = dict( __abs__=[ lambda a: None], __add__=[ lambda a, b: None], __and__=[ lambda a, b: None], __concat__=[ lambda a, b: None], __contains__=[ lambda a, b: None], __delitem__=[ lambda a, b: None], __delslice__=[ lambda a, b, c: None], __div__=[ lambda a, b: None], __eq__=[ lambda a, b: None], __floordiv__=[ lambda a, b: None], __ge__=[ lambda a, b: None], __getitem__=[ lambda a, b: None], __getslice__=[ lambda a, b, c: None], __gt__=[ lambda a, b: None], __iadd__=[ lambda a, b: None], __iand__=[ lambda a, b: None], __iconcat__=[ lambda a, b: None], __idiv__=[ lambda a, b: None], __ifloordiv__=[ lambda a, b: None], __ilshift__=[ lambda a, b: None], __imatmul__=[ lambda a, b: None], __imod__=[ lambda a, b: None], __imul__=[ lambda a, b: None], __index__=[ lambda a: None], __inv__=[ lambda a: None], __invert__=[ lambda a: None], __ior__=[ lambda a, b: None], __ipow__=[ lambda a, b: None], __irepeat__=[ lambda a, b: None], __irshift__=[ lambda a, b: None], __isub__=[ lambda a, b: None], __itruediv__=[ lambda a, b: None], __ixor__=[ lambda a, b: None], __le__=[ lambda a, b: None], __lshift__=[ lambda a, b: None], __lt__=[ lambda a, b: None], __matmul__=[ lambda a, b: None], __mod__=[ lambda a, b: None], __mul__=[ lambda a, b: None], __ne__=[ lambda a, b: None], __neg__=[ lambda a: None], __not__=[ lambda a: None], __or__=[ lambda a, b: None], __pos__=[ lambda a: None], __pow__=[ lambda a, b: None], __repeat__=[ lambda a, b: None], __rshift__=[ lambda a, b: None], __setitem__=[ lambda a, b, c: None], __setslice__=[ lambda a, b, c, d: None], __sub__=[ lambda a, b: None], __truediv__=[ lambda a, b: None], __xor__=[ lambda a, b: None], _abs=[ lambda x: None], _compare_digest=[ lambda a, b: None], abs=[ lambda a: None], add=[ lambda a, b: None], and_=[ lambda a, b: None], attrgetter=[ lambda attr, *args: None], concat=[ lambda a, b: None], contains=[ lambda a, b: None], countOf=[ lambda a, b: None], delitem=[ lambda a, b: None], delslice=[ lambda a, b, c: None], div=[ lambda a, b: None], eq=[ lambda a, b: None], floordiv=[ lambda a, b: None], ge=[ lambda a, b: None], getitem=[ lambda a, b: None], getslice=[ lambda a, b, c: None], gt=[ lambda a, b: None], iadd=[ lambda a, b: None], iand=[ lambda a, b: None], iconcat=[ lambda a, b: None], idiv=[ lambda a, b: None], ifloordiv=[ lambda a, b: None], ilshift=[ lambda a, b: None], imatmul=[ lambda a, b: None], imod=[ lambda a, b: None], imul=[ lambda a, b: None], index=[ lambda a: None], indexOf=[ lambda a, b: None], inv=[ lambda a: None], invert=[ lambda a: None], ior=[ lambda a, b: None], ipow=[ lambda a, b: None], irepeat=[ lambda a, b: None], irshift=[ lambda a, b: None], is_=[ lambda a, b: None], is_not=[ lambda a, b: None], isCallable=[ lambda a: None], isMappingType=[ lambda a: None], isNumberType=[ lambda a: None], isSequenceType=[ lambda a: None], isub=[ lambda a, b: None], itemgetter=[ lambda item, *args: None], itruediv=[ lambda a, b: None], ixor=[ lambda a, b: None], le=[ lambda a, b: None], length_hint=[ lambda obj: None, lambda obj, default: None], lshift=[ lambda a, b: None], lt=[ lambda a, b: None], matmul=[ lambda a, b: None], methodcaller=[ lambda name, *args, **kwargs: None], mod=[ lambda a, b: None], mul=[ lambda a, b: None], ne=[ lambda a, b: None], neg=[ lambda a: None], not_=[ lambda a: None], or_=[ lambda a, b: None], pos=[ lambda a: None], pow=[ lambda a, b: None], repeat=[ lambda a, b: None], rshift=[ lambda a, b: None], sequenceIncludes=[ lambda a, b: None], setitem=[ lambda a, b, c: None], setslice=[ lambda a, b, c, d: None], sub=[ lambda a, b: None], truediv=[ lambda a, b: None], truth=[ lambda a: None], xor=[ lambda a, b: None], ) module_info['toolz'] = dict( curry=[ (0, lambda *args, **kwargs: None)], excepts=[ (0, lambda exc, func, handler=None: None)], flip=[ (0, lambda func=None, a=None, b=None: None)], juxt=[ (0, lambda *funcs: None)], memoize=[ (0, lambda func=None, cache=None, key=None: None)], ) module_info['toolz.functoolz'] = dict( Compose=[ (0, lambda funcs: None)], InstanceProperty=[ (0, lambda fget=None, fset=None, fdel=None, doc=None, classval=None: None)], ) def num_pos_args(sigspec): """ Return the number of positional arguments. ``f(x, y=1)`` has 1""" return sum(1 for x in sigspec.parameters.values() if x.kind == x.POSITIONAL_OR_KEYWORD and x.default is x.empty) def get_exclude_keywords(num_pos_only, sigspec): """ Return the names of position-only arguments if func has **kwargs""" if num_pos_only == 0: return () has_kwargs = any(x.kind == x.VAR_KEYWORD for x in sigspec.parameters.values()) if not has_kwargs: return () pos_args = list(sigspec.parameters.values())[:num_pos_only] return tuple(x.name for x in pos_args) def signature_or_spec(func): try: return inspect.signature(func) except (ValueError, TypeError): return None def expand_sig(sig): """ Convert the signature spec in ``module_info`` to add to ``signatures`` The input signature spec is one of: - ``lambda_func`` - ``(num_position_args, lambda_func)`` - ``(num_position_args, lambda_func, keyword_only_args)`` The output signature spec is: ``(num_position_args, lambda_func, keyword_exclude, sigspec)`` where ``keyword_exclude`` includes keyword only arguments and, if variadic keywords is present, the names of position-only argument. The latter is included to support builtins such as ``partial(func, *args, **kwargs)``, which allows ``func=`` to be used as a keyword even though it's the name of a positional argument. """ if isinstance(sig, tuple): if len(sig) == 3: num_pos_only, func, keyword_only = sig assert isinstance(sig[-1], tuple) else: num_pos_only, func = sig keyword_only = () sigspec = signature_or_spec(func) else: func = sig sigspec = signature_or_spec(func) num_pos_only = num_pos_args(sigspec) keyword_only = () keyword_exclude = get_exclude_keywords(num_pos_only, sigspec) return num_pos_only, func, keyword_only + keyword_exclude, sigspec signatures = {} def create_signature_registry(module_info=module_info, signatures=signatures): for module, info in module_info.items(): if isinstance(module, str): module = import_module(module) for name, sigs in info.items(): if hasattr(module, name): new_sigs = tuple(expand_sig(sig) for sig in sigs) signatures[getattr(module, name)] = new_sigs def check_valid(sig, args, kwargs): """ Like ``is_valid_args`` for the given signature spec""" num_pos_only, func, keyword_exclude, sigspec = sig if len(args) < num_pos_only: return False if keyword_exclude: kwargs = dict(kwargs) for item in keyword_exclude: kwargs.pop(item, None) try: func(*args, **kwargs) return True except TypeError: return False def _is_valid_args(func, args, kwargs): """ Like ``is_valid_args`` for builtins in our ``signatures`` registry""" if func not in signatures: return None sigs = signatures[func] return any(check_valid(sig, args, kwargs) for sig in sigs) def check_partial(sig, args, kwargs): """ Like ``is_partial_args`` for the given signature spec""" num_pos_only, func, keyword_exclude, sigspec = sig if len(args) < num_pos_only: pad = (None,) * (num_pos_only - len(args)) args = args + pad if keyword_exclude: kwargs = dict(kwargs) for item in keyword_exclude: kwargs.pop(item, None) return is_partial_args(func, args, kwargs, sigspec) def _is_partial_args(func, args, kwargs): """ Like ``is_partial_args`` for builtins in our ``signatures`` registry""" if func not in signatures: return None sigs = signatures[func] return any(check_partial(sig, args, kwargs) for sig in sigs) def check_arity(n, sig): num_pos_only, func, keyword_exclude, sigspec = sig if keyword_exclude or num_pos_only > n: return False return is_arity(n, func, sigspec) def _is_arity(n, func): if func not in signatures: return None sigs = signatures[func] checks = [check_arity(n, sig) for sig in sigs] if all(checks): return True elif any(checks): return None return False def check_varargs(sig): num_pos_only, func, keyword_exclude, sigspec = sig return has_varargs(func, sigspec) def _has_varargs(func): if func not in signatures: return None sigs = signatures[func] checks = [check_varargs(sig) for sig in sigs] if all(checks): return True elif any(checks): return None return False def check_keywords(sig): num_pos_only, func, keyword_exclude, sigspec = sig if keyword_exclude: return True return has_keywords(func, sigspec) def _has_keywords(func): if func not in signatures: return None sigs = signatures[func] checks = [check_keywords(sig) for sig in sigs] if all(checks): return True elif any(checks): return None return False def check_required_args(sig): num_pos_only, func, keyword_exclude, sigspec = sig return num_required_args(func, sigspec) def _num_required_args(func): if func not in signatures: return None sigs = signatures[func] vals = [check_required_args(sig) for sig in sigs] val = vals[0] if all(x == val for x in vals): return val return None toolz-0.11.2/toolz/_version.py000066400000000000000000000441151414140660100163110ustar00rootroot00000000000000 # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.18 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = " (HEAD -> master, tag: 0.11.2)" git_full = "294e981edad035a7ac6f0e2b48f1738368fa4b34" git_date = "2021-11-06 00:07:13 -0500" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "toolz-" cfg.versionfile_source = "toolz/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} toolz-0.11.2/toolz/compatibility.py000066400000000000000000000017451414140660100173400ustar00rootroot00000000000000import warnings warnings.warn("The toolz.compatibility module is no longer " "needed in Python 3 and has been deprecated. Please " "import these utilities directly from the standard library. " "This module will be removed in a future release.", category=DeprecationWarning, stacklevel=2) import operator import sys PY3 = sys.version_info[0] > 2 PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4 PYPY = hasattr(sys, 'pypy_version_info') and PY3 __all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', 'iteritems', 'iterkeys', 'itervalues', 'filterfalse', 'PY3', 'PY34', 'PYPY') map = map filter = filter range = range zip = zip from functools import reduce from itertools import zip_longest from itertools import filterfalse iteritems = operator.methodcaller('items') iterkeys = operator.methodcaller('keys') itervalues = operator.methodcaller('values') from collections.abc import Sequence toolz-0.11.2/toolz/curried/000077500000000000000000000000001414140660100155435ustar00rootroot00000000000000toolz-0.11.2/toolz/curried/__init__.py000066400000000000000000000052141414140660100176560ustar00rootroot00000000000000""" Alternate namespace for toolz such that all functions are curried Currying provides implicit partial evaluation of all functions Example: Get usually requires two arguments, an index and a collection >>> from toolz.curried import get >>> get(0, ('a', 'b')) 'a' When we use it in higher order functions we often want to pass a partially evaluated form >>> data = [(1, 2), (11, 22), (111, 222)] >>> list(map(lambda seq: get(0, seq), data)) [1, 11, 111] The curried version allows simple expression of partial evaluation >>> list(map(get(0), data)) [1, 11, 111] See Also: toolz.functoolz.curry """ import toolz from . import operator from toolz import ( apply, comp, complement, compose, compose_left, concat, concatv, count, curry, diff, first, flip, frequencies, identity, interleave, isdistinct, isiterable, juxt, last, memoize, merge_sorted, peek, pipe, second, thread_first, thread_last, ) from .exceptions import merge, merge_with accumulate = toolz.curry(toolz.accumulate) assoc = toolz.curry(toolz.assoc) assoc_in = toolz.curry(toolz.assoc_in) cons = toolz.curry(toolz.cons) countby = toolz.curry(toolz.countby) dissoc = toolz.curry(toolz.dissoc) do = toolz.curry(toolz.do) drop = toolz.curry(toolz.drop) excepts = toolz.curry(toolz.excepts) filter = toolz.curry(toolz.filter) get = toolz.curry(toolz.get) get_in = toolz.curry(toolz.get_in) groupby = toolz.curry(toolz.groupby) interpose = toolz.curry(toolz.interpose) itemfilter = toolz.curry(toolz.itemfilter) itemmap = toolz.curry(toolz.itemmap) iterate = toolz.curry(toolz.iterate) join = toolz.curry(toolz.join) keyfilter = toolz.curry(toolz.keyfilter) keymap = toolz.curry(toolz.keymap) map = toolz.curry(toolz.map) mapcat = toolz.curry(toolz.mapcat) nth = toolz.curry(toolz.nth) partial = toolz.curry(toolz.partial) partition = toolz.curry(toolz.partition) partition_all = toolz.curry(toolz.partition_all) partitionby = toolz.curry(toolz.partitionby) peekn = toolz.curry(toolz.peekn) pluck = toolz.curry(toolz.pluck) random_sample = toolz.curry(toolz.random_sample) reduce = toolz.curry(toolz.reduce) reduceby = toolz.curry(toolz.reduceby) remove = toolz.curry(toolz.remove) sliding_window = toolz.curry(toolz.sliding_window) sorted = toolz.curry(toolz.sorted) tail = toolz.curry(toolz.tail) take = toolz.curry(toolz.take) take_nth = toolz.curry(toolz.take_nth) topk = toolz.curry(toolz.topk) unique = toolz.curry(toolz.unique) update_in = toolz.curry(toolz.update_in) valfilter = toolz.curry(toolz.valfilter) valmap = toolz.curry(toolz.valmap) del exceptions del toolz toolz-0.11.2/toolz/curried/exceptions.py000066400000000000000000000005211414140660100202740ustar00rootroot00000000000000import toolz __all__ = ['merge_with', 'merge'] @toolz.curry def merge_with(func, d, *dicts, **kwargs): return toolz.merge_with(func, d, *dicts, **kwargs) @toolz.curry def merge(d, *dicts, **kwargs): return toolz.merge(d, *dicts, **kwargs) merge_with.__doc__ = toolz.merge_with.__doc__ merge.__doc__ = toolz.merge.__doc__ toolz-0.11.2/toolz/curried/operator.py000066400000000000000000000007521414140660100177540ustar00rootroot00000000000000from __future__ import absolute_import import operator from toolz.functoolz import curry, num_required_args, has_keywords def should_curry(f): num = num_required_args(f) return num is None or num > 1 or num == 1 and has_keywords(f) is not False locals().update( {name: curry(f) if should_curry(f) else f for name, f in vars(operator).items() if callable(f)}, ) # Clean up the namespace. del curry del num_required_args del has_keywords del operator del should_curry toolz-0.11.2/toolz/dicttoolz.py000066400000000000000000000213361414140660100165000ustar00rootroot00000000000000import operator from functools import reduce from collections.abc import Mapping __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 'valfilter', 'keyfilter', 'itemfilter', 'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in') def _get_factory(f, kwargs): factory = kwargs.pop('factory', dict) if kwargs: raise TypeError("{}() got an unexpected keyword argument " "'{}'".format(f.__name__, kwargs.popitem()[0])) return factory def merge(*dicts, **kwargs): """ Merge a collection of dictionaries >>> merge({1: 'one'}, {2: 'two'}) {1: 'one', 2: 'two'} Later dictionaries have precedence >>> merge({1: 2, 3: 4}, {3: 3, 4: 4}) {1: 2, 3: 3, 4: 4} See Also: merge_with """ if len(dicts) == 1 and not isinstance(dicts[0], Mapping): dicts = dicts[0] factory = _get_factory(merge, kwargs) rv = factory() for d in dicts: rv.update(d) return rv def merge_with(func, *dicts, **kwargs): """ Merge dictionaries and apply function to combined values A key may occur in more than one dict, and all values mapped from the key will be passed to the function as a list, such as func([val1, val2, ...]). >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) {1: 11, 2: 22} >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP {1: 1, 2: 2, 3: 30} See Also: merge """ if len(dicts) == 1 and not isinstance(dicts[0], Mapping): dicts = dicts[0] factory = _get_factory(merge_with, kwargs) result = factory() for d in dicts: for k, v in d.items(): if k not in result: result[k] = [v] else: result[k].append(v) return valmap(func, result, factory) def valmap(func, d, factory=dict): """ Apply function to values of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> valmap(sum, bills) # doctest: +SKIP {'Alice': 65, 'Bob': 45} See Also: keymap itemmap """ rv = factory() rv.update(zip(d.keys(), map(func, d.values()))) return rv def keymap(func, d, factory=dict): """ Apply function to keys of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> keymap(str.lower, bills) # doctest: +SKIP {'alice': [20, 15, 30], 'bob': [10, 35]} See Also: valmap itemmap """ rv = factory() rv.update(zip(map(func, d.keys()), d.values())) return rv def itemmap(func, d, factory=dict): """ Apply function to items of dictionary >>> accountids = {"Alice": 10, "Bob": 20} >>> itemmap(reversed, accountids) # doctest: +SKIP {10: "Alice", 20: "Bob"} See Also: keymap valmap """ rv = factory() rv.update(map(func, d.items())) return rv def valfilter(predicate, d, factory=dict): """ Filter items in dictionary by value >>> iseven = lambda x: x % 2 == 0 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> valfilter(iseven, d) {1: 2, 3: 4} See Also: keyfilter itemfilter valmap """ rv = factory() for k, v in d.items(): if predicate(v): rv[k] = v return rv def keyfilter(predicate, d, factory=dict): """ Filter items in dictionary by key >>> iseven = lambda x: x % 2 == 0 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> keyfilter(iseven, d) {2: 3, 4: 5} See Also: valfilter itemfilter keymap """ rv = factory() for k, v in d.items(): if predicate(k): rv[k] = v return rv def itemfilter(predicate, d, factory=dict): """ Filter items in dictionary by item >>> def isvalid(item): ... k, v = item ... return k % 2 == 0 and v < 4 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> itemfilter(isvalid, d) {2: 3} See Also: keyfilter valfilter itemmap """ rv = factory() for item in d.items(): if predicate(item): k, v = item rv[k] = v return rv def assoc(d, key, value, factory=dict): """ Return a new dict with new key value pair New dict has d[key] set to value. Does not modify the initial dictionary. >>> assoc({'x': 1}, 'x', 2) {'x': 2} >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP {'x': 1, 'y': 3} """ d2 = factory() d2.update(d) d2[key] = value return d2 def dissoc(d, *keys, **kwargs): """ Return a new dict with the given key(s) removed. New dict has d[key] deleted for each supplied key. Does not modify the initial dictionary. >>> dissoc({'x': 1, 'y': 2}, 'y') {'x': 1} >>> dissoc({'x': 1, 'y': 2}, 'y', 'x') {} >>> dissoc({'x': 1}, 'y') # Ignores missing keys {'x': 1} """ factory = _get_factory(dissoc, kwargs) d2 = factory() if len(keys) < len(d) * .6: d2.update(d) for key in keys: if key in d2: del d2[key] else: remaining = set(d) remaining.difference_update(keys) for k in remaining: d2[k] = d[k] return d2 def assoc_in(d, keys, value, factory=dict): """ Return a new dict with new, potentially nested, key value pair >>> purchase = {'name': 'Alice', ... 'order': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP {'credit card': '5555-1234-1234-1234', 'name': 'Alice', 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}} """ return update_in(d, keys, lambda x: value, value, factory) def update_in(d, keys, func, default=None, factory=dict): """ Update value in a (potentially) nested dictionary inputs: d - dictionary on which to operate keys - list or tuple giving the location of the value to be changed in d func - function to operate on that value If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the original dictionary with v replaced by func(v), but does not mutate the original dictionary. If k0 is not a key in d, update_in creates nested dictionaries to the depth specified by the keys, with the innermost value set to func(default). >>> inc = lambda x: x + 1 >>> update_in({'a': 0}, ['a'], inc) {'a': 1} >>> transaction = {'name': 'Alice', ... 'purchase': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP {'credit card': '5555-1234-1234-1234', 'name': 'Alice', 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}} >>> # updating a value when k0 is not in d >>> update_in({}, [1, 2, 3], str, default="bar") {1: {2: {3: 'bar'}}} >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) {1: 'foo', 2: {3: {4: 1}}} """ ks = iter(keys) k = next(ks) rv = inner = factory() rv.update(d) for key in ks: if k in d: d = d[k] dtemp = factory() dtemp.update(d) else: d = dtemp = factory() inner[k] = inner = dtemp k = key if k in d: inner[k] = func(d[k]) else: inner[k] = func(default) return rv def get_in(keys, coll, default=None, no_default=False): """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless ``no_default`` is specified, then it raises KeyError or IndexError. ``get_in`` is a generalization of ``operator.getitem`` for nested data structures such as dictionaries and lists. >>> transaction = {'name': 'Alice', ... 'purchase': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> get_in(['purchase', 'items', 0], transaction) 'Apple' >>> get_in(['name'], transaction) 'Alice' >>> get_in(['purchase', 'total'], transaction) >>> get_in(['purchase', 'items', 'apple'], transaction) >>> get_in(['purchase', 'items', 10], transaction) >>> get_in(['purchase', 'total'], transaction, 0) 0 >>> get_in(['y'], {}, no_default=True) Traceback (most recent call last): ... KeyError: 'y' See Also: itertoolz.get operator.getitem """ try: return reduce(operator.getitem, keys, coll) except (KeyError, IndexError, TypeError): if no_default: raise return default toolz-0.11.2/toolz/functoolz.py000066400000000000000000000721761414140660100165200ustar00rootroot00000000000000from functools import reduce, partial import inspect import sys from operator import attrgetter, not_ from importlib import import_module from textwrap import dedent from types import MethodType import sys from .utils import no_default PYPY = hasattr(sys, 'pypy_version_info') and sys.version_info[0] > 2 __all__ = ('identity', 'apply', 'thread_first', 'thread_last', 'memoize', 'compose', 'compose_left', 'pipe', 'complement', 'juxt', 'do', 'curry', 'flip', 'excepts') PYPY = hasattr(sys, 'pypy_version_info') def identity(x): """ Identity function. Return x >>> identity(3) 3 """ return x def apply(*func_and_args, **kwargs): """ Applies a function and returns the results >>> def double(x): return 2*x >>> def inc(x): return x + 1 >>> apply(double, 5) 10 >>> tuple(map(apply, [double, inc, double], [10, 500, 8000])) (20, 501, 16000) """ if not func_and_args: raise TypeError('func argument is required') func, args = func_and_args[0], func_and_args[1:] return func(*args, **kwargs) def thread_first(val, *forms): """ Thread value through a sequence of functions/forms >>> def double(x): return 2*x >>> def inc(x): return x + 1 >>> thread_first(1, inc, double) 4 If the function expects more than one input you can specify those inputs in a tuple. The value is used as the first input. >>> def add(x, y): return x + y >>> def pow(x, y): return x**y >>> thread_first(1, (add, 4), (pow, 2)) # pow(add(1, 4), 2) 25 So in general thread_first(x, f, (g, y, z)) expands to g(f(x), y, z) See Also: thread_last """ def evalform_front(val, form): if callable(form): return form(val) if isinstance(form, tuple): func, args = form[0], form[1:] args = (val,) + args return func(*args) return reduce(evalform_front, forms, val) def thread_last(val, *forms): """ Thread value through a sequence of functions/forms >>> def double(x): return 2*x >>> def inc(x): return x + 1 >>> thread_last(1, inc, double) 4 If the function expects more than one input you can specify those inputs in a tuple. The value is used as the last input. >>> def add(x, y): return x + y >>> def pow(x, y): return x**y >>> thread_last(1, (add, 4), (pow, 2)) # pow(2, add(4, 1)) 32 So in general thread_last(x, f, (g, y, z)) expands to g(y, z, f(x)) >>> def iseven(x): ... return x % 2 == 0 >>> list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) [2, 4] See Also: thread_first """ def evalform_back(val, form): if callable(form): return form(val) if isinstance(form, tuple): func, args = form[0], form[1:] args = args + (val,) return func(*args) return reduce(evalform_back, forms, val) def instanceproperty(fget=None, fset=None, fdel=None, doc=None, classval=None): """ Like @property, but returns ``classval`` when used as a class attribute >>> class MyClass(object): ... '''The class docstring''' ... @instanceproperty(classval=__doc__) ... def __doc__(self): ... return 'An object docstring' ... @instanceproperty ... def val(self): ... return 42 ... >>> MyClass.__doc__ 'The class docstring' >>> MyClass.val is None True >>> obj = MyClass() >>> obj.__doc__ 'An object docstring' >>> obj.val 42 """ if fget is None: return partial(instanceproperty, fset=fset, fdel=fdel, doc=doc, classval=classval) return InstanceProperty(fget=fget, fset=fset, fdel=fdel, doc=doc, classval=classval) class InstanceProperty(property): """ Like @property, but returns ``classval`` when used as a class attribute Should not be used directly. Use ``instanceproperty`` instead. """ def __init__(self, fget=None, fset=None, fdel=None, doc=None, classval=None): self.classval = classval property.__init__(self, fget=fget, fset=fset, fdel=fdel, doc=doc) def __get__(self, obj, type=None): if obj is None: return self.classval return property.__get__(self, obj, type) def __reduce__(self): state = (self.fget, self.fset, self.fdel, self.__doc__, self.classval) return InstanceProperty, state class curry(object): """ Curry a callable function Enables partial application of arguments through calling a function with an incomplete set of arguments. >>> def mul(x, y): ... return x * y >>> mul = curry(mul) >>> double = mul(2) >>> double(10) 20 Also supports keyword arguments >>> @curry # Can use curry as a decorator ... def f(x, y, a=10): ... return a * (x + y) >>> add = f(a=1) >>> add(2, 3) 5 See Also: toolz.curried - namespace of curried functions https://toolz.readthedocs.io/en/latest/curry.html """ def __init__(self, *args, **kwargs): if not args: raise TypeError('__init__() takes at least 2 arguments (1 given)') func, args = args[0], args[1:] if not callable(func): raise TypeError("Input must be callable") # curry- or functools.partial-like object? Unpack and merge arguments if ( hasattr(func, 'func') and hasattr(func, 'args') and hasattr(func, 'keywords') and isinstance(func.args, tuple) ): _kwargs = {} if func.keywords: _kwargs.update(func.keywords) _kwargs.update(kwargs) kwargs = _kwargs args = func.args + args func = func.func if kwargs: self._partial = partial(func, *args, **kwargs) else: self._partial = partial(func, *args) self.__doc__ = getattr(func, '__doc__', None) self.__name__ = getattr(func, '__name__', '') self.__module__ = getattr(func, '__module__', None) self.__qualname__ = getattr(func, '__qualname__', None) self._sigspec = None self._has_unknown_args = None @instanceproperty def func(self): return self._partial.func @instanceproperty def __signature__(self): sig = inspect.signature(self.func) args = self.args or () keywords = self.keywords or {} if is_partial_args(self.func, args, keywords, sig) is False: raise TypeError('curry object has incorrect arguments') params = list(sig.parameters.values()) skip = 0 for param in params[:len(args)]: if param.kind == param.VAR_POSITIONAL: break skip += 1 kwonly = False newparams = [] for param in params[skip:]: kind = param.kind default = param.default if kind == param.VAR_KEYWORD: pass elif kind == param.VAR_POSITIONAL: if kwonly: continue elif param.name in keywords: default = keywords[param.name] kind = param.KEYWORD_ONLY kwonly = True else: if kwonly: kind = param.KEYWORD_ONLY if default is param.empty: default = no_default newparams.append(param.replace(default=default, kind=kind)) return sig.replace(parameters=newparams) @instanceproperty def args(self): return self._partial.args @instanceproperty def keywords(self): return self._partial.keywords @instanceproperty def func_name(self): return self.__name__ def __str__(self): return str(self.func) def __repr__(self): return repr(self.func) def __hash__(self): return hash((self.func, self.args, frozenset(self.keywords.items()) if self.keywords else None)) def __eq__(self, other): return (isinstance(other, curry) and self.func == other.func and self.args == other.args and self.keywords == other.keywords) def __ne__(self, other): return not self.__eq__(other) def __call__(self, *args, **kwargs): try: return self._partial(*args, **kwargs) except TypeError as exc: if self._should_curry(args, kwargs, exc): return self.bind(*args, **kwargs) raise def _should_curry(self, args, kwargs, exc=None): func = self.func args = self.args + args if self.keywords: kwargs = dict(self.keywords, **kwargs) if self._sigspec is None: sigspec = self._sigspec = _sigs.signature_or_spec(func) self._has_unknown_args = has_varargs(func, sigspec) is not False else: sigspec = self._sigspec if is_partial_args(func, args, kwargs, sigspec) is False: # Nothing can make the call valid return False elif self._has_unknown_args: # The call may be valid and raised a TypeError, but we curry # anyway because the function may have `*args`. This is useful # for decorators with signature `func(*args, **kwargs)`. return True elif not is_valid_args(func, args, kwargs, sigspec): # Adding more arguments may make the call valid return True else: # There was a genuine TypeError return False def bind(self, *args, **kwargs): return type(self)(self, *args, **kwargs) def call(self, *args, **kwargs): return self._partial(*args, **kwargs) def __get__(self, instance, owner): if instance is None: return self return curry(self, instance) def __reduce__(self): func = self.func modname = getattr(func, '__module__', None) qualname = getattr(func, '__qualname__', None) if qualname is None: # pragma: no cover qualname = getattr(func, '__name__', None) is_decorated = None if modname and qualname: attrs = [] obj = import_module(modname) for attr in qualname.split('.'): if isinstance(obj, curry): attrs.append('func') obj = obj.func obj = getattr(obj, attr, None) if obj is None: break attrs.append(attr) if isinstance(obj, curry) and obj.func is func: is_decorated = obj is self qualname = '.'.join(attrs) func = '%s:%s' % (modname, qualname) # functools.partial objects can't be pickled userdict = tuple((k, v) for k, v in self.__dict__.items() if k not in ('_partial', '_sigspec')) state = (type(self), func, self.args, self.keywords, userdict, is_decorated) return _restore_curry, state def _restore_curry(cls, func, args, kwargs, userdict, is_decorated): if isinstance(func, str): modname, qualname = func.rsplit(':', 1) obj = import_module(modname) for attr in qualname.split('.'): obj = getattr(obj, attr) if is_decorated: return obj func = obj.func obj = cls(func, *args, **(kwargs or {})) obj.__dict__.update(userdict) return obj @curry def memoize(func, cache=None, key=None): """ Cache a function's result for speedy future evaluation Considerations: Trades memory for speed. Only use on pure functions. >>> def add(x, y): return x + y >>> add = memoize(add) Or use as a decorator >>> @memoize ... def add(x, y): ... return x + y Use the ``cache`` keyword to provide a dict-like object as an initial cache >>> @memoize(cache={(1, 2): 3}) ... def add(x, y): ... return x + y Note that the above works as a decorator because ``memoize`` is curried. It is also possible to provide a ``key(args, kwargs)`` function that calculates keys used for the cache, which receives an ``args`` tuple and ``kwargs`` dict as input, and must return a hashable value. However, the default key function should be sufficient most of the time. >>> # Use key function that ignores extraneous keyword arguments >>> @memoize(key=lambda args, kwargs: args) ... def add(x, y, verbose=False): ... if verbose: ... print('Calculating %s + %s' % (x, y)) ... return x + y """ if cache is None: cache = {} try: may_have_kwargs = has_keywords(func) is not False # Is unary function (single arg, no variadic argument or keywords)? is_unary = is_arity(1, func) except TypeError: # pragma: no cover may_have_kwargs = True is_unary = False if key is None: if is_unary: def key(args, kwargs): return args[0] elif may_have_kwargs: def key(args, kwargs): return ( args or None, frozenset(kwargs.items()) if kwargs else None, ) else: def key(args, kwargs): return args def memof(*args, **kwargs): k = key(args, kwargs) try: return cache[k] except TypeError: raise TypeError("Arguments to memoized function must be hashable") except KeyError: cache[k] = result = func(*args, **kwargs) return result try: memof.__name__ = func.__name__ except AttributeError: pass memof.__doc__ = func.__doc__ memof.__wrapped__ = func return memof class Compose(object): """ A composition of functions See Also: compose """ __slots__ = 'first', 'funcs' def __init__(self, funcs): funcs = tuple(reversed(funcs)) self.first = funcs[0] self.funcs = funcs[1:] def __call__(self, *args, **kwargs): ret = self.first(*args, **kwargs) for f in self.funcs: ret = f(ret) return ret def __getstate__(self): return self.first, self.funcs def __setstate__(self, state): self.first, self.funcs = state @instanceproperty(classval=__doc__) def __doc__(self): def composed_doc(*fs): """Generate a docstring for the composition of fs. """ if not fs: # Argument name for the docstring. return '*args, **kwargs' return '{f}({g})'.format(f=fs[0].__name__, g=composed_doc(*fs[1:])) try: return ( 'lambda *args, **kwargs: ' + composed_doc(*reversed((self.first,) + self.funcs)) ) except AttributeError: # One of our callables does not have a `__name__`, whatever. return 'A composition of functions' @property def __name__(self): try: return '_of_'.join( (f.__name__ for f in reversed((self.first,) + self.funcs)) ) except AttributeError: return type(self).__name__ def __repr__(self): return '{.__class__.__name__}{!r}'.format( self, tuple(reversed((self.first, ) + self.funcs))) def __eq__(self, other): if isinstance(other, Compose): return other.first == self.first and other.funcs == self.funcs return NotImplemented def __ne__(self, other): equality = self.__eq__(other) return NotImplemented if equality is NotImplemented else not equality def __hash__(self): return hash(self.first) ^ hash(self.funcs) # Mimic the descriptor behavior of python functions. # i.e. let Compose be called as a method when bound to a class. # adapted from # docs.python.org/3/howto/descriptor.html#functions-and-methods def __get__(self, obj, objtype=None): return self if obj is None else MethodType(self, obj) # introspection with Signature is only possible from py3.3+ @instanceproperty def __signature__(self): base = inspect.signature(self.first) last = inspect.signature(self.funcs[-1]) return base.replace(return_annotation=last.return_annotation) __wrapped__ = instanceproperty(attrgetter('first')) def compose(*funcs): """ Compose functions to operate in series. Returns a function that applies other functions in sequence. Functions are applied from right to left so that ``compose(f, g, h)(x, y)`` is the same as ``f(g(h(x, y)))``. If no arguments are provided, the identity function (f(x) = x) is returned. >>> inc = lambda i: i + 1 >>> compose(str, inc)(3) '4' See Also: compose_left pipe """ if not funcs: return identity if len(funcs) == 1: return funcs[0] else: return Compose(funcs) def compose_left(*funcs): """ Compose functions to operate in series. Returns a function that applies other functions in sequence. Functions are applied from left to right so that ``compose_left(f, g, h)(x, y)`` is the same as ``h(g(f(x, y)))``. If no arguments are provided, the identity function (f(x) = x) is returned. >>> inc = lambda i: i + 1 >>> compose_left(inc, str)(3) '4' See Also: compose pipe """ return compose(*reversed(funcs)) def pipe(data, *funcs): """ Pipe a value through a sequence of functions I.e. ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))`` We think of the value as progressing through a pipe of several transformations, much like pipes in UNIX ``$ cat data | f | g | h`` >>> double = lambda i: 2 * i >>> pipe(3, double, str) '6' See Also: compose compose_left thread_first thread_last """ for func in funcs: data = func(data) return data def complement(func): """ Convert a predicate function to its logical complement. In other words, return a function that, for inputs that normally yield True, yields False, and vice-versa. >>> def iseven(n): return n % 2 == 0 >>> isodd = complement(iseven) >>> iseven(2) True >>> isodd(2) False """ return compose(not_, func) class juxt(object): """ Creates a function that calls several functions with the same arguments Takes several functions and returns a function that applies its arguments to each of those functions then returns a tuple of the results. Name comes from juxtaposition: the fact of two things being seen or placed close together with contrasting effect. >>> inc = lambda x: x + 1 >>> double = lambda x: x * 2 >>> juxt(inc, double)(10) (11, 20) >>> juxt([inc, double])(10) (11, 20) """ __slots__ = ['funcs'] def __init__(self, *funcs): if len(funcs) == 1 and not callable(funcs[0]): funcs = funcs[0] self.funcs = tuple(funcs) def __call__(self, *args, **kwargs): return tuple(func(*args, **kwargs) for func in self.funcs) def __getstate__(self): return self.funcs def __setstate__(self, state): self.funcs = state def do(func, x): """ Runs ``func`` on ``x``, returns ``x`` Because the results of ``func`` are not returned, only the side effects of ``func`` are relevant. Logging functions can be made by composing ``do`` with a storage function like ``list.append`` or ``file.write`` >>> from toolz import compose >>> from toolz.curried import do >>> log = [] >>> inc = lambda x: x + 1 >>> inc = compose(inc, do(log.append)) >>> inc(1) 2 >>> inc(11) 12 >>> log [1, 11] """ func(x) return x @curry def flip(func, a, b): """ Call the function call with the arguments flipped This function is curried. >>> def div(a, b): ... return a // b ... >>> flip(div, 2, 6) 3 >>> div_by_two = flip(div, 2) >>> div_by_two(4) 2 This is particularly useful for built in functions and functions defined in C extensions that accept positional only arguments. For example: isinstance, issubclass. >>> data = [1, 'a', 'b', 2, 1.5, object(), 3] >>> only_ints = list(filter(flip(isinstance, int), data)) >>> only_ints [1, 2, 3] """ return func(b, a) def return_none(exc): """ Returns None. """ return None class excepts(object): """A wrapper around a function to catch exceptions and dispatch to a handler. This is like a functional try/except block, in the same way that ifexprs are functional if/else blocks. Examples -------- >>> excepting = excepts( ... ValueError, ... lambda a: [1, 2].index(a), ... lambda _: -1, ... ) >>> excepting(1) 0 >>> excepting(3) -1 Multiple exceptions and default except clause. >>> excepting = excepts((IndexError, KeyError), lambda a: a[0]) >>> excepting([]) >>> excepting([1]) 1 >>> excepting({}) >>> excepting({0: 1}) 1 """ def __init__(self, exc, func, handler=return_none): self.exc = exc self.func = func self.handler = handler def __call__(self, *args, **kwargs): try: return self.func(*args, **kwargs) except self.exc as e: return self.handler(e) @instanceproperty(classval=__doc__) def __doc__(self): exc = self.exc try: if isinstance(exc, tuple): exc_name = '(%s)' % ', '.join( map(attrgetter('__name__'), exc), ) else: exc_name = exc.__name__ return dedent( """\ A wrapper around {inst.func.__name__!r} that will except: {exc} and handle any exceptions with {inst.handler.__name__!r}. Docs for {inst.func.__name__!r}: {inst.func.__doc__} Docs for {inst.handler.__name__!r}: {inst.handler.__doc__} """ ).format( inst=self, exc=exc_name, ) except AttributeError: return type(self).__doc__ @property def __name__(self): exc = self.exc try: if isinstance(exc, tuple): exc_name = '_or_'.join(map(attrgetter('__name__'), exc)) else: exc_name = exc.__name__ return '%s_excepting_%s' % (self.func.__name__, exc_name) except AttributeError: return 'excepting' def _check_sigspec(sigspec, func, builtin_func, *builtin_args): if sigspec is None: try: sigspec = inspect.signature(func) except (ValueError, TypeError) as e: sigspec = e if isinstance(sigspec, ValueError): return None, builtin_func(*builtin_args) elif not isinstance(sigspec, inspect.Signature): if ( func in _sigs.signatures and (( hasattr(func, '__signature__') and hasattr(func.__signature__, '__get__') )) ): val = builtin_func(*builtin_args) return None, val return None, False return sigspec, None if PYPY: # pragma: no cover _check_sigspec_orig = _check_sigspec def _check_sigspec(sigspec, func, builtin_func, *builtin_args): # PyPy may lie, so use our registry for builtins instead if func in _sigs.signatures: val = builtin_func(*builtin_args) return None, val return _check_sigspec_orig(sigspec, func, builtin_func, *builtin_args) _check_sigspec.__doc__ = """ \ Private function to aid in introspection compatibly across Python versions. If a callable doesn't have a signature (Python 3) or an argspec (Python 2), the signature registry in toolz._signatures is used. """ def num_required_args(func, sigspec=None): sigspec, rv = _check_sigspec(sigspec, func, _sigs._num_required_args, func) if sigspec is None: return rv return sum(1 for p in sigspec.parameters.values() if p.default is p.empty and p.kind in (p.POSITIONAL_OR_KEYWORD, p.POSITIONAL_ONLY)) def has_varargs(func, sigspec=None): sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_varargs, func) if sigspec is None: return rv return any(p.kind == p.VAR_POSITIONAL for p in sigspec.parameters.values()) def has_keywords(func, sigspec=None): sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_keywords, func) if sigspec is None: return rv return any(p.default is not p.empty or p.kind in (p.KEYWORD_ONLY, p.VAR_KEYWORD) for p in sigspec.parameters.values()) def is_valid_args(func, args, kwargs, sigspec=None): sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_valid_args, func, args, kwargs) if sigspec is None: return rv try: sigspec.bind(*args, **kwargs) except TypeError: return False return True def is_partial_args(func, args, kwargs, sigspec=None): sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_partial_args, func, args, kwargs) if sigspec is None: return rv try: sigspec.bind_partial(*args, **kwargs) except TypeError: return False return True def is_arity(n, func, sigspec=None): """ Does a function have only n positional arguments? This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def f(x): ... return x >>> is_arity(1, f) True >>> def g(x, y=1): ... return x + y >>> is_arity(1, g) False """ sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_arity, n, func) if sigspec is None: return rv num = num_required_args(func, sigspec) if num is not None: num = num == n if not num: return False varargs = has_varargs(func, sigspec) if varargs: return False keywords = has_keywords(func, sigspec) if keywords: return False if num is None or varargs is None or keywords is None: # pragma: no cover return None return True num_required_args.__doc__ = """ \ Number of required positional arguments This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def f(x, y, z=3): ... return x + y + z >>> num_required_args(f) 2 >>> def g(*args, **kwargs): ... pass >>> num_required_args(g) 0 """ has_varargs.__doc__ = """ \ Does a function have variadic positional arguments? This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def f(*args): ... return args >>> has_varargs(f) True >>> def g(**kwargs): ... return kwargs >>> has_varargs(g) False """ has_keywords.__doc__ = """ \ Does a function have keyword arguments? This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def f(x, y=0): ... return x + y >>> has_keywords(f) True """ is_valid_args.__doc__ = """ \ Is ``func(*args, **kwargs)`` a valid function call? This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def add(x, y): ... return x + y >>> is_valid_args(add, (1,), {}) False >>> is_valid_args(add, (1, 2), {}) True >>> is_valid_args(map, (), {}) False **Implementation notes** Python 2 relies on ``inspect.getargspec``, which only works for user-defined functions. Python 3 uses ``inspect.signature``, which works for many more types of callables. Many builtins in the standard library are also supported. """ is_partial_args.__doc__ = """ \ Can partial(func, *args, **kwargs)(*args2, **kwargs2) be a valid call? Returns True *only* if the call is valid or if it is possible for the call to become valid by adding more positional or keyword arguments. This function relies on introspection and does not call the function. Returns None if validity can't be determined. >>> def add(x, y): ... return x + y >>> is_partial_args(add, (1,), {}) True >>> is_partial_args(add, (1, 2), {}) True >>> is_partial_args(add, (1, 2, 3), {}) False >>> is_partial_args(map, (), {}) True **Implementation notes** Python 2 relies on ``inspect.getargspec``, which only works for user-defined functions. Python 3 uses ``inspect.signature``, which works for many more types of callables. Many builtins in the standard library are also supported. """ from . import _signatures as _sigs toolz-0.11.2/toolz/itertoolz.py000066400000000000000000000657301414140660100165260ustar00rootroot00000000000000import itertools import heapq import collections import operator from functools import partial from itertools import filterfalse, zip_longest from random import Random from collections.abc import Sequence from toolz.utils import no_default __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') def remove(predicate, seq): """ Return those items of sequence for which predicate(item) is False >>> def iseven(x): ... return x % 2 == 0 >>> list(remove(iseven, [1, 2, 3, 4])) [1, 3] """ return filterfalse(predicate, seq) def accumulate(binop, seq, initial=no_default): """ Repeatedly apply binary function to a sequence, accumulating results >>> from operator import add, mul >>> list(accumulate(add, [1, 2, 3, 4, 5])) [1, 3, 6, 10, 15] >>> list(accumulate(mul, [1, 2, 3, 4, 5])) [1, 2, 6, 24, 120] Accumulate is similar to ``reduce`` and is good for making functions like cumulative sum: >>> from functools import partial, reduce >>> sum = partial(reduce, add) >>> cumsum = partial(accumulate, add) Accumulate also takes an optional argument that will be used as the first value. This is similar to reduce. >>> list(accumulate(add, [1, 2, 3], -1)) [-1, 0, 2, 5] >>> list(accumulate(add, [], 1)) [1] See Also: itertools.accumulate : In standard itertools for Python 3.2+ """ seq = iter(seq) if initial == no_default: try: result = next(seq) except StopIteration: return else: result = initial yield result for elem in seq: result = binop(result, elem) yield result def groupby(key, seq): """ Group a collection by a key function >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> groupby(len, names) # doctest: +SKIP {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} >>> iseven = lambda x: x % 2 == 0 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} Non-callable keys imply grouping on a member. >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, ... {'name': 'Bob', 'gender': 'M'}, ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP {'F': [{'gender': 'F', 'name': 'Alice'}], 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]} Not to be confused with ``itertools.groupby`` See Also: countby """ if not callable(key): key = getter(key) d = collections.defaultdict(lambda: [].append) for item in seq: d[key(item)](item) rv = {} for k, v in d.items(): rv[k] = v.__self__ return rv def merge_sorted(*seqs, **kwargs): """ Merge and sort a collection of sorted collections This works lazily and only keeps one value from each iterable in memory. >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) [1, 2, 3, 4, 5, 6] >>> ''.join(merge_sorted('abc', 'abc', 'abc')) 'aaabbbccc' The "key" function used to sort the input may be passed as a keyword. >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) [2, 1, 3, 3] """ if len(seqs) == 0: return iter([]) elif len(seqs) == 1: return iter(seqs[0]) key = kwargs.get('key', None) if key is None: return _merge_sorted_binary(seqs) else: return _merge_sorted_binary_key(seqs, key) def _merge_sorted_binary(seqs): mid = len(seqs) // 2 L1 = seqs[:mid] if len(L1) == 1: seq1 = iter(L1[0]) else: seq1 = _merge_sorted_binary(L1) L2 = seqs[mid:] if len(L2) == 1: seq2 = iter(L2[0]) else: seq2 = _merge_sorted_binary(L2) try: val2 = next(seq2) except StopIteration: for val1 in seq1: yield val1 return for val1 in seq1: if val2 < val1: yield val2 for val2 in seq2: if val2 < val1: yield val2 else: yield val1 break else: break else: yield val1 else: yield val2 for val2 in seq2: yield val2 return yield val1 for val1 in seq1: yield val1 def _merge_sorted_binary_key(seqs, key): mid = len(seqs) // 2 L1 = seqs[:mid] if len(L1) == 1: seq1 = iter(L1[0]) else: seq1 = _merge_sorted_binary_key(L1, key) L2 = seqs[mid:] if len(L2) == 1: seq2 = iter(L2[0]) else: seq2 = _merge_sorted_binary_key(L2, key) try: val2 = next(seq2) except StopIteration: for val1 in seq1: yield val1 return key2 = key(val2) for val1 in seq1: key1 = key(val1) if key2 < key1: yield val2 for val2 in seq2: key2 = key(val2) if key2 < key1: yield val2 else: yield val1 break else: break else: yield val1 else: yield val2 for val2 in seq2: yield val2 return yield val1 for val1 in seq1: yield val1 def interleave(seqs): """ Interleave a sequence of sequences >>> list(interleave([[1, 2], [3, 4]])) [1, 3, 2, 4] >>> ''.join(interleave(('ABC', 'XY'))) 'AXBYC' Both the individual sequences and the sequence of sequences may be infinite Returns a lazy iterator """ iters = itertools.cycle(map(iter, seqs)) while True: try: for itr in iters: yield next(itr) return except StopIteration: predicate = partial(operator.is_not, itr) iters = itertools.cycle(itertools.takewhile(predicate, iters)) def unique(seq, key=None): """ Return only unique elements of a sequence >>> tuple(unique((1, 2, 3))) (1, 2, 3) >>> tuple(unique((1, 2, 1, 3))) (1, 2, 3) Uniqueness can be defined by key keyword >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) ('cat', 'mouse') """ seen = set() seen_add = seen.add if key is None: for item in seq: if item not in seen: seen_add(item) yield item else: # calculate key for item in seq: val = key(item) if val not in seen: seen_add(val) yield item def isiterable(x): """ Is x iterable? >>> isiterable([1, 2, 3]) True >>> isiterable('abc') True >>> isiterable(5) False """ try: iter(x) return True except TypeError: return False def isdistinct(seq): """ All values in sequence are distinct >>> isdistinct([1, 2, 3]) True >>> isdistinct([1, 2, 1]) False >>> isdistinct("Hello") False >>> isdistinct("World") True """ if iter(seq) is seq: seen = set() seen_add = seen.add for item in seq: if item in seen: return False seen_add(item) return True else: return len(seq) == len(set(seq)) def take(n, seq): """ The first n elements of a sequence >>> list(take(2, [10, 20, 30, 40, 50])) [10, 20] See Also: drop tail """ return itertools.islice(seq, n) def tail(n, seq): """ The last n elements of a sequence >>> tail(2, [10, 20, 30, 40, 50]) [40, 50] See Also: drop take """ try: return seq[-n:] except (TypeError, KeyError): return tuple(collections.deque(seq, n)) def drop(n, seq): """ The sequence following the first n elements >>> list(drop(2, [10, 20, 30, 40, 50])) [30, 40, 50] See Also: take tail """ return itertools.islice(seq, n, None) def take_nth(n, seq): """ Every nth item in seq >>> list(take_nth(2, [10, 20, 30, 40, 50])) [10, 30, 50] """ return itertools.islice(seq, 0, None, n) def first(seq): """ The first element in a sequence >>> first('ABC') 'A' """ return next(iter(seq)) def second(seq): """ The second element in a sequence >>> second('ABC') 'B' """ seq = iter(seq) next(seq) return next(seq) def nth(n, seq): """ The nth element in a sequence >>> nth(1, 'ABC') 'B' """ if isinstance(seq, (tuple, list, Sequence)): return seq[n] else: return next(itertools.islice(seq, n, None)) def last(seq): """ The last element in a sequence >>> last('ABC') 'C' """ return tail(1, seq)[0] rest = partial(drop, 1) def _get(ind, seq, default): try: return seq[ind] except (KeyError, IndexError): return default def get(ind, seq, default=no_default): """ Get element in a sequence or dict Provides standard indexing >>> get(1, 'ABC') # Same as 'ABC'[1] 'B' Pass a list to get multiple values >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) ('B', 'C') Works on any value that supports indexing/getitem For example here we see that it works with dictionaries >>> phonebook = {'Alice': '555-1234', ... 'Bob': '555-5678', ... 'Charlie':'555-9999'} >>> get('Alice', phonebook) '555-1234' >>> get(['Alice', 'Bob'], phonebook) ('555-1234', '555-5678') Provide a default for missing values >>> get(['Alice', 'Dennis'], phonebook, None) ('555-1234', None) See Also: pluck """ try: return seq[ind] except TypeError: # `ind` may be a list if isinstance(ind, list): if default == no_default: if len(ind) > 1: return operator.itemgetter(*ind)(seq) elif ind: return seq[ind[0]], else: return () else: return tuple(_get(i, seq, default) for i in ind) elif default != no_default: return default else: raise except (KeyError, IndexError): # we know `ind` is not a list if default == no_default: raise else: return default def concat(seqs): """ Concatenate zero or more iterables, any of which may be infinite. An infinite sequence will prevent the rest of the arguments from being included. We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs can be a generator. >>> list(concat([[], [1], [2, 3]])) [1, 2, 3] See also: itertools.chain.from_iterable equivalent """ return itertools.chain.from_iterable(seqs) def concatv(*seqs): """ Variadic version of concat >>> list(concatv([], ["a"], ["b", "c"])) ['a', 'b', 'c'] See also: itertools.chain """ return concat(seqs) def mapcat(func, seqs): """ Apply func to each sequence in seqs, concatenating results. >>> list(mapcat(lambda s: [c.upper() for c in s], ... [["a", "b"], ["c", "d", "e"]])) ['A', 'B', 'C', 'D', 'E'] """ return concat(map(func, seqs)) def cons(el, seq): """ Add el to beginning of (possibly infinite) sequence seq. >>> list(cons(1, [2, 3])) [1, 2, 3] """ return itertools.chain([el], seq) def interpose(el, seq): """ Introduce element between each pair of elements in seq >>> list(interpose("a", [1, 2, 3])) [1, 'a', 2, 'a', 3] """ inposed = concat(zip(itertools.repeat(el), seq)) next(inposed) return inposed def frequencies(seq): """ Find number of occurrences of each value in seq >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP {'cat': 3, 'ox': 1, 'pig': 2} See Also: countby groupby """ d = collections.defaultdict(int) for item in seq: d[item] += 1 return dict(d) def reduceby(key, binop, seq, init=no_default): """ Perform a simultaneous groupby and reduction The computation: >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP is equivalent to the following: >>> def reduction(group): # doctest: +SKIP ... return reduce(binop, group, init) # doctest: +SKIP >>> groups = groupby(key, seq) # doctest: +SKIP >>> result = valmap(reduction, groups) # doctest: +SKIP But the former does not build the intermediate groups, allowing it to operate in much less space. This makes it suitable for larger datasets that do not fit comfortably in memory The ``init`` keyword argument is the default initialization of the reduction. This can be either a constant value like ``0`` or a callable like ``lambda : 0`` as might be used in ``defaultdict``. Simple Examples --------------- >>> from operator import add, mul >>> iseven = lambda x: x % 2 == 0 >>> data = [1, 2, 3, 4, 5] >>> reduceby(iseven, add, data) # doctest: +SKIP {False: 9, True: 6} >>> reduceby(iseven, mul, data) # doctest: +SKIP {False: 15, True: 8} Complex Example --------------- >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] >>> reduceby('state', # doctest: +SKIP ... lambda acc, x: acc + x['cost'], ... projects, 0) {'CA': 1200000, 'IL': 2100000} Example Using ``init`` ---------------------- >>> def set_add(s, i): ... s.add(i) ... return s >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP {True: set([2, 4]), False: set([1, 3])} """ is_no_default = init == no_default if not is_no_default and not callable(init): _init = init init = lambda: _init if not callable(key): key = getter(key) d = {} for item in seq: k = key(item) if k not in d: if is_no_default: d[k] = item continue else: d[k] = init() d[k] = binop(d[k], item) return d def iterate(func, x): """ Repeatedly apply a function func onto an original input Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. >>> def inc(x): return x + 1 >>> counter = iterate(inc, 0) >>> next(counter) 0 >>> next(counter) 1 >>> next(counter) 2 >>> double = lambda x: x * 2 >>> powers_of_two = iterate(double, 1) >>> next(powers_of_two) 1 >>> next(powers_of_two) 2 >>> next(powers_of_two) 4 >>> next(powers_of_two) 8 """ while True: yield x x = func(x) def sliding_window(n, seq): """ A sequence of overlapping subsequences >>> list(sliding_window(2, [1, 2, 3, 4])) [(1, 2), (2, 3), (3, 4)] This function creates a sliding window suitable for transformations like sliding means / smoothing >>> mean = lambda seq: float(sum(seq)) / len(seq) >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) [1.5, 2.5, 3.5] """ return zip(*(collections.deque(itertools.islice(it, i), 0) or it for i, it in enumerate(itertools.tee(seq, n)))) no_pad = '__no__pad__' def partition(n, seq, pad=no_pad): """ Partition sequence into tuples of length n >>> list(partition(2, [1, 2, 3, 4])) [(1, 2), (3, 4)] If the length of ``seq`` is not evenly divisible by ``n``, the final tuple is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: >>> list(partition(2, [1, 2, 3, 4, 5])) [(1, 2), (3, 4)] >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) [(1, 2), (3, 4), (5, None)] See Also: partition_all """ args = [iter(seq)] * n if pad is no_pad: return zip(*args) else: return zip_longest(*args, fillvalue=pad) def partition_all(n, seq): """ Partition all elements of sequence into tuples of length at most n The final tuple may be shorter to accommodate extra elements. >>> list(partition_all(2, [1, 2, 3, 4])) [(1, 2), (3, 4)] >>> list(partition_all(2, [1, 2, 3, 4, 5])) [(1, 2), (3, 4), (5,)] See Also: partition """ args = [iter(seq)] * n it = zip_longest(*args, fillvalue=no_pad) try: prev = next(it) except StopIteration: return for item in it: yield prev prev = item if prev[-1] is no_pad: try: # If seq defines __len__, then # we can quickly calculate where no_pad starts yield prev[:len(seq) % n] except TypeError: # Get first index of no_pad without using .index() # https://github.com/pytoolz/toolz/issues/387 # Binary search from CPython's bisect module, # modified for identity testing. lo, hi = 0, n while lo < hi: mid = (lo + hi) // 2 if prev[mid] is no_pad: hi = mid else: lo = mid + 1 yield prev[:lo] else: yield prev def count(seq): """ Count the number of items in seq Like the builtin ``len`` but works on lazy sequences. Not to be confused with ``itertools.count`` See also: len """ if hasattr(seq, '__len__'): return len(seq) return sum(1 for i in seq) def pluck(ind, seqs, default=no_default): """ plucks an element or several elements from each item in a sequence. ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more elements of each item in the sequence. This is equivalent to running `map(curried.get(ind), seqs)` ``ind`` can be either a single string/index or a list of strings/indices. ``seqs`` should be sequence containing sequences or dicts. e.g. >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] >>> list(pluck('name', data)) ['Cheese', 'Pies'] >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) [(1, 2), (4, 5)] See Also: get map """ if default == no_default: get = getter(ind) return map(get, seqs) elif isinstance(ind, list): return (tuple(_get(item, seq, default) for item in ind) for seq in seqs) return (_get(ind, seq, default) for seq in seqs) def getter(index): if isinstance(index, list): if len(index) == 1: index = index[0] return lambda x: (x[index],) elif index: return operator.itemgetter(*index) else: return lambda x: () else: return operator.itemgetter(index) def join(leftkey, leftseq, rightkey, rightseq, left_default=no_default, right_default=no_default): """ Join two sequences on common attributes This is a semi-streaming operation. The LEFT sequence is fully evaluated and placed into memory. The RIGHT sequence is evaluated lazily and so can be arbitrarily large. (Note: If right_default is defined, then unique keys of rightseq will also be stored in memory.) >>> friends = [('Alice', 'Edith'), ... ('Alice', 'Zhao'), ... ('Edith', 'Alice'), ... ('Zhao', 'Alice'), ... ('Zhao', 'Edith')] >>> cities = [('Alice', 'NYC'), ... ('Alice', 'Chicago'), ... ('Dan', 'Syndey'), ... ('Edith', 'Paris'), ... ('Edith', 'Berlin'), ... ('Zhao', 'Shanghai')] >>> # Vacation opportunities >>> # In what cities do people have friends? >>> result = join(second, friends, ... first, cities) >>> for ((a, b), (c, d)) in sorted(unique(result)): ... print((a, d)) ('Alice', 'Berlin') ('Alice', 'Paris') ('Alice', 'Shanghai') ('Edith', 'Chicago') ('Edith', 'NYC') ('Zhao', 'Chicago') ('Zhao', 'NYC') ('Zhao', 'Berlin') ('Zhao', 'Paris') Specify outer joins with keyword arguments ``left_default`` and/or ``right_default``. Here is a full outer join in which unmatched elements are paired with None. >>> identity = lambda x: x >>> list(join(identity, [1, 2, 3], ... identity, [2, 3, 4], ... left_default=None, right_default=None)) [(2, 2), (3, 3), (None, 4), (1, None)] Usually the key arguments are callables to be applied to the sequences. If the keys are not obviously callable then it is assumed that indexing was intended, e.g. the following is a legal change. The join is implemented as a hash join and the keys of leftseq must be hashable. Additionally, if right_default is defined, then keys of rightseq must also be hashable. >>> # result = join(second, friends, first, cities) >>> result = join(1, friends, 0, cities) # doctest: +SKIP """ if not callable(leftkey): leftkey = getter(leftkey) if not callable(rightkey): rightkey = getter(rightkey) d = groupby(leftkey, leftseq) if left_default == no_default and right_default == no_default: # Inner Join for item in rightseq: key = rightkey(item) if key in d: for left_match in d[key]: yield (left_match, item) elif left_default != no_default and right_default == no_default: # Right Join for item in rightseq: key = rightkey(item) if key in d: for left_match in d[key]: yield (left_match, item) else: yield (left_default, item) elif right_default != no_default: seen_keys = set() seen = seen_keys.add if left_default == no_default: # Left Join for item in rightseq: key = rightkey(item) seen(key) if key in d: for left_match in d[key]: yield (left_match, item) else: # Full Join for item in rightseq: key = rightkey(item) seen(key) if key in d: for left_match in d[key]: yield (left_match, item) else: yield (left_default, item) for key, matches in d.items(): if key not in seen_keys: for match in matches: yield (match, right_default) def diff(*seqs, **kwargs): """ Return those items that differ between sequences >>> list(diff([1, 2, 3], [1, 2, 10, 100])) [(3, 10)] Shorter sequences may be padded with a ``default`` value: >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) [(3, 10), (None, 100)] A ``key`` function may also be applied to each item to use during comparisons: >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) [('bananas', 'Oranges')] """ N = len(seqs) if N == 1 and isinstance(seqs[0], list): seqs = seqs[0] N = len(seqs) if N < 2: raise TypeError('Too few sequences given (min 2 required)') default = kwargs.get('default', no_default) if default == no_default: iters = zip(*seqs) else: iters = zip_longest(*seqs, fillvalue=default) key = kwargs.get('key', None) if key is None: for items in iters: if items.count(items[0]) != N: yield items else: for items in iters: vals = tuple(map(key, items)) if vals.count(vals[0]) != N: yield items def topk(k, seq, key=None): """ Find the k largest elements of a sequence Operates lazily in ``n*log(k)`` time >>> topk(2, [1, 100, 10, 1000]) (1000, 100) Use a key function to change sorted order >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) ('Charlie', 'Alice') See also: heapq.nlargest """ if key is not None and not callable(key): key = getter(key) return tuple(heapq.nlargest(k, seq, key=key)) def peek(seq): """ Retrieve the next element of a sequence Returns the first element and an iterable equivalent to the original sequence, still having the element retrieved. >>> seq = [0, 1, 2, 3, 4] >>> first, seq = peek(seq) >>> first 0 >>> list(seq) [0, 1, 2, 3, 4] """ iterator = iter(seq) item = next(iterator) return item, itertools.chain((item,), iterator) def peekn(n, seq): """ Retrieve the next n elements of a sequence Returns a tuple of the first n elements and an iterable equivalent to the original, still having the elements retrieved. >>> seq = [0, 1, 2, 3, 4] >>> first_two, seq = peekn(2, seq) >>> first_two (0, 1) >>> list(seq) [0, 1, 2, 3, 4] """ iterator = iter(seq) peeked = tuple(take(n, iterator)) return peeked, itertools.chain(iter(peeked), iterator) def random_sample(prob, seq, random_state=None): """ Return elements from a sequence with probability of prob Returns a lazy iterator of random items from seq. ``random_sample`` considers each item independently and without replacement. See below how the first time it returned 13 items and the next time it returned 6 items. >>> seq = list(range(100)) >>> list(random_sample(0.1, seq)) # doctest: +SKIP [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] >>> list(random_sample(0.1, seq)) # doctest: +SKIP [6, 44, 54, 61, 69, 94] Providing an integer seed for ``random_state`` will result in deterministic sampling. Given the same seed it will return the same sample every time. >>> list(random_sample(0.1, seq, random_state=2016)) [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] >>> list(random_sample(0.1, seq, random_state=2016)) [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] ``random_state`` can also be any object with a method ``random`` that returns floats between 0.0 and 1.0 (exclusive). >>> from random import Random >>> randobj = Random(2016) >>> list(random_sample(0.1, seq, random_state=randobj)) [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] """ if not hasattr(random_state, 'random'): random_state = Random(random_state) return filter(lambda _: random_state.random() < prob, seq) toolz-0.11.2/toolz/recipes.py000066400000000000000000000023501414140660100161120ustar00rootroot00000000000000import itertools from .itertoolz import frequencies, pluck, getter __all__ = ('countby', 'partitionby') def countby(key, seq): """ Count elements of a collection by a key function >>> countby(len, ['cat', 'mouse', 'dog']) {3: 2, 5: 1} >>> def iseven(x): return x % 2 == 0 >>> countby(iseven, [1, 2, 3]) # doctest:+SKIP {True: 1, False: 2} See Also: groupby """ if not callable(key): key = getter(key) return frequencies(map(key, seq)) def partitionby(func, seq): """ Partition a sequence according to a function Partition `s` into a sequence of lists such that, when traversing `s`, every time the output of `func` changes a new list is started and that and subsequent items are collected into that list. >>> is_space = lambda c: c == " " >>> list(partitionby(is_space, "I have space")) [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')] >>> is_large = lambda x: x > 10 >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5])) [(1, 2, 1), (99, 88, 33, 99), (-1, 5)] See also: partition groupby itertools.groupby """ return map(tuple, pluck(1, itertools.groupby(seq, key=func))) toolz-0.11.2/toolz/sandbox/000077500000000000000000000000001414140660100155445ustar00rootroot00000000000000toolz-0.11.2/toolz/sandbox/__init__.py000066400000000000000000000001041414140660100176500ustar00rootroot00000000000000from .core import EqualityHashKey, unzip from .parallel import fold toolz-0.11.2/toolz/sandbox/core.py000066400000000000000000000103601414140660100170460ustar00rootroot00000000000000from toolz.itertoolz import getter, cons, pluck from itertools import tee, starmap # See #166: https://github.com/pytoolz/toolz/issues/166 # See #173: https://github.com/pytoolz/toolz/pull/173 class EqualityHashKey(object): """ Create a hash key that uses equality comparisons between items. This may be used to create hash keys for otherwise unhashable types: >>> from toolz import curry >>> EqualityHashDefault = curry(EqualityHashKey, None) >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP {=[]=, =()=, =[1]=} **Caution:** adding N ``EqualityHashKey`` items to a hash container may require O(N**2) operations, not O(N) as for typical hashable types. Therefore, a suitable key function such as ``tuple`` or ``frozenset`` is usually preferred over using ``EqualityHashKey`` if possible. The ``key`` argument to ``EqualityHashKey`` should be a function or index that returns a hashable object that effectively distinguishes unequal items. This helps avoid the poor scaling that occurs when using the default key. For example, the above example can be improved by using a key function that distinguishes items by length or type: >>> EqualityHashLen = curry(EqualityHashKey, len) >>> EqualityHashType = curry(EqualityHashKey, type) # this works too >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP {=[]=, =()=, =[1]=} ``EqualityHashKey`` is convenient to use when a suitable key function is complicated or unavailable. For example, the following returns all unique values based on equality: >>> from toolz import unique >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}] >>> list(unique(vals, key=EqualityHashDefault)) [[], (), [1], [2], {}] **Warning:** don't change the equality value of an item already in a hash containter. Unhashable types are unhashable for a reason. For example: >>> L1 = [1] ; L2 = [2] >>> s = set(map(EqualityHashDefault, [L1, L2])) >>> s # doctest: +SKIP {=[1]=, =[2]=} >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items! >>> s # doctest: +SKIP {=[2]=, =[2]=} Although this may appear problematic, immutable data types is a common idiom in functional programming, and``EqualityHashKey`` easily allows the same idiom to be used by convention rather than strict requirement. See Also: identity """ __slots__ = ['item', 'key'] _default_hashkey = '__default__hashkey__' def __init__(self, key, item): if key is None: self.key = self._default_hashkey elif not callable(key): self.key = getter(key) else: self.key = key self.item = item def __hash__(self): if self.key == self._default_hashkey: val = self.key else: val = self.key(self.item) return hash(val) def __eq__(self, other): try: return (self._default_hashkey == other._default_hashkey and self.item == other.item) except AttributeError: return False def __ne__(self, other): return not self.__eq__(other) def __str__(self): return '=%s=' % str(self.item) def __repr__(self): return '=%s=' % repr(self.item) # See issue #293: https://github.com/pytoolz/toolz/issues/239 def unzip(seq): """Inverse of ``zip`` >>> a, b = unzip([('a', 1), ('b', 2)]) >>> list(a) ['a', 'b'] >>> list(b) [1, 2] Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this implementation can handle an infinite sequence ``seq``. Caveats: * The implementation uses ``tee``, and so can use a significant amount of auxiliary storage if the resulting iterators are consumed at different times. * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be used if ``seq`` is a finite sequence of infinite sequences. """ seq = iter(seq) # Check how many iterators we need try: first = tuple(next(seq)) except StopIteration: return tuple() # and create them niters = len(first) seqs = tee(cons(first, seq), niters) return tuple(starmap(pluck, enumerate(seqs))) toolz-0.11.2/toolz/sandbox/parallel.py000066400000000000000000000053431414140660100177170ustar00rootroot00000000000000import functools from toolz.itertoolz import partition_all from toolz.utils import no_default def _reduce(func, seq, initial=None): if initial is None: return functools.reduce(func, seq) else: return functools.reduce(func, seq, initial) def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): """ Reduce without guarantee of ordered reduction. inputs: ``binop`` - associative operator. The associative property allows us to leverage a parallel map to perform reductions in parallel. ``seq`` - a sequence to be aggregated ``default`` - an identity element like 0 for ``add`` or 1 for mul ``map`` - an implementation of ``map``. This may be parallel and determines how work is distributed. ``chunksize`` - Number of elements of ``seq`` that should be handled within a single function call ``combine`` - Binary operator to combine two intermediate results. If ``binop`` is of type (total, item) -> total then ``combine`` is of type (total, total) -> total Defaults to ``binop`` for common case of operators like add Fold chunks up the collection into blocks of size ``chunksize`` and then feeds each of these to calls to ``reduce``. This work is distributed with a call to ``map``, gathered back and then refolded to finish the computation. In this way ``fold`` specifies only how to chunk up data but leaves the distribution of this work to an externally provided ``map`` function. This function can be sequential or rely on multithreading, multiprocessing, or even distributed solutions. If ``map`` intends to serialize functions it should be prepared to accept and serialize lambdas. Note that the standard ``pickle`` module fails here. Example ------- >>> # Provide a parallel map to accomplish a parallel sum >>> from operator import add >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) 10 """ assert chunksize > 1 if combine is None: combine = binop chunks = partition_all(chunksize, seq) # Evaluate sequence in chunks via map if default == no_default: results = map( functools.partial(_reduce, binop), chunks) else: results = map( functools.partial(_reduce, binop, initial=default), chunks) results = list(results) # TODO: Support complete laziness if len(results) == 1: # Return completed result return results[0] else: # Recurse to reaggregate intermediate results return fold(combine, results, map=map, chunksize=chunksize) toolz-0.11.2/toolz/sandbox/tests/000077500000000000000000000000001414140660100167065ustar00rootroot00000000000000toolz-0.11.2/toolz/sandbox/tests/test_core.py000066400000000000000000000073671414140660100212640ustar00rootroot00000000000000from toolz import curry, unique, first, take from toolz.sandbox.core import EqualityHashKey, unzip from itertools import count, repeat def test_EqualityHashKey_default_key(): EqualityHashDefault = curry(EqualityHashKey, None) L1 = [1] L2 = [2] data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] set1 = set(map(EqualityHashDefault, data1)) set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) assert set1 == set2 assert len(set1) == 5 # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` T0 = () T1 = (1,) data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)])) data2.extend([T0, T1, (), (1,)]) set3 = set(data2) assert set3 == {(), (1,), EqualityHashDefault(()), EqualityHashDefault((1,))} assert len(set3) == 4 assert EqualityHashDefault(()) in set3 assert EqualityHashDefault((1,)) in set3 # Miscellaneous E1 = EqualityHashDefault(L1) E2 = EqualityHashDefault(L2) assert str(E1) == '=[1]=' assert repr(E1) == '=[1]=' assert E1 != E2 assert not (E1 == E2) assert E1 == EqualityHashDefault(L1) assert not (E1 != EqualityHashDefault(L1)) assert E1 != L1 assert not (E1 == L1) def test_EqualityHashKey_callable_key(): # Common simple hash key functions. EqualityHashLen = curry(EqualityHashKey, len) EqualityHashType = curry(EqualityHashKey, type) EqualityHashId = curry(EqualityHashKey, id) EqualityHashFirst = curry(EqualityHashKey, first) data1 = [[], [1], (), (1,), {}, {1: 2}] data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}] assert list(unique(data1*3, key=EqualityHashLen)) == data1 assert list(unique(data2*3, key=EqualityHashLen)) == data2 assert list(unique(data1*3, key=EqualityHashType)) == data1 assert list(unique(data2*3, key=EqualityHashType)) == data2 assert list(unique(data1*3, key=EqualityHashId)) == data1 assert list(unique(data2*3, key=EqualityHashId)) == data2 assert list(unique(data2*3, key=EqualityHashFirst)) == data2 def test_EqualityHashKey_index_key(): d1 = {'firstname': 'Alice', 'age': 21, 'data': {}} d2 = {'firstname': 'Alice', 'age': 34, 'data': {}} d3a = {'firstname': 'Bob', 'age': 56, 'data': {}} d3b = {'firstname': 'Bob', 'age': 56, 'data': {}} EqualityHashFirstname = curry(EqualityHashKey, 'firstname') assert list(unique(3*[d1, d2, d3a, d3b], key=EqualityHashFirstname)) == [d1, d2, d3a] EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age']) assert list(unique(3*[d1, d2, d3a, d3b], key=EqualityHashFirstnameAge)) == [d1, d2, d3a] list1 = [0] * 10 list2 = [0] * 100 list3a = [1] * 10 list3b = [1] * 10 EqualityHash0 = curry(EqualityHashKey, 0) assert list(unique(3*[list1, list2, list3a, list3b], key=EqualityHash0)) == [list1, list2, list3a] def test_unzip(): def _to_lists(seq, n=10): """iter of iters -> finite list of finite lists """ def initial(s): return list(take(n, s)) return initial(map(initial, seq)) def _assert_initial_matches(a, b, n=10): assert list(take(n, a)) == list(take(n, b)) # Unzips a simple list correctly assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \ == [['a', 'b', 'c'], [1, 2, 3]] # Can handle a finite number of infinite iterators (the naive unzip # implementation `zip(*args)` impelementation fails on this example). a, b, c = unzip(zip(count(1), repeat(0), repeat(1))) _assert_initial_matches(a, count(1)) _assert_initial_matches(b, repeat(0)) _assert_initial_matches(c, repeat(1)) # Sensibly handles empty input assert list(unzip(zip([]))) == [] toolz-0.11.2/toolz/sandbox/tests/test_parallel.py000066400000000000000000000015531414140660100221170ustar00rootroot00000000000000from toolz.sandbox.parallel import fold from toolz import reduce from operator import add from pickle import dumps, loads from multiprocessing import Pool # is comparison will fail between this and no_default no_default2 = loads(dumps('__no__default__')) def test_fold(): assert fold(add, range(10), 0) == reduce(add, range(10), 0) assert fold(add, range(10), 0, map=Pool().map) == reduce(add, range(10), 0) assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0) assert fold(add, range(10)) == fold(add, range(10), 0) def setadd(s, item): s = s.copy() s.add(item) return s assert fold(setadd, [1, 2, 3], set()) == {1, 2, 3} assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union) == {1, 2, 3}) assert fold(add, range(10), default=no_default2) == fold(add, range(10)) toolz-0.11.2/toolz/tests/000077500000000000000000000000001414140660100152505ustar00rootroot00000000000000toolz-0.11.2/toolz/tests/test_compatibility.py000066400000000000000000000004051414140660100215310ustar00rootroot00000000000000 import pytest import importlib def test_compat_warn(): with pytest.warns(DeprecationWarning): # something else is importing this, import toolz.compatibility # reload to be sure we warn importlib.reload(toolz.compatibility) toolz-0.11.2/toolz/tests/test_curried.py000066400000000000000000000070771414140660100203310ustar00rootroot00000000000000import toolz import toolz.curried from toolz.curried import (take, first, second, sorted, merge_with, reduce, merge, operator as cop) from collections import defaultdict from importlib import import_module from operator import add def test_take(): assert list(take(2)([1, 2, 3])) == [1, 2] def test_first(): assert first is toolz.itertoolz.first def test_merge(): assert merge(factory=lambda: defaultdict(int))({1: 1}) == {1: 1} assert merge({1: 1}) == {1: 1} assert merge({1: 1}, factory=lambda: defaultdict(int)) == {1: 1} def test_merge_with(): assert merge_with(sum)({1: 1}, {1: 2}) == {1: 3} def test_merge_with_list(): assert merge_with(sum, [{'a': 1}, {'a': 2}]) == {'a': 3} def test_sorted(): assert sorted(key=second)([(1, 2), (2, 1)]) == [(2, 1), (1, 2)] def test_reduce(): assert reduce(add)((1, 2, 3)) == 6 def test_module_name(): assert toolz.curried.__name__ == 'toolz.curried' def test_curried_operator(): for k, v in vars(cop).items(): if not callable(v): continue if not isinstance(v, toolz.curry): try: # Make sure it is unary v(1) except TypeError: try: v('x') except TypeError: pass else: continue raise AssertionError( 'toolz.curried.operator.%s is not curried!' % k, ) # Make sure this isn't totally empty. assert len(set(vars(cop)) & {'add', 'sub', 'mul'}) == 3 def test_curried_namespace(): exceptions = import_module('toolz.curried.exceptions') namespace = {} def should_curry(func): if not callable(func) or isinstance(func, toolz.curry): return False nargs = toolz.functoolz.num_required_args(func) if nargs is None or nargs > 1: return True return nargs == 1 and toolz.functoolz.has_keywords(func) def curry_namespace(ns): return { name: toolz.curry(f) if should_curry(f) else f for name, f in ns.items() if '__' not in name } from_toolz = curry_namespace(vars(toolz)) from_exceptions = curry_namespace(vars(exceptions)) namespace.update(toolz.merge(from_toolz, from_exceptions)) namespace = toolz.valfilter(callable, namespace) curried_namespace = toolz.valfilter(callable, toolz.curried.__dict__) if namespace != curried_namespace: missing = set(namespace) - set(curried_namespace) if missing: raise AssertionError('There are missing functions in toolz.curried:\n %s' % ' \n'.join(sorted(missing))) extra = set(curried_namespace) - set(namespace) if extra: raise AssertionError('There are extra functions in toolz.curried:\n %s' % ' \n'.join(sorted(extra))) unequal = toolz.merge_with(list, namespace, curried_namespace) unequal = toolz.valfilter(lambda x: x[0] != x[1], unequal) messages = [] for name, (orig_func, auto_func) in sorted(unequal.items()): if name in from_exceptions: messages.append('%s should come from toolz.curried.exceptions' % name) elif should_curry(getattr(toolz, name)): messages.append('%s should be curried from toolz' % name) else: messages.append('%s should come from toolz and NOT be curried' % name) raise AssertionError('\n'.join(messages)) toolz-0.11.2/toolz/tests/test_curried_doctests.py000066400000000000000000000004221414140660100222240ustar00rootroot00000000000000import doctest import toolz def test_doctests(): toolz.__test__ = {} for name, func in vars(toolz).items(): if isinstance(func, toolz.curry): toolz.__test__[name] = func.func assert doctest.testmod(toolz).failed == 0 del toolz.__test__ toolz-0.11.2/toolz/tests/test_dicttoolz.py000066400000000000000000000215561414140660100207050ustar00rootroot00000000000000from collections import defaultdict as _defaultdict from collections.abc import Mapping import os from toolz.dicttoolz import (merge, merge_with, valmap, keymap, update_in, assoc, dissoc, keyfilter, valfilter, itemmap, itemfilter, assoc_in) from toolz.functoolz import identity from toolz.utils import raises def inc(x): return x + 1 def iseven(i): return i % 2 == 0 class TestDict(object): """Test typical usage: dict inputs, no factory keyword. Class attributes: D: callable that inputs a dict and creates or returns a MutableMapping kw: kwargs dict to specify "factory" keyword (if applicable) """ D = dict kw = {} def test_merge(self): D, kw = self.D, self.kw assert merge(D({1: 1, 2: 2}), D({3: 4}), **kw) == D({1: 1, 2: 2, 3: 4}) def test_merge_iterable_arg(self): D, kw = self.D, self.kw assert merge([D({1: 1, 2: 2}), D({3: 4})], **kw) == D({1: 1, 2: 2, 3: 4}) def test_merge_with(self): D, kw = self.D, self.kw dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)}) dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3}) assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)}) assert not merge_with(sum) def test_merge_with_iterable_arg(self): D, kw = self.D, self.kw dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22}) def test_valmap(self): D, kw = self.D, self.kw assert valmap(inc, D({1: 1, 2: 2}), **kw) == D({1: 2, 2: 3}) def test_keymap(self): D, kw = self.D, self.kw assert keymap(inc, D({1: 1, 2: 2}), **kw) == D({2: 1, 3: 2}) def test_itemmap(self): D, kw = self.D, self.kw assert itemmap(reversed, D({1: 2, 2: 4}), **kw) == D({2: 1, 4: 2}) def test_valfilter(self): D, kw = self.D, self.kw assert valfilter(iseven, D({1: 2, 2: 3}), **kw) == D({1: 2}) def test_keyfilter(self): D, kw = self.D, self.kw assert keyfilter(iseven, D({1: 2, 2: 3}), **kw) == D({2: 3}) def test_itemfilter(self): D, kw = self.D, self.kw assert itemfilter(lambda item: iseven(item[0]), D({1: 2, 2: 3}), **kw) == D({2: 3}) assert itemfilter(lambda item: iseven(item[1]), D({1: 2, 2: 3}), **kw) == D({1: 2}) def test_assoc(self): D, kw = self.D, self.kw assert assoc(D({}), "a", 1, **kw) == D({"a": 1}) assert assoc(D({"a": 1}), "a", 3, **kw) == D({"a": 3}) assert assoc(D({"a": 1}), "b", 3, **kw) == D({"a": 1, "b": 3}) # Verify immutability: d = D({'x': 1}) oldd = d assoc(d, 'x', 2, **kw) assert d is oldd def test_dissoc(self): D, kw = self.D, self.kw assert dissoc(D({"a": 1}), "a", **kw) == D({}) assert dissoc(D({"a": 1, "b": 2}), "a", **kw) == D({"b": 2}) assert dissoc(D({"a": 1, "b": 2}), "b", **kw) == D({"a": 1}) assert dissoc(D({"a": 1, "b": 2}), "a", "b", **kw) == D({}) assert dissoc(D({"a": 1}), "a", **kw) == dissoc(dissoc(D({"a": 1}), "a", **kw), "a", **kw) # Verify immutability: d = D({'x': 1}) oldd = d d2 = dissoc(d, 'x', **kw) assert d is oldd assert d2 is not oldd def test_assoc_in(self): D, kw = self.D, self.kw assert assoc_in(D({"a": 1}), ["a"], 2, **kw) == D({"a": 2}) assert (assoc_in(D({"a": D({"b": 1})}), ["a", "b"], 2, **kw) == D({"a": D({"b": 2})})) assert assoc_in(D({}), ["a", "b"], 1, **kw) == D({"a": D({"b": 1})}) # Verify immutability: d = D({'x': 1}) oldd = d d2 = assoc_in(d, ['x'], 2, **kw) assert d is oldd assert d2 is not oldd def test_update_in(self): D, kw = self.D, self.kw assert update_in(D({"a": 0}), ["a"], inc, **kw) == D({"a": 1}) assert update_in(D({"a": 0, "b": 1}), ["b"], str, **kw) == D({"a": 0, "b": "1"}) assert (update_in(D({"t": 1, "v": D({"a": 0})}), ["v", "a"], inc, **kw) == D({"t": 1, "v": D({"a": 1})})) # Handle one missing key. assert update_in(D({}), ["z"], str, None, **kw) == D({"z": "None"}) assert update_in(D({}), ["z"], inc, 0, **kw) == D({"z": 1}) assert update_in(D({}), ["z"], lambda x: x+"ar", default="b", **kw) == D({"z": "bar"}) # Same semantics as Clojure for multiple missing keys, ie. recursively # create nested empty dictionaries to the depth specified by the # keys with the innermost value set to f(default). assert update_in(D({}), [0, 1], inc, default=-1, **kw) == D({0: D({1: 0})}) assert update_in(D({}), [0, 1], str, default=100, **kw) == D({0: D({1: "100"})}) assert (update_in(D({"foo": "bar", 1: 50}), ["d", 1, 0], str, 20, **kw) == D({"foo": "bar", 1: 50, "d": D({1: D({0: "20"})})})) # Verify immutability: d = D({'x': 1}) oldd = d update_in(d, ['x'], inc, **kw) assert d is oldd def test_factory(self): D, kw = self.D, self.kw assert merge(defaultdict(int, D({1: 2})), D({2: 3})) == {1: 2, 2: 3} assert (merge(defaultdict(int, D({1: 2})), D({2: 3}), factory=lambda: defaultdict(int)) == defaultdict(int, D({1: 2, 2: 3}))) assert not (merge(defaultdict(int, D({1: 2})), D({2: 3}), factory=lambda: defaultdict(int)) == {1: 2, 2: 3}) assert raises(TypeError, lambda: merge(D({1: 2}), D({2: 3}), factoryy=dict)) class defaultdict(_defaultdict): def __eq__(self, other): return (super(defaultdict, self).__eq__(other) and isinstance(other, _defaultdict) and self.default_factory == other.default_factory) class TestDefaultDict(TestDict): """Test defaultdict as input and factory Class attributes: D: callable that inputs a dict and creates or returns a MutableMapping kw: kwargs dict to specify "factory" keyword (if applicable) """ @staticmethod def D(dict_): return defaultdict(int, dict_) kw = {'factory': lambda: defaultdict(int)} class CustomMapping(object): """Define methods of the MutableMapping protocol required by dicttoolz""" def __init__(self, *args, **kwargs): self._d = dict(*args, **kwargs) def __getitem__(self, key): return self._d[key] def __setitem__(self, key, val): self._d[key] = val def __delitem__(self, key): del self._d[key] def __iter__(self): return iter(self._d) def __len__(self): return len(self._d) def __contains__(self, key): return key in self._d def __eq__(self, other): return isinstance(other, CustomMapping) and self._d == other._d def __ne__(self, other): return not isinstance(other, CustomMapping) or self._d != other._d def keys(self): return self._d.keys() def values(self): return self._d.values() def items(self): return self._d.items() def update(self, *args, **kwargs): self._d.update(*args, **kwargs) # Unused methods that are part of the MutableMapping protocol #def get(self, key, *args): # return self._d.get(key, *args) #def pop(self, key, *args): # return self._d.pop(key, *args) #def popitem(self, key): # return self._d.popitem() #def clear(self): # self._d.clear() #def setdefault(self, key, *args): # return self._d.setdefault(self, key, *args) class TestCustomMapping(TestDict): """Test CustomMapping as input and factory Class attributes: D: callable that inputs a dict and creates or returns a MutableMapping kw: kwargs dict to specify "factory" keyword (if applicable) """ D = CustomMapping kw = {'factory': lambda: CustomMapping()} def test_environ(): # See: https://github.com/pytoolz/cytoolz/issues/127 assert keymap(identity, os.environ) == os.environ assert valmap(identity, os.environ) == os.environ assert itemmap(identity, os.environ) == os.environ def test_merge_with_non_dict_mappings(): class Foo(Mapping): def __init__(self, d): self.d = d def __iter__(self): return iter(self.d) def __getitem__(self, key): return self.d[key] def __len__(self): return len(self.d) d = Foo({1: 1}) assert merge(d) is d or merge(d) == {1: 1} assert merge_with(sum, d) == {1: 1} toolz-0.11.2/toolz/tests/test_functoolz.py000066400000000000000000000473551414140660100207220ustar00rootroot00000000000000import inspect import toolz from toolz.functoolz import (thread_first, thread_last, memoize, curry, compose, compose_left, pipe, complement, do, juxt, flip, excepts, apply) from operator import add, mul, itemgetter from toolz.utils import raises from functools import partial def iseven(x): return x % 2 == 0 def isodd(x): return x % 2 == 1 def inc(x): return x + 1 def double(x): return 2 * x class AlwaysEquals(object): """useful to test correct __eq__ implementation of other objects""" def __eq__(self, other): return True def __ne__(self, other): return False class NeverEquals(object): """useful to test correct __eq__ implementation of other objects""" def __eq__(self, other): return False def __ne__(self, other): return True def test_apply(): assert apply(double, 5) == 10 assert tuple(map(apply, [double, inc, double], [10, 500, 8000])) == (20, 501, 16000) assert raises(TypeError, apply) def test_thread_first(): assert thread_first(2) == 2 assert thread_first(2, inc) == 3 assert thread_first(2, inc, inc) == 4 assert thread_first(2, double, inc) == 5 assert thread_first(2, (add, 5), double) == 14 def test_thread_last(): assert list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) == [2, 4] assert list(thread_last([1, 2, 3], (map, inc), (filter, isodd))) == [3] assert thread_last(2, (add, 5), double) == 14 def test_memoize(): fn_calls = [0] # Storage for side effects def f(x, y): """ A docstring """ fn_calls[0] += 1 return x + y mf = memoize(f) assert mf(2, 3) is mf(2, 3) assert fn_calls == [1] # function was only called once assert mf.__doc__ == f.__doc__ assert raises(TypeError, lambda: mf(1, {})) def test_memoize_kwargs(): fn_calls = [0] # Storage for side effects def f(x, y=0): return x + y mf = memoize(f) assert mf(1) == f(1) assert mf(1, 2) == f(1, 2) assert mf(1, y=2) == f(1, y=2) assert mf(1, y=3) == f(1, y=3) def test_memoize_curried(): @curry def f(x, y=0): return x + y f2 = f(y=1) fm2 = memoize(f2) assert fm2(3) == f2(3) assert fm2(3) == f2(3) def test_memoize_partial(): def f(x, y=0): return x + y f2 = partial(f, y=1) fm2 = memoize(f2) assert fm2(3) == f2(3) assert fm2(3) == f2(3) def test_memoize_key_signature(): # Single argument should not be tupled as a key. No keywords. mf = memoize(lambda x: False, cache={1: True}) assert mf(1) is True assert mf(2) is False # Single argument must be tupled if signature has varargs. No keywords. mf = memoize(lambda x, *args: False, cache={(1,): True, (1, 2): 2}) assert mf(1) is True assert mf(2) is False assert mf(1, 1) is False assert mf(1, 2) == 2 assert mf((1, 2)) is False # More than one argument is always tupled. No keywords. mf = memoize(lambda x, y: False, cache={(1, 2): True}) assert mf(1, 2) is True assert mf(1, 3) is False assert raises(TypeError, lambda: mf((1, 2))) # Nullary function (no inputs) uses empty tuple as the key mf = memoize(lambda: False, cache={(): True}) assert mf() is True # Single argument must be tupled if there are keyword arguments, because # keyword arguments may be passed as unnamed args. mf = memoize(lambda x, y=0: False, cache={((1,), frozenset((('y', 2),))): 2, ((1, 2), None): 3}) assert mf(1, y=2) == 2 assert mf(1, 2) == 3 assert mf(2, y=2) is False assert mf(2, 2) is False assert mf(1) is False assert mf((1, 2)) is False # Keyword-only signatures must still have an "args" tuple. mf = memoize(lambda x=0: False, cache={(None, frozenset((('x', 1),))): 1, ((1,), None): 2}) assert mf() is False assert mf(x=1) == 1 assert mf(1) == 2 def test_memoize_curry_cache(): @memoize(cache={1: True}) def f(x): return False assert f(1) is True assert f(2) is False def test_memoize_key(): @memoize(key=lambda args, kwargs: args[0]) def f(x, y, *args, **kwargs): return x + y assert f(1, 2) == 3 assert f(1, 3) == 3 def test_memoize_wrapped(): def foo(): """ Docstring """ pass memoized_foo = memoize(foo) assert memoized_foo.__wrapped__ is foo def test_curry_simple(): cmul = curry(mul) double = cmul(2) assert callable(double) assert double(10) == 20 assert repr(cmul) == repr(mul) cmap = curry(map) assert list(cmap(inc)([1, 2, 3])) == [2, 3, 4] assert raises(TypeError, lambda: curry()) assert raises(TypeError, lambda: curry({1: 2})) def test_curry_kwargs(): def f(a, b, c=10): return (a + b) * c f = curry(f) assert f(1, 2, 3) == 9 assert f(1)(2, 3) == 9 assert f(1, 2) == 30 assert f(1, c=3)(2) == 9 assert f(c=3)(1, 2) == 9 def g(a=1, b=10, c=0): return a + b + c cg = curry(g, b=2) assert cg() == 3 assert cg(b=3) == 4 assert cg(a=0) == 2 assert cg(a=0, b=1) == 1 assert cg(0) == 2 # pass "a" as arg, not kwarg assert raises(TypeError, lambda: cg(1, 2)) # pass "b" as arg AND kwarg def h(x, func=int): return func(x) # __init__ must not pick func as positional arg assert curry(h)(0.0) == 0 assert curry(h)(func=str)(0.0) == '0.0' assert curry(h, func=str)(0.0) == '0.0' def test_curry_passes_errors(): @curry def f(a, b): if not isinstance(a, int): raise TypeError() return a + b assert f(1, 2) == 3 assert raises(TypeError, lambda: f('1', 2)) assert raises(TypeError, lambda: f('1')(2)) assert raises(TypeError, lambda: f(1, 2, 3)) def test_curry_docstring(): def f(x, y): """ A docstring """ return x g = curry(f) assert g.__doc__ == f.__doc__ assert str(g) == str(f) assert f(1, 2) == g(1, 2) def test_curry_is_like_partial(): def foo(a, b, c=1): return a + b + c p, c = partial(foo, 1, c=2), curry(foo)(1, c=2) assert p.keywords == c.keywords assert p.args == c.args assert p(3) == c(3) p, c = partial(foo, 1), curry(foo)(1) assert p.keywords == c.keywords assert p.args == c.args assert p(3) == c(3) assert p(3, c=2) == c(3, c=2) p, c = partial(foo, c=1), curry(foo)(c=1) assert p.keywords == c.keywords assert p.args == c.args assert p(1, 2) == c(1, 2) def test_curry_is_idempotent(): def foo(a, b, c=1): return a + b + c f = curry(foo, 1, c=2) g = curry(f) assert isinstance(f, curry) assert isinstance(g, curry) assert not isinstance(g.func, curry) assert not hasattr(g.func, 'func') assert f.func == g.func assert f.args == g.args assert f.keywords == g.keywords def test_curry_attributes_readonly(): def foo(a, b, c=1): return a + b + c f = curry(foo, 1, c=2) assert raises(AttributeError, lambda: setattr(f, 'args', (2,))) assert raises(AttributeError, lambda: setattr(f, 'keywords', {'c': 3})) assert raises(AttributeError, lambda: setattr(f, 'func', f)) assert raises(AttributeError, lambda: delattr(f, 'args')) assert raises(AttributeError, lambda: delattr(f, 'keywords')) assert raises(AttributeError, lambda: delattr(f, 'func')) def test_curry_attributes_writable(): def foo(a, b, c=1): return a + b + c foo.__qualname__ = 'this.is.foo' f = curry(foo, 1, c=2) assert f.__qualname__ == 'this.is.foo' f.__name__ = 'newname' f.__doc__ = 'newdoc' f.__module__ = 'newmodule' f.__qualname__ = 'newqualname' assert f.__name__ == 'newname' assert f.__doc__ == 'newdoc' assert f.__module__ == 'newmodule' assert f.__qualname__ == 'newqualname' if hasattr(f, 'func_name'): assert f.__name__ == f.func_name def test_curry_module(): from toolz.curried.exceptions import merge assert merge.__module__ == 'toolz.curried.exceptions' def test_curry_comparable(): def foo(a, b, c=1): return a + b + c f1 = curry(foo, 1, c=2) f2 = curry(foo, 1, c=2) g1 = curry(foo, 1, c=3) h1 = curry(foo, c=2) h2 = h1(c=2) h3 = h1() assert f1 == f2 assert not (f1 != f2) assert f1 != g1 assert not (f1 == g1) assert f1 != h1 assert h1 == h2 assert h1 == h3 # test function comparison works def bar(a, b, c=1): return a + b + c b1 = curry(bar, 1, c=2) assert b1 != f1 assert {f1, f2, g1, h1, h2, h3, b1, b1()} == {f1, g1, h1, b1} # test unhashable input unhash1 = curry(foo, []) assert raises(TypeError, lambda: hash(unhash1)) unhash2 = curry(foo, c=[]) assert raises(TypeError, lambda: hash(unhash2)) def test_curry_doesnot_transmogrify(): # Early versions of `curry` transmogrified to `partial` objects if # only one positional argument remained even if keyword arguments # were present. Now, `curry` should always remain `curry`. def f(x, y=0): return x + y cf = curry(f) assert cf(y=1)(y=2)(y=3)(1) == f(1, 3) def test_curry_on_classmethods(): class A(object): BASE = 10 def __init__(self, base): self.BASE = base @curry def addmethod(self, x, y): return self.BASE + x + y @classmethod @curry def addclass(cls, x, y): return cls.BASE + x + y @staticmethod @curry def addstatic(x, y): return x + y a = A(100) assert a.addmethod(3, 4) == 107 assert a.addmethod(3)(4) == 107 assert A.addmethod(a, 3, 4) == 107 assert A.addmethod(a)(3)(4) == 107 assert a.addclass(3, 4) == 17 assert a.addclass(3)(4) == 17 assert A.addclass(3, 4) == 17 assert A.addclass(3)(4) == 17 assert a.addstatic(3, 4) == 7 assert a.addstatic(3)(4) == 7 assert A.addstatic(3, 4) == 7 assert A.addstatic(3)(4) == 7 # we want this to be of type curry assert isinstance(a.addmethod, curry) assert isinstance(A.addmethod, curry) def test_memoize_on_classmethods(): class A(object): BASE = 10 HASH = 10 def __init__(self, base): self.BASE = base @memoize def addmethod(self, x, y): return self.BASE + x + y @classmethod @memoize def addclass(cls, x, y): return cls.BASE + x + y @staticmethod @memoize def addstatic(x, y): return x + y def __hash__(self): return self.HASH a = A(100) assert a.addmethod(3, 4) == 107 assert A.addmethod(a, 3, 4) == 107 a.BASE = 200 assert a.addmethod(3, 4) == 107 a.HASH = 200 assert a.addmethod(3, 4) == 207 assert a.addclass(3, 4) == 17 assert A.addclass(3, 4) == 17 A.BASE = 20 assert A.addclass(3, 4) == 17 A.HASH = 20 # hashing of class is handled by metaclass assert A.addclass(3, 4) == 17 # hence, != 27 assert a.addstatic(3, 4) == 7 assert A.addstatic(3, 4) == 7 def test_curry_call(): @curry def add(x, y): return x + y assert raises(TypeError, lambda: add.call(1)) assert add(1)(2) == add.call(1, 2) assert add(1)(2) == add(1).call(2) def test_curry_bind(): @curry def add(x=1, y=2): return x + y assert add() == add(1, 2) assert add.bind(10)(20) == add(10, 20) assert add.bind(10).bind(20)() == add(10, 20) assert add.bind(x=10)(y=20) == add(10, 20) assert add.bind(x=10).bind(y=20)() == add(10, 20) def test_curry_unknown_args(): def add3(x, y, z): return x + y + z @curry def f(*args): return add3(*args) assert f()(1)(2)(3) == 6 assert f(1)(2)(3) == 6 assert f(1, 2)(3) == 6 assert f(1, 2, 3) == 6 assert f(1, 2)(3, 4) == f(1, 2, 3, 4) def test_curry_bad_types(): assert raises(TypeError, lambda: curry(1)) def test_curry_subclassable(): class mycurry(curry): pass add = mycurry(lambda x, y: x+y) assert isinstance(add, curry) assert isinstance(add, mycurry) assert isinstance(add(1), mycurry) assert isinstance(add()(1), mycurry) assert add(1)(2) == 3 # Should we make `_should_curry` public? """ class curry2(curry): def _should_curry(self, args, kwargs, exc=None): return len(self.args) + len(args) < 2 add = curry2(lambda x, y: x+y) assert isinstance(add(1), curry2) assert add(1)(2) == 3 assert isinstance(add(1)(x=2), curry2) assert raises(TypeError, lambda: add(1)(x=2)(3)) """ def generate_compose_test_cases(): """ Generate test cases for parametrized tests of the compose function. """ def add_then_multiply(a, b, c=10): return (a + b) * c return ( ( (), # arguments to compose() (0,), {}, # positional and keyword args to the Composed object 0 # expected result ), ( (inc,), (0,), {}, 1 ), ( (double, inc), (0,), {}, 2 ), ( (str, iseven, inc, double), (3,), {}, "False" ), ( (str, add), (1, 2), {}, '3' ), ( (str, inc, add_then_multiply), (1, 2), {"c": 3}, '10' ), ) def test_compose(): for (compose_args, args, kw, expected) in generate_compose_test_cases(): assert compose(*compose_args)(*args, **kw) == expected def test_compose_metadata(): # Define two functions with different names def f(a): return a def g(a): return a composed = compose(f, g) assert composed.__name__ == 'f_of_g' assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))' # Create an object with no __name__. h = object() composed = compose(f, h) assert composed.__name__ == 'Compose' assert composed.__doc__ == 'A composition of functions' assert repr(composed) == 'Compose({!r}, {!r})'.format(f, h) assert composed == compose(f, h) assert composed == AlwaysEquals() assert not composed == compose(h, f) assert not composed == object() assert not composed == NeverEquals() assert composed != compose(h, f) assert composed != NeverEquals() assert composed != object() assert not composed != compose(f, h) assert not composed != AlwaysEquals() assert hash(composed) == hash(compose(f, h)) assert hash(composed) != hash(compose(h, f)) bindable = compose(str, lambda x: x*2, lambda x, y=0: int(x) + y) class MyClass: def __int__(self): return 8 my_method = bindable my_static_method = staticmethod(bindable) assert MyClass.my_method(3) == '6' assert MyClass.my_method(3, y=2) == '10' assert MyClass.my_static_method(2) == '4' assert MyClass().my_method() == '16' assert MyClass().my_method(y=3) == '22' assert MyClass().my_static_method(0) == '0' assert MyClass().my_static_method(0, 1) == '2' assert compose(f, h).__wrapped__ is h if hasattr(toolz, 'sandbox'): # only test this with Python version (i.e., not Cython) assert compose(f, h).__class__.__wrapped__ is None # __signature__ is python3 only def myfunc(a, b, c, *d, **e): return 4 def otherfunc(f): return 'result: {}'.format(f) # set annotations compatibly with python2 syntax myfunc.__annotations__ = { 'a': int, 'b': str, 'c': float, 'd': int, 'e': bool, 'return': int, } otherfunc.__annotations__ = {'f': int, 'return': str} composed = compose(otherfunc, myfunc) sig = inspect.signature(composed) assert sig.parameters == inspect.signature(myfunc).parameters assert sig.return_annotation == str class MyClass: method = composed assert len(inspect.signature(MyClass().method).parameters) == 4 def generate_compose_left_test_cases(): """ Generate test cases for parametrized tests of the compose function. These are based on, and equivalent to, those produced by enerate_compose_test_cases(). """ return tuple( (tuple(reversed(compose_args)), args, kwargs, expected) for (compose_args, args, kwargs, expected) in generate_compose_test_cases() ) def test_compose_left(): for (compose_left_args, args, kw, expected) in generate_compose_left_test_cases(): assert compose_left(*compose_left_args)(*args, **kw) == expected def test_pipe(): assert pipe(1, inc) == 2 assert pipe(1, inc, inc) == 3 assert pipe(1, double, inc, iseven) is False def test_complement(): # No args: assert complement(lambda: False)() assert not complement(lambda: True)() # Single arity: assert complement(iseven)(1) assert not complement(iseven)(2) assert complement(complement(iseven))(2) assert not complement(complement(isodd))(2) # Multiple arities: both_even = lambda a, b: iseven(a) and iseven(b) assert complement(both_even)(1, 2) assert not complement(both_even)(2, 2) # Generic truthiness: assert complement(lambda: "")() assert complement(lambda: 0)() assert complement(lambda: None)() assert complement(lambda: [])() assert not complement(lambda: "x")() assert not complement(lambda: 1)() assert not complement(lambda: [1])() def test_do(): inc = lambda x: x + 1 assert do(inc, 1) == 1 log = [] assert do(log.append, 1) == 1 assert log == [1] def test_juxt_generator_input(): data = list(range(10)) juxtfunc = juxt(itemgetter(2*i) for i in range(5)) assert juxtfunc(data) == (0, 2, 4, 6, 8) assert juxtfunc(data) == (0, 2, 4, 6, 8) def test_flip(): def f(a, b): return a, b assert flip(f, 'a', 'b') == ('b', 'a') def test_excepts(): # These are descriptors, make sure this works correctly. assert excepts.__name__ == 'excepts' assert ( 'A wrapper around a function to catch exceptions and\n' ' dispatch to a handler.\n' ) in excepts.__doc__ def idx(a): """idx docstring """ return [1, 2].index(a) def handler(e): """handler docstring """ assert isinstance(e, ValueError) return -1 excepting = excepts(ValueError, idx, handler) assert excepting(1) == 0 assert excepting(2) == 1 assert excepting(3) == -1 assert excepting.__name__ == 'idx_excepting_ValueError' assert 'idx docstring' in excepting.__doc__ assert 'ValueError' in excepting.__doc__ assert 'handler docstring' in excepting.__doc__ def getzero(a): """getzero docstring """ return a[0] excepting = excepts((IndexError, KeyError), getzero) assert excepting([]) is None assert excepting([1]) == 1 assert excepting({}) is None assert excepting({0: 1}) == 1 assert excepting.__name__ == 'getzero_excepting_IndexError_or_KeyError' assert 'getzero docstring' in excepting.__doc__ assert 'return_none' in excepting.__doc__ assert 'Returns None' in excepting.__doc__ def raise_(a): """A function that raises an instance of the exception type given. """ raise a() excepting = excepts((ValueError, KeyError), raise_) assert excepting(ValueError) is None assert excepting(KeyError) is None assert raises(TypeError, lambda: excepting(TypeError)) assert raises(NotImplementedError, lambda: excepting(NotImplementedError)) excepting = excepts(object(), object(), object()) assert excepting.__name__ == 'excepting' assert excepting.__doc__ == excepts.__doc__ toolz-0.11.2/toolz/tests/test_inspect_args.py000066400000000000000000000371301414140660100213460ustar00rootroot00000000000000import functools import inspect import itertools import operator import toolz from toolz.functoolz import (curry, is_valid_args, is_partial_args, is_arity, num_required_args, has_varargs, has_keywords) from toolz._signatures import builtins import toolz._signatures as _sigs from toolz.utils import raises def make_func(param_string, raise_if_called=True): if not param_string.startswith('('): param_string = '(%s)' % param_string if raise_if_called: body = 'raise ValueError("function should not be called")' else: body = 'return True' d = {} exec('def func%s:\n %s' % (param_string, body), globals(), d) return d['func'] def test_make_func(): f = make_func('') assert raises(ValueError, lambda: f()) assert raises(TypeError, lambda: f(1)) f = make_func('', raise_if_called=False) assert f() assert raises(TypeError, lambda: f(1)) f = make_func('x, y=1', raise_if_called=False) assert f(1) assert f(x=1) assert f(1, 2) assert f(x=1, y=2) assert raises(TypeError, lambda: f(1, 2, 3)) f = make_func('(x, y=1)', raise_if_called=False) assert f(1) assert f(x=1) assert f(1, 2) assert f(x=1, y=2) assert raises(TypeError, lambda: f(1, 2, 3)) def test_is_valid(check_valid=is_valid_args, incomplete=False): orig_check_valid = check_valid check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) f = make_func('') assert check_valid(f) assert check_valid(f, 1) is False assert check_valid(f, x=1) is False f = make_func('x') assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, x=2) is False assert check_valid(f, 1, y=2) is False assert check_valid(f, 1, 2) is False assert check_valid(f, x=1, y=2) is False f = make_func('x=1') assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, x=2) is False assert check_valid(f, 1, y=2) is False assert check_valid(f, 1, 2) is False assert check_valid(f, x=1, y=2) is False f = make_func('*args') assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, 1, 2) assert check_valid(f, x=1) is False f = make_func('**kwargs') assert check_valid(f) assert check_valid(f, x=1) assert check_valid(f, x=1, y=2) assert check_valid(f, 1) is False f = make_func('x, *args') assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, 1, 2) assert check_valid(f, x=1) assert check_valid(f, 1, x=1) is False assert check_valid(f, 1, y=1) is False f = make_func('x, y=1, **kwargs') assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, 2) assert check_valid(f, x=1, y=2, z=3) assert check_valid(f, 1, 2, y=3) is False f = make_func('a, b, c=3, d=4') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, 1, 2) assert check_valid(f, 1, c=3) is incomplete assert check_valid(f, 1, e=3) is False assert check_valid(f, 1, 2, e=3) is False assert check_valid(f, 1, 2, b=3) is False assert check_valid(1) is False def test_is_valid_py3(check_valid=is_valid_args, incomplete=False): orig_check_valid = check_valid check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) f = make_func('x, *, y=1') assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, y=2) assert check_valid(f, 1, 2) is False assert check_valid(f, 1, z=2) is False f = make_func('x, *args, y=1') assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, y=2) assert check_valid(f, 1, 2, y=2) assert check_valid(f, 1, 2) assert check_valid(f, 1, z=2) is False f = make_func('*, y=1') assert check_valid(f) assert check_valid(f, 1) is False assert check_valid(f, y=1) assert check_valid(f, z=1) is False f = make_func('x, *, y') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, x=1) is incomplete assert check_valid(f, 1, y=2) assert check_valid(f, x=1, y=2) assert check_valid(f, 1, 2) is False assert check_valid(f, 1, z=2) is False assert check_valid(f, 1, y=1, z=2) is False f = make_func('x=1, *, y, z=3') assert check_valid(f) is incomplete assert check_valid(f, 1, z=3) is incomplete assert check_valid(f, y=2) assert check_valid(f, 1, y=2) assert check_valid(f, x=1, y=2) assert check_valid(f, x=1, y=2, z=3) assert check_valid(f, 1, x=1, y=2) is False assert check_valid(f, 1, 3, y=2) is False f = make_func('w, x=2, *args, y, z=4') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, 1, y=3) f = make_func('a, b, c=3, d=4, *args, e=5, f=6, g, h') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, 1, 2) is incomplete assert check_valid(f, 1, 2, g=7) is incomplete assert check_valid(f, 1, 2, g=7, h=8) assert check_valid(f, 1, 2, 3, 4, 5, 6, 7, 8, 9) is incomplete f = make_func('a: int, b: float') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, b=1) is incomplete assert check_valid(f, 1, 2) f = make_func('(a: int, b: float) -> float') assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, b=1) is incomplete assert check_valid(f, 1, 2) f.__signature__ = 34 assert check_valid(f) is False class RaisesValueError(object): def __call__(self): pass @property def __signature__(self): raise ValueError('Testing Python 3.4') f = RaisesValueError() assert check_valid(f) is None def test_is_partial(): test_is_valid(check_valid=is_partial_args, incomplete=True) test_is_valid_py3(check_valid=is_partial_args, incomplete=True) def test_is_valid_curry(): def check_curry(func, args, kwargs, incomplete=True): try: curry(func)(*args, **kwargs) curry(func, *args)(**kwargs) curry(func, **kwargs)(*args) curry(func, *args, **kwargs)() if not isinstance(func, type(lambda: None)): return None return incomplete except ValueError: return True except TypeError: return False check_valid = functools.partial(check_curry, incomplete=True) test_is_valid(check_valid=check_valid, incomplete=True) test_is_valid_py3(check_valid=check_valid, incomplete=True) check_valid = functools.partial(check_curry, incomplete=False) test_is_valid(check_valid=check_valid, incomplete=False) test_is_valid_py3(check_valid=check_valid, incomplete=False) def test_func_keyword(): def f(func=None): pass assert is_valid_args(f, (), {}) assert is_valid_args(f, (None,), {}) assert is_valid_args(f, (), {'func': None}) assert is_valid_args(f, (None,), {'func': None}) is False assert is_partial_args(f, (), {}) assert is_partial_args(f, (None,), {}) assert is_partial_args(f, (), {'func': None}) assert is_partial_args(f, (None,), {'func': None}) is False def test_has_unknown_args(): assert has_varargs(1) is False assert has_varargs(map) assert has_varargs(make_func('')) is False assert has_varargs(make_func('x, y, z')) is False assert has_varargs(make_func('*args')) assert has_varargs(make_func('**kwargs')) is False assert has_varargs(make_func('x, y, *args, **kwargs')) assert has_varargs(make_func('x, y, z=1')) is False assert has_varargs(make_func('x, y, z=1, **kwargs')) is False f = make_func('*args') f.__signature__ = 34 assert has_varargs(f) is False class RaisesValueError(object): def __call__(self): pass @property def __signature__(self): raise ValueError('Testing Python 3.4') f = RaisesValueError() assert has_varargs(f) is None def test_num_required_args(): assert num_required_args(lambda: None) == 0 assert num_required_args(lambda x: None) == 1 assert num_required_args(lambda x, *args: None) == 1 assert num_required_args(lambda x, **kwargs: None) == 1 assert num_required_args(lambda x, y, *args, **kwargs: None) == 2 assert num_required_args(map) == 2 assert num_required_args(dict) is None def test_has_keywords(): assert has_keywords(lambda: None) is False assert has_keywords(lambda x: None) is False assert has_keywords(lambda x=1: None) assert has_keywords(lambda **kwargs: None) assert has_keywords(int) assert has_keywords(sorted) assert has_keywords(max) assert has_keywords(map) is False assert has_keywords(bytearray) is None def test_has_varargs(): assert has_varargs(lambda: None) is False assert has_varargs(lambda *args: None) assert has_varargs(lambda **kwargs: None) is False assert has_varargs(map) assert has_varargs(max) is None def test_is_arity(): assert is_arity(0, lambda: None) assert is_arity(1, lambda: None) is False assert is_arity(1, lambda x: None) assert is_arity(3, lambda x, y, z: None) assert is_arity(1, lambda x, *args: None) is False assert is_arity(1, lambda x, **kwargs: None) is False assert is_arity(1, all) assert is_arity(2, map) is False assert is_arity(2, range) is None def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False): orig_check_valid = check_valid check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs) f = toolz.curry(make_func('x, y, z=0')) assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, 1, 2) assert check_valid(f, 1, 2, 3) assert check_valid(f, 1, 2, 3, 4) is False assert check_valid(f, invalid_keyword=True) is False assert check_valid(f(1)) assert check_valid(f(1), 2) assert check_valid(f(1), 2, 3) assert check_valid(f(1), 2, 3, 4) is False assert check_valid(f(1), x=2) is False assert check_valid(f(1), y=2) assert check_valid(f(x=1), 2) is False assert check_valid(f(x=1), y=2) assert check_valid(f(y=2), 1) assert check_valid(f(y=2), 1, z=3) assert check_valid(f(y=2), 1, 3) is False f = toolz.curry(make_func('x, y, z=0'), 1, x=1) assert check_valid(f) is False assert check_valid(f, z=3) is False f = toolz.curry(make_func('x, y, *args, z')) assert check_valid(f) assert check_valid(f, 0) assert check_valid(f(1), 0) assert check_valid(f(1, 2), 0) assert check_valid(f(1, 2, 3), 0) assert check_valid(f(1, 2, 3, 4), 0) assert check_valid(f(1, 2, 3, 4), z=4) assert check_valid(f(x=1)) assert check_valid(f(x=1), 1) is False assert check_valid(f(x=1), y=2) def test_introspect_curry_partial_py3(): test_introspect_curry_valid_py3(check_valid=is_partial_args, incomplete=True) def test_introspect_curry_py3(): f = toolz.curry(make_func('')) assert num_required_args(f) == 0 assert is_arity(0, f) assert has_varargs(f) is False assert has_keywords(f) is False f = toolz.curry(make_func('x')) assert num_required_args(f) == 0 assert is_arity(0, f) is False assert is_arity(1, f) is False assert has_varargs(f) is False assert has_keywords(f) # A side-effect of being curried f = toolz.curry(make_func('x, y, z=0')) assert num_required_args(f) == 0 assert is_arity(0, f) is False assert is_arity(1, f) is False assert is_arity(2, f) is False assert is_arity(3, f) is False assert has_varargs(f) is False assert has_keywords(f) f = toolz.curry(make_func('*args, **kwargs')) assert num_required_args(f) == 0 assert has_varargs(f) assert has_keywords(f) def test_introspect_builtin_modules(): mods = [builtins, functools, itertools, operator, toolz, toolz.functoolz, toolz.itertoolz, toolz.dicttoolz, toolz.recipes] denylist = set() def add_denylist(mod, attr): if hasattr(mod, attr): denylist.add(getattr(mod, attr)) add_denylist(builtins, 'basestring') add_denylist(builtins, 'NoneType') add_denylist(builtins, '__metaclass__') add_denylist(builtins, 'sequenceiterator') def is_missing(modname, name, func): if name.startswith('_') and not name.startswith('__'): return False if name.startswith('__pyx_unpickle_') or name.endswith('_cython__'): return False try: if issubclass(func, BaseException): return False except TypeError: pass try: return (callable(func) and func.__module__ is not None and modname in func.__module__ and is_partial_args(func, (), {}) is not True and func not in denylist) except AttributeError: return False missing = {} for mod in mods: modname = mod.__name__ for name, func in vars(mod).items(): if is_missing(modname, name, func): if modname not in missing: missing[modname] = [] missing[modname].append(name) if missing: messages = [] for modname, names in sorted(missing.items()): msg = '{}:\n {}'.format(modname, '\n '.join(sorted(names))) messages.append(msg) message = 'Missing introspection for the following callables:\n\n' raise AssertionError(message + '\n\n'.join(messages)) def test_inspect_signature_property(): # By adding AddX to our signature registry, we can inspect the class # itself and objects of the class. `inspect.signature` doesn't like # it when `obj.__signature__` is a property. class AddX(object): def __init__(self, func): self.func = func def __call__(self, addx, *args, **kwargs): return addx + self.func(*args, **kwargs) @property def __signature__(self): sig = inspect.signature(self.func) params = list(sig.parameters.values()) kind = inspect.Parameter.POSITIONAL_OR_KEYWORD newparam = inspect.Parameter('addx', kind) params = [newparam] + params return sig.replace(parameters=params) addx = AddX(lambda x: x) sig = inspect.signature(addx) assert sig == inspect.Signature(parameters=[ inspect.Parameter('addx', inspect.Parameter.POSITIONAL_OR_KEYWORD), inspect.Parameter('x', inspect.Parameter.POSITIONAL_OR_KEYWORD)]) assert num_required_args(AddX) is False _sigs.signatures[AddX] = (_sigs.expand_sig((0, lambda func: None)),) assert num_required_args(AddX) == 1 del _sigs.signatures[AddX] def test_inspect_wrapped_property(): class Wrapped(object): def __init__(self, func): self.func = func def __call__(self, *args, **kwargs): return self.func(*args, **kwargs) @property def __wrapped__(self): return self.func func = lambda x: x wrapped = Wrapped(func) assert inspect.signature(func) == inspect.signature(wrapped) assert num_required_args(Wrapped) is None _sigs.signatures[Wrapped] = (_sigs.expand_sig((0, lambda func: None)),) assert num_required_args(Wrapped) == 1 toolz-0.11.2/toolz/tests/test_itertoolz.py000066400000000000000000000434051414140660100207220ustar00rootroot00000000000000import itertools from itertools import starmap from toolz.utils import raises from functools import partial from random import Random from pickle import dumps, loads from toolz.itertoolz import (remove, groupby, merge_sorted, concat, concatv, interleave, unique, isiterable, getter, mapcat, isdistinct, first, second, nth, take, tail, drop, interpose, get, rest, last, cons, frequencies, reduceby, iterate, accumulate, sliding_window, count, partition, partition_all, take_nth, pluck, join, diff, topk, peek, peekn, random_sample) from operator import add, mul # is comparison will fail between this and no_default no_default2 = loads(dumps('__no__default__')) def identity(x): return x def iseven(x): return x % 2 == 0 def isodd(x): return x % 2 == 1 def inc(x): return x + 1 def double(x): return 2 * x def test_remove(): r = remove(iseven, range(5)) assert type(r) is not list assert list(r) == list(filter(isodd, range(5))) def test_groupby(): assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]} def test_groupby_non_callable(): assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {1: [(1, 2), (1, 3)], 2: [(2, 2), (2, 4)]} assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {(1,): [(1, 2), (1, 3)], (2,): [(2, 2), (2, 4)]} assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {(1, 1): [(1, 2), (1, 3)], (2, 2): [(2, 2), (2, 4)]} def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' assert ''.join(merge_sorted('cba', 'cba', 'cba', key=lambda x: -ord(x))) == 'cccbbbaaa' assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4] data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)], [(9, 1), (9, 8), (9, 9)]] assert list(merge_sorted(*data, key=lambda x: x[1])) == [ (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)] assert list(merge_sorted()) == [] assert list(merge_sorted([1, 2, 3])) == [1, 2, 3] assert list(merge_sorted([1, 4, 5], [2, 3])) == [1, 2, 3, 4, 5] assert list(merge_sorted([1, 4, 5], [2, 3], key=identity)) == [ 1, 2, 3, 4, 5] assert list(merge_sorted([1, 5], [2], [4, 7], [3, 6], key=identity)) == [ 1, 2, 3, 4, 5, 6, 7] def test_interleave(): assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3' assert ''.join(interleave(('ABC', '1'))) == 'A1BC' def test_unique(): assert tuple(unique((1, 2, 3))) == (1, 2, 3) assert tuple(unique((1, 2, 1, 3))) == (1, 2, 3) assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2) def test_isiterable(): assert isiterable([1, 2, 3]) is True assert isiterable('abc') is True assert isiterable(5) is False def test_isdistinct(): assert isdistinct([1, 2, 3]) is True assert isdistinct([1, 2, 1]) is False assert isdistinct("Hello") is False assert isdistinct("World") is True assert isdistinct(iter([1, 2, 3])) is True assert isdistinct(iter([1, 2, 1])) is False def test_nth(): assert nth(2, 'ABCDE') == 'C' assert nth(2, iter('ABCDE')) == 'C' assert nth(1, (3, 2, 1)) == 2 assert nth(0, {'foo': 'bar'}) == 'foo' assert raises(StopIteration, lambda: nth(10, {10: 'foo'})) assert nth(-2, 'ABCDE') == 'D' assert raises(ValueError, lambda: nth(-2, iter('ABCDE'))) def test_first(): assert first('ABCDE') == 'A' assert first((3, 2, 1)) == 3 assert isinstance(first({0: 'zero', 1: 'one'}), int) def test_second(): assert second('ABCDE') == 'B' assert second((3, 2, 1)) == 2 assert isinstance(second({0: 'zero', 1: 'one'}), int) def test_last(): assert last('ABCDE') == 'E' assert last((3, 2, 1)) == 1 assert isinstance(last({0: 'zero', 1: 'one'}), int) def test_rest(): assert list(rest('ABCDE')) == list('BCDE') assert list(rest((3, 2, 1))) == list((2, 1)) def test_take(): assert list(take(3, 'ABCDE')) == list('ABC') assert list(take(2, (3, 2, 1))) == list((3, 2)) def test_tail(): assert list(tail(3, 'ABCDE')) == list('CDE') assert list(tail(3, iter('ABCDE'))) == list('CDE') assert list(tail(2, (3, 2, 1))) == list((2, 1)) def test_drop(): assert list(drop(3, 'ABCDE')) == list('DE') assert list(drop(1, (3, 2, 1))) == list((2, 1)) def test_take_nth(): assert list(take_nth(2, 'ABCDE')) == list('ACE') def test_get(): assert get(1, 'ABCDE') == 'B' assert list(get([1, 3], 'ABCDE')) == list('BD') assert get('a', {'a': 1, 'b': 2, 'c': 3}) == 1 assert get(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}) == (1, 2) assert get('foo', {}, default='bar') == 'bar' assert get({}, [1, 2, 3], default='bar') == 'bar' assert get([0, 2], 'AB', 'C') == ('A', 'C') assert get([0], 'AB') == ('A',) assert get([], 'AB') == () assert raises(IndexError, lambda: get(10, 'ABC')) assert raises(KeyError, lambda: get(10, {'a': 1})) assert raises(TypeError, lambda: get({}, [1, 2, 3])) assert raises(TypeError, lambda: get([1, 2, 3], 1, None)) assert raises(KeyError, lambda: get('foo', {}, default=no_default2)) def test_mapcat(): assert (list(mapcat(identity, [[1, 2, 3], [4, 5, 6]])) == [1, 2, 3, 4, 5, 6]) assert (list(mapcat(reversed, [[3, 2, 1, 0], [6, 5, 4], [9, 8, 7]])) == list(range(10))) inc = lambda i: i + 1 assert ([4, 5, 6, 7, 8, 9] == list(mapcat(partial(map, inc), [[3, 4, 5], [6, 7, 8]]))) def test_cons(): assert list(cons(1, [2, 3])) == [1, 2, 3] def test_concat(): assert list(concat([[], [], []])) == [] assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) == ['a', 'b', 0, 1, 2]) def test_concatv(): assert list(concatv([], [], [])) == [] assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) == ['a', 'b', 0, 1, 2]) def test_interpose(): assert "a" == first(rest(interpose("a", range(1000000000)))) assert "tXaXrXzXaXn" == "".join(interpose("X", "tarzan")) assert list(interpose(0, itertools.repeat(1, 4))) == [1, 0, 1, 0, 1, 0, 1] assert list(interpose('.', ['a', 'b', 'c'])) == ['a', '.', 'b', '.', 'c'] def test_frequencies(): assert (frequencies(["cat", "pig", "cat", "eel", "pig", "dog", "dog", "dog"]) == {"cat": 2, "eel": 1, "pig": 2, "dog": 3}) assert frequencies([]) == {} assert frequencies("onomatopoeia") == {"a": 2, "e": 1, "i": 1, "m": 1, "o": 4, "n": 1, "p": 1, "t": 1} def test_reduceby(): data = [1, 2, 3, 4, 5] iseven = lambda x: x % 2 == 0 assert reduceby(iseven, add, data, 0) == {False: 9, True: 6} assert reduceby(iseven, mul, data, 1) == {False: 15, True: 8} projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] assert reduceby(lambda x: x['state'], lambda acc, x: acc + x['cost'], projects, 0) == {'CA': 1200000, 'IL': 2100000} assert reduceby('state', lambda acc, x: acc + x['cost'], projects, 0) == {'CA': 1200000, 'IL': 2100000} def test_reduce_by_init(): assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3} assert reduceby(iseven, add, [1, 2, 3, 4], no_default2) == {True: 2 + 4, False: 1 + 3} def test_reduce_by_callable_default(): def set_add(s, i): s.add(i) return s assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \ {True: {2, 4}, False: {1, 3}} def test_iterate(): assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4] assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8] def test_accumulate(): assert list(accumulate(add, [1, 2, 3, 4, 5])) == [1, 3, 6, 10, 15] assert list(accumulate(mul, [1, 2, 3, 4, 5])) == [1, 2, 6, 24, 120] assert list(accumulate(add, [1, 2, 3, 4, 5], -1)) == [-1, 0, 2, 5, 9, 14] def binop(a, b): raise AssertionError('binop should not be called') start = object() assert list(accumulate(binop, [], start)) == [start] assert list(accumulate(binop, [])) == [] assert list(accumulate(add, [1, 2, 3], no_default2)) == [1, 3, 6] def test_accumulate_works_on_consumable_iterables(): assert list(accumulate(add, iter((1, 2, 3)))) == [1, 3, 6] def test_sliding_window(): assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)] assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)] def test_sliding_window_of_short_iterator(): assert list(sliding_window(3, [1, 2])) == [] assert list(sliding_window(7, [1, 2])) == [] def test_partition(): assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)] assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2), (3, -1, -1)] assert list(partition(2, [])) == [] def test_partition_all(): assert list(partition_all(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)] assert list(partition_all(2, [])) == [] # Regression test: https://github.com/pytoolz/toolz/issues/387 class NoCompare(object): def __eq__(self, other): if self.__class__ == other.__class__: return True raise ValueError() obj = NoCompare() result = [(obj, obj, obj, obj), (obj, obj, obj)] assert list(partition_all(4, [obj]*7)) == result assert list(partition_all(4, iter([obj]*7))) == result def test_count(): assert count((1, 2, 3)) == 3 assert count([]) == 0 assert count(iter((1, 2, 3, 4))) == 4 assert count('hello') == 5 assert count(iter('hello')) == 5 def test_pluck(): assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}] assert list(pluck('id', data)) == [1, 2] assert list(pluck('price', data, 0)) == [0, 1] assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')] assert list(pluck(['name'], data)) == [('cheese',), ('pies',)] assert list(pluck(['price', 'other'], data, 0)) == [(0, 0), (1, 0)] assert raises(IndexError, lambda: list(pluck(1, [[0]]))) assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}]))) assert list(pluck(0, [[0, 1], [2, 3], [4, 5]], no_default2)) == [0, 2, 4] assert raises(IndexError, lambda: list(pluck(1, [[0]], no_default2))) def test_join(): names = [(1, 'one'), (2, 'two'), (3, 'three')] fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] def addpair(pair): return pair[0] + pair[1] result = set(starmap(add, join(first, names, second, fruit))) expected = {(1, 'one', 'apple', 1), (1, 'one', 'orange', 1), (2, 'two', 'banana', 2), (2, 'two', 'coconut', 2)} assert result == expected result = set(starmap(add, join(first, names, second, fruit, left_default=no_default2, right_default=no_default2))) assert result == expected def test_getter(): assert getter(0)('Alice') == 'A' assert getter([0])('Alice') == ('A',) assert getter([])('Alice') == () def test_key_as_getter(): squares = [(i, i**2) for i in range(5)] pows = [(i, i**2, i**3) for i in range(5)] assert set(join(0, squares, 0, pows)) == set(join(lambda x: x[0], squares, lambda x: x[0], pows)) get = lambda x: (x[0], x[1]) assert set(join([0, 1], squares, [0, 1], pows)) == set(join(get, squares, get, pows)) get = lambda x: (x[0],) assert set(join([0], squares, [0], pows)) == set(join(get, squares, get, pows)) def test_join_double_repeats(): names = [(1, 'one'), (2, 'two'), (3, 'three'), (1, 'uno'), (2, 'dos')] fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] result = set(starmap(add, join(first, names, second, fruit))) expected = {(1, 'one', 'apple', 1), (1, 'one', 'orange', 1), (2, 'two', 'banana', 2), (2, 'two', 'coconut', 2), (1, 'uno', 'apple', 1), (1, 'uno', 'orange', 1), (2, 'dos', 'banana', 2), (2, 'dos', 'coconut', 2)} assert result == expected def test_join_missing_element(): names = [(1, 'one'), (2, 'two'), (3, 'three')] fruit = [('apple', 5), ('orange', 1)] result = set(starmap(add, join(first, names, second, fruit))) expected = {(1, 'one', 'orange', 1)} assert result == expected def test_left_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], left_default=None)) expected = {(2, 2), (None, 3)} assert result == expected def test_right_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], right_default=None)) expected = {(2, 2), (1, None)} assert result == expected def test_outer_join(): result = set(join(identity, [1, 2], identity, [2, 3], left_default=None, right_default=None)) expected = {(2, 2), (1, None), (None, 3)} assert result == expected def test_diff(): assert raises(TypeError, lambda: list(diff())) assert raises(TypeError, lambda: list(diff([1, 2]))) assert raises(TypeError, lambda: list(diff([1, 2], 3))) assert list(diff([1, 2], (1, 2), iter([1, 2]))) == [] assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [ (2, 10, 2), (3, 3, 10)] assert list(diff([1, 2], [10])) == [(1, 10)] assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)] # non-variadic usage assert raises(TypeError, lambda: list(diff([]))) assert raises(TypeError, lambda: list(diff([[]]))) assert raises(TypeError, lambda: list(diff([[1, 2]]))) assert raises(TypeError, lambda: list(diff([[1, 2], 3]))) assert list(diff([(1, 2), (1, 3)])) == [(2, 3)] data1 = [{'cost': 1, 'currency': 'dollar'}, {'cost': 2, 'currency': 'dollar'}] data2 = [{'cost': 100, 'currency': 'yen'}, {'cost': 300, 'currency': 'yen'}] conversions = {'dollar': 1, 'yen': 0.01} def indollars(item): return conversions[item['currency']] * item['cost'] list(diff(data1, data2, key=indollars)) == [ ({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})] def test_topk(): assert topk(2, [4, 1, 5, 2]) == (5, 4) assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2) assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2) assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \ ({'a': 10, 'b': 1}, {'a': 9, 'b': 2}) assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \ ({'a': 1, 'b': 10}, {'a': 2, 'b': 9}) assert topk(2, [(0, 4), (1, 3), (2, 2), (3, 1), (4, 0)], 0) == \ ((4, 0), (3, 1)) def test_topk_is_stable(): assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1) def test_peek(): alist = ["Alice", "Bob", "Carol"] element, blist = peek(alist) assert element == alist[0] assert list(blist) == alist assert raises(StopIteration, lambda: peek([])) def test_peekn(): alist = ("Alice", "Bob", "Carol") elements, blist = peekn(2, alist) assert elements == alist[:2] assert tuple(blist) == alist elements, blist = peekn(len(alist) * 4, alist) assert elements == alist assert tuple(blist) == alist def test_random_sample(): alist = list(range(100)) assert list(random_sample(prob=1, seq=alist, random_state=2016)) == alist mk_rsample = lambda rs=1: list(random_sample(prob=0.1, seq=alist, random_state=rs)) rsample1 = mk_rsample() assert rsample1 == mk_rsample() rsample2 = mk_rsample(1984) randobj = Random(1984) assert rsample2 == mk_rsample(randobj) assert rsample1 != rsample2 assert mk_rsample(hash(object)) == mk_rsample(hash(object)) assert mk_rsample(hash(object)) != mk_rsample(hash(object())) assert mk_rsample(b"a") == mk_rsample(u"a") assert raises(TypeError, lambda: mk_rsample([])) toolz-0.11.2/toolz/tests/test_recipes.py000066400000000000000000000014641414140660100203200ustar00rootroot00000000000000from toolz import first, identity, countby, partitionby def iseven(x): return x % 2 == 0 def test_countby(): assert countby(iseven, [1, 2, 3]) == {True: 1, False: 2} assert countby(len, ['cat', 'dog', 'mouse']) == {3: 2, 5: 1} assert countby(0, ('ab', 'ac', 'bc')) == {'a': 2, 'b': 1} def test_partitionby(): assert list(partitionby(identity, [])) == [] vowels = "aeiou" assert (list(partitionby(vowels.__contains__, "abcdefghi")) == [("a",), ("b", "c", "d"), ("e",), ("f", "g", "h"), ("i",)]) assert (list(map(first, partitionby(identity, [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == [1, 2, 3, 2, 3]) assert ''.join(map(first, partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!' toolz-0.11.2/toolz/tests/test_serialization.py000066400000000000000000000132371414140660100215440ustar00rootroot00000000000000from toolz import * import toolz import toolz.curried import pickle from toolz.utils import raises def test_compose(): f = compose(str, sum) g = pickle.loads(pickle.dumps(f)) assert f((1, 2)) == g((1, 2)) def test_curry(): f = curry(map)(str) g = pickle.loads(pickle.dumps(f)) assert list(f((1, 2, 3))) == list(g((1, 2, 3))) def test_juxt(): f = juxt(str, int, bool) g = pickle.loads(pickle.dumps(f)) assert f(1) == g(1) assert f.funcs == g.funcs def test_complement(): f = complement(bool) assert f(True) is False assert f(False) is True g = pickle.loads(pickle.dumps(f)) assert f(True) == g(True) assert f(False) == g(False) def test_instanceproperty(): p = toolz.functoolz.InstanceProperty(bool) assert p.__get__(None) is None assert p.__get__(0) is False assert p.__get__(1) is True p2 = pickle.loads(pickle.dumps(p)) assert p2.__get__(None) is None assert p2.__get__(0) is False assert p2.__get__(1) is True def f(x, y): return x, y def test_flip(): flip = pickle.loads(pickle.dumps(toolz.functoolz.flip)) assert flip is toolz.functoolz.flip g1 = flip(f) g2 = pickle.loads(pickle.dumps(g1)) assert g1(1, 2) == g2(1, 2) == f(2, 1) g1 = flip(f)(1) g2 = pickle.loads(pickle.dumps(g1)) assert g1(2) == g2(2) == f(2, 1) def test_curried_exceptions(): # This tests a global curried object that isn't defined in toolz.functoolz merge = pickle.loads(pickle.dumps(toolz.curried.merge)) assert merge is toolz.curried.merge @toolz.curry class GlobalCurried(object): def __init__(self, x, y): self.x = x self.y = y @toolz.curry def f1(self, a, b): return self.x + self.y + a + b def g1(self): pass def __reduce__(self): """Allow us to serialize instances of GlobalCurried""" return GlobalCurried, (self.x, self.y) @toolz.curry class NestedCurried(object): def __init__(self, x, y): self.x = x self.y = y @toolz.curry def f2(self, a, b): return self.x + self.y + a + b def g2(self): pass def __reduce__(self): """Allow us to serialize instances of NestedCurried""" return GlobalCurried.NestedCurried, (self.x, self.y) class Nested(object): def __init__(self, x, y): self.x = x self.y = y @toolz.curry def f3(self, a, b): return self.x + self.y + a + b def g3(self): pass def test_curried_qualname(): def preserves_identity(obj): return pickle.loads(pickle.dumps(obj)) is obj assert preserves_identity(GlobalCurried) assert preserves_identity(GlobalCurried.func.f1) assert preserves_identity(GlobalCurried.func.NestedCurried) assert preserves_identity(GlobalCurried.func.NestedCurried.func.f2) assert preserves_identity(GlobalCurried.func.Nested.f3) global_curried1 = GlobalCurried(1) global_curried2 = pickle.loads(pickle.dumps(global_curried1)) assert global_curried1 is not global_curried2 assert global_curried1(2).f1(3, 4) == global_curried2(2).f1(3, 4) == 10 global_curried3 = global_curried1(2) global_curried4 = pickle.loads(pickle.dumps(global_curried3)) assert global_curried3 is not global_curried4 assert global_curried3.f1(3, 4) == global_curried4.f1(3, 4) == 10 func1 = global_curried1(2).f1(3) func2 = pickle.loads(pickle.dumps(func1)) assert func1 is not func2 assert func1(4) == func2(4) == 10 nested_curried1 = GlobalCurried.func.NestedCurried(1) nested_curried2 = pickle.loads(pickle.dumps(nested_curried1)) assert nested_curried1 is not nested_curried2 assert nested_curried1(2).f2(3, 4) == nested_curried2(2).f2(3, 4) == 10 # If we add `curry.__getattr__` forwarding, the following tests will pass # if not PY34: # assert preserves_identity(GlobalCurried.func.g1) # assert preserves_identity(GlobalCurried.func.NestedCurried.func.g2) # assert preserves_identity(GlobalCurried.func.Nested) # assert preserves_identity(GlobalCurried.func.Nested.g3) # # # Rely on curry.__getattr__ # assert preserves_identity(GlobalCurried.f1) # assert preserves_identity(GlobalCurried.NestedCurried) # assert preserves_identity(GlobalCurried.NestedCurried.f2) # assert preserves_identity(GlobalCurried.Nested.f3) # if not PY34: # assert preserves_identity(GlobalCurried.g1) # assert preserves_identity(GlobalCurried.NestedCurried.g2) # assert preserves_identity(GlobalCurried.Nested) # assert preserves_identity(GlobalCurried.Nested.g3) # # nested_curried3 = nested_curried1(2) # nested_curried4 = pickle.loads(pickle.dumps(nested_curried3)) # assert nested_curried3 is not nested_curried4 # assert nested_curried3.f2(3, 4) == nested_curried4.f2(3, 4) == 10 # # func1 = nested_curried1(2).f2(3) # func2 = pickle.loads(pickle.dumps(func1)) # assert func1 is not func2 # assert func1(4) == func2(4) == 10 # # if not PY34: # nested3 = GlobalCurried.func.Nested(1, 2) # nested4 = pickle.loads(pickle.dumps(nested3)) # assert nested3 is not nested4 # assert nested3.f3(3, 4) == nested4.f3(3, 4) == 10 # # func1 = nested3.f3(3) # func2 = pickle.loads(pickle.dumps(func1)) # assert func1 is not func2 # assert func1(4) == func2(4) == 10 def test_curried_bad_qualname(): @toolz.curry class Bad(object): __qualname__ = 'toolz.functoolz.not.a.valid.path' assert raises(pickle.PicklingError, lambda: pickle.dumps(Bad)) toolz-0.11.2/toolz/tests/test_signatures.py000066400000000000000000000054711414140660100210540ustar00rootroot00000000000000import functools import toolz._signatures as _sigs from toolz._signatures import builtins, _is_valid_args, _is_partial_args def test_is_valid(check_valid=_is_valid_args, incomplete=False): orig_check_valid = check_valid check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) assert check_valid(lambda x: None) is None f = builtins.abs assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, x=1) is False assert check_valid(f, 1, 2) is False f = builtins.complex assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, real=1) assert check_valid(f, 1, 2) assert check_valid(f, 1, imag=2) assert check_valid(f, 1, real=2) is False assert check_valid(f, 1, 2, 3) is False assert check_valid(f, 1, 2, imag=3) is False f = builtins.int assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, x=1) assert check_valid(f, 1, 2) assert check_valid(f, 1, base=2) assert check_valid(f, x=1, base=2) assert check_valid(f, base=2) is incomplete assert check_valid(f, 1, 2, 3) is False f = builtins.map assert check_valid(f) is incomplete assert check_valid(f, 1) is incomplete assert check_valid(f, 1, 2) assert check_valid(f, 1, 2, 3) assert check_valid(f, 1, 2, 3, 4) f = builtins.min assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, iterable=1) is False assert check_valid(f, 1, 2) assert check_valid(f, 1, 2, 3) assert check_valid(f, key=None) is incomplete assert check_valid(f, 1, key=None) assert check_valid(f, 1, 2, key=None) assert check_valid(f, 1, 2, 3, key=None) assert check_valid(f, key=None, default=None) is incomplete assert check_valid(f, 1, key=None, default=None) assert check_valid(f, 1, 2, key=None, default=None) is False assert check_valid(f, 1, 2, 3, key=None, default=None) is False f = builtins.range assert check_valid(f) is incomplete assert check_valid(f, 1) assert check_valid(f, 1, 2) assert check_valid(f, 1, 2, 3) assert check_valid(f, 1, 2, step=3) is False assert check_valid(f, 1, 2, 3, 4) is False f = functools.partial assert orig_check_valid(f, (), {}) is incomplete assert orig_check_valid(f, (), {'func': 1}) is incomplete assert orig_check_valid(f, (1,), {}) assert orig_check_valid(f, (1,), {'func': 1}) assert orig_check_valid(f, (1, 2), {}) def test_is_partial(): test_is_valid(check_valid=_is_partial_args, incomplete=True) def test_for_coverage(): # :) assert _sigs._is_arity(1, 1) is None assert _sigs._is_arity(1, all) assert _sigs._has_varargs(None) is None assert _sigs._has_keywords(None) is None assert _sigs._num_required_args(None) is None toolz-0.11.2/toolz/tests/test_tlz.py000066400000000000000000000030711414140660100174730ustar00rootroot00000000000000import toolz def test_tlz(): import tlz tlz.curry tlz.functoolz.curry assert tlz.__package__ == 'tlz' assert tlz.__name__ == 'tlz' import tlz.curried assert tlz.curried.__package__ == 'tlz.curried' assert tlz.curried.__name__ == 'tlz.curried' tlz.curried.curry import tlz.curried.operator assert tlz.curried.operator.__package__ in (None, 'tlz.curried') assert tlz.curried.operator.__name__ == 'tlz.curried.operator' assert tlz.functoolz.__name__ == 'tlz.functoolz' m1 = tlz.functoolz import tlz.functoolz as m2 assert m1 is m2 import tlz.sandbox try: import tlzthisisabadname.curried 1/0 except ImportError: pass try: import tlz.curry 1/0 except ImportError: pass try: import tlz.badsubmodulename 1/0 except ImportError: pass assert toolz.__package__ == 'toolz' assert toolz.curried.__package__ == 'toolz.curried' assert toolz.functoolz.__name__ == 'toolz.functoolz' try: import cytoolz assert cytoolz.__package__ == 'cytoolz' assert cytoolz.curried.__package__ == 'cytoolz.curried' assert cytoolz.functoolz.__name__ == 'cytoolz.functoolz' except ImportError: pass if hasattr(tlz, '__file__'): assert tlz.__file__ == toolz.__file__ if hasattr(tlz.functoolz, '__file__'): assert tlz.functoolz.__file__ == toolz.functoolz.__file__ assert tlz.pipe is toolz.pipe assert 'tlz' in tlz.__doc__ assert tlz.curried.__doc__ is not None toolz-0.11.2/toolz/tests/test_utils.py000066400000000000000000000002341414140660100200200ustar00rootroot00000000000000from toolz.utils import raises def test_raises(): assert raises(ZeroDivisionError, lambda: 1 / 0) assert not raises(ZeroDivisionError, lambda: 1) toolz-0.11.2/toolz/utils.py000066400000000000000000000002131414140660100156140ustar00rootroot00000000000000def raises(err, lamda): try: lamda() return False except err: return True no_default = '__no__default__' toolz-0.11.2/tox.ini000066400000000000000000000002401414140660100142460ustar00rootroot00000000000000[tox] envlist = py35 py36 py37 py38 py39 pypy3 skip_missing_interpreters = true [testenv] deps = pytest commands = py.test {posargs} toolz-0.11.2/versioneer.py000066400000000000000000002057361414140660100155070ustar00rootroot00000000000000 # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy * [![Latest Version] (https://pypip.in/version/versioneer/badge.svg?style=flat) ](https://pypi.python.org/pypi/versioneer/) * [![Build Status] (https://travis-ci.org/warner/python-versioneer.png?branch=master) ](https://travis-ci.org/warner/python-versioneer) This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere to your $PATH * add a `[versioneer]` section to your setup.cfg (see below) * run `versioneer install` in your source tree, commit the results ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes. The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation See [INSTALL.md](./INSTALL.md) for detailed installation instructions. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the commit date in ISO 8601 format. This will be None if the date is not available. * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See [details.md](details.md) in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Known Limitations Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github [issues page](https://github.com/warner/python-versioneer/issues). ### Subprojects Versioneer has limited support for source trees in which `setup.py` is not in the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are two common reasons why `setup.py` might not be in the root: * Source trees which contain multiple subprojects, such as [Buildbot](https://github.com/buildbot/buildbot), which contains both "master" and "slave" subprojects, each with their own `setup.py`, `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also provide bindings to Python (and perhaps other langauges) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs and implementation details which frequently cause `pip install .` from a subproject directory to fail to find a correct version string (so it usually defaults to `0+unknown`). `pip install --editable .` should work correctly. `setup.py install` might work too. Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking this issue. The discussion in [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve pip to let Versioneer work correctly. Versioneer-0.16 and earlier only looked for a `.git` directory next to the `setup.cfg`, so subprojects were completely unsupported with those releases. ### Editable installs with setuptools <= 18.5 `setup.py develop` and `pip install --editable .` allow you to install a project into a virtualenv once, then continue editing the source code (and test) without re-installing after every change. "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a convenient way to specify executable scripts that should be installed along with the python package. These both work as expected when using modern setuptools. When using setuptools-18.5 or earlier, however, certain operations will cause `pkg_resources.DistributionNotFound` errors when running the entrypoint script, which must be resolved by re-installing the package. This happens when the install happens with one version, then the egg_info data is regenerated while a different version is checked out. Many setup.py commands cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. ### Unicode version strings While Versioneer works (and is continually tested) with both Python 2 and Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. Newer releases probably generate unicode version strings on py2. It's not clear that this is wrong, but it may be surprising for applications when then write these strings to a network connection or include them in bytes-oriented APIs like cryptographic checksums. [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates this question. ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the Creative Commons "Public Domain Dedication" license (CC0-1.0), as described in https://creativecommons.org/publicdomain/zero/1.0/ . """ from __future__ import print_function try: import configparser except ImportError: import ConfigParser as configparser import errno import json import os import re import subprocess import sys class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_root(): """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) except NameError: pass return root def get_config_from_root(root): """Read the project setup.cfg file to determine Versioneer config.""" # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.ConfigParser() parser.read(setup_cfg) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") if cfg.tag_prefix in ("''", '""'): cfg.tag_prefix = "" cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.18 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%%s*" %% tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.18) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose=False): """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} def get_version(): """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(): """Get the custom setuptools/distutils subclasses used by Versioneer.""" if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 cmds = {} # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # pip install: # copies source tree to a tempdir before running egg_info/etc # if .git isn't copied too, 'git describe' will fail # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? # we override different "build_py" commands for both environments if "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION # "product_version": versioneer.get_version(), # ... class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] if 'py2exe' in sys.modules: # py2exe enabled? try: from py2exe.distutils_buildexe import py2exe as _py2exe # py3 except ImportError: from py2exe.build_exe import py2exe as _py2exe # py2 class cmd_py2exe(_py2exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _py2exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments if "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): """Main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)