pax_global_header00006660000000000000000000000064141766362070014526gustar00rootroot0000000000000052 comment=7f30408a034a20f02cffa0d7de68c538b978da84 TileDB-Py-0.12.2/000077500000000000000000000000001417663620700133015ustar00rootroot00000000000000TileDB-Py-0.12.2/.dockerignore000066400000000000000000000000371417663620700157550ustar00rootroot00000000000000build vv venv cibuildwheel_env TileDB-Py-0.12.2/.github/000077500000000000000000000000001417663620700146415ustar00rootroot00000000000000TileDB-Py-0.12.2/.github/disabled-workflows/000077500000000000000000000000001417663620700204435ustar00rootroot00000000000000TileDB-Py-0.12.2/.github/disabled-workflows/release.yml000066400000000000000000000011641417663620700226100ustar00rootroot00000000000000name: TileDB Python Release on: push: branches: - 'release-*' tags: - '**' jobs: build: name: Create Release runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 - name: Create Release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token with: tag_name: ${{ github.ref }} release_name: TileDB-Py ${{ github.ref }} draft: false prerelease: falseTileDB-Py-0.12.2/.github/workflows/000077500000000000000000000000001417663620700166765ustar00rootroot00000000000000TileDB-Py-0.12.2/.github/workflows/ci.yml000066400000000000000000000046001417663620700200140ustar00rootroot00000000000000name: TileDB Python CI on: [push, pull_request] jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-10.15, windows-latest] python-version: ['3.7', '3.8', '3.9', '3.10'] env: MACOSX_DEPLOYMENT_TARGET: 10.14 steps: - name: Checkout TileDB-Py `dev` uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} # Print python and pip version information for debugging. - name: 'Print python version in bash task' run: | echo '==== Python information ====' which python which pip python --version echo '============================' shell: bash - name: 'Print env' run: printenv shell: bash - name: 'Install dependencies' run: python -m pip install --upgrade -r misc/requirements_ci.txt shell: bash - name: 'Build TileDB and TileDB-Py extension (Windows)' run: | python setup.py build_ext --inplace python setup.py install shell: bash if: matrix.os == 'windows-latest' - name: 'Build TileDB and TileDB-Py extension (POSIX)' run: | set -xeo pipefail python setup.py build_ext --inplace --werror python setup.py install shell: bash if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-10.15' - name: 'Run tests' run: | set -xeo pipefail pytest -vv --showlocals # Test wheel build, install, and run python setup.py bdist_wheel #whl_file=`pwd`/dist/`ls dist/*.whl` mkdir /tmp/wheel_test cp dist/*.whl /tmp/wheel_test pushd /tmp/wheel_test ls pip install *.whl python -c 'import tiledb ; tiledb.libtiledb.version()' shell: bash - name: 'Print log files (failed build only)' run: | set -xeo pipefail # Display log files if the build failed echo 'Dumping log files for failed build' echo '----------------------------------' for f in $(find $BUILD_REPOSITORY_LOCALPATH/build -name *.log); do echo '------' echo $f echo '======' cat $f done; shell: bash if: ${{ failure() }} # only run this job if the build step failed TileDB-Py-0.12.2/.github/workflows/daily-test-build-issue-template.md000066400000000000000000000003631417663620700253350ustar00rootroot00000000000000--- title: Nightly GitHub Actions Build Fail on {{ date | date('ddd, MMMM Do YYYY') }} assignees: nguyenv, ihnorton labels: bug --- See run for more details: https://github.com/{{ env.GITHUB_REPOSITORY }}/actions/runs/{{ env.GITHUB_RUN_ID }} TileDB-Py-0.12.2/.github/workflows/daily-test-build.yml000066400000000000000000000037221417663620700226010ustar00rootroot00000000000000name: Daily Test Build TileDB-Py Against Core #on: [push] on: schedule: # runs every day at 5:00 UTC (1:00AM EST / Midnight CST) - cron: "0 5 * * *" jobs: test-wheels-on-azure: runs-on: ubuntu-latest steps: - name: Get current date id: date run: echo "::set-output name=date::$(date +'%a-%Y-%m-%d')" - name: Get libtiledb short SHA run: echo "LIBTILEDB_SHA=$(git ls-remote https://github.com/TileDB-Inc/TileDB HEAD | cut -c1-7)" >> $GITHUB_ENV - name: Create Test Branch for Azure Wheel Nightly Build uses: peterjgrainger/action-create-branch@v2.0.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: branch: 'azure-wheel-test-${{ steps.date.outputs.date }}-against-${{ env.LIBTILEDB_SHA }}' test: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-10.15, windows-latest] permissions: issues: write env: TILEDB_VERSION: dev MACOSX_DEPLOYMENT_TARGET: 10.14 steps: - name: Set up Python uses: actions/setup-python@v2 - name: Print Python version run: | which python which pip python --version - name: Print env run: printenv - name: Checkout TileDB-Py `dev` uses: actions/checkout@v2 - name: Install dependencies run: python -m pip install --upgrade -r misc/requirements_ci.txt - name: Build TileDB-Py run: | python setup.py build_ext --inplace --werror python setup.py install - name: Test TileDB-Py run: pytest -vv create_issue_on_fail: runs-on: ubuntu-latest needs: test if: failure() || cancelled() steps: - name: Checkout TileDB-Py `dev` uses: actions/checkout@v2 - name: Create Issue if Build Fails uses: JasonEtco/create-an-issue@v2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: filename: .github/workflows/daily-test-build-issue-template.md TileDB-Py-0.12.2/.github/workflows/format.yml000066400000000000000000000007121417663620700207110ustar00rootroot00000000000000name: TileDB Python Linting on: [push, pull_request] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - name: Check Python Black Format uses: psf/black@stable with: options: ". --check" - name: Check Clang-Format uses: DoozyX/clang-format-lint-action@v0.13 with: clangFormatVersion: '10' source: 'tiledb' TileDB-Py-0.12.2/.github/workflows/issue-if-azure-fail-template.md000066400000000000000000000003671417663620700246200ustar00rootroot00000000000000--- title: Nightly Azure Wheel Fail on {{ date | date('ddd, MMMM Do YYYY') }} assignees: nguyenv, ihnorton labels: bug --- See run for more details: https://dev.azure.com/TileDB-Inc/CI/_build/results?buildId=${{ env.AZURE_BUILD_ID }}&view=resultsTileDB-Py-0.12.2/.github/workflows/issue-if-azure-fail.yml000066400000000000000000000020771417663620700232100ustar00rootroot00000000000000name: Create Issue if Build Fails on Azure on: [repository_dispatch] jobs: clean-branch: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Get current date id: date run: echo "::set-output name=date::$(date +'%a-%Y-%m-%d')" - name: Get libtiledb short SHA run: echo "LIBTILEDB_SHA=$(git ls-remote https://github.com/TileDB-Inc/TileDB HEAD | cut -c1-7)" >> $GITHUB_ENV - name: Clean Up Test Branch uses: dawidd6/action-delete-branch@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} branches: 'azure-wheel-test-${{ steps.date.outputs.date }}-against-${{ env.LIBTILEDB_SHA }}' notify-fail: if: github.event.action == 'failed' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Create Issue if Build Fails uses: JasonEtco/create-an-issue@v2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} AZURE_BUILD_ID: ${{ github.event.client_payload.build_id }} with: filename: .github/workflows/issue-if-azure-fail-template.md TileDB-Py-0.12.2/.gitignore000066400000000000000000000025561417663620700153010ustar00rootroot00000000000000# project.toml: Forces build isolation and other problems; ecosystem still spotty. pyproject.toml # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so *.dylib # Cython extensions *.cpp # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation doc/source/_build doc/source/_sidebar.rst.inc # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # setuptools-scm tiledb/version.py # IntelliJ .idea *.DS_Store TileDB-Py-0.12.2/.readthedocs.yml000066400000000000000000000005471417663620700163750ustar00rootroot00000000000000# Don't build any extra formats formats: [] # Use RTD config version 2 # https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 sphinx: configuration: doc/source/conf.py python: version: 3.7 install: # this order is important: we need to get cmake - requirements: doc/requirements_doc.txt - method: setuptools path: . TileDB-Py-0.12.2/CONTRIBUTING.md000066400000000000000000000051621417663620700155360ustar00rootroot00000000000000# Contributing to TileDB-Py Thanks for your interest in TileDB-Py. The notes below give some pointers for filing issues and bug reports, or contributing to the code. ## Contribution Checklist - Reporting a bug? Please include the following information - operating system and version (windows, linux, macos, etc.) - the output of `tiledb.version()` and `tiledb.libtiledb.version()` - if possible, a minimal working example demonstrating the bug or issue (along with any data to re-create, when feasible) - Please paste code blocks with triple backquotes (```) so that github will format it nicely. See [GitHub's guide on Markdown](https://guides.github.com/features/mastering-markdown) for more formatting tricks. ## Contributing Code *By contributing code to TileDB-Py, you are agreeing to release it under the [MIT License](https://github.com/TileDB-Inc/TileDB/tree/dev/LICENSE).* ### Contribution Workflow - Quick steps to build locally: - install prerequisites via pip or conda: `pybind11` `cython` `numpy` `pandas` `pyarrow` - recommended: install TileDB embedded (libtiledb) NOTE: if libtiledb path is not specified with `--tiledb`, it will be built automatically by `setup.py`. However, this build is internal to the source tree and somewhat difficult to modify. When working on both projects simultaneously, it is strongly suggested to build libtiledb separately. Changes to libtiledb must be `make install-tiledb` to `dist` in order to be used with `--tiledb`. - from latest release build: https://github.com/TileDB-Inc/TileDB/releases - `tar xf tiledb--.tar.gz -C /path/to/extract` - use `--tiledb=/path/to/extract` (note: this path should _contain_ the `lib` directory) - from [conda-forge](): `mamba install tiledb` - `--tiledb=$CONDA_PREFIX` - from source: https://docs.tiledb.com/main/how-to/installation/building-from-source/c-cpp - use `--tiledb=/path/to/tiledb/dist` - build TileDB-Py ``` git clone https://github.com/TileDB-Inc/TileDB-Py cd TileDB-Py python setup.py develop --tiledb= ``` - Make changes locally, then rebuild with `python setup.py develop [--tiledb=<>]` - Make sure to run `pytest` to verify changes against tests (add new tests where applicable). - Execute the tests as `pytest tiledb` from the top-level directory or `pytest` in the `tiledb/` directory. - Please submit [pull requests](https://help.github.com/en/desktop/contributing-to-projects/creating-a-pull-request) against the default [`dev` branch of TileDB-Py](https://github.com/TileDB-Inc/TileDB-Py/tree/dev) TileDB-Py-0.12.2/HISTORY.md000066400000000000000000001174411417663620700147740ustar00rootroot00000000000000# TileDB-Py 0.12.2 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.12.1 includes TileDB Embedded [TileDB 2.6.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.6.2) ## API Changes * Addition of `ArraySchema.validity_filters` [#898](https://github.com/TileDB-Inc/TileDB-Py/pull/898) # TileDB-Py 0.12.1 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.12.1 includes TileDB Embedded [TileDB 2.6.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.6.1) ## Bug fixes * Cast 'dim`'s dtype in `Domain` to `str` prior to applying `html.escape` [#883](https://github.com/TileDB-Inc/TileDB-Py/pull/883) * Support attributes with spaces in `QueryCondition` by casting with attr(); values may be casted with val() [#886](https://github.com/TileDB-Inc/TileDB-Py/pull/886) # TileDB-Py 0.12.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.12.0 includes TileDB Embedded [TileDB 2.6.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.6.0) ## API Changes * Allow writing to dimension-only array (zero attributes) by using assignment to `None`, for example: `A[coords] = None` (given `A: tiledb.Array`) [#854](https://github.com/TileDB-Inc/TileDB-Py/pull/854) * Remove repeating header names for `attr` when displaying `ArraySchema` in Jupyter Notebooks [#856](https://github.com/TileDB-Inc/TileDB-Py/pull/856) * `tiledb.VFS.open` returns `FileIO` object; no longer returns `FileHandle` [#802](https://github.com/TileDB-Inc/TileDB-Py/pull/802) * Addition of `tiledb.copy_fragments_to_existing_array` [#864](https://github.com/TileDB-Inc/TileDB-Py/pull/864) ## Bug fixes * HTML escape strings for `Dim` and `Attr`'s `name` and `dtype` [#856](https://github.com/TileDB-Inc/TileDB-Py/pull/856) * Fix attribute view for multi-indexer [#866](https://github.com/TileDB-Inc/TileDB-Py/pull/866) ## Improvements * Metadata-related API calls are now 'nogil' [#867](https://github.com/TileDB-Inc/TileDB-Py/pull/867) # TileDB-Py 0.11.5 Release Notes * Added missing dependency on [`packaging`](https://pypi.org/project/packaging/) in requirements.txt [#852](https://github.com/TileDB-Inc/TileDB-Py/pull/852) # TileDB-Py 0.11.4 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.11.4 includes TileDB Embedded [TileDB 2.5.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.5.3) ## API Changes * Add rich display for TileDB objects in Jupyter notebooks [#824](https://github.com/TileDB-Inc/TileDB-Py/pull/824) * Support `TILEDB_STRING_ASCII` for array metadata [#828](https://github.com/TileDB-Inc/TileDB-Py/pull/828) # TileDB-Py 0.11.3 Release Notes ## Impovements * Support for Python 3.10 [#808](https://github.com/TileDB-Inc/TileDB-Py/pull/808) ## API Changes * Addition of `tiledb.version()` to return version as a tuple [#801](https://github.com/TileDB-Inc/TileDB-Py/pull/801) * `Query.get_stats` and `Ctx.get_stats` changed function signature; automatically `print_out` stats and add option to output as `json` [#809](https://github.com/TileDB-Inc/TileDB-Py/pull/809) ## Bug fixes * `tiledb.delete_fragments` removes unused schemas [#813](https://github.com/TileDB-Inc/TileDB-Py/pull/813) # TileDB-Py 0.11.2 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.11.2 includes TileDB Embedded [TileDB 2.5.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.5.2) ## Bug fixes * Support dict parameter for 'config' argument to VFS constructor [#805](https://github.com/TileDB-Inc/TileDB-Py/pull/805) # TileDB-Py 0.11.1 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.11.1 includes TileDB Embedded [TileDB 2.5.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.5.1) ## Bug fixes * Support dict parameter for 'config' argument to VFS constructor [#805](https://github.com/TileDB-Inc/TileDB-Py/pull/805) ## Bug Fixes * Correct libtiledb version checking for Fragment Info API getters' MBRs and array schema name [#784](https://github.com/TileDB-Inc/TileDB-Py/pull/784) # TileDB-Py 0.11.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.11.0 includes TileDB Embedded [TileDB 2.5.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.5.0) ## API Changes * Addition of MBRs to `FragmentInfo` API [#760](https://github.com/TileDB-Inc/TileDB-Py/pull/760) * Addition of `array_schema_name` to `FragmentInfo` API [#777](https://github.com/TileDB-Inc/TileDB-Py/pull/777) * Addition of `tiledb.create_array_from_fragments` to copy fragments within a given timestamp range to a new array [#777](https://github.com/TileDB-Inc/TileDB-Py/pull/777) # TileDB-Py 0.10.5 Release Notes ## API Changes * Addition of `tiledb.delete_fragments` to remove fragments within a given timestamp range [#774](https://github.com/TileDB-Inc/TileDB-Py/pull/774) # TileDB-Py 0.10.4 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.10.4 includes TileDB Embedded [TileDB 2.4.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.4.3) ## Bug fixes * Error out when applying `QueryCondition` to dense arrays; this feature will be implemented in TileDB Embedded 2.5 [#753](https://github.com/TileDB-Inc/TileDB-Py/pull/753) * Ensure that indexer, multi-indexer, and .df return the same results when applying `QueryCondition` [#753](https://github.com/TileDB-Inc/TileDB-Py/pull/753) * Fix error when using .df with PyArrow 6 due to incorrect metadata field in exported schema [#764](https://github.com/TileDB-Inc/TileDB-Py/pull/764) * Fix [#755](https://github.com/TileDB-Inc/TileDB-Py/issues/755): `from_pandas` to correctly round-trip unnamed Index [#761](https://github.com/TileDB-Inc/TileDB-Py/pull/761) * Fix .df indexer bug with empty result set [#744](https://github.com/TileDB-Inc/TileDB-Py/pull/744) ## API Changes * Close the `PyFragmentInfo` object in the `FragmentInfoList` constructor to reflect changes in the `FragmentInfo` API in TileDB Embedded 2.5 [#752](https://github.com/TileDB-Inc/TileDB-Py/pull/752) * Make `ctx` argument optional for `ArraySchemaEvolution` [#743](https://github.com/TileDB-Inc/TileDB-Py/pull/743) * Remove `coords_filters` from `ArraySchema` for dense arrays [#762](https://github.com/TileDB-Inc/TileDB-Py/pull/762) # TileDB-Py 0.10.3 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.10.3 includes TileDB Embedded [TileDB 2.4.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.4.2) - Note that 2.4.1 was skipped due to accidental tagging of the 2.4.1 git tag during CI testing ## API Changes * Addition of `overwrite` parameter to `Array.create` [#713](https://github.com/TileDB-Inc/TileDB-Py/pull/713) * Addition of `"ascii"` dtype for `Dim`s [#720](https://github.com/TileDB-Inc/TileDB-Py/pull/720) ## Bug fixes * Pass `Ctx` to `ArraySchema.load` in `from_pandas` [#709](https://github.com/TileDB-Inc/TileDB-Py/pull/709) * Give clear error message when attempting to apply `QueryCondition` on dimensions [#722](https://github.com/TileDB-Inc/TileDB-Py/pull/722) * Do not add string range when querying empty array [#721](https://github.com/TileDB-Inc/TileDB-Py/pull/721) ## Improvements * String dimension default fix in core [#2436](https://github.com/TileDB-Inc/TileDB/pull/2436) reverts a previous change in which the nonempty domain was passed the to multi-range indexer if unspecified [#712](https://github.com/TileDB-Inc/TileDB-Py/pull/712) # TileDB-Py 0.10.2 Release Notes ## API Changes * Deprecate sparse writes to dense arrays [#681](https://github.com/TileDB-Inc/TileDB-Py/pull/681) * Addition of `Attr.isascii` [#681](https://github.com/TileDB-Inc/TileDB-Py/pull/681) * Addition of `Ctx.get_stats` and `Query.get_stats` [#698](https://github.com/TileDB-Inc/TileDB-Py/pull/698) ## Improvements * Added support for `timestamp` argument in `tiledb.from_numpy` [#699](https://github.com/TileDB-Inc/TileDB-Py/pull/699) # TileDB-Py 0.10.1 Release Notes ## API Changes * Do not require `domain=(None, None)` for string dimensions [#662](https://github.com/TileDB-Inc/TileDB-Py/pull/662) ## Improvements * Print a warning about ContextVar bug when running under ipykernel < 6.0. [#665](https://github.com/TileDB-Inc/TileDB-Py/pull/665) Please see https://github.com/TileDB-Inc/TileDB-Py/issues/667 for more information. * `tiledb.Dim` representation now displays `var=True` for dimensions with `bytes` datatype, consistent with `tiledb.Attr` [#669](https://github.com/TileDB-Inc/TileDB-Py/pull/669) ## Bug fixes * Fix concurrent use of `Array.multi_index` and `.df` by using new instance for each invocation [#672](https://github.com/TileDB-Inc/TileDB-Py/pull/672) * For attributes, if `var=False` but the bytestring is fixed-width or if `var=True` but the bytestring is variable length, error out [#663](https://github.com/TileDB-Inc/TileDB-Py/pull/663) # TileDB-Py 0.10.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.10.0 includes TileDB Embedded [TileDB 2.4.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.4.0) with a number of new features and improvements, including: - new platform support: Apple M1 - support for ArraySchema evolution (adding and removing attributes) - support for Azure SAS (shared access signature) tokens ## API Changes * When using `Array.multi_index`, an empty result is returned if the nonempty domain is empty [#656](https://github.com/TileDB-Inc/TileDB-Py/pull/656) * Addition of `Array.set_query` to read array using a serialized query [#651](https://github.com/TileDB-Inc/TileDB-Py/pull/651) ## Improvements * Support numeric column names in `from_pandas` by casting to str dtype [#652](https://github.com/TileDB-Inc/TileDB-Py/pull/652) * New `tiledb.ArraySchemaEvolution` API to add and drop attributes from an existing array [#657](https://github.com/TileDB-Inc/TileDB-Py/pull/657) ## Bug Fixes * Correct listing of consolidated fragments to vacuum in the Fragment Info API by deprecating `FragmentInfoList.to_vacuum_uri`, `FragmentInfoList.to_vacuum_num`, `FragmentInfo.to_vacuum_uri`, and `FragmentInfo.to_vacuum_num` and replacing with `FragmentInfoList.to_vacuum` [#650](https://github.com/TileDB-Inc/TileDB-Py/pull/650) * Correct issue where appending `None` to `FilterList` causes segfault by checking the `filter` argument [#653](https://github.com/TileDB-Inc/TileDB-Py/pull/653) # TileDB-Py 0.9.5 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.9.5 includes TileDB Embedded [TileDB 2.3.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.3.3) ## Improvements * Consolidate `_nonempty_domain_var` into `nonempty_domain` [#632](https://github.com/TileDB-Inc/TileDB-Py/pull/632) * Support more valid Python syntax for `QueryCondition` statements [#636](https://github.com/TileDB-Inc/TileDB-Py/pull/636) * Addition of `ascii` dtype to `Attr` allows `QueryCondition` to support var-length strings [#637](https://github.com/TileDB-Inc/TileDB-Py/pull/637) # TileDB-Py 0.9.4 Release Notes ## Improvements * Support pickling for arrays in write-mode [#626](https://github.com/TileDB-Inc/TileDB-Py/pull/626) ## Bug Fixes * Fixed multi-range indexer to default to explicitly pass in the non-empty domain if dimensions are unspecified [#630](https://github.com/TileDB-Inc/TileDB-Py/pull/630) # TileDB-Py 0.9.3 Release Notes ## Packaging Notes * Due to a packaging issue released with 0.9.3 (NumPy ABI compatibility with NumPy < 1.20 for Python 3.8), this section is intentionally left blank. # TileDB-Py 0.9.2 Release Notes ## Packaging Notes * Fixed release builder ordering issue which led to CRLF line endings in 0.9.1 source distribution. ## API Changes * Deprecate `Array.timestamp` and replace with `Array.timestamp_range` [#616](https://github.com/TileDB-Inc/TileDB-Py/pull/616) ## Improvements * Set `ArraySchema.tile_order=None` for Hilbert-ordered arrays [#609](https://github.com/TileDB-Inc/TileDB-Py/pull/609) * Use CIBW to build release wheels on Linux [#613](https://github.com/TileDB-Inc/TileDB-Py/pull/613) * Addition of Pickling functionality for `SparseArray` [#618](https://github.com/TileDB-Inc/TileDB-Py/pull/618) # TileDB-Py 0.9.1 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.9.1 includes TileDB Embedded [TileDB 2.3.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.3.1) ## Improvements * Support passing a timestamp range for consolidation and vacuuming [#603](https://github.com/TileDB-Inc/TileDB-Py/pull/603) ## Bug Fixes * FragmentInfo API's to_vacuum_uri() function corrected to iterate through `to_vacuum_num` rather than `fragment_num`[#603](https://github.com/TileDB-Inc/TileDB-Py/pull/603) * Return "NA" For ArraySchema.tile_order if "Hilbert" [#605](https://github.com/TileDB-Inc/TileDB-Py/pull/605) # TileDB-Py 0.9.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.9.0 includes TileDB Embedded [TileDB 2.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.3) with a significant number of new features and improvements. ## Packaging Notes * Windows wheels are now built with TileDB Cloud REST support enabled [#541](https://github.com/TileDB-Inc/TileDB-Py/pull/541) ## Improvements * Addition of `QueryCondition` API to filter query on attributes [#576](https://github.com/TileDB-Inc/TileDB-Py/pull/576) ## Bug Fixes * Fixed `from_pandas` append error for sparse arrayse: no need to specify 'row_start_idx' [#593](https://github.com/TileDB-Inc/TileDB-Py/pull/593) * Fixed 'index_dims' kwarg handling for `from_pandas` [#590](https://github.com/TileDB-Inc/TileDB-Py/pull/590) ## API Changes * `from_dataframe` function has been removed; deprecated in TileDB-Py 0.6 and replaced by `from_pandas`. --- # TileDB-Py 0.8.11 Release Notes ## Bug fixes * Fixed incorrect NumPy ABI target in Linux wheels [#590](https://github.com/TileDB-Inc/TileDB-Py/pull/590) * QueryCondition API will cast condition values to the datatype of the corresponding attribute [#589](https://github.com/TileDB-Inc/TileDB-Py/pull/589) * QueryCondition API errors out when there are mismatched attributes to `query`'s `attr_cond` and `attrs` arguments [#589](https://github.com/TileDB-Inc/TileDB-Py/pull/589) * QueryCondition API can now parse negative numbers [#589](https://github.com/TileDB-Inc/TileDB-Py/pull/589) # TileDB-Py 0.8.10 Release Notes ## Improvements * Disabled libtiledb Werror compilation argument for from-source builds via setup.py [#574](https://github.com/TileDB-Inc/TileDB-Py/pull/574) * Relaxed NumPy version requirements for from-source builds via setup.py [#575](https://github.com/TileDB-Inc/TileDB-Py/pull/575) ## Bug fixes * Fixed FragmentInfoList where context was not being passed to ArraySchema [#573](https://github.com/TileDB-Inc/TileDB-Py/pull/573) * Fixed FragmentInfoList where context was not being passed to ArraySchema [#578](https://github.com/TileDB-Inc/TileDB-Py/pull/578) * Fixed read bug due to large estimated result size [#579](https://github.com/TileDB-Inc/TileDB-Py/pull/579) * Fixed bug reading nullable attributes due to missing buffer resize [#581](https://github.com/TileDB-Inc/TileDB-Py/pull/581) * Fixed Python output for `tiledb.stats_dump` [#586](https://github.com/TileDB-Inc/TileDB-Py/pull/586) # TileDB-Py 0.8.9 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.9 includes TileDB Embedded [TileDB 2.2.9](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.9) ## Improvements * Support for iterating over incomplete query results [#548](https://github.com/TileDB-Inc/TileDB-Py/pull/548) - This feature provides the capability to consume partial query results with a fixed maximum buffer size rather than the the default behavior of resizing buffers and resubmitting to completion. Usage example: `examples/incomplete_iteration.py` (along with test in: `test_libtiledb.py:test_incomplete_return`) * Rename FragmentsInfo to FragmentInfoList [#551](https://github.com/TileDB-Inc/TileDB-Py/pull/551) * Dataframe creation uses Zstd default compression level (-1) [#552](https://github.com/TileDB-Inc/TileDB-Py/pull/552) * Rename Fragment Info API's `non_empty_domain` attribute to `nonempty_domain` [#553](https://github.com/TileDB-Inc/TileDB-Py/pull/553) * Added configuration option `py.alloc_max_bytes` to control maximum initial buffer allocation [#557](https://github.com/TileDB-Inc/TileDB-Py/pull/557) ## Bug fixes * Fixed incorrected error raised in .df[] indexer when pyarrow not installed [#554](https://github.com/TileDB-Inc/TileDB-Py/pull/554) * Fixed `from_pandas(attr_filters=None, dim_filters=None)` (previously used internal defaults) [#564](https://github.com/TileDB-Inc/TileDB-Py/pull/554) * Fixed `from_pandas` write bug due to incorrect classification of str/bytes columns [#562](https://github.com/TileDB-Inc/TileDB-Py/pull/562) * Fix segfault due to mismatched validity num and data buffer sizes [#567](https://github.com/TileDB-Inc/TileDB-Py/pull/567) # TileDB-Py 0.8.8 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.8 includes TileDB Embedded [TileDB 2.2.8](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.8) # TileDB-Py 0.8.7 Release Notes ## Improvements * ArraySchema support for `cell_order="hilbert"` [#535](https://github.com/TileDB-Inc/TileDB-Py/pull/535) ## Bug fixes * Fixed regression in `from_pandas` with string-valued index dimensions [#526](https://github.com/TileDB-Inc/TileDB-Py/pull/526) * Fixed GC lifetime bug in string buffer conversion [#525](https://github.com/TileDB-Inc/TileDB-Py/pull/526) * Fixed `FilterList`'s `repr()` method [#528](https://github.com/TileDB-Inc/TileDB-Py/pull/528) # TileDB-Py 0.8.6 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.6 includes TileDB Embedded [TileDB 2.2.7](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.7) ## Improvements * Addition of `VFS()` functions `copy_file()` and `copy_dir()` [#507](https://github.com/TileDB-Inc/TileDB-Py/pull/507) * Add support in `from_pandas` for storing Pandas extension types as variable-length attributes [#515](https://github.com/TileDB-Inc/TileDB-Py/pull/515) * Add support for sparse writes to dense arrays [#521](https://github.com/TileDB-Inc/TileDB-Py/pull/521) ## Bug fixes * Multi-length attributes, regardless of fixed or var-length, do not work query properly with PyArrow enabled due to lack of Arrow List support. When using `.df[]` with PyArrow enabled, we are returning a clear message to the user to use `query(use_pyarrow=False)` [#513](https://github.com/TileDB-Inc/TileDB-Py/pull/513) # TileDB-Py 0.8.5 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.5 includes TileDB Embedded [TileDB 2.2.6](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.6) ## Documentation Updates * Added example reading/writing RGB (multi-component) array [#487](https://github.com/TileDB-Inc/TileDB-Py/pull/487) ## Improvements * Restore `tiledb.stats_dump` default to `verbose=True` [#491](https://github.com/TileDB-Inc/TileDB-Py/pull/491) * Remove `non_empty_domain_var()` Fragment Info PyBind11 Function and only use `get_non_empty_domain()` for both fixed and var-length domains [#505](https://github.com/TileDB-Inc/TileDB-Py/pull/505) # TileDB-Py 0.8.4 Release Notes ## Improvements * Addition of high-level function `array_fragments()` that returns a `FragmentsInfo` object [#488](https://github.com/TileDB-Inc/TileDB-Py/pull/488) * Added support for `from_pandas`/`df[]` round-trip of Pandas nullable integer and bool types [#480](https://github.com/TileDB-Inc/TileDB-Py/pull/480) * Fragment info API example usage now provided at `examples/fragment_info.py` [#479](https://github.com/TileDB-Inc/TileDB-Py/pull/479) * Fragment info API parameters have been rearranged to match the rest of the TileDB Python API such that the `uri` is provided first and `context`, an optional parameter that defaults to `tiledb.default_ctx()`, is provided second [#479](https://github.com/TileDB-Inc/TileDB-Py/pull/479) ## Bug fixes * Fix bug in `Attr` to ensure that all Unicode strings are automatically set to `var=True` [#495]https://github.com/TileDB-Inc/TileDB-Py/pull/495 * Fix bug in Array.multi_index slicing bug for sparse array with dimension range including 0 [#482](https://github.com/TileDB-Inc/TileDB-Py/pull/482) # TileDB-Py 0.8.3 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.3 includes TileDB Embedded [TileDB 2.2.4](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.4) ## Improvements * Added `nullable` keyword argument to `Attr` constructor [#474](https://github.com/TileDB-Inc/TileDB-Py/pull/474) ## Bug fixes * Fix bug in Array.multi_index with slice range including 0 (incorrectly used the nonempty domain as endpoint) [#473](https://github.com/TileDB-Inc/TileDB-Py/pull/473) # TileDB-Py 0.8.2 Release Notes ## Packaging Notes * This is a version bump to fix numpy compatibility pinning in the wheel build system. # TileDB-Py 0.8.1 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.1 includes TileDB Embedded [TileDB 2.2.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.3) ## Packaging Notes * TileDB-Py 0.8 does not support Python 2. # TileDB-Py 0.8.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.8.0 includes TileDB Embedded 2.2.2 featuring a number of significant improvements in core storage engine functionality. See release notes for [TileDB 2.2.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.1) and [TileDB 2.2.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.2.2). ## Packaging Notes * TileDB-Py 0.8 does not support Python 2. ## Improvements * Add initial `tiledb.from_parquet` functionality (beta) [[a90d5d9b1b](https://github.com/TileDB-Inc/TileDB-Py/commit/a90d5d9b1b6a39b48090592297fe98a7f33338fb)] * Preload metadata in .df query path to reduce read latency for remote arrays [[79ab12fcf0](https://github.com/TileDB-Inc/TileDB-Py/commit/79ab12fcf0ede0cbac822392a30ee7640595e93c)] ## Bug fixes * Update py::dtype usage for compatibility with pybind11 2.6.2 [[9d3d3d3c43](https://github.com/TileDB-Inc/TileDB-Py/commit/9d3d3d3c430fbc058d04773f03ddc63bd47f79e3)] # TileDB-Py 0.7.7 Release Notes ## Bug fixes * Cherry-pick commit 9d3d3d3c43 to ix runtime bug in conda packages built against pybind11 2.6.2 [9d3d3d3c430f](https://github.com/TileDB-Inc/TileDB-Py/commit/9d3d3d3c430fbc058d04773f03ddc63bd47f79e3) # TileDB-Py 0.7.6 Release Notes ## Packaging Notes * TileDB-Py 0.7.x will be the last version of TileDB-Py supporting Python 2. ## Bug fixes * Fix read compatibility for empty strings written with 2.1 or 2.2 [#462](https://github.com/TileDB-Inc/TileDB-Py/pull/462) * Fix #457: make sure to fit automatic tile extent to dim range for date type [#464](https://github.com/TileDB-Inc/TileDB-Py/pull/464) # TileDB-Py 0.7.5 Release Notes ## Packaging Notes * TileDB-Py 0.7.x will be the last version of TileDB-Py supporting Python 2. ## TileDB Embedded updates: * TileDB-Py 0.7.5 includes [TileDB Embedded 2.1.6](https://github.com/TileDB-Inc/TileDB/releases/tag/2.1.6) ## Improvements * FragmentInfo API by default returns information from all fragments and dimensions [#444](https://github.com/TileDB-Inc/TileDB-Py/pull/444) * Add integer multi-indexing for NumPy datetime64 dimensions [#447](https://github.com/TileDB-Inc/TileDB-Py/pull/447) * Add `from_csv/pandas` support for `timestamp` keyword argument to specify write timestamp [#450](https://github.com/TileDB-Inc/TileDB-Py/pull/450) * Add verbosity option to `stats_dump()` [#452](https://github.com/TileDB-Inc/TileDB-Py/pull/452) * Add `unique_dim_values()` to return unique dimension values for a given `SparseArray` [#454](https://github.com/TileDB-Inc/TileDB-Py/pull/454) * Add support to `query()` for returning subsets of specified dimensions [#458](https://github.com/TileDB-Inc/TileDB-Py/pull/458) * Optimize string array writes [#459](https://github.com/TileDB-Inc/TileDB-Py/pull/459) ## Bug fixes * Fix `Dim.shape` for dense array with datetime dimension [#448](https://github.com/TileDB-Inc/TileDB-Py/pull/448) # TileDB-Py 0.7.4 Release Notes ## Improvements * Support selecting subset of dimensions in Array.query via new keyword argument `dims: List[String]`. The `coords=True` kwarg is still supported for compatibility, and continues to return all dimensions [#433](https://github.com/TileDB-Inc/TileDB-Py/pull/433) * Support Dim(filters=FilterList) keyword argument to set filters on a per-Dim basis [#434](https://github.com/TileDB-Inc/TileDB-Py/pull/434) * Support tiledb.from_csv setting attribute and dimension filters by dictionary of {name: filter} [#434](https://github.com/TileDB-Inc/TileDB-Py/pull/434) * Add ArraySchema.check wrapping `tiledb_array_schema_check` [#435](https://github.com/TileDB-Inc/TileDB-Py/pull/435) * Add support for attribute fill values `tiledb.Attr(fill=...)` and `Attr.fill` getter [#437](https://github.com/TileDB-Inc/TileDB-Py/pull/437) ## API Changes * tiledb.from_csv keyword arg `attrs_filters` renamed to `attr_filters` [#434](https://github.com/TileDB-Inc/TileDB-Py/pull/434) ## Bug fixes * Fix bug in `multi_index` slicing of dense arrays [#438](https://github.com/TileDB-Inc/TileDB-Py/pull/438) # TileDB-Py 0.7.3 Release Notes ## Improvements * The default result layout for indexing/querying sparse arrays is now TILEDB_UNORDERED [#428](https://github.com/TileDB-Inc/TileDB-Py/pull/428), [#431](https://github.com/TileDB-Inc/TileDB-Py/pull/431) * Added documentation for all TileDB-Py configuration parameters [#430](https://github.com/TileDB-Inc/TileDB-Py/pull/430) * Fixed documentation rendering for `Array.query` [#430](https://github.com/TileDB-Inc/TileDB-Py/pull/430) ## Bug fixes * Fix sparse dimension type selection when array type is not specified to from_pandas [#429](https://github.com/TileDB-Inc/TileDB-Py/pull/429) * Don't pass allows_duplicates=True to dense array constructor (tiledb.from_csv) [#428](https://github.com/TileDB-Inc/TileDB-Py/pull/428) # TileDB-Py 0.7.2 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.7.2 includes [TileDB Embedded 2.1.3](https://github.com/TileDB-Inc/TileDB/releases/tag/2.1.3) Including a fix for issue [#409](https://github.com/TileDB-Inc/TileDB-Py/issues/409). ## Changes * The default array type for `from_pandas` and `from_csv` is now dense, if unspecified, except when passing a dataframe with string indexes to `from_pandas` [#424](https://github.com/TileDB-Inc/TileDB-Py/pull/408) ## Improvements * Automatically determine column to dimension mapping for `tiledb.from_csv` append mode [#408](https://github.com/TileDB-Inc/TileDB-Py/pull/408) ## Bug fixes * Fixed `tiledb.from_csv/dataframe` error when ingesting single-row/index datasets [#422]() * Fixed intermittent `csv_sparse_col_to_dims` failure due to duplicate result ordering [#423](https://github.com/TileDB-Inc/TileDB-Py/pull/423) # TileDB-Py 0.7.1 Release Notes ## Improvements * Added support for `df[]` indexing via `tiledb.Array.query` [#411](https://github.com/TileDB-Inc/TileDB-Py/pull/411) * Modified `stats_dump` to return internal stats as string, allowing for output in Jupyter notebooks [#403](https://github.com/TileDB-Inc/TileDB-Py/pull/403) * Added `__repr__` to `Array` and `Ctx` [#413](https://github.com/TileDB-Inc/TileDB-Py/pull/413) * `tiledb.open` now supports `timestamp` keyword argument [#419](https://github.com/TileDB-Inc/TileDB-Py/pull/419) * `tiledb.Domain` now supports passing a list of `Dim`s without unpacking [#419](https://github.com/TileDB-Inc/TileDB-Py/pull/419) ## Bug fixes * Fixed PyPI wheels load error on newer macOS due to overlinkage against system libraries in build process (curl -> libintl) [#418](https://github.com/TileDB-Inc/TileDB-Py/pull/418) * Fixed PyPI wheels load error on Windows due to building against TBB [#419](https://github.com/TileDB-Inc/TileDB-Py/pull/419) * Fixed indexing of attribute named 'coords' [#414](https://github.com/TileDB-Inc/TileDB-Py/pull/414) * `open_dataframe` now uses the underlying Array's `nonempty_domain` to avoid errors opening unlimited domain arrays [#409](https://github.com/TileDB-Inc/TileDB-Py/pull/409) # TileDB-Py 0.7.0 Release Notes ## TileDB Embedded updates: * TileDB-Py 0.7.0 includes [TileDB Embedded 2.1.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.1.2) featuring a number of significant improvements, with major highlights including: - no longer uses Intel TBB for parallelization by default. Along with many benefits to TileDB Embedded, this significantly reduces complications and bugs with python multiprocessing fork mode. - Support coalescing subarray ranges to give major performance boosts. ## Packaging Notes * TileDB-Py 0.7 packages on PyPI support macOS 10.13+ and manylinux10-compatible Linux distributions only. For now, wheels could be produced supporting older systems but without Google Cloud Support; if needed, please contact us to discuss. ## Improvements * Added ".df[]" indexer tiledb.Array: directly returns a Pandas dataframe from a query (uses `multi_index` indexing behavior) [#390](https://github.com/TileDB-Inc/TileDB-Py/pull/389) * Added parallel CSV ingestion example using Python multiprocessing with `tiledb.from_csv` [#397](https://github.com/TileDB-Inc/TileDB-Py/pull/397) * Added wrapping and support for TileDB checksumming filters: `ChecksumMD5Filter` and `ChecksumSHA256Filter` [#389](https://github.com/TileDB-Inc/TileDB-Py/pull/389) * Removed TBB install from default setup.py, corresponding to TileDB Embedded changes [#389](https://github.com/TileDB-Inc/TileDB-Py/pull/389) * Add support for 'capacity' kwarg to `from_csv`/`from_pandas` [#391](https://github.com/TileDB-Inc/TileDB-Py/pull/391) * Add support for 'tile' kwarg to `from_csv`/`from_pandas` to customize Dim tile extent [#391](https://github.com/TileDB-Inc/TileDB-Py/pull/391) * Added '--release-symbols' option for building in release optimization with debug symbols [#402](https://github.com/TileDB-Inc/TileDB-Py/pull/402) * Changed `allows_duplicates` default to `True` for `from_csv/from_pandas` [#394](https://github.com/TileDB-Inc/TileDB-Py/pull/394) ## Bug fixes * Fixed bug indexing anonymous attributes of sparse arrays using `A[]` (did not affect dense or multi_index) [#404](https://github.com/TileDB-Inc/TileDB-Py/pull/404) * Fixed rendering of column name in mixed dtype exception [#382](https://github.com/TileDB-Inc/TileDB-Py/pull/382) * Fixed forwarding of 'ctx' kwarg to from_csv/from_pandas [#383](https://github.com/TileDB-Inc/TileDB-Py/pull/383) * Fixed type of return values for empty results when indexing a sparse array [#384](https://github.com/TileDB-Inc/TileDB-Py/pull/384) ## Misc Updates * Added round-trip tests for all filter `repr` objects [#389](https://github.com/TileDB-Inc/TileDB-Py/pull/389) # TileDB-Py 0.6.6 Release Notes **Note that we will be removing wheel support for macOS 10.9-10.12 in TileDB-Py 0.7 (planned for release in August 2020).** This change is due to upstream (AWS SDK) minimum version requirements. The minimum supported version for macOS wheels on PyPI will be macOS 10.13. **Note that we will be removing support for [manylinux1](https://github.com/pypa/manylinux/tree/manylinux1) wheels in TileDB-Py 0.7 (planned for release in August 2020).** manylinux1 is based on CentOS5, which has been unsupported for several years. We now provide wheels built with [manylinux2010](https://www.python.org/dev/peps/pep-0571/), which is based on CentOS6 / glibc 2.12. ## Improvements * Bump release target to [TileDB 2.0.7](https://github.com/TileDB-Inc/TileDB/releases/tag/2.0.7) # TileDB-Py 0.6.5 Release Notes We have added manylinux2010 wheels, corresponding to CentOS6 / glibc 2.12. We are deprecating support for manylinux1 (CentOS5 / glibc 2.0.7), which is not supported by the Google Cloud Storage SDK. We are planning to remove manylinux1 wheel support in the TileDB-Py 0.7 release. ## Improvements * Enabled Google Cloud Storage support in macOS and linux (manylinux2010) wheels on PyPI ([#364](https://github.com/TileDB-Inc/TileDB-Py/pull/364)) # TileDB-Py 0.6.4 Release Notes ## API notes * Deprecated `initialize_ctx` in favor of `default_ctx(config: tiledb.Config)` [#351](https://github.com/TileDB-Inc/TileDB-Py/pull/351) ## Improvements * Bump release target to [TileDB 2.0.6](https://github.com/TileDB-Inc/TileDB/releases/tag/2.0.6) * Improved error reporting for input data type mismatches [#359](https://github.com/TileDB-Inc/TileDB-Py/pull/359) * Added `tiledb.VFS.dir_size` [#343](https://github.com/TileDB-Inc/TileDB-Py/pull/343) * Added read and buffer conversion statistics for python to `tiledb.stats_dump` [#354](https://github.com/TileDB-Inc/TileDB-Py/pull/354) * Implemented string deduplication to reduce conversion time for string arrays [#357](https://github.com/TileDB-Inc/TileDB-Py/pull/357) ## Bug fixes * Fixed argument order for `Array.consolidate` with a Config override parameter [#344](https://github.com/TileDB-Inc/TileDB-Py/pull/344) # TileDB-Py 0.6.3 Release Notes ## Improvements * Bump release target to [TileDB 2.0.5](https://github.com/TileDB-Inc/TileDB/releases/tag/2.0.5) ## Bug fixes * Fix unnecessary implicit ordering requirement for multi-attribute assignment. [#328](https://github.com/TileDB-Inc/TileDB-Py/pull/328) # TileDB-Py 0.6.2 Release Notes ## Bug fixes * Fix `nonempty_domain` with heterogeneous non-string dimensions ([#320](https://github.com/TileDB-Inc/TileDB-Py/pull/320)) ## Improvements * Add doctest for `tiledb.vacuum` ([#319](https://github.com/TileDB-Inc/TileDB-Py/pull/320)) # TileDB-Py 0.6.1 Release Notes ## Bug fixes * Fix assignment order for `nonempty_domain` with string dimensions ([#308](https://github.com/TileDB-Inc/TileDB-Py/pull/308)) (test in [#311](https://github.com/TileDB-Inc/TileDB-Py/commit/35e5ff64ccfe7bf8f30a5900bfbe67c46cd1f97d)) * Fix bug in string attribute handling for var-length attributes ([#307](https://github.com/TileDB-Inc/TileDB-Py/issues/307)) * Fix regression reading anonymous attributes from TileDB 1.7 arrays ([#311](https://github.com/TileDB-Inc/TileDB-Py/pull/311)) * Fix incorrect `multi_index` error when string attribute results are empty ([#311](https://github.com/TileDB-Inc/TileDB-Py/pull/311)) # TileDB-Py 0.6.0 Release Notes ## Improvements * Target TileDB version 2.0 - Added support for heterogeneous and string-typed dimensions [#304](https://github.com/TileDB-Inc/TileDB-Py/pull/304) - Added support for `tiledb_array_vacuum` for cleaning up consolidated fragments * Added Windows wheels for Python 3.7 and 3.8 on PyPI # TileDB-Py 0.5.9 Release Notes * Bump release target to [TileDB 1.7.7](https://github.com/TileDB-Inc/TileDB/releases/tag/1.7.7) # TileDB-Py 0.5.8 Release Notes * Rebuild/release due to wheel build error on linux for 0.5.7. # TileDB-Py 0.5.7 Release Notes * Bump release target to [TileDB 1.7.6](https://github.com/TileDB-Inc/TileDB/releases/tag/1.7.6) # TileDB-Py 0.5.6 Release Notes * Bump release target to [TileDB 1.7.5](https://github.com/TileDB-Inc/TileDB/releases/tag/1.7.5) # TileDB-Py 0.5.5 Release Notes * Bump release target to [TileDB 1.7.4](https://github.com/TileDB-Inc/TileDB/releases/tag/1.7.4) ## Improvements - Return coordinates by default for dense `multi_index` queries [#259]( https://github.com/TileDB-Inc/TileDB-Py/pull/259) # TileDB-Py 0.5.4 Release Notes * Bump release target to [TileDB 1.7.3](https://github.com/TileDB-Inc/TileDB/releases/tag/1.7.3) ## Improvements - macOS wheels are now available on PyPI [#258](https://github.com/TileDB-Inc/TileDB-Py/pull/258) - Delay default ctx initialization, allows per-process global config options to be controlled by user [#256](https://github.com/TileDB-Inc/TileDB-Py/pull/256) # TileDB-Py 0.5.3 Release Notes PyPI packages: https://pypi.org/project/tiledb/0.5.3/ ## Improvements - Reduce i/o overhead of `tiledb.open` and array constructors. [#239](https://github.com/TileDB-Inc/TileDB-Py/pull/239), [#240](https://github.com/TileDB-Inc/TileDB-Py/pull/240) - Internal support for retrying incomplete queries in all array indexing modes. [#238](https://github.com/TileDB-Inc/TileDB-Py/pull/238), [#252](https://github.com/TileDB-Inc/TileDB-Py/pull/252) - Eliminate reference cycles to improve Ctx cleanup. [#249](https://github.com/TileDB-Inc/TileDB-Py/pull/249) - Support for retrieving compressor level from filter. [#234](https://github.com/TileDB-Inc/TileDB-Py/pull/234) ## Bug fixes - Fix variable-length indexing error. [#236](https://github.com/TileDB-Inc/TileDB-Py/pull/236) - Fix race condition initializing `tiledb.cloud` mixin from thread pool. [#246](https://github.com/TileDB-Inc/TileDB-Py/pull/246) # TileDB-Py 0.5.2 Release Notes ## Bug fixes - Fix bug in multi_index result buffer calculation [#232](https://github.com/TileDB-Inc/TileDB-Py/pull/232) # TileDB-Py 0.5.1 Release Notes ## Bug fixes - [Fix current buffer size calculation](https://github.com/TileDB-Inc/TileDB-Py/commit/3af75b5911b2195ceb66a41d582d9ffa9aa227b6) - [Fix incorrect query_free in multi-range dense path](https://github.com/TileDB-Inc/TileDB-Py/commit/dbec665da3ebd0e0b5a341d22e47b25ede05cd7d) ## Other - [Support '--tiledb=source' option for setup.py to ensure build from source](https://github.com/TileDB-Inc/TileDB-Py/commit/67e7c5c490caf97c5351352cb720116a1c5e1a0d) # TileDB-Py 0.5.0 Release Notes ## New features - add support for multi-range queries [#219](https://github.com/TileDB-Inc/TileDB-Py/pull/219) - add support for TileDB array metadata [#213](https://github.com/TileDB-Inc/TileDB-Py/pull/213) - add support for TILEDB_DATETIME_* attributes, domains, and slicing [#211](https://github.com/TileDB-Inc/TileDB-Py/pull/211) - add support for retrieving list of fragments written by the most recent write to an array [#207](https://github.com/TileDB-Inc/TileDB-Py/pull/207) ## Bug fixes - fix read error with multi-attribute sparse arrays [#214](https://github.com/TileDB-Inc/TileDB-Py/pull/214) # TileDB-Py 0.4.4 Release Notes * Bump release target to [TileDB 1.6.3](https://github.com/TileDB-Inc/TileDB/releases/tag/1.6.3) ## New features - add `dim_type` keyword argument to `from_numpy` in order to override inferred Dimension dtype [#194](https://github.com/TileDB-Inc/TileDB-Py/pull/194) - add `Array.domain_index`: slice over any range within the domain bounds, including negative slices [#202](https://github.com/TileDB-Inc/TileDB-Py/pull/202) # TileDB-Py 0.4.3 Release Notes * Bump release target to [TileDB 1.6.0](https://github.com/TileDB-Inc/TileDB/releases/tag/1.6.0) ## New features - allow `tiledb.open` and `Array.create` to take an optional Ctx to override schema [#162](https://github.com/TileDB-Inc/TileDB-Py/pull/162) - add `tiledb.array_exists` [#167](https://github.com/TileDB-Inc/TileDB-Py/pull/167) ## Bug fixes - wrap query_submits into try / finally blocks correctly propagate KeyboardInterrupt errors while cleaning up resources [#155](https://github.com/TileDB-Inc/TileDB-Py/pull/155) - fixed OOB access in exception handling path [#159](https://github.com/TileDB-Inc/TileDB-Py/pull/159) - raise an error when trying to consolidate an open readonly array [#172](https://github.com/TileDB-Inc/TileDB-Py/pull/172) # TileDB-Py 0.4.2 Release Notes TileDB-Py 0.4.2 contains several improvements as well as bug-fixes associated with the TileDB 1.5.1 release. ## New features - support for NumPy complex types ([#142](https://github.com/TileDB-Inc/TileDB-Py/pull/142)) ## Bug fixes - fixed query buffer memory leak ([#151](https://github.com/TileDB-Inc/TileDB-Py/pull/151)) - fixed segfault during consolidation ([TileDB #1213](https://github.com/TileDB-Inc/TileDB/pull/1213)) - *note: to receive this fix, conda and source builds should be updated to TileDB 1.5.1. TileDB-Py 0.4.2 binaries on PyPI bundle the updated TileDB 1.5.1 library.* - fixed indexing with array dtype different from the platform default ([#146](https://github.com/TileDB-Inc/TileDB-Py/pull/146)) - fixed `VFS.is_bucket` when VFS is initialized with a Ctx object ([#148](https://github.com/TileDB-Inc/TileDB-Py/pull/148)) - fixed `schema_like` to correctly forward a Ctx keyword arg ([#148](https://github.com/TileDB-Inc/TileDB-Py/pull/148)) # TileDB-Py 0.4.1 Release Notes ## New Features: * several high-level API additions (tiledb.open, .save, .empty_like, .schema_like), and serialization improvements including pickling support for DenseArray objects (#129) * manylinux1 wheels for Python 2.7, 3.5, 3.6, and 3.7 are available on PyPI: https://pypi.org/project/tiledb # TileDB-Py 0.4.0 Release Notes This release builds TileDB-Py against TileDB 1.5 ## New Features: * support for variable-length arrays (#120) ## Breaking changes: * the Ctx argument is now a keyword argument, simplifying API use in the common case (#122) for example: `tiledb.DenseArray(ctx, uri, ...)` becomes: tiledb.DenseArray(uri, ...) or optionally `tiledb.DenseArray(uri, ..., ctx=ctx)` TileDB-Py-0.12.2/LICENSE000066400000000000000000000020611417663620700143050ustar00rootroot00000000000000MIT License Copyright (c) 2017-2018 TileDB Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TileDB-Py-0.12.2/MANIFEST.in000066400000000000000000000003131417663620700150340ustar00rootroot00000000000000global-exclude .gitignore exclude MANIFEST.in exclude .dockerignore exclude .readthedocs.yml exclude azure-pipelines.yml exclude misc/* include misc/requirements_wheel.txt include README.md prune doc TileDB-Py-0.12.2/README.md000066400000000000000000000035641417663620700145700ustar00rootroot00000000000000TileDB logo [![Build Status](https://dev.azure.com/TileDB-Inc/CI/_apis/build/status/TileDB-Inc.TileDB-Py?branchName=dev)](https://dev.azure.com/TileDB-Inc/CI/_build/latest?definitionId=1&branchName=dev) ![](https://raw.githubusercontent.com/TileDB-Inc/TileDB/dev/doc/anaconda.svg?sanitize=true)[![Anaconda download count badge](https://anaconda.org/conda-forge/TileDB-Py/badges/downloads.svg)](https://anaconda.org/conda-forge/TileDB-Py) # TileDB-Py *TileDB-Py* is a [Python](https://python.org) interface to the [TileDB Storage Engine](https://github.com/TileDB-Inc/TileDB). # Quick Links * [Installation](https://docs.tiledb.com/developer/installation/quick-install) * [Build Instructions](https://docs.tiledb.com/main/how-to/installation/building-from-source/python) * [TileDB Documentation](https://docs.tiledb.com/main/) * [Python API reference](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable) # Quick Installation TileDB-Py is available from either [PyPI](https://pypi.org/project/tiledb/) with ``pip``: ``` pip install tiledb ``` or from [conda-forge](https://anaconda.org/conda-forge/tiledb-py) with [conda](https://conda.io/docs/) or [mamba](https://github.com/mamba-org/mamba#installation): ``` conda install -c conda-forge tiledb-py ``` Dataframes functionality (`tiledb.from_pandas`, `Array.df[]`) requires [Pandas](https://pandas.pydata.org/) 1.0 or higher, and [PyArrow](https://arrow.apache.org/docs/python/) 1.0 or higher. # Contributing We welcome contributions, please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for suggestions and development-build instructions. For larger features, please open an issue to discuss goals and approach in order to ensure a smooth PR integration and review process. TileDB-Py-0.12.2/azure-pipelines.yml000066400000000000000000000001711417663620700171370ustar00rootroot00000000000000stages: - template: ./misc/azure-ci.yml - template: ./misc/azure-release.yml #- template: ./misc/azure-publish.yml TileDB-Py-0.12.2/doc/000077500000000000000000000000001417663620700140465ustar00rootroot00000000000000TileDB-Py-0.12.2/doc/local-build.sh000077500000000000000000000036671417663620700166100ustar00rootroot00000000000000#!/bin/bash # # Builds the ReadTheDocs documentation locally. # Usage. Execute in this directory: # $ ./local-build.sh # This creates a Python virtual env 'venv' in the current directory. # # Choose the default directories source_dir="source" build_dir="source/_build" venv_dir="venv" ext_dir="../" die() { echo "$@" 1>&2 ; popd 2>/dev/null; exit 1 } arg() { echo "$1" | sed "s/^${2-[^=]*=}//" | sed "s/:/;/g" } # Display bootstrap usage usage() { echo ' Usage: '"$0"' [] Options: [defaults in brackets after descriptions] Configuration: --help print this message --tiledb=PATH (required) path to TileDB repo root ' exit 10 } # Parse arguments tiledb="" while test $# != 0; do case "$1" in --tiledb=*) dir=`arg "$1"` tiledb="$dir";; --help) usage ;; *) die "Unknown option: $1" ;; esac shift done if [ ! -d "${tiledb}" ]; then die "invalid tiledb installation directory (use --tiledb)" fi setup_venv() { if [ ! -d "${venv_dir}" ]; then virtualenv "${venv_dir}" || die "could not create virtualenv" fi source "${venv_dir}/bin/activate" || die "could not activate virtualenv" pip install 'Sphinx==1.6.7' \ 'breathe' \ 'sphinx_rtd_theme' \ -r requirements_doc.txt || die "could not install doc dependencies" } build_ext() { pushd "${ext_dir}" python setup.py install --tiledb="${tiledb}" || die "could not install tiledb-py" popd } build_site() { if [[ $OSTYPE == darwin* ]]; then export DYLD_LIBRARY_PATH="${tiledb}/lib" else export LD_LIBRARY_PATH="${tiledb}/lib" fi export TILEDB_PY_NO_VERSION_CHECK="yes" sphinx-build -E -T -b html -d ${build_dir}/doctrees -D language=en ${source_dir} ${build_dir}/html || \ die "could not build sphinx site" } run() { setup_venv build_ext build_site echo "Build complete. Open '${build_dir}/html/index.html' in your browser." } run TileDB-Py-0.12.2/doc/requirements_doc.txt000066400000000000000000000000541417663620700201560ustar00rootroot00000000000000docutils < 0.18 -r ../requirements_dev.txt TileDB-Py-0.12.2/doc/source/000077500000000000000000000000001417663620700153465ustar00rootroot00000000000000TileDB-Py-0.12.2/doc/source/_static/000077500000000000000000000000001417663620700167745ustar00rootroot00000000000000TileDB-Py-0.12.2/doc/source/_static/TileDB_Logo_BlueArtboard_1@1.5x.png000066400000000000000000000100561417663620700251510ustar00rootroot00000000000000‰PNG  IHDR,‹Bñ– pHYs››t‰œKàIDATxÚíÝÛqÛÖ†áßS»€©@t0S ÌLp/èž3¦+0TAh4`¨‚MU°‰¨ °/°hS6Eñðƒ¹ÞgF3™Ø†€…Ó‡u ôû?‘¤+Þ¼®Æs@Gaz%)ê¾ÔÕ¸äŒÀ'ï$M$Ýt`_î%eœIúÖ•üÄé€O~£€À, °X€À@` œµwÛx’´0ØÎœÓ ã’¶uCqÇ ,£º—%€KWW㙤Ŷ‚0­)Q`{4  À¡Þ]òÁa:0ØÌ¼®Æsß/” Lû’ú›Z¸jõSÏ•¤èT¿ŸfT\’8WÖ‘]YHšIR‘¨<ó2$]ueºPžï.ü>úf°{©37ã)%’î ¶ó(£>ŠŒ®½3—.È]×v(Î%I• 03Iå™…˜‰:Ô1Û•ç²Lç+å:+å#ô÷àB…î烤»8׳¤©¤üÜk`:P¦7+aæYRé~¦EÒΨ_ú°|Ñ“t+é[œkçJ(³rý 铤ÿŹfq®$Îm›´,_k ¾\Zq-鋤yœ+³ .Á¥©ˆ(S=5ý›L‚ €¦Và¿ñt©ÁeH`àpwq®Òºÿ¾—ã\ù>åK`ॉ&¢ÝJ*ã|·¹½,ü*t/UBK;®w …ÖëZºS¾-§,ßé6}Z,lZèˆÛŽPÍ Ä,B ÅК›·†”XØÎuœkB1´ænÓÈ! ÛûçXqþRe¯ý«5c[sIÛ™uäx{O_M{+[Ú¯Éep‚û2sEE¢EÇËôÙè™{sÄ}¾seëV|&°`+u5Î%åt<3÷ ÛI¦™š)¦Ø*‹dÿiñ]SÂPR¢fŽ6…’F›j:bV$6µA®ÃñÀý [ˆ™;/Ð$8{E¢y‘hR$Š$½—Mð&w»ÎÔzæå»(M‹D£"Q_Ò_’ª–~ÝÚõ†,€K{¹–®fáO5Í"mÖøZƹ .÷-l¾·n‘D àR_ªS5ý[žZú·>Õ²¼RÆ™šÚk€W/Ô…k&zhéWd”±òBKD`øøRMZ -·Ì€û=´|5Üä5àshi£3nBéJjFN™ùyý& À'CÙwÄQ¬ÍH-ÙÖ²\X¾¾TzeØìBVsþ®lkÃL uA˜öՌֈV¾š?ýµ…~Ìʹüïy]ç” ŒCKçztk¸ÙDÔ´H-ÎfN`ÐF@‰ÜWìÀ…”Þ–ÿôÚm-§/%•u5.)a¹k´g´½!ÅãÀÒ‘iÐï‚0=ê>ÔÕ8¸àr}¬«ñ€{ú"CÊÀ}eZ¾ä¶uã~î\€™JšÖÕxJÉcE¢…[yÙêYºõ…f”«‡@çCÊ• (™Ž·ø\OMUþm¦•š5®&u5^pF°£‰šZ«€=ü,m¢Ó-€}ÃJ¦fï/:Ý Ö¡ûBžaš¹l] ¦¶ÎÊÀ÷2sÓ2˜XT†A˜Î]Pèud·z+Á…~ØEF`1Õ7 ”{•« L§’þÕéjT¶ .Ÿ‚0-]Ç_`›—¢ÕZC=†7›…¶_&ø#°Ø&¬ ÔTÏ~8“]¾‘Tašpö°…Üp[Þ·DÕ7%À®ae$雺Óü³õ×®¤/A˜æœE¼Á²‹Ï5,–˜§»„•\Ò§3?Œ[×DD‡\¬åš…*Ëþâ\}ÙÍCS­N`°)¬Ü^Èá,›ˆ-xMI`Ù;¬\©©±ª]É×ýO €K+Kׄl`5JϧBsa¥t÷—…g5óãXxVVC ³ã¢ÍÀb=I—ÃÊPMgükÃÍNÜü8ÃJrÁaeé&Ó g«Š¤½U†/,¤\Ź’8×LÍ–5J¯Ö®HLÍàGX‰ÔÌZëƒA˜–¬E„ŸT²™ch u"]Ôö\éÇ*ëÙÖ¦ü,y­v…À`V–æ|’aÕÕxÎg®îNЏk5SœƒÏE²ùD` oñÂ'5}æ’úÑo`ù·ü:Ô~çÅžšêç!§ÎL͈²Cѱ{7_‹äí!ÑÀsnÛm>ŒÔÔÞLßXQyúÓ~E’(Ú S‚0Ò4ÇjÅo–…Øí#&Ùæ/v:°ÔÕ8Ó SaZìÆ½Û‹±o¹aZ}} [Úê€ú )Û·É¥®Æ35Q\gଥà2#‡€Sx4ÚÔoe£„¹ `݉îIÒu5N¬ú‡ÔÕ8¯«q_Ò} ŲæœEp4÷E²¹“-ÀªÌú‹©®Æ‘«1çjÿÝ4êm•ÎÓ‚"h]%é}‘ì~ÏXO¹ZË&–¿êjœ´½ß. Ejjr¬„A˜ÒùhϳšÒhß9o,€¿,ÃÅçºçÇÚq×ywà‚],/ÃʰH”íÒD`  Lû²ë@ýXWãѱa%´XùÀ:C@+z’¾Å¹fq¾ÿ‡ð“UÀxÖ ç1qÍC–qiÚs-éKœk¾Op!°~²z1OÞ˜[å&²k"°í ]p)ã\} €µ\sEgÛ •‹ LVû1à ŽæFÒ,η«ñ%°þ±ªE˜v ve)7ÚNÏͰ à8z’>ÅùÛ÷0ðÏÀ*°tå€ÜuVÜ ,Ü8¾[×)÷ÕŽï¬%øÇê…< ÂtÔ¡ãêwl;vs-©Œs Ö &°qÃv­&‹»Ôu¥øÊơʎìdzl—8Æ=­¦OZB`üFsàOX‰í1¹&›È•Õ ¥sçšÉËÎôÀ/LŒö¶>E€]ìšD®©¦t?rÃ’5s;õ Õ§8WY$?jˆèt ø…–·…÷ǼYõ¹H4w ö%}6Þü‹ À{®©Ãª†À»UŸ‹D‹"ÑHÒ{ÙMäxç?š´,Ö>®6cx\JÙ.LšXÊìïÊ÷‚tÍjʃ›8oÎ ð nßöLxi`´9E)‰rIF›K,€x˜¾mFxɪ†¥¤(¿Ë,Ã$ °^sCs­F‡xןÅbÉŒë8×xáønÈõÓšÒh;^šSÞIŒ¶ó\$\?-8 ÀË|!ûË5]m®¤D[{æÐ$ððÀªºóÒñ‹åŠã\;í!°ž¡ö`³GŠÀnvÛÄp“SJµ5‹©«ñ"ÓgÙLAþŸº/(Uœ1Ëû*ú¯´‹À?Vµ,CŠçÊÕ®X6Q»²ÞÀê¹E`üSm'£(qÆ&²«]‘¤œ"mõÃfA`,û ƒ0M(Nœ›8×PÒ­á&+Ÿ<ÜPÎ}ÙÀ¢Iðc¶’j¦¬O„sz‰F²¯ ™P²ëŸ†Û¢Ið”U{{¨ŽV…¤°&¬\¹ëÕ²)èY4­+ëlk±æ€Àr¨A˜f]:¸ L‡’æ4YaåÚWÓzm}/‰-÷k04í„\$Ô°^ª«ñTRe¸É» LG]86·ÿº¯è/A˜Fœq¾öÕŒŽ»naó%ü¢¬# -k±%ú°>Ë·÷)ÓüTM1A˜^aZJúôÓ•„¿ôã\Iߌ_ K̽ò¢¼µT‹E`ü6‘]çÛ¥[G+#5Ëܬùã¡Å»g?Ε¹kâcK¿æY¶ó¸œsyã\¥¤/-é$1Ó-à)7ëíDÒñ¦¯%} ÂôARVWãÖ¾@]•LMçßM–¡ePWc†Ÿ^Þ óJR¤f’²¡Úiúù%ðûÚwÅõŠ\Y[ )K_—µX–· ŽÝ¡°®Æ­ý>÷€ïïùÏû»Ð?°<óC_€îë߀Ñy8¤ JÃú&²ž|Õ­¤Û LÕ4?M-¦òwço¨f ˜]ö›ÐÒm‰ëk²‹¨å—åkžŠä,ú®,kšIZmæ½9ò1|2~éåÑ poNp‚Ú¼’ϪðÀ/úR‡¯8¯Ö®øXX=Ø‚kI<;lB vñ$V(?–JkºF\t`qÕ½œ{`«ûe*éžÐüâYÒmO[Ö>ÌÃ’sþ­CKFÈ~y˜ îh†¯õºøÀâF!ð¶¿gî™'It¾ÅÆëƒN¶G †ýÜoÅ«ÀâŒäW‡B€Ðò¶‡ºG„V:VE²¹EÄ‹ÀâH ×°shùûBïow|À:Ÿ‹D}VŽ £m‚¡7k ¹…sm;Ý7Iïu95”O’þpǬûÒÿ³HX#èHe}ï‚á|›àÕâ‡î!uÏuìtß”jfšýzî_Íjú«PÅ×®~‘0Æ<¨©UÉvùGÞ­%TWã,Ó¹šõ z\7ÀV÷ÍBÒЭã“ë|¦P—š%:FlxyfŒê~Y{¹øa]ó Lg.´0u?°ý½SªYÀ2Ñv«$Ÿ:¨d† Eâr<«™X” Ò¾Ê}ä䇖µ·«5»¯­Á™è$×?‡Yr3 ,VÃKN€Ž»s7'A“ °X€À@` °X,€À@` °€óöNÒHÍ’é§6çt踙¤÷p|ÿõ‰“S0¤UIEND®B`‚TileDB-Py-0.12.2/doc/source/_static/custom.css000066400000000000000000000003741417663620700210240ustar00rootroot00000000000000.wy-side-nav-search { background-color: #fafafa; color: #404040; } .wy-side-nav-search > a { color: #2980B9; } .wy-side-nav-search > div.version { color: rgba(64, 64, 64, 0.3); } .red { color: red; } .green { color: green; }TileDB-Py-0.12.2/doc/source/_static/favicon.ico000066400000000000000000000124661417663620700211260ustar00rootroot00000000000000 h&  ¨Ž(  svs s rNsPs s sts¾súsèrrrtsèsúsºs¾sÄs$r¤s´s´s`sÈsºr¾rÄrºrörÔrÔrprÈrºs¾sÄsÖsÐsÈsºs¾sÄsÖsÐsÈsºr¾rÄrÖrÐrÈrºr¾rÄs"rrrèrärrrrs<rÈrºs¾sÄsLsÿsÿsÿsÿsÿs†sÈsºr¾rÄrrrØrÔrrs rÈrºr¾rÄrÖrÐrÈrºs¾sÄsšs”sÈsºr¾røsäspsrsärúrºsxr¢r¢sNsRr¢r¢svÿÿÏóñž9œ9œùœùœùœù˜œùœùœùñÏóÿÿ( @ r r@r@r@r@r@s>r@r@r@r@r@r@ss|sÿsÿsÿsÿsÿsøsÿsÿsÿsÿsÿsÿsts|rÿrÿrÿrÿrÿrørÿrÿrÿrÿrÿrÿstr|rÿrÿrêrÒrÒrÌrÒrÒrÒrìrÿrÿrts|sÿsÿsˆs,sfsjsjsjsjsjsssÿsÿsts|rÿrÿsˆssŒrÿrÿrÿrÿrÿrÿrÿs srÿrÿsts|sÿsÿsˆrNsÿsÿsÿsÿsÿsÿsÿsÿs ssÿsÿstr|sÿsÿrˆsžsÿsÿsÞs¨s¨s¨s¨s¨srsÿsÿrtr|rÿrÿrˆr¬rÿrÿr rrÿrÿrts|sÿsÿsˆs¬sÿsÿs ssÿsÿsts|sÿsÿsˆs¬sÿsÿs ssÿsÿstr|rÿrÿrˆr¬rÿrÿr rrÿrÿrts|sÿsÿsˆs¬sÿsÿs ssÿsÿsts|rÿrÿsˆr¬rÿrÿr srÿrÿsts|sÿsÿsˆs¬sÿsÿs ssÿsÿstr|sÿsÿrˆs†rârâsösÿsÿsôrârârârârâs rsÿsÿrtr|rÿrÿrˆr˜rÿrÿrÿrÿrÿrÿrÿrÿrÿrÿrÿs rrÿrÿrts|sÿsÿsˆs˜sÿsÿsÿsÿsÿsÿsÿsÿsÿsÿsÿs ssÿsÿsts|rÿrÿsˆrr,r,rºrÿrÿr°r,r,r,r,r,ssrÿrÿstr|rÿrÿrˆr¬rÿrÿr rrÿrÿrts|sÿsÿsˆs¬sÿsÿs ssÿsÿsts|rÿrÿsˆr¬rÿrÿr srÿrÿsts|sÿsÿsˆs¬sÿsÿs ssÿsÿstr|sÿsÿrˆrJrnrnrFrsÿsÿrtr|rÿrÿræsÊsÊsÄsÊsÊsÊrèrÿrÿrts|sÿsÿsÿsÿsÿsøsÿsÿsÿsÿsÿsÿsts|sÿsÿsÿsÿsÿsøsÿsÿsÿsÿsÿsÿstr"rDrDrDrDrDrBrDrDrDrDrDrDr ÿÿÿÿÿÿÿÿÿÿÿÿàþàþàþãÿÿÇãøÇãøÇãðÇãðÿÇãðÿÇãðÿÇãðÿÇãðÿÇãðÿÇãðÿÇã€Çã€Çã€ÇãðÿÇãðÿÇãðÿÇãðÿÇãðÿÇãÿÿÇàþàþàþÿÿÿÿÿÿÿÿÿÿÿÿTileDB-Py-0.12.2/doc/source/_static/tileDB_uppercase_600_112.png000066400000000000000000000117211417663620700236660ustar00rootroot00000000000000‰PNG  IHDRXp®sRGB®ÎébiTXtXML:com.adobe.xmp Screenshot nÌÞ.IDATxííÛH†mãþ¯X˜ ø¿u˜uØ,G`;ÏFàq–0véVþ¿ÀÒp§‹`ï}µâ@£¡4Å’šäS@ ›Íîꪧ[d ©‡ùô#“Ó?ÿüõçË»k63•§kû)/Sr_ {¢Þæ_-ˆ×m¿­µ¯ú/ts¹³y~ß噎™ÛÿjÚ”ª³ZΗëø›sU¼Ý¬li¿²%[]3“Éá(®—Á0"}?ÖõèØõþè+qƒÀ&u}” ¸z!uÂPJ¿¯Ês%_ÞO]29u~ß{¾ë`ulÅ׻Ūo3v¹U!Áj'Æ @ ŒÔ3ßì½J Õ;)pVD$²;ÉWAVÛå®—*˜­ï0šo¡íQB‚u>:C€NB ×(Ö¥(!(Tø,½îÈ®¥ß ÿÉä›u"}»–Ôš±.'_ ÖA¸h @H‚@./¬ï” øâÿ^êdk¡-Ò\f¬Vw¸Ìx¶/ãGîˆ@€ ÐYcyþAú—K騳‘¤ëx&×üË7ÿßRïï¬x8@è 'V~£ùJòÎxÝ=G§ry™Ìîr½G„ Ø5Ç @ >ßFË—è°N Ó_”dý¦ÇY—ᣠw€·bü\á_Ô=6l#Á.Z"‡ pZÅi‡c´ŽpðXÿ—#e÷ýxÖw d•ëŽòˆpe@€@¿Luñݯ’‹&“G¿‹ó­÷¾‘`%7O8@h•À;]ü}§‰#`¾þ Á`Ý  @è-‰%0Q"›WC`U$ØB€úK _¿ø÷7̳GæOq.…«"Á€ Ðo¯ú^ÑÝ$²$XIÌN@€ øÖ­7b‡8Ì^8l¬aN>QC€À0 L†öI£Î=߃uRæ Öa…|x„ÿ¹úòŽG¤+$0“Oß·øµPý|íØXåÑÚ~U|¬B&õ1·‰–g`=HËök`¯âºÞuÛ¬·i£œùS›$Xm Ä  ‘ÀG}¹d±gà÷¶ÓE9“­©ô­4JN‘ĵê»_¶epÅx"{f<¨£Ÿƒ¢ñˆ0ˆ,f!@GˆJ°Žt«?Ý•ÈQÑð)Â(²Ø½C`µÞ9@ Ôø^WÙBÝÓl`âÜÁº‡!@=#À{°nOhÈBî`݆Ì Ð/ùF8>‘.6êØ…†M äœ@‚5ìEEôè*LŽ[séOÒ±Ô’/ÿîÿ§X5-µõã'`.{‹@hL ï V&2ÓtüSQòB†ó ã—GÚ=¶ÿ‘Ã7î^ªç¬qo:¦N “ƒ¹Ôïq"år[’ï0ä$ËúMZ¬ÊÚ €î'0„+ò'î'|·ÅônUk5—GZJÕ¾áj8Û·1í:A`"/ýŽ·™ô2Ö ÖJ*R¬ÛÛRŠ@¨%Ð÷«6h*!$ äòÊwxT¤©‰}²oVË\úQz--¥ nJ œ‡ÀTÃþ%ý"uy$킌åä;©}ÿ]šK@K$X,@à\&ØÉÉi&í²8'ˆÖ\Š@'@‚5ð@ø8ß¡òk&í“ä ÆI–ïl!€À€ ` xò g àäÊ ˆïøôY^+¸?¤ŽÒ&ò:%ÁJ{Òñ}"à“˜“«qŸ‚Ú‹ãt¼!'ïãr8Œ@È9‰ë°I 5 МÀ[u 9‘5w)¼§ãuÜ 00$X›pÂ…À™d÷õ™Æ>÷°Ž{tn'¸KàɧÙÝÚvjH°Úáˆ@`7W»÷þè¤÷ ºI`å6 VYìB놞`<_‡AH†À³(OH°¢Èb¨d*X‡,CO0‡<÷Äž(=ɵi”{$XQd± T²ª0ðm>ðø ©ð÷Õ9ÉŠ¿E›€À:|}'¸¼ýùcä{´¡  ÐCºs5QXÑŸj.úž`ùd[¬ÖG¾Ú²ÙN Ø8”oì³  øuþ^:“–Ò&2V§\ê“îH!Y„QlB`¨”(åk±g*[ëä'Uú5nñ6ê5¾`õçk߬¹½XxÏò¥ÚE|wM!»MüQ·“H´o¹¢ør’Hd( ÔëÖ¯õcÄý+Z£Ù1Ò¸C êµzg ïÁj@.€@2|çêØäj=˜B;åze@(ÿüõç9 ÖÔh$Eà*À›2À&&!áð?~H°†3áD ¾ð«Eß‚"@ Ó|Nš9,S@ . ¹êâ¬á3úMà½.ÏM$Xýžh¢ƒ @à4æJ®.«¡H°*l!@€@3¾kõr½+ Ö: Ê€ @àp×þäàz·¾Öz¬”!a+LÿFSq€À!¦úâÓ¯J²fU'¬Š[@ /F $ïK0Ät†À;%Y…’¬Òóˆ°3󆣀 $LÀÿÜÝÜ='ÁJx¦p € N˜è.Öòm$Xš7œ… @ q¯ì VⳄ{€ tŠÀÄÞ’`ujÎp€ Ä Œô˜p§Ÿ%܃ @ –ÀEm­~£ÔßI¥$'ß8î÷F=–úS&”g$X‘x± @€@%QÅ.Ã5Ç«öo”|½Vß›Oüí²ÓðؘG„ ÉÑ € nPòu%Ïoý´MË‘ä$X-Å @éP’5“—ó(OI°¢Èb€ Ô |Œr+Š,v!@Hw°RŸ!üƒ @ [jÞßZÜÁj %† @€À?øšV Ð7 T$T™ O¸è{³aÔ6¬¶‰bï²s ʘÉð{*.’õÇ Ôøœ‘·íÛ&нsÈÎ1(cB€¶ ÁÚF†z@€ Ð VCptƒ @Û`m#C= @hH€«!8ºA€ møá62ÔCC%0Uà/‚÷OrÌìbH V‚“‚K€ÀY <Õèy€_lbH”ë'¦¬¯¦ñb$D@ ˜ V=ಾúèZNÜG#Ä I`qdÿ¾u¹6“`v™ŒN;£%D LÈ\ºAàq›ß‚ìvÕlȵ™«~9Ìë«[©ÍZ±‚‘®(ùï+Ð_LCûൽ§Æ­ž|ú‘5î|OG¬z@‹úêVjyÁ´‚±“F¢Ökª“˧!°ÀêÂõÚ.¶<¸#QŒ`m_Kó퇎:òê¨Þtî2¨5ñ•]àœwÁI|„@C‘׊ECŸúØ-êü9'ÁÚ¾\¢`®!­Èð”A!ç²; ²ºÙˆ¸G©ý& »WSEø:*Ê?ýye»KvÅ9—¿“ Ÿ$XÛÉF~gÍ="Žœ›À÷@ÞÉö4Ð~ª¦w› Q&{ãTƒÅ¯~Ð,ý (­QBr%²âìÄÊ×â(ùÊnG¹GÖë1>K i%¥ V¤ …ô6(,¯)Ÿ”_I?J«õ[¨ÜgqÜIßHgÒc$Wg'lQ²ˆ2ŒÝ³x±º ÒÔÇ꘭:µõzŽ–yô©Ù×™«ùZrésiµ¯bˆÌI°¶s-¶jíˆ'ØzìEw!sé…I—@qת5ußPÕšYoç$Åë¨k2’ÃN.5ñßýÇÒhiâ[´OØ?ŽÀô¸îgéíê;%J¾ìáp¾G›S6)H°¶ã^èOˆ§8ñn÷b¿##5Ë÷kJ«3¸Öø“3ûàáëÖŒë"da´Æf]L5ͨ‚À  Œ>ï˜Ïs½Ï÷`Ý3i~Ô‚@ Mûï±…àËlôÁDч ˆ¡Óf¾ðw:‚n8¿Ìx“ûîɺÞ}˜£8˜€×ÔÐNpóƼ¹0ÌÀ¹ pÓà43àó<߃uëRÇg÷´á0!àäêý!zÒvyÂéI,MÂàÂÖ„}Ú$PèîUѦAlÕð]ÂÒG¸ƒUËçVåoÚóE@[®dhhkjè ÆÐ̶^;ØiFÀ盗ͺÒëæìœa)$X‰íÛR‡üé*mâÉ®<ëe¦ Ë!NÌÉxSÝUIÆ£~:r‹3 Ö~“‡ß‘6¾ht|Çjû>I-Ú7ÛÈâ•z]KýÍѹÀ±f2PH‡²¦üzv’õZúVÚ'Y(˜¡Ý™ìÓüu=–RüFbu’i,4Š“+ŸÏj¥+ò·»*§}2.ª¶¥|°Océ+éD:’"hJ TG¯©\Z­){+ Ev)Iduý5T*†÷Ò™Ô±EÉßQ†±Ûi×òþ3‰ÕIæÐ¬ß‹uqßhm$X÷Ñçãs÷r¥mŸKsi&E Є@¡NÖLê5õB:–öUJæ×ïúL¥]Šw!¯¥¥…À©xíÒÏÒk]ì½Ä˜Ë´_çf]î; Ö¾¤îoç­Õ’IséSéxUÖÀÞJµ¼Zi¦m.­ø‘Ê}“…Zw¢ý*Þ”b-å̵ôëj« p¥F˜K¿IùÂPA–RöÍÛ ¬y{ÿ`!Á:Ù^Jµšm´k?“zûx­OW¾—£C ñ7 @€@o`õf* € T`¥2ø@€@o`õf* € Tøk’y·ý(‹Ç8@€’"ðÏ6Ip¬)FIEND®B`‚TileDB-Py-0.12.2/doc/source/_static/tileDB_uppercase_white_600_141.png000066400000000000000000000072571417663620700251010ustar00rootroot00000000000000‰PNG  IHDRXk itEXtSoftwareAdobe ImageReadyqÉe<QIDATxÚìÝOlWžð"à!˜@<+|Fk"åâ¢HA{HŒ%¸N€½"áw¥ pÊ!+a9äÌö[âLà<Œl&‡ÑŒbç°>l¤à°Z™Ul6˜a€°þôÄ0`üçUuuÕç#Uº‰åêî_—»¾ýÞ«÷6<|øp< •ÉM‹ÿÙ§鼤€€ ` `X€€ ` `X€€€€ `X¬Å¦¢`rr2›››«Ü ß·oŸw(Ƈ‹Š|€¡¡¡lbb¢r/¼à— P«ÏÍøR:>>îÍ•™ÐE˜€ `TÛ&%(Vã(š2@zþ^vëÞOÉö·«{SÖ×½±ç:::šÍÌÌT¢nGÍvïÞíÚ:òÕlé9ðjW¶½ëç6‰m‹÷zº ÿl)Òù™…ìúÂý¶<öÒú=«¾/ƒUSkÊýϾˮÌÞM¶¿_lÏ>^ÜŠ066V™AɃƒƒµ–òs!åcnú×®ÅÀ¡a ç• _çÿ|»-µ\I #€½ùê£ZömݘíݱYÀ€&Š÷GåïCËÞÞÍyHxïñ-/ªa–]ºq牟µkÜF=ãVÀ€‹Ð‡éGÿÞ¿sKvàµ-‹·/×®k¬(1l$¶–¨[Ô1kë(`@É¢u&o¡ùÓ£°5üÆ+Z¶V)Z»ÎÏÜηíß> [‡û»+SG±Ú¶Žü~6¼åæ]YGØjÕqiKW»TºkddÄQ@#<êFœ-ô‚œ¦ÔñW¿ûŸ¼UðÓ·z¬g9yò¤#hœ"—¤Ù°aƒWÜo¦oe×dŸ¿ûKÅX‡Ñï~ÌÃÖ¹zÛ2>K!TLtw}ò§b¢«0º ÛAÀ€Š†¬Ÿ¾¥ BVÌ«(`¹è.¼¾ð@!Ö)ï.,ù *²X¿²[,¨°è*L¹.kSÅ€÷2§o° â.Ýø‹"$]…e1“;$°gÏž5ýÞÜÜ\699©€À²~ûßwòYÊYŸ2·° Ó§O¯é÷&&&²¡¡!„6ŠÉ(zÖ¶pðõÛ²ë ÷… w ë‚úz¶ú3¼îߺ⸴nO ›w³kÏùY ±ßxÊX(ZÀ Ñ"\­yýº?ßÙ×㊿ùÃÍäA+Æ`• ÖjW÷Æ•×qdzÿ÷Ço[S+Ñâ4=WNÁ€DúCFÌ^Ä üúíû©cÔïÜû½ùÎÉë¸PN,H(–eùôíôkàUaã²±\PYóa XXt•õu§…s­ŽFXMÝŠuë¯åLy!`@Ö:pþyÊêÚªš7w·–Õh;•Wö¤œ\oW·Ã(7\ºqG!Öé½ÞÍ‹©¨óž·3ÕýöWá«eÊv¥¦jèe\‘)`µsŒ…žžž5O ÔSK X@G‰™ï¯^½šMMMå·±­vFü}ûöå·¼"€ f»wïÎ7«d£££ÙÌÌLÛŸÇ©S§’¿?î蚬ÄÑ£G@;L§h‰º|ùr Z­RëÕÚϳöá+‚W„®¸ @À*ÐØØX²øõIº¿8‰¬5`U¥&+Ñj¥ Ú"HµŽ«v¬ÓÛ™3gòGØŠ¿‘èº èÑÍ¡*ZDã~Õ_l¸" }Ú›Õ&EäY1yfkÍ8.ž´žþY§L’Ý„¢†« ‘ÐâïÒU—ùýlåŸã{0ECÌlúÖ3vmáÁ ǤNÛUK«ÌΜ9SÚ·z:3dÝík¹ÇX:¦)uÀêïï/ôuÅsO9&k½êØbH{xmKõÖìÝÒBÒZEëÕvc°ê#ºh ÉÇHã̼^šdÿΗ!Ãý[K{,«Ñ:á—åÄxººÿ 4Iãêhv((«Õ¥îì,¯%Ð;V ¿²œU÷]åMѤ×Jñ~=°]Õ¾ç°J'Oã2XN\hPļiUqìØ±F¼Z¬I®ú:ÁhJÑXvP°J´ÞËÉ©·86â©sðhBËŽÖjRÙ¿sKö±Ö«$>}«§ô *`•(Z°ê|eý"€Ôù‰×Vç–ܘw̼t¤˹|þî/"h¹:Üß]úã Xm8ŽkÉâ¹¢;¹®!+Žû8þë²¢‹×|r$ù xã•ìÜû½¶'ðù»ÿжV@ï^Ä7Üo¾ùÆ7]– YqŒÔq,O+dEkOÄ{ïU]^í­Vç>èÍ»³XŸ¾îMy-ÛÑrÕRé‰Fc©¢Ä|»?”ã9Äı¦šµyZ´ò|ÿý÷ùlýqŒÔiAâYÑÚóuêñÃ1Þªˆ«?ã=Ÿ™™ñGÐ1Ö*Z­öîØ¬ ‚U»º;*`5!tD+VlqòŒ™¨/^¼(lñ„8ÇÇÇØØX¥f,OuüÇ gÏžÍ_[ÌÌ^eÑÍk€9`?ÞgŸõ­Uû_Û’ÏËä*Á4!õŸÿ±;¿­ KåTèÛpt1ÄÖZÒdjj*¿õAKëÄ[ëøh…ñ:´lµ.‰­õE£*a+ZÛ¢î1yhÜ?ÉZU,Ó­T±žñUë-UQÓ¯½\Ùz XÔú@­uÉwkQÚ]q?6ÁËññô±ÇDܯz+ÐJ‚d„­x-ñš._¾œß–ñºâËN¾TkYDAjÛâÉþÍÅ0ªoë¦ü–õ…©][7æá4jÙ)UÀêñ¡Û³º%ZA+NFóóóùIhé¢Ï‚XsŽ¥s0µI¯¥ÇD+ w‚7±µ·ž{®Ökzúx_;#¬Æm,FÝz,-TÍ“/ü‹Ÿ°Ÿ#¿·íñï?•¦þ­:¶jÛÉu°j u5âj®J\ÚÊQÕJê š6»vëxxÑX¡N \­0ù¼cýE¯E€ª‡ÿ:¼Kˆ«oÝû©ôÇmJ°ªº=LcQnhñZ Yt]Ë(; @À°X»Â¹Çr<'@Ùb¦øª]äSY+·áá"eHfB!@b€€ `XX€€€€ `XX€€ ` `XMñÿ jÑ%Mb¥¢IEND®B`‚TileDB-Py-0.12.2/doc/source/_static/tiledb-logo_color_no_margin_@4x.png000066400000000000000000000270571417663620700256600ustar00rootroot00000000000000‰PNG  IHDRÊoz”ø pHYs,J,JwztM IDATxÚíÝÝqWº6ìS>'¿60ˆ;b€8¨9µq.øUæD VC!ƒ Á¼døдhdñ@ÿ]WK?¦h¬g¡Éî»W?kÿù¿Q’"C¹ÝÌ×ÊìÛ`8'«Äám7ó™*@·ü”d”äƒRÅ2ÉZ€»¶;š™@·üE è3A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ýÔ²×{Óòzßy˲nù5Ói’7¦¨C«‚òíf>6eß¼^*“”m}ýƒátœä73 ÔAëzMP@¯ Êè5A9½&( ×åôš €^”Ðk?)Ðt“2cUØ»õ¢ÈZåìÉ`8'­Z¬HrÞò1øAï½ÒÆ÷ìZ€;ÿÎï“ü®ßTçDëêüH€Þ” _ÝØz¸¡õûM¬$wÂô—”ð\çù¢H’I™Mv¡ù2»à|¥LpÐcðñ¯©ŽÃ$¹Ï.8_ew3k%@ÿ1A9û0Lò®úø:8¿¶âŽæ$¼™õ ßfž/“,íQðG‚ráqpþqRæ&Éuv¡ùZyàèÞT7³îSÝÈŠà\PÀQ<¬pýµ ÍËXiu:Iò¶úxx ä!4¿î[1þâýÀ‘'ù˜d=)SNÊœ) Ôn˜äç$ÿ𔹫ŽÍ‹¾ ^P@]N²kñïI™å¤L¡$ШcóqhÞéZ‚ršà<»^æk94ÊãZëI™ËI™Ó® RP@“ #0‡&Ÿ¿æKÛ¤QW&( ‰æÊò°Êü?UÛ¤qÛ$( É†ÙõI^Úôé<ÉomÌå´Áyv=’¯ºØ:rŒ¶60”Ð&?'YiÇõ˜_·©‡¹ €¶yhÇrmu94ÖÛìz˜ÏÚpœ Êh«·‰Í>¡á>d÷ȸÉ/RP@›d·º\ïrh®avíX{œ Ê肟“,'eΔ}œ6ru¹ €®x“]X^(4ÖÃêòY“^” €.9IòqRæJ) Ñ>LÊ,›ÒŠEP@ý<)s­o94ÚyvòÖÞ2IP@W½Í®ËH) ±NÒ€–I‚rºìMv›Úäšë¡eÒe]/@P@×=¬X–C³ý:)SÖñ?”Ðaù…R@£½«#,”Ð'IþUw/dà‡Ž– Êè›Ârh¼£†å‚rúèã¤ÌX ÑÞMÊ\ã$( ¯®mð ÷ó1ž”ÐW| Ë¡Ùþˆ €>{ËO•ízRft¨/.( ï„åÐŽãôúPÇ© ’7IJe€Æ§ÙÜSP;o'efÊönRæbß_TP_|8DìU¹ï,‚rø£ò›¯v’=·J”À$¹Vh´·û|úCPÿí~åÐxWûjÁ"(€oû0)3Vh¬a’Ë}|!A9|ßÞ7 öêrǨ ¾o˜hÁ v’äêµ_DPîg-X ÑÞMÊŒ^ó~RC€V[µüõ¯Ma¯ß+ç¦è¿-Š,ëúŸOÊœ%¿·$}õá|ìyʪn´Óm’»½Þ³ìVJót—yE¿rA9@‹m7óKU ­ï•Ápº53‡µ(þ|ÁDµsüèc¨jß5œ”™- mXZê²Î›V/UõÞ~¸áuV}Œ’¼1¥ÿ¥¨ŽÑÝ”ôÔ¢È:»•Òeò{p~‘¤ˆ î[.'eʪnpŒcô.ù=à¿~øûGú¸:f¯»øE^د\P@’߃ó«$WUh~™]ð¤ÄÎæJAÍÇêC€¾L2«‚ó‡›\}n«t™å6óà¿,ЬE.EN“¼O²Q•$É[{ÒÀãõnQ¤\'ùk’_’Ü÷°×Ÿ‚rþTÀ"0p¥4øx]W½ôGég`^¼ä Êx’Gù?ÒÏÕªÞLÊ—…qpÄãõî«À¼/^ÔIPÀ³,Š\e¾}êqfÞ ´äx}Ìÿšä¦C>™”ÏËå<[¾Iþ–~¶cNJa9­:f×Uó>¬.”p<‹"Ë$gI>÷pø—“2§Þ´ì˜%ù{ºÝ>IPÀqU«Ë/²ë]Þ''I.½há1{dœî†å'“2gÏù‚rö¢ê]þ¿é×FŸV•ÓÖãu•n‡åÏZU.(`oªðí,ÉmO†|’´y€¯ãt3,?ç“åìÕ¢È:»ª/aù̬Óâãu•n¶Òz€z-ŠÜ¥?aùpR¦0ë´øx-“|êØ°žÕ§\PÀAô,,·©']xo:6¦ÑS?QPÀÁô(,3)Ÿ×x¬ví†å4CÂòÂlÓòcõ:ÉM‡†$( 9ª°¼Hrßáa¾›”95۴ܬCcyòñ((à(EVI.:>ÌÂLÓòãt™î<ýqþÔO”p4U÷ѦžtÁUß,(à¨E®’|îèð†6õ¤®»2Iù´>å‚rêP$ÙtxlÐZÕž]¹™õ¤>å‚rŽîÑæž]taSO:`Ù§Á ʨEÕ¯üŸÚIº¿i)Ý·ìÓ`åÔi–n¶`”Ój‹"«Ž eü”OúÉ”@· †ÓQ’Ñ3þÉÝv3_©ǰ(r7)S$ù­cC{;)sZµ˜¶ºIrÞ‡ Ê ¥ÃéYv›}ýëi’7¯üÚÿ¸I²®~¿¬~]e¨/ͯµ(²œ”ùœämdžv‘¤4ôXonôÊ áÃéC>®~å•Aø3 «ä«EU þ¤/«_WV¤ð—ÕϺ“IPNÛ­Ò½Xß$(€†©VŠó%6ü%?éçÆìÕ]VXËífîÑs¾kQd=)s•äC‡†¥ý ´„ jVõg·êlœî¬¤;ÏÃóÛì‚ók-[øŽ«ìV–wiUù8ɵ©¥¥–iÿÍ«õS>IPÞŽ §ezÒ4¿Ž‹·ÁpºU†'»ÙnæcÇätœîm2S—_¶›ùL€žþ<9Ë.¿ÈqÛ¨ÔéMõñó`8½¯BƒevÁ¹•võåðƒáô"»þ†?«F£$ù8N—ƒáTÐeW~NMJa9­2îÀÖOýDA9|GµŠü:É¿¢y›œ'ù÷`8iÇÐ~Õ¦žŸ;2œ±Åûõhî… ^¥ZE¾NòV5ZëC’•ÕåpÝ‘qø™D›\´üõ/ŸóÉ‚rxä«Uä'*ÒzÃT«Ë• ÕåpDU› Þô'Oåð»Áp:ή¹UäÝóa0œ®ÃéH)Ú§CíWÞ˜MZâ²cX>ç“åd0œ^&ù-z‘wÙ›ìZ±\(@+ubUù¤Ô§œÆ¿GOÓþ¶+÷‹"«çüA9½ö¨ÕʯªÑ 'Iþ¥ @+i¿Çq™ö· |ö÷ A9½Umò¸ŒV+}ôa0œ^†ÓS¥h‡ªýÊm†"(§±ªÕä]h»"(€§x’ëÚ_o“,õ-h•.¬*÷s‡&ëÂjòûE!(€ §E’wà"€×{è[nu@;t!(?74ѤÌY’}ý>!( Wªü£JðÈIv+Ëmò ÐpÕæ|÷mGHBÓ”}‡ €Þ §e„ä|ÛÃ&Ÿ…R4Þ²c™FšdRæ*ÝhI¸Y/û!( ªüJð…å·êÀ¬(§1&eŠ$?wd8W/ý‡‚r:OHÎ3 ËšmÙ1ŒL#MP…ä]yâò>¯h#( Ó„ä¼° ¡^ÚV¡aFf’ºu,$O’ëE‘»—þcA9%$ç•„åÍuÛò×n ©SCò$™½æ Êè¤Áp:‹œ×û8Nõ‘hžeÛ0)sj©é½W¦{!ù§E‘õk¾€ €Î©VP öd),hzÂ3MÊœMʬҽÅ$÷yåjòDP@Ç †Ó‹to… õ:É.,·ò 9ÖƒŸ+ŤÌé¤Ì,É¿“¼éà¯^»š<”Ð!ÕªßR%8a9@ƒtdCO+Ê9¸ªù:Ý}Úr“äj_è'oº  0¯³ 4áÞTb…R4ÂmÚ½:ÖÍW¢êY³ ;>ÜËE‘»}|!A9]±ìÁ…õ{7NWÛÍüJ)j·N»ƒr+ÊÙ›*¿¨>ÞödØŸE®÷õÅå´Þ`8½J7û-ÒL¿VaùR)jµJAø/“2ãìn¸\$9ïÙðï³ç§üå´ZµyçÏ*Á‘]†ÓÑv3¿S €Ú¬[þúÏM!?2)s–]›žÓìBñQõk߉ûj¹ò@P@k †ÓQlÞI=Nª÷Þ…RÔf­<Ãog]ñiŸ-WüE]h1›wR§·ƒáôRj³jû&eF¦žå6»J÷NP@+ †ÓYl5®~µzÿûÎÃéåv3¿R €£kûªò³êg6ðçÞ/ŠÃ+‚rZEË•?¸¯.®—IVÛÍüUU¾®þ¸üFíÇÕý¸úÐþ‹Ù`8½þÑÍönä¼Å¯ÿÔ½_‡_(#( 5ªÏ}o¹r›Ý&¦×ÛÍü¨›˜UAü2ÉU5gI.ª¾ß¼8©êráH8ªµ@§ýó!y"( ]ÊžŽ{“]8~Õ¤ËUP¿Ê— -‹êcØÓyz;Nǯ]Ù@¯XQß÷iQäòXÿ3Ay;\¶àg‘ä] k{[Õ—§¹S‚¤ þÖ°×t+ù€Ž §EÚýhõKÜdŽ_7ý…Vþ,»Ðü¢:Ç:ïá[õ*»ö4ïú¬ÍüÌ€o{¬•äå-pìGj_xá:niyï¬øáÇäÃiM:ÝÄ:m0œžfÂöÅM’Y[ÏSª`ÿºjÍr™v.¨x©7ƒá´Ønæ¥#à8×õJsôÉEßöK©öú¸ §×IÊt·¥ÎIv«ÊgŽd€ïúÇ¢ÈU^ˆå4Ù¬£ãºI2êó¦âV—îð0µ_ø¶Û$ÿÛ”<”ÐP^Mþ©êE~×÷9ÞnæwÛÍü"É/âÉ`8-ÍðË¢ÈYÝ­V¾&( ©fÓûíf^˜Ú?Únæ³$ï½x¡@+Üd·Š¼‘çG‚r§Úñ¢cÃz¿ÝÌK³ûmUmþ–]ïö.†Ó±8¨µ@£Ý'y¿(êß°óÏÊh¢Ëì6Cì !ùT}ËÇé^X®W9ÐG÷ÙµØ-Š4þ\XP@‹ü¶›ùªcóŸ$o«žû}ñ)»€|¶(ÒŠ½yå4Jµùá°#Ãù‡üù¶›ùuº×³¼0³@Ü&ùë¢HÑ–€ü €¦):2ŽOÛÍüÊt¾LuƒáŸÞ×­ò&ÉjR¦œ”9kÓ ”ÐU{Šó å6úR¿Úv3¿LrÓ‘áØÔè‹“$ï’ü{Rf5)Û±`@P@“t!\¾Or±ÝÌïLç^\¤;›{¦è™7I>NÊÜMÊÌ&eN›úBå4ÉEÆ0ÛnækS¹Õ ‡Âû ÕN’|Hòÿª¶,£¦½@A90N/ÒþMÜ™Bxµ½„åZ¯àÂê1V€W;U »°üò5_@P@]åôp€µÀÞü:)_¾ª\ëê"(§ïÞ(À«[þúצðhþ‘vö„?Ëo®ò¥}Û¹iý/'eV‹âùs-( .#%àHV‹"Ë¾î¾æI™q¾èãìÂõ“ÏõrRf´(ž·€ €ºXMKï †Óñv3_ªÀ‹Zþúצ×úÖ €Iù{h>Nr‘~ç'I®óÌ'Nô(àèé¶+À> Ûüâ… œÃ½·EÊE‘bQä4Éÿ&ùg’MOJp>)3{Î?”P›ÂÎH ^fR:Ÿ€§ZY-Š\.ŠŒ’ü-ɧ ûä|ú¾H‚rê`E9쌔 ·ç7¦:,Š,EŠ$ÿ_’_’Üwx¸åS?QP@¬œO@EîEfÙݸïj`þæ©-XåÔa¤À+µ}EùÊÒó³$Ÿ;8ÄËjsÓ?%( #%z~>qg i’jЋ$O·V—Ÿ$¹úÑ' Êê3R€Þ~µ¢œFZ¹®Ž¯Û ëí¤ÌøÏ>AP@læ ;#%èíù„å4VÕŽå,ɧ kögÿQP@N”x©I™ÓœOXQNã-ŠÙmôÙ綪\P@Û´þé´EaE9­y¯Î’¼ïÈpfßû‚rÚ¦íAù)¤MEÊt£ Ëù¤üvë;A9m£?9YÕ†ås†2ûÖ_ Ê8ªÁp:RøÝR ^¤íA¹þä´U‘dÓò1\Tûü €c)ðJoZþúå´RÕ[¿hù0N’\|ý—‚rZcRfÜah½Bk-Š,“ü³åÔÐjmo»ò4B›Í’Ü·øõ¿ýºýŠ €6·üõßšBÚ®jÁrÕòaüaU¹  >zÔ<߸å¯m 鈫´{U¹ €Z á =jžaRæ,»øœ AÍ:°ª|üø‚rŽj»™ €—w` KÓH‡”-~í'7”Ôd»™/UàYÆÃÚ4Ò‹"ë´»ïþïßSå´Å¸å¯ÿ¾ ¡KÊ.|O”ÔãV ž®j‘ ?94ϲůýìá7‚rêp¯`#O€gºèÀ–¦‘®YYµøüþdRf”ʨ‡ÕT ,x.A9xoÂ(”ÔÅŠr€'š”9K2ìÀP, «ÚüÞ'‚r”ÍWt` ·‹ÂMRœ×4Ði"( K% ï¶›¹ãàéºÐvÅ Rº¬Í7ÎA9@6Jð4j»²4›tغí”P+ªè»¥<Ù¥ïýÐl‹¢ÕAùy"( úsÒwn<Á¤ÌiºÑveÓò :OP@„„ôÝR žä"ɉïûÀ¡ Ê8ºífnE9}v¿ÝÌÝ,xš®´]¹6•Ð\“2§‚rêr£ôÔR ~lRfœäïýÀœ ʨ‹UåôÕR ždÖ‘qÜ. ç=Ðt?)5Y%yÛÒ×~Ó¡‹wêyïð'&eFIÎ;2œÒŒÒƒcö´íc”P—6‡…gÛÍ|i à`f‹súà¬íÐz€º´9(? §¦öoRæ,É»Ž g³(¢í }R´üõß$‚rê×öÇ’Ï­*€—«Z®œwlXWf–ž¿£t¤e’ €º-;0†Ò4ÀóU½gšþäôÅeưNåÔ¬êS~ßòa Ãé¥Ù€§›”9M÷Z®$ÉçE± Þ ÇpÑ¡¬A9ͰìÀfƒátd*àɮґ– _)M-=q™nÜèZ'‚rš¡ 'Ÿ¸0€§™”™%y×Á¡m…¶+ôâ¥mWA9 Ò• Ês-XàÏMÊI>ttx6ñ¤/®Ò‘¶I‹b÷t« €Úm7ó»$Ÿ;2œ_Ãé™Y€ÿV…ä;:¼ûxºŒ~ÇIÞvd8›‡ßÊhŠ.=¦¼ §§¦´9Ãi¡ õš”9K·W\_/ŠÜ™i:~Ò­B«‡ßÊhÌÅe‡Ærayc †Ó2ÉÇêWjP­$_¦#­¾cf¦éøq|Z³wé8^>üFP@#TíW>uhHoâñëZ †ÓÓÁpºÊ—ÍâÞ †Ó™Ê×£v+]É?-ŠÝ†€ÐaWÕ9n—XQ@#•Ï[«˜ëQõ‰_~ãbîƒ6,Ç3)3Kw{’?fOº~,—ù²ø 36òLå4Èv3_æÑ†:ñNX~\ƒátœo‡ä> ËkRætRæ:ɇüöf¥ ÕIDAT ÷fQ|Y• <žËt0$Oòùñå4MWd½ §×z–^ÕZå·üøñ~a9ÀLÊŒ³kgð¶'Cž™u:z,ŸNÊýh5y"(g¿V©V2BÕîí—$TãEοú•ç{7N#,è…ûìöÓÚ}Ïžò‰Q+öh©À¡m7óYv-ß .ïÃi© W,Ь•Zíê©Ç± œ}^´®²»KphÎ;¨™° Ûþ¹(r­ ÐjÏj$(gßün»™¯“\ª5–tÓí¢pžP<ç“åìÛR €cØnæe’O*AÍÞ †Sa @wl’Œ•Zï—E‘Õsþ œ}³¢8¦ËèWNý~ §…2´Þ}’‹E‘;¥€V»yNË•‚röj»™ßÅÊ.à¸çEô+§~…å­Wɪ:†—mØŒóµå´õB´Ì.0?Ë.0¿Hr¢2@ÃÎUFÙæE¬2?”ÏÙ…ãݨ×&»Pm•ž„jÐ17ŽáUon ÊiûE誺ðÌ`8½H2Î.4w! 4á\e/«ÌϪóç*¯÷9ÕãûÕJ~Žg“d]˜öðëJ+h›$wùН=ñ±#(§K¢½Î.«Õ[gÕǸúÕŠs Îs•UvOÂ] Í_D8°7ßùûå7~¿îbObè˜Ûä÷›VÇîªú;7´~@PNW/D×ÙÝÙþÃ#¸ÕEéiõDZJ=[ŸOŠÊ¯NÁûx͹ÊãÐ|”]`>®>ÜÜÿr¡·L²ÔVH’E‘*@#ü¢µx¼»³|ùŸÿ'ù­%~(ÐíôÝýq¾<×—ç¿ãÙ…ãV<õï½ïÚ¨åÐ ÕjóU’«$ùFK¹QÚžßæÑfQÛÍ|i樓 ìOZʳ ÍGÙ…è§IÎôÒ6{{øX%YW7 QåÐB¶ ûѾ,§Ù…èÆßøôQ~¼Bý[›½­óeÿ’»ä÷þ˜ë*Ü€Ö”@Ç|µjÛF˜ðQúLP@¯ Êè5A9½&( ×~jÓ‹ §Ë–×ûr»™¯¼í€\/Í’|P €çû©e¯÷¼åõ>õ–h­Wè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ Êè5A9½&( ×åôš €^”Ðk‚rzMP@¯ý”d•äoJq+%¤L²T€çûÿ‹s7Iý&a-IEND®B`‚TileDB-Py-0.12.2/doc/source/conf.py000066400000000000000000000120721417663620700166470ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # -- Imports configuration ------------------------------------------------- import os import sys from os.path import abspath, join, dirname sys.path.insert(0, abspath(join(dirname(__file__)))) # -- ReadTheDocs configuration --------------------------------------------- # Special handling on ReadTheDocs builds. # Some of this code is from https://github.com/robotpy/robotpy-docs/blob/master/conf.py readthedocs = os.environ.get("READTHEDOCS", None) == "True" rtd_version = os.environ.get("READTHEDOCS_VERSION", "latest") rtd_version = rtd_version if rtd_version in ["stable", "latest"] else "stable" # -- Project information ----------------------------------------------------- project = "TileDB-Py" copyright = "2020, TileDB, Inc." author = "TileDB, Inc." # The short X.Y version version = "0.6" # The full version, including alpha/beta/rc tags release = "0.6.5" # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] # Mapping for linking between RTD subprojects. if readthedocs: intersphinx_mapping = { "tiledb": ( "https://tiledb-inc-tiledb.readthedocs-hosted.com/en/%s/" % rtd_version, None, ), "tiledb-py": ( "https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/python-api/en/%s/" % rtd_version, None, ), "python": ("https://docs.python.org/", None), } # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = ".rst" # The master toctree document. master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "friendly" # -- Options for HTML output ------------------------------------------------- html_static_path = ["_static"] html_logo = "_static/tiledb-logo_color_no_margin_@4x.png" html_favicon = "_static/favicon.ico" if readthedocs: html_theme = "default" else: import sphinx_rtd_theme html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "TileDB-Pydoc" # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "TileDB-Py.tex", "TileDB-Py Documentation", "TileDB, Inc.", "manual") ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "tiledb-py", "TileDB-Py Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "TileDB-Py", "TileDB-Py Documentation", author, "TileDB-Py", "One line description of project.", "Miscellaneous", ) ] # -- Custom Document processing ---------------------------------------------- # Generate the sidebar automatically so that it is identical across all subprojects. # This (and gensidebar.py) from https://github.com/robotpy/robotpy-docs import gensidebar gensidebar.generate_sidebar( {"on_rtd": readthedocs, "rtd_version": rtd_version}, "tiledb-py" ) # -- Custom setup ----------------------------------------------------------- def setup(app): app.add_stylesheet("custom.css") TileDB-Py-0.12.2/doc/source/gensidebar.py000066400000000000000000000047461417663620700200360ustar00rootroot00000000000000# # This file generates the sidebar/toctree for all TileDB projects and should # be copied to each project when it is updated. # # This file is originally from the RobotPy documentation project # https://github.com/robotpy/robotpy-docs, licensed under Apache v2. # import os def write_if_changed(fname, contents): try: with open(fname, "r") as fp: old_contents = fp.read() except: old_contents = "" if old_contents != contents: with open(fname, "w") as fp: fp.write(contents) def generate_sidebar(conf, conf_api): version = conf["rtd_version"] lines = [ "", ".. DO NOT MODIFY! THIS PAGE IS AUTOGENERATED!", " To edit the sidebar, modify gensidebar.py and re-build the docs.", "", ] url_base = "https://tiledb-inc-tiledb.readthedocs-hosted.com" lang = "en" def toctree(name): lines.extend( [".. toctree::", " :caption: %s" % name, " :maxdepth: 1", ""] ) def endl(): lines.append("") def write(desc, link): if conf_api == "tiledb": args = desc, link else: args = desc, "%s/%s/%s/%s.html" % (url_base, lang, version, link) lines.append(" %s <%s>" % args) def write_api(project, desc, rst_page): # From non-root project to root project link if project == "tiledb" and conf_api != "tiledb": args = desc, url_base, lang, version, rst_page lines.append(" %s API <%s/%s/%s/%s.html>" % args) # From anything to non-root project link elif project != conf_api: args = desc, url_base, project, lang, version, rst_page lines.append(" %s API <%s/projects/%s/%s/%s/%s.html>" % args) # Local project link else: args = desc, rst_page lines.append(" %s API <%s>" % args) def write_api_url(desc, url): lines.append(" %s API <%s>" % (desc, url)) # # Specify the sidebar contents here # toctree("API Reference") write_api("tiledb", "C", "c-api") write_api("tiledb", "C++", "c++-api") write_api("tiledb-py", "Python", "python-api") write_api_url("R", "https://tiledb-inc.github.io/TileDB-R/reference/index.html") write_api_url("Java", "https://www.javadoc.io/doc/io.tiledb/tiledb-java") write_api_url("Go", "https://godoc.org/github.com/TileDB-Inc/TileDB-Go") endl() write_if_changed("_sidebar.rst.inc", "\n".join(lines)) TileDB-Py-0.12.2/doc/source/index.rst000066400000000000000000000002321417663620700172040ustar00rootroot00000000000000TileDB Python Project ===================== This project encompasses the Python language bindings for the TileDB library. .. include:: _sidebar.rst.inc TileDB-Py-0.12.2/doc/source/python-api.rst000066400000000000000000000066171417663620700202020ustar00rootroot00000000000000TileDB Python API Reference =========================== .. warning:: The Python interface to TileDB is still under development and the API is subject to change. Modules ------- Typical usage of the Python interface to TileDB will use the top-level module ``tiledb``, e.g. .. code-block:: python import tiledb There is also a submodule ``libtiledb`` which contains the necessary bindings to the underlying TileDB native library. Most of the time you will not need to interact with ``tiledb.libtiledb`` unless you need native-library specific information, e.g. the version number: .. code-block:: python import tiledb tiledb.libtiledb.version() # Native TileDB library version number Getting Started --------------- Arrays may be opened with the ``tiledb.open`` function: .. autofunction:: tiledb.open Data import helpers ------------------- .. autofunction:: tiledb.from_numpy .. autofunction:: tiledb.from_csv .. autofunction:: tiledb.from_pandas Context ------- .. autoclass:: tiledb.Ctx :members: .. autofunction:: tiledb.default_ctx Config ------ .. autoclass:: tiledb.Config :members: Array Schema ------------ .. autoclass:: tiledb.ArraySchema :members: .. autofunction:: tiledb.empty_like Attribute --------- .. autoclass:: tiledb.Attr :members: Filters ------- .. autoclass:: tiledb.FilterList :members: .. autoclass:: tiledb.libtiledb.CompressionFilter :members: .. autoclass:: tiledb.GzipFilter :members: .. autoclass:: tiledb.ZstdFilter :members: .. autoclass:: tiledb.LZ4Filter :members: .. autoclass:: tiledb.Bzip2Filter :members: .. autoclass:: tiledb.RleFilter :members: .. autoclass:: tiledb.DoubleDeltaFilter :members: .. autoclass:: tiledb.BitShuffleFilter :members: .. autoclass:: tiledb.ByteShuffleFilter :members: .. autoclass:: tiledb.BitWidthReductionFilter :members: .. autoclass:: tiledb.PositiveDeltaFilter :members: Dimension --------- .. autoclass:: tiledb.Dim :members: Domain ------ .. autoclass:: tiledb.Domain :members: Array ----- .. autoclass:: tiledb.libtiledb.Array :members: .. autofunction:: tiledb.consolidate .. autofunction:: tiledb.vacuum Dense Array ----------- .. autoclass:: tiledb.DenseArray :members: .. automethod:: __getitem__(selection) .. automethod:: __setitem__(selection, value) .. automethod:: query .. automethod:: from_numpy(uri, array, ctx=None, **kwargs) Sparse Array ------------ .. autoclass:: tiledb.SparseArray :members: .. automethod:: __getitem__(selection) .. automethod:: __setitem__(selection, value) .. automethod:: query Query Condition --------------- .. autoclass:: tiledb.QueryCondition :members: Object Management ----------------- .. autofunction:: tiledb.array_exists .. autofunction:: tiledb.group_create .. autofunction:: tiledb.object_type .. autofunction:: tiledb.remove .. autofunction:: tiledb.move .. autofunction:: tiledb.ls .. autofunction:: tiledb.walk Fragment Info ------------- .. autoclass:: tiledb.FragmentInfoList :members: .. autoclass:: tiledb.FragmentInfo :members: Exceptions ---------- .. autoexception:: tiledb.TileDBError :members: VFS --- .. autoclass:: tiledb.VFS :members: Version ------- .. autofunction:: tiledb.libtiledb.version Statistics ---------- .. autofunction:: tiledb.stats_enable .. autofunction:: tiledb.stats_disable .. autofunction:: tiledb.stats_reset .. autofunction:: tiledb.stats_dump TileDB-Py-0.12.2/examples/000077500000000000000000000000001417663620700151175ustar00rootroot00000000000000TileDB-Py-0.12.2/examples/config.py000066400000000000000000000052141417663620700167400ustar00rootroot00000000000000# config.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/configuration # # This program shows how to set/get the TileDB configuration parameters. # import tiledb def set_get_config_ctx_vfs(): # Create config object config = tiledb.Config() # Set/get config to/from ctx ctx = tiledb.Ctx(config) config_ctx = ctx.config() # Set/get config to/from VFS vfs = tiledb.VFS(config) config_vfs = vfs.config() def set_get_config(): config = tiledb.Config() # Set value config["vfs.s3.connect_timeout_ms"] = 5000 # Get value tile_cache_size = config["sm.tile_cache_size"] print("Tile cache size: %s" % str(tile_cache_size)) def print_default(): config = tiledb.Config() print("\nDefault settings:") for p in config.items(): print('"%s" : "%s"' % (p[0], p[1])) def iter_config_with_prefix(): config = tiledb.Config() # Print only the S3 settings. print("\nVFS S3 settings:") for p in config.items("vfs.s3."): print('"%s" : "%s"' % (p[0], p[1])) def save_load_config(): # Save to file config = tiledb.Config() config["sm.tile_cache_size"] = 0 config.save("tiledb_config.txt") # Load from file config_load = tiledb.Config.load("tiledb_config.txt") print( "\nTile cache size after loading from file: %s" % str(config_load["sm.tile_cache_size"]) ) set_get_config_ctx_vfs() set_get_config() print_default() iter_config_with_prefix() save_load_config() TileDB-Py-0.12.2/examples/errors.py000066400000000000000000000032051417663620700170050ustar00rootroot00000000000000# errors.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/catching-errors # # This example shows how to catch errors in TileDB. # import tiledb # Catch an error try: tiledb.group_create("my_group") tiledb.group_create("my_group") except tiledb.TileDBError as e: print("TileDB exception: %s" % e.message) # clean up if tiledb.VFS().is_dir("my_group"): tiledb.remove("my_group") # Setting a different error handler for the context is not yet supported. TileDB-Py-0.12.2/examples/fragment_info.py000066400000000000000000000067441417663620700203220ustar00rootroot00000000000000# fragment_info.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # import numpy as np import sys import tiledb array_name = "fragment_info" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4] and space tiles 2x2. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.Array.create(array_name, schema) def write_array_1(): with tiledb.open(array_name, mode="w") as A: A[1:3, 1:5] = np.array(([1, 2, 3, 4, 5, 6, 7, 8])) def write_array_2(): with tiledb.open(array_name, mode="w") as A: A[2:4, 2:4] = np.array(([101, 102, 103, 104])) def write_array_3(): with tiledb.open(array_name, mode="w") as A: A[3:4, 4:5] = np.array(([202])) # Create and write array only if it does not exist if tiledb.object_type(array_name) != "array": create_array() write_array_1() write_array_2() write_array_3() # tiledb.array_fragments() requires TileDB-Py version > 0.8.5 fragments_info = tiledb.array_fragments(array_name) print("====== FRAGMENTS INFO ======") print("array uri: {}".format(fragments_info.array_uri)) print("number of fragments: {}".format(len(fragments_info))) to_vac = fragments_info.to_vacuum print("number of consolidated fragments to vacuum: {}".format(len(to_vac))) print("uris of consolidated fragments to vacuum: {}".format(to_vac)) print(fragments_info.nonempty_domain) print(fragments_info.sparse) for fragment in fragments_info: print() print("===== FRAGMENT NUMBER {} =====".format(fragment.num)) print("fragment uri: {}".format(fragment.uri)) print("is sparse: {}".format(fragment.sparse)) print("cell num: {}".format(fragment.cell_num)) print("has consolidated metadata: {}".format(fragment.has_consolidated_metadata)) print("nonempty domain: {}".format(fragment.nonempty_domain)) print("timestamp range: {}".format(fragment.timestamp_range)) print( "number of unconsolidated metadata: {}".format( fragment.unconsolidated_metadata_num ) ) print("version: {}".format(fragment.version)) TileDB-Py-0.12.2/examples/fragments_consolidation.py000066400000000000000000000074371417663620700224170ustar00rootroot00000000000000# fragments_consolidation.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/consolidation-and-vacuuming # # When run, this program will create a simple 2D dense array, write some data # with three queries (creating three fragments), optionally consolidate # and read the entire array data back. # import numpy as np import sys import tiledb array_name = "fragments_consolidation" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4] and space tiles 2x2. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array_1(): with tiledb.DenseArray(array_name, mode="w") as A: A[1:3, 1:5] = np.array(([1, 2, 3, 4, 5, 6, 7, 8])) def write_array_2(): with tiledb.DenseArray(array_name, mode="w") as A: A[2:4, 2:4] = np.array(([101, 102, 103, 104])) def write_array_3(): with tiledb.DenseArray(array_name, mode="w") as A: # Note: sparse (unordered) writes to dense arrays are not yet supported in Python. # Instead we can make two single-cell writes (results in total of 4 fragments). A[1:2, 1:2] = np.array(([201])) A[3:4, 4:5] = np.array(([202])) def read_array(): with tiledb.DenseArray(array_name, mode="r") as A: # Read the entire array. To get coord values as well, we use the .query() syntax. data = A.query(coords=True)[:, :] a_vals = data["a"] rows = data["rows"] cols = data["cols"] for i in range(rows.shape[0]): for j in range(cols.shape[0]): print( "Cell {} has data {}".format( str((rows[i, j], cols[i, j])), str(a_vals[i, j]) ) ) # Create and write array only if it does not exist if tiledb.object_type(array_name) != "array": create_array() write_array_1() write_array_2() write_array_3() # Optionally consolidate if len(sys.argv) > 1 and sys.argv[1] == "consolidate": config = tiledb.Config() config["sm.consolidation.steps"] = 1 config["sm.consolidation.step_max_frags"] = 3 config["sm.consolidation.step_min_frags"] = 1 tiledb.consolidate(config=config, uri=array_name) read_array() TileDB-Py-0.12.2/examples/incomplete_iteration.py000066400000000000000000000062441417663620700217140ustar00rootroot00000000000000# incomplete_iteration.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/incomplete-queries # # When run, this program will create a 1D dense array, write some data # to it, and read slices back by iteration over incomplete queries. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "incomplete_iteration" def create_array(): # The array will be 100 cells with dimensions "x". dom = tiledb.Domain(tiledb.Dim(name="x", domain=(0, 99), tile=100, dtype=np.int64)) # The array will be dense with a single string typed attribute "a" schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=str)] ) # Create the (empty) array on disk. tiledb.SparseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.open(array_name, mode="w") as A: extent = A.schema.domain.dim("x").domain ncells = extent[1] - extent[0] + 1 # Data is the Latin alphabet with varying repeat lengths data = [chr(i % 26 + 97) * (i % 52) for i in range(ncells)] # Coords are the dimension range coords = np.arange(extent[0], extent[1] + 1) A[coords] = data def read_array_iterated(): # in order to force iteration, restrict the buffer sizes # this setting gives 5 iterations for the example data init_buffer_bytes = 800 cfg = tiledb.Config( { "py.init_buffer_bytes": init_buffer_bytes, "py.exact_init_buffer_bytes": "true", } ) with tiledb.open(array_name, config=cfg) as A: # iterate over results as a dataframe iterable = A.query(return_incomplete=True).df[:] for i, result in enumerate(iterable): print(f"--- result {i} is a '{type(result)}' with size {len(result)}") print(result) print("---") print(f"Query completed after {i} iterations") create_array() write_array() read_array_iterated() TileDB-Py-0.12.2/examples/multi_attribute.py000066400000000000000000000122021417663620700207030ustar00rootroot00000000000000# multi_attribute.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/multi-range-subarrays # # When run, this program will create a simple 2D dense array with two # attributes, write some data to it, and read a slice of the data back on # (i) both attributes, and (ii) subselecting on only one of the attributes. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "multi_attribute" def create_array(): # Check if the array already exists. if tiledb.object_type(array_name) == "array": return # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), ) # Add two attributes "a1" and "a2", so each (i,j) cell can store # a character on "a1" and a vector of two floats on "a2". schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[ tiledb.Attr(name="a1", dtype=np.uint8), tiledb.Attr( name="a2", dtype=np.dtype([("", np.float32), ("", np.float32), ("", np.float32)]), ), ], ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: data_a1 = np.array( ( list( map( ord, [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", ], ) ) ) ) data_a2 = np.array( ( [ (1.1, 1.2, 1.3), (2.1, 2.2, 2.3), (3.1, 3.2, 3.3), (4.1, 4.2, 4.3), (5.1, 5.2, 5.3), (6.1, 6.2, 6.3), (7.1, 7.2, 7.3), (8.1, 8.2, 8.3), (9.1, 9.2, 9.3), (10.1, 10.2, 10.3), (11.1, 11.2, 11.3), (12.1, 12.2, 12.3), (13.1, 13.2, 13.3), (14.1, 14.2, 14.3), (15.1, 15.2, 15.3), (16.1, 16.2, 16.3), ] ), dtype=[("", np.float32), ("", np.float32), ("", np.float32)], ) A[:, :] = {"a1": data_a1, "a2": data_a2} def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice only rows 1, 2 and cols 2, 3, 4. data = A[1:3, 2:5] print("Reading both attributes a1 and a2:") a1, a2 = data["a1"].flat, data["a2"].flat for i, v in enumerate(a1): print( "a1: '%s', a2: (%.1f,%.1f,%.1f)" % (chr(v), a2[i][0], a2[i][1], a2[i][2]) ) def read_array_subselect(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice only rows 1, 2 and cols 2, 3, 4, attribute 'a1' only. # We use the '.query()' syntax which allows attribute subselection. data = A.query(attrs=["a1"])[1:3, 2:5] print("Subselecting on attribute a1:") for a in data["a1"].flat: print("a1: '%s'" % chr(a)) create_array() write_array() read_array() read_array_subselect() TileDB-Py-0.12.2/examples/multirange_indexing.py000066400000000000000000000053361417663620700215340ustar00rootroot00000000000000# multirange_indexing.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/multi-range-subarrays # # When run, this program will create a simple 2D dense array with two # attributes, write some data to it, and read a slice of the data back on # (i) both attributes, and (ii) subselecting on only one of the attributes. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "multi_range" def create_array(): # Check if the array already exists. if tiledb.object_type(array_name) == "array": return dom = tiledb.Domain( tiledb.Dim(name="x", domain=(1, 20), tile=4, dtype=np.int64), tiledb.Dim(name="y", domain=(1, 20), tile=4, dtype=np.int64), ) # Add a single "a" float attribute schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float64)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: data_a = np.arange(400).reshape(20, 20) A[:, :] = {"a": data_a} def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice only rows: (1,3) inclusive, and 5 # cols: 2, 5, 7 data = A.multi_index[[(1, 3), 5], [2, 5, 7]] print("Reading attribute 'a', [ [1:3, 5], [2,5,7] ]") a = data["a"] print(a) create_array() write_array() read_array() TileDB-Py-0.12.2/examples/object.py000066400000000000000000000071311417663620700167410ustar00rootroot00000000000000# object.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/object-management # # This program creates a hierarchy as shown below. Specifically, it creates # groups `dense_arrays` and `sparse_arrays` in a group `my_group`, and # then some dense/sparse arrays and key-value store in those groups. # # my_group/ # - dense_arrays/ # - array_A # - array_B # - sparse_arrays/ # - array_C # - array_D # # The program then shows how to list this hierarchy, as well as # move/remove TileDB objects. import os import numpy as np import tiledb def create_array(array_name, sparse): if tiledb.object_type(array_name) == "array": return dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), ) schema = tiledb.ArraySchema( domain=dom, sparse=sparse, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) if sparse: tiledb.SparseArray.create(array_name, schema) else: tiledb.DenseArray.create(array_name, schema) def path(p): return os.path.join(os.getcwd(), p) def create_hierarchy(): # Create groups tiledb.group_create(path("my_group")) tiledb.group_create(path("my_group/dense_arrays")) tiledb.group_create(path("my_group/sparse_arrays")) # Create arrays create_array(path("my_group/dense_arrays/array_A"), False) create_array(path("my_group/dense_arrays/array_B"), False) create_array(path("my_group/sparse_arrays/array_C"), True) create_array(path("my_group/sparse_arrays/array_D"), True) def list_obj(path): # List children print("\nListing hierarchy:") tiledb.ls(path, lambda obj_path, obj_type: print(obj_path, obj_type)) # Walk in a path with a pre- and post-order traversal print("\nPreorder traversal:") tiledb.walk( path, lambda obj_path, obj_type: print(obj_path, obj_type) ) # Default order is preorder print("\nPostorder traversal:") tiledb.walk( path, lambda obj_path, obj_type: print(obj_path, obj_type), order="postorder" ) def move_remove_obj(): tiledb.move(path("my_group"), path("my_group_2")) tiledb.remove(path("my_group_2/dense_arrays")) tiledb.remove(path("my_group_2/sparse_arrays/array_C")) create_hierarchy() list_obj("my_group") move_remove_obj() # Renames 'my_group' to 'my_group_2' list_obj("my_group_2") # clean up tiledb.remove("my_group_2") TileDB-Py-0.12.2/examples/parallel_csv_ingestion.py000066400000000000000000000243371417663620700222300ustar00rootroot00000000000000# parallel_csv_ingestion.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # This example demonstrates ingestion of CSV files in parallel # with tiledb.from_csv and Python multiprocessing. # import tiledb import numpy as np, pandas as pd import os, tempfile, time, glob import multiprocessing from concurrent.futures import ProcessPoolExecutor # helper functions to generate data from tiledb.tests.common import rand_datetime64_array, rand_utf8 # are we running as a test in_test = "PYTEST_CURRENT_TEST" in os.environ def generate_csvs(csv_folder, count=9, min_length=1, max_length=109): def make_dataframe(col_size): data = { "idx_datetime": rand_datetime64_array(col_size, include_extremes=False), "column_int64": np.random.randint(0, 150000, size=col_size, dtype=np.int64), "column_uint32": np.random.randint( 0, 150000, size=col_size, dtype=np.uint32 ), "column_float64": np.random.rand(col_size), "column_utf8": np.array( [rand_utf8(np.random.randint(1, 100)) for _ in range(col_size)] ), } df = pd.DataFrame.from_dict(data) df.set_index("idx_datetime", inplace=True) return df # create list of CSV row-counts to generate # (each file will have nrows from this list) csv_lengths = np.random.randint(min_length, max_length, size=count) for i, target_length in enumerate(csv_lengths): output_path = os.path.join(csv_folder, "gen_csv_{}.csv".format(i)) df = make_dataframe(target_length) df.to_csv(output_path) def log_process_errors(*args, **kwargs): try: tiledb.from_csv(*args, **kwargs) except Exception as exc: # print log to file. randomize just in case err_id = np.random.randint(np.iinfo(np.int64).max - 1) err_filename = f"ingest-err-PID_{os.getpid()}_{err_id}.log" print("err_filename: ", err_filename) err = f""" ------------------------ Caught exception: ------------------------ {exc} ------------------------ with args: ------------------------ {args} ------------------------ with kwargs: ------------------------ {kwargs} ------------------------ this message saved to file: {err_filename} """ print(err) with open(err_filename, "w") as f: f.writelines(err) raise exc def from_csv_mp( csv_path, array_path, list_step_size=5, chunksize=100, max_workers=4, initial_file_count=5, index_col=None, parse_dates=True, attr_types=None, sparse=True, allows_duplicates=True, debug=False, **kwargs, ): """ Multi-process ingestion wrapper around tiledb.from_csv Currently uses ProcessPoolExecutor. """ # Setting start method to 'spawn' is required before TileDB 2.1 to # avoid problems with TBB when spawning via fork. # NOTE: *must be inside __main__* or a function. if multiprocessing.get_start_method(True) != "spawn": multiprocessing.set_start_method("spawn", True) # Get a list of of CSVs from the target path csvs = glob.glob(csv_path + "/*.csv") if len(csvs) < 1: raise ValueError("Cannot ingest empty CSV list!") # first step: create the array. we read the first N csvs to create schema # and as check for inconsistency before starting the full run. tiledb.from_csv( array_path, csvs[:initial_file_count], chunksize=chunksize, # must set chunksize here even though schema_only index_col=index_col, parse_dates=parse_dates, dtype=attr_types, column_types=attr_types, engine="c", debug=debug, allows_duplicates=True, sparse=sparse, mode="schema_only", **kwargs, ) print("Finished array schema creation") # controls number of CSV files passed to each worker process: # depending on the makeup of the files, we may want to read a number of # files consecutively (up to chunksize) in order to write more optimal # fragments. if list_step_size > len(csvs): raise ValueError( "Please choose a step size smaller than the number of CSV files" ) tasks = [] csv_chunks = [] # high level ingestion timing start = time.time() # ingest the data in parallel # note: use ThreadPoolExecutor for debugging # use ProcessPoolExecutor in general # with ThreadPoolExecutor(max_workers=max_workers) as executor: with ProcessPoolExecutor(max_workers=max_workers) as executor: for first in range(0, len(csvs), list_step_size): last = min(len(csvs), first + list_step_size) print(" Submitting task for CSV list range: ", (first, last)) task = executor.submit( log_process_errors, *(array_path, csvs[first:last]), **dict( chunksize=chunksize, index_col=index_col, parse_dates=parse_dates, dtype=attr_types, column_types=attr_types, engine="c", debug=debug, allows_duplicates=allows_duplicates, ), **kwargs, mode="append", ) tasks.append(task) print("Task results: ", [t.result() for t in tasks]) print("Ingestion complete. Duration: ", time.time() - start) ############################################################################## # Usage example ############################################################################## def example(): # set up test paths and data csv_path = tempfile.mkdtemp() generate_csvs(csv_path, count=11) print("Finished generating CSVs in path: ", csv_path) array_path = tempfile.mkdtemp() print("Writing output array to: ", array_path) # Create Schema attr_types = { "column_int64": np.int64, "column_uint32": np.uint32, "column_float64": np.float64, "column_utf8": str, } from_csv_mp( csv_path, array_path, chunksize=27, list_step_size=5, max_workers=4, index_col=["idx_datetime"], attr_types=attr_types, ) print("Ingestion complete.") print(" Note: temp paths have undefined lifetime after exit.") # apparently no good way to check for "is interactive" in python if not in_test: input(" Press any key to continue: ") return csv_path, array_path if __name__ == "__main__" and not in_test: example() ############################################################################## # TEST SECTION # uses this example as a test of various input combinations ############################################################################## def df_from_csvs(path, **kwargs): idx_column = kwargs.pop("tiledb_idx_column", None) csv_paths = glob.glob(path + "/*.csv") csv_df_list = [pd.read_csv(p, **kwargs) for p in csv_paths] df = pd.concat(csv_df_list) if idx_column is not None: df.sort_values(idx_column, inplace=True) df.set_index(idx_column, inplace=True) df.index = df.index.astype("datetime64[ns]") return df def test_parallel_csv_ingestion(): csv_path, array_path = example() import pandas._testing as tm attr_types = { "column_int64": np.int64, "column_uint32": np.uint32, "column_float64": np.float64, # Avoid this runtime warning: "DeprecationWarning: `np.str` is a deprecated alias for the builtin `str`." "column_utf8": str, } # read dataframe from CSV list, set index, and sort df_direct = df_from_csvs( csv_path, dtype=attr_types, tiledb_idx_column="idx_datetime" ) # validate the array generated in example() df_tiledb = tiledb.open_dataframe(array_path) tm.assert_frame_equal(df_direct, df_tiledb.sort_values("idx_datetime")) # ingest over several parameters for nproc in [1, 5]: # note: already did 4 above for csv_list_step in [5, 11]: for chunksize in [10, 100]: array_tmp = tempfile.mkdtemp() print( "Running ingestion with nproc: '{}', step: '{}', chunksize: '{}'".format( nproc, csv_list_step, chunksize ) ) print("Writing output array to: ", array_tmp) from_csv_mp( csv_path, array_tmp, chunksize=chunksize, list_step_size=csv_list_step, max_workers=nproc, index_col=["idx_datetime"], attr_types=attr_types, ) df_tiledb = tiledb.open_dataframe(array_tmp) tm.assert_frame_equal(df_direct, df_tiledb.sort_values("idx_datetime")) print("Writing output array to: ", array_path) if __name__ == "__main__": test_parallel_csv_ingestion() TileDB-Py-0.12.2/examples/query_condition_string.py000066400000000000000000000045301417663620700222740ustar00rootroot00000000000000# query_condition_string.py # # LICENSE # # The MIT License # # Copyright (c) 2021 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # This example creates an array with one string-typed attribute, # writes sample data to the array, and then prints out a filtered # dataframe using the TileDB QueryCondition feature. import tiledb import numpy as np import tempfile import string def create_array(path): dom = tiledb.Domain(tiledb.Dim(name="d", domain=(1, 10), tile=1, dtype=np.uint32)) attrs = [tiledb.Attr(name="ascii_attr", dtype="ascii", var=True)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.SparseArray.create(path, schema, overwrite=True) # create array of strings from a to a..j attr_data = np.array([string.ascii_lowercase[0:n] for n in range(1, 11)], dtype="O") with tiledb.open(path, "w") as arr: arr[np.arange(1, 11)] = {"ascii_attr": attr_data} def read_array(path, cond): with tiledb.open(path) as arr: qc = tiledb.QueryCondition(cond) print("QueryCondition is: ", qc) res = arr.query(attr_cond=qc).df[:] return res uri = "query_condition_string" create_array(uri) filtered_df1 = read_array(uri, "ascii_attr == 'abcd'") print(" result: ", filtered_df1) filtered_df2 = read_array(uri, "ascii_attr > 'abc'") print(" result: ", filtered_df2) TileDB-Py-0.12.2/examples/quickstart_dense.py000066400000000000000000000052671417663620700210530ustar00rootroot00000000000000# quickstart_dense.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please refer to the TileDB and TileDB-Py documentation for more information: # https://docs.tiledb.com/main/how-to # https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html # # When run, this program will create a simple 2D dense array, write some data # to it, and read a slice of the data back. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "quickstart_dense" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: data = np.array(([1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16])) A[:] = data def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice only rows 1, 2 and cols 2, 3, 4. data = A[1:3, 2:5] print(data["a"]) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/examples/quickstart_sparse.py000066400000000000000000000056141417663620700212460ustar00rootroot00000000000000# quickstart_sparse.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please refer to the TileDB and TileDB-Py documentation for more information: # https://docs.tiledb.com/main/how-to # https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html # # When run, this program will create a simple 2D sparse array, write some data # to it, and read a slice of the data back. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "quickstart_sparse" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), ) # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.SparseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.SparseArray(array_name, mode="w") as A: # Write some simple data to cells (1, 1), (2, 4) and (2, 3). I, J = [1, 2, 2], [1, 4, 3] data = np.array(([1, 2, 3])) A[I, J] = data def read_array(): # Open the array and read from it. with tiledb.SparseArray(array_name, mode="r") as A: # Slice only rows 1, 2 and cols 2, 3, 4. data = A[1:3, 2:5] a_vals = data["a"] for i, coord in enumerate(zip(data["rows"], data["cols"])): print("Cell (%d, %d) has data %d" % (coord[0], coord[1], a_vals[i])) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/examples/reading_dense_layouts.py000066400000000000000000000102471417663620700220440ustar00rootroot00000000000000# reading_dense_layouts.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/basic-reading # # When run, this program will create a simple 2D dense array, write some data # to it, and read a slice of the data back in the layout of the user's choice # (passed as an argument to the program: "row", "col", or "global"). # import numpy as np import sys import tiledb # Name of the array to create. array_name = "reading_dense_layouts" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: # NOTE: global writes are not currently supported in the Python API. # The following code will produce the same array as the corresponding # C++ example in the docs (which wrote in global order) data = np.array(([1, 2, 5, 6], [3, 4, 7, 8], [9, 10, 13, 14], [11, 12, 15, 16])) A[:] = data def read_array(order): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Get non-empty domain print("Non-empty domain: {}".format(A.nonempty_domain())) # Slice only rows 1, 2 and cols 2, 3, 4. # NOTE: The `query` syntax is required to get the coordinates for # dense arrays and specify an order other than the default row-major data = A.query(attrs=["a"], order=order, coords=True)[1:3, 2:5] a_vals = data["a"] coords = np.asarray(list(zip(data["rows"], data["cols"]))) if order != "G" and a_vals.flags["F_CONTIGUOUS"]: print("NOTE: The following result array has col-major layout internally") if order != "G": for i in range(coords.shape[0]): for j in range(coords.shape[1]): print( "Cell {} has data {}".format( str(coords[i, j]), str(a_vals[i, j]) ) ) else: # When reading in global order, TileDB always returns a vector (1D array) for i in range(coords.shape[0]): print("Cell {} has data {}".format(str(coords[i]), str(a_vals[i]))) # Check if the array already exists. if tiledb.object_type(array_name) != "array": create_array() write_array() layout = "" if len(sys.argv) > 1: layout = sys.argv[1] order = "C" if layout == "col": order = "F" elif layout == "global": order = "G" else: order = "C" read_array(order) TileDB-Py-0.12.2/examples/reading_sparse_layouts.py000066400000000000000000000066541417663620700222520ustar00rootroot00000000000000# reading_sparse_layouts.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/basic-reading # # When run, this program will create a simple 2D sparse array, write some data # to it, and read a slice of the data back in the layout of the user's choice # (passed as an argument to the program: "row", "col", or "global"). # import numpy as np import sys import tiledb # Name of the array to create. array_name = "reading_sparse_layouts" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.SparseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.SparseArray(array_name, mode="w") as A: # To write, the coordinates must be split into two vectors, one per dimension I, J = [1, 1, 2, 1, 2, 2], [1, 2, 2, 4, 3, 4] data = np.array(([1, 2, 3, 4, 5, 6])) A[I, J] = data def read_array(order): # Open the array and read from it. with tiledb.SparseArray(array_name, mode="r") as A: # Get non-empty domain print("Non-empty domain: {}".format(A.nonempty_domain())) # Slice only rows 1, 2 and cols 2, 3, 4. # NOTE: The `query` syntax is required to specify an order # other than the default row-major data = A.query(attrs=["a"], order=order, coords=True)[1:3, 2:5] a_vals = data["a"] for i, coord in enumerate(zip(data["rows"], data["cols"])): print("Cell {} has data {}".format(str(coord), str(a_vals[i]))) # Check if the array already exists. if tiledb.object_type(array_name) != "array": create_array() write_array() layout = "" if len(sys.argv) > 1: layout = sys.argv[1] order = "C" if layout == "col": order = "F" elif layout == "global": order = "G" else: order = "C" read_array(order) TileDB-Py-0.12.2/examples/string_float_int_dimensions.py000066400000000000000000000051371417663620700232740ustar00rootroot00000000000000# quickstart_dense.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/creating-arrays/creating-dimensions # # When run, this program will create a simple 2D dense array, write some data # to it, and read a slice of the data back. # import tiledb import numpy as np import random path = "sparse_mixed_demo" dom = tiledb.Domain( *[ tiledb.Dim(name="str_dim", domain=(None, None), dtype=np.bytes_), tiledb.Dim(name="int64_dim", domain=(0, 100), tile=10, dtype=np.int64), tiledb.Dim( name="float64_dim", domain=(-100.0, 100.0), tile=10, dtype=np.float64, ), ], ) att = tiledb.Attr(name="a", dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True, capacity=10000) tiledb.SparseArray.create(path, schema) data = [1, 2, 3, 4] c_str = [b"aa", b"bbb", b"c", b"dddd"] c_int64 = [0, 10, 20, 30] c_float64 = [-95.0, -61.5, 1.3, 42.7] with tiledb.open(path, "w") as A: A[c_str, c_int64, c_float64] = data with tiledb.open(path) as A: print("\n\nRead full array:\n") print(A[:]) print("\n\nRead string slice A['c':'dddd']:\n") print(A["c":"dddd"]) print("\n\nRead A[:, 10]: \n") print(A["aa":"bbb"]) print("\n\nRead A.multi_index['aa':'c', 0:10]\n") print(A.multi_index["aa":"c", 0:10]) print("\n\nRead A.multi_index['aa':'bbb', :, -95.0:-61.5]\n") print(A.multi_index["aa":"bbb", :, -95.0:-61.5]) TileDB-Py-0.12.2/examples/time_traveling.py000066400000000000000000000046631417663620700205130ustar00rootroot00000000000000# time_traveling.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # When run, this program will create a simple sparse array, write some data # to it at specified timestamps, and read the entire array data back. # import numpy as np import tiledb # Name of the array to create. array_name = "time_traveling" def create_array(): dom = tiledb.Domain(tiledb.Dim(domain=(0, 0), tile=1, dtype=np.int64)) att = tiledb.Attr(name="num", dtype=np.int64) schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(array_name, schema) def write_array(): # Open the array and write to it. for timestamp in range(1, 4): with tiledb.open(array_name, timestamp=timestamp, mode="w") as T: T[0] = timestamp def read_array(): # Open the array and read from it. with tiledb.open(array_name, mode="r") as T: print(T[:]["num"]) with tiledb.open(array_name, mode="r", timestamp=(1, 2)) as T: print(T[:]["num"]) with tiledb.open(array_name, mode="r", timestamp=(2, 3)) as T: print(T[:]["num"]) with tiledb.open(array_name, mode="r", timestamp=1) as T: print(T[:]["num"]) with tiledb.open(array_name, mode="r", timestamp=(1, None)) as T: print(T[:]["num"]) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/examples/using_tiledb_stats.py000066400000000000000000000051301417663620700213560ustar00rootroot00000000000000# using_tiledb_stats.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/performance/using-performance-statistics # # When run, this program will create a 0.5GB dense array, and enable the # TileDB statistics surrounding reads from the array. # import numpy as np import tiledb # Name of array. array_name = "stats_array" def create_array(row_tile_extent, col_tile_extent): dom = tiledb.Domain( tiledb.Dim( name="rows", domain=(1, 12000), tile=row_tile_extent, dtype=np.int32 ), tiledb.Dim( name="cols", domain=(1, 12000), tile=col_tile_extent, dtype=np.int32 ), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: data = np.arange(12000 * 12000) A[:] = data def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Read a slice of 3,000 rows. # Enable the stats for the read query, and print the report. tiledb.stats_enable() data1 = A[1:3001, 1:12001] tiledb.stats_dump() tiledb.stats_disable() # Create array with each row as a tile. create_array(1, 12000) write_array() read_array() TileDB-Py-0.12.2/examples/variable_length.py000066400000000000000000000070421417663620700206220ustar00rootroot00000000000000# variable_length.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/var-length-attributes # # This program shows how to set/get the TileDB configuration parameters. # #%% import tiledb import numpy as np from tiledb.tests.common import assert_subarrays_equal array_name = "variable_length_array" #%% def create_array(): dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int64), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int64), ) attrs = [ tiledb.Attr(name="a1", var=True, dtype="U"), tiledb.Attr(name="a2", var=True, dtype=np.int64), ] schema = tiledb.ArraySchema(domain=dom, sparse=False, attrs=attrs) tiledb.Array.create(array_name, schema) return schema def generate_data(): # generate test input data a1_data = np.array( [ "a", "bb", "ccc", "dd", "eee", "f", "g", "hhh", "i", "jjj", "kk", "l", "m", "n", "oo", "p", ], dtype=np.object, ) a1_data = a1_data.reshape(4, 4) a2_data = np.array( list( map( lambda v: np.repeat(v[0], v[1]).astype(np.int64), [ (1, 1), (2, 2), (3, 1), (4, 1), (5, 1), (6, 2), (7, 2), (8, 3), (9, 2), (10, 1), (11, 1), (12, 2), (13, 1), (14, 3), (15, 1), (16, 1), ], ) ), dtype=np.object, ) a2_data = a2_data.reshape(4, 4) data_dict = {"a1": a1_data, "a2": a2_data} return data_dict def write_array(data_dict): # open array for writing, and write data with tiledb.open(array_name, "w") as array: array[:] = data_dict def test_output_subarrays(test_dict): with tiledb.open(array_name) as A: rt_dict = A[:] assert_subarrays_equal(test_dict["a2"], rt_dict["a2"]) create_array() data = generate_data() write_array(data) test_output_subarrays(data) TileDB-Py-0.12.2/examples/vfs.py000066400000000000000000000063521417663620700162750ustar00rootroot00000000000000# vfs.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/virtual-filesystem # # This program explores the various TileDB VFS tools. # import struct import tiledb import os def path(p): return os.path.join(os.getcwd(), p) def dirs_files(): # Create TileDB VFS vfs = tiledb.VFS() # Create directory if not vfs.is_dir("dir_A"): vfs.create_dir(path("dir_A")) print("Created 'dir_A'") else: print("'dir_A' already exists") # Creating an (empty) file if not vfs.is_file("dir_A/file_A"): vfs.touch(path("dir_A/file_A")) print("Created empty file 'dir_A/file_A'") else: print("'dir_A/file_A' already exists") # Getting the file size print("Size of file 'dir_A/file_A': {}".format(vfs.file_size(path("dir_A/file_A")))) # Moving files (moving directories is similar) print("Moving file 'dir_A/file_A' to 'dir_A/file_B'") vfs.move_file(path("dir_A/file_A"), path("dir_A/file_B")) # Deleting files and directories print("Deleting 'dir_A/file_B' and 'dir_A'") vfs.remove_file(path("dir_A/file_B")) vfs.remove_dir(path("dir_A")) def write(): # Create TileDB VFS vfs = tiledb.VFS() # Create VFS file handle f = vfs.open("tiledb_vfs.bin", "wb") # Write binary data vfs.write(f, struct.pack("f", 153.0)) vfs.write(f, "abcd".encode("utf-8")) vfs.close(f) # Write binary data again - this will overwrite the previous file f = vfs.open("tiledb_vfs.bin", "wb") vfs.write(f, struct.pack("f", 153.1)) vfs.write(f, "abcdef".encode("utf-8")) vfs.close(f) # Append binary data to existing file (this will NOT work on S3) f = vfs.open("tiledb_vfs.bin", "ab") vfs.write(f, "ghijkl".encode("utf-8")) vfs.close(f) def read(): # Create TileDB VFS vfs = tiledb.VFS() # Read binary data f = vfs.open("tiledb_vfs.bin", "rb") f1 = struct.unpack("f", vfs.read(f, 0, 4))[0] s1 = bytes.decode(vfs.read(f, 4, 12), "utf-8") print("Binary read:\n{}\n{}".format(f1, s1)) vfs.close(f) dirs_files() write() read() TileDB-Py-0.12.2/examples/writing_dense_multiple.py000066400000000000000000000053321417663620700222500ustar00rootroot00000000000000# writing_dense_multiple.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays # # When run, this program will create a simple 2D dense array, write some data # to it with two write queries, and read the entire array data back. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "writing_dense_multiple" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: # First write data = np.array(([1, 2], [3, 4])) A[1:3, 1:3] = data # Second write data = np.array(([5, 6, 7, 8], [9, 10, 11, 12])) A[2:4, 1:5] = data def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice the entire array data = A[:] print(data["a"]) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/examples/writing_dense_padding.py000066400000000000000000000052011417663620700220160ustar00rootroot00000000000000# writing_dense_padding.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays # # When run, this program will create a simple 2D dense array, write some data # to it in a way that some space is empty, and read the entire array data back. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "writing_dense_padding" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ) # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.DenseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.DenseArray(array_name, mode="w") as A: # Write to [2,3], [1,2] data = np.array(([1, 2], [3, 4])) A[2:4, 1:3] = data def read_array(): # Open the array and read from it. with tiledb.DenseArray(array_name, mode="r") as A: # Slice the entire array data = A[:] print(data["a"]) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/examples/writing_dense_rgb.py000066400000000000000000000050431417663620700211660ustar00rootroot00000000000000# writing_dense_rgb.py # # LICENSE # # The MIT License # # Copyright (c) 2021 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays # # When run, this program will create a 2D+1 multi-component (eg RGB) dense array, write some # data to it, and read the entire array data. import tiledb, numpy as np img_shape = (100, 224, 224) img_uri = "writing_dense_rgb" image_data = np.random.randint(low=0, high=100, size=(*img_shape, 3), dtype=np.int32) def create_array(): domain = tiledb.Domain( tiledb.Dim( name="image_id", domain=(0, img_shape[0] - 1), tile=4, dtype=np.int32 ), tiledb.Dim( name="x", domain=(0, img_shape[1] - 1), tile=img_shape[1], dtype=np.int32 ), tiledb.Dim( name="y", domain=(0, img_shape[2] - 1), tile=img_shape[2], dtype=np.int32 ), ) # create multi-component attribute with three int32 components attr = tiledb.Attr(dtype=np.dtype("i4, i4, i4")) schema = tiledb.ArraySchema(domain=domain, sparse=False, attrs=[attr]) tiledb.Array.create(img_uri, schema) image_data_rgb = image_data.view(np.dtype("i4, i4, i4")) with tiledb.open(img_uri, "w") as A: # write data to 1st image_id slot A[:] = image_data_rgb def read_array(): with tiledb.open(img_uri) as A: print(A[:].shape) if __name__ == "__main__": create_array() read_array() TileDB-Py-0.12.2/examples/writing_sparse_multiple.py000066400000000000000000000055711417663620700224540ustar00rootroot00000000000000# writing_sparse_multiple.py # # LICENSE # # The MIT License # # Copyright (c) 2020 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # DESCRIPTION # # Please see the TileDB documentation for more information: # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-sparse-cells # # When run, this program will create a simple 2D sparse array, write some data # to it twice, and read all the data back. # import numpy as np import sys import tiledb # Name of the array to create. array_name = "writing_sparse_multiple" def create_array(): # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), ) # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ) # Create the (empty) array on disk. tiledb.SparseArray.create(array_name, schema) def write_array(): # Open the array and write to it. with tiledb.SparseArray(array_name, mode="w") as A: # First write I, J = [1, 2, 2], [1, 4, 3] data = np.array(([1, 2, 3])) A[I, J] = data # Second write I, J = [4, 2], [1, 4] data = np.array(([4, 20])) A[I, J] = data def read_array(): # Open the array and read from it. with tiledb.SparseArray(array_name, mode="r") as A: # Slice entire array data = A[1:5, 1:5] a_vals = data["a"] for i, coord in enumerate(zip(data["rows"], data["cols"])): print("Cell (%d, %d) has data %d" % (coord[0], coord[1], a_vals[i])) if tiledb.object_type(array_name) != "array": create_array() write_array() read_array() TileDB-Py-0.12.2/external/000077500000000000000000000000001417663620700151235ustar00rootroot00000000000000TileDB-Py-0.12.2/external/LICENSE-string_view.txt000066400000000000000000000024721417663620700213110ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TileDB-Py-0.12.2/external/LICENSE-tsl_robin_map.txt000066400000000000000000000022251417663620700215750ustar00rootroot00000000000000/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ TileDB-Py-0.12.2/external/string_view.hpp000066400000000000000000001312751417663620700202050ustar00rootroot00000000000000// Copyright 2017-2020 by Martin Moene // // string-view lite, a C++17-like string_view for C++98 and later. // For more information see https://github.com/martinmoene/string-view-lite // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #pragma once #ifndef NONSTD_SV_LITE_H_INCLUDED #define NONSTD_SV_LITE_H_INCLUDED #define string_view_lite_MAJOR 1 #define string_view_lite_MINOR 4 #define string_view_lite_PATCH 0 #define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) #define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) #define nssv_STRINGIFY_( x ) #x // string-view lite configuration: #define nssv_STRING_VIEW_DEFAULT 0 #define nssv_STRING_VIEW_NONSTD 1 #define nssv_STRING_VIEW_STD 2 #if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) # define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) #endif #if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW ) # error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_... #endif #ifndef nssv_CONFIG_STD_SV_OPERATOR # define nssv_CONFIG_STD_SV_OPERATOR 0 #endif #ifndef nssv_CONFIG_USR_SV_OPERATOR # define nssv_CONFIG_USR_SV_OPERATOR 1 #endif #ifdef nssv_CONFIG_CONVERSION_STD_STRING # define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING # define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING #endif #ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS # define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 #endif #ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS # define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 #endif // Control presence of exception handling (try and auto discover): #ifndef nssv_CONFIG_NO_EXCEPTIONS # if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) # define nssv_CONFIG_NO_EXCEPTIONS 0 # else # define nssv_CONFIG_NO_EXCEPTIONS 1 # endif #endif // C++ language version detection (C++20 is speculative): // Note: VC14.0/1900 (VS2015) lacks too much from C++14. #ifndef nssv_CPLUSPLUS # if defined(_MSVC_LANG ) && !defined(__clang__) # define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) # else # define nssv_CPLUSPLUS __cplusplus # endif #endif #define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) #define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) #define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) #define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) #define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) #define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) // use C++17 std::string_view if available and requested: #if nssv_CPP17_OR_GREATER && defined(__has_include ) # if __has_include( ) # define nssv_HAVE_STD_STRING_VIEW 1 # else # define nssv_HAVE_STD_STRING_VIEW 0 # endif #else # define nssv_HAVE_STD_STRING_VIEW 0 #endif #define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) #define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) #define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH // // Use C++17 std::string_view: // #if nssv_USES_STD_STRING_VIEW #include // Extensions for std::string: #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { template< class CharT, class Traits, class Allocator = std::allocator > std::basic_string to_string( std::basic_string_view v, Allocator const & a = Allocator() ) { return std::basic_string( v.begin(), v.end(), a ); } template< class CharT, class Traits, class Allocator > std::basic_string_view to_string_view( std::basic_string const & s ) { return std::basic_string_view( s.data(), s.size() ); } // Literal operators sv and _sv: #if nssv_CONFIG_STD_SV_OPERATOR using namespace std::literals::string_view_literals; #endif #if nssv_CONFIG_USR_SV_OPERATOR inline namespace literals { inline namespace string_view_literals { constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } }} // namespace literals::string_view_literals #endif // nssv_CONFIG_USR_SV_OPERATOR } // namespace nonstd #endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { using std::string_view; using std::wstring_view; using std::u16string_view; using std::u32string_view; using std::basic_string_view; // literal "sv" and "_sv", see above using std::operator==; using std::operator!=; using std::operator<; using std::operator<=; using std::operator>; using std::operator>=; using std::operator<<; } // namespace nonstd #else // nssv_HAVE_STD_STRING_VIEW // // Before C++17: use string_view lite: // // Compiler versions: // // MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) // MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) // MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) // MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) // MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) // MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) // MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) // MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) // MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) // MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) // MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) #if defined(_MSC_VER ) && !defined(__clang__) # define nssv_COMPILER_MSVC_VER (_MSC_VER ) # define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) #else # define nssv_COMPILER_MSVC_VER 0 # define nssv_COMPILER_MSVC_VERSION 0 #endif #define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) #if defined(__clang__) # define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) #else # define nssv_COMPILER_CLANG_VERSION 0 #endif #if defined(__GNUC__) && !defined(__clang__) # define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) #else # define nssv_COMPILER_GNUC_VERSION 0 #endif // half-open range [lo..hi): #define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) // Presence of language and library features: #ifdef _HAS_CPP0X # define nssv_HAS_CPP0X _HAS_CPP0X #else # define nssv_HAS_CPP0X 0 #endif // Unless defined otherwise below, consider VC14 as C++11 for variant-lite: #if nssv_COMPILER_MSVC_VER >= 1900 # undef nssv_CPP11_OR_GREATER # define nssv_CPP11_OR_GREATER 1 #endif #define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) #define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) #define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) #define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) #define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) #define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) #define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) #define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) // Presence of C++11 language features: #define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 #define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 #define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 #define nssv_HAVE_NOEXCEPT nssv_CPP11_140 #define nssv_HAVE_NULLPTR nssv_CPP11_100 #define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 #define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 #define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 #define nssv_HAVE_WCHAR16_T nssv_CPP11_100 #define nssv_HAVE_WCHAR32_T nssv_CPP11_100 #if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) # define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 #else # define nssv_HAVE_STD_DEFINED_LITERALS 0 #endif // Presence of C++14 language features: #define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 // Presence of C++17 language features: #define nssv_HAVE_NODISCARD nssv_CPP17_000 // Presence of C++ library features: #define nssv_HAVE_STD_HASH nssv_CPP11_120 // C++ feature usage: #if nssv_HAVE_CONSTEXPR_11 # define nssv_constexpr constexpr #else # define nssv_constexpr /*constexpr*/ #endif #if nssv_HAVE_CONSTEXPR_14 # define nssv_constexpr14 constexpr #else # define nssv_constexpr14 /*constexpr*/ #endif #if nssv_HAVE_EXPLICIT_CONVERSION # define nssv_explicit explicit #else # define nssv_explicit /*explicit*/ #endif #if nssv_HAVE_INLINE_NAMESPACE # define nssv_inline_ns inline #else # define nssv_inline_ns /*inline*/ #endif #if nssv_HAVE_NOEXCEPT # define nssv_noexcept noexcept #else # define nssv_noexcept /*noexcept*/ #endif //#if nssv_HAVE_REF_QUALIFIER //# define nssv_ref_qual & //# define nssv_refref_qual && //#else //# define nssv_ref_qual /*&*/ //# define nssv_refref_qual /*&&*/ //#endif #if nssv_HAVE_NULLPTR # define nssv_nullptr nullptr #else # define nssv_nullptr NULL #endif #if nssv_HAVE_NODISCARD # define nssv_nodiscard [[nodiscard]] #else # define nssv_nodiscard /*[[nodiscard]]*/ #endif // Additional includes: #include #include #include #include #include #include // std::char_traits<> #if ! nssv_CONFIG_NO_EXCEPTIONS # include #endif #if nssv_CPP11_OR_GREATER # include #endif // Clang, GNUC, MSVC warning suppression macros: #if defined(__clang__) # pragma clang diagnostic ignored "-Wreserved-user-defined-literal" # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wuser-defined-literals" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wliteral-suffix" #endif // __clang__ #if nssv_COMPILER_MSVC_VERSION >= 140 # define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] # define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) # define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) #else # define nssv_SUPPRESS_MSGSL_WARNING(expr) # define nssv_SUPPRESS_MSVC_WARNING(code, descr) # define nssv_DISABLE_MSVC_WARNINGS(codes) #endif #if defined(__clang__) # define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") #elif defined(__GNUC__) # define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") #elif nssv_COMPILER_MSVC_VERSION >= 140 # define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) #else # define nssv_RESTORE_WARNINGS() #endif // Suppress the following MSVC (GSL) warnings: // - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not // start with an underscore are reserved // - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; // use brace initialization, gsl::narrow_cast or gsl::narow // - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) //nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) //nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) namespace nonstd { namespace sv_lite { #if nssv_CPP11_OR_GREATER namespace detail { #if nssv_CPP14_OR_GREATER template< typename CharT > inline constexpr std::size_t length( CharT * s ) { std::size_t result = 0; while ( *s++ != '\0' ) { ++result; } return result; } #elif defined(__OPTIMIZE__) // nssv_CPP14_OR_GREATER // gcc, clang provide __OPTIMIZE__ // Expect tail call optimization to make length() non-recursive: template< typename CharT > inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) { return *s == '\0' ? result : length( s + 1, result + 1 ); } #else // nssv_CPP14_OR_GREATER // non-constexpr, non-recursive: template< typename CharT > inline std::size_t length( CharT * s ) { std::size_t result = 0; while ( *s++ != '\0' ) { ++result; } return result; } #endif // nssv_CPP14_OR_GREATER } // namespace detail #endif // nssv_CPP11_OR_GREATER template < class CharT, class Traits = std::char_traits > class basic_string_view; // // basic_string_view: // template < class CharT, class Traits /* = std::char_traits */ > class basic_string_view { public: // Member types: typedef Traits traits_type; typedef CharT value_type; typedef CharT * pointer; typedef CharT const * const_pointer; typedef CharT & reference; typedef CharT const & const_reference; typedef const_pointer iterator; typedef const_pointer const_iterator; typedef std::reverse_iterator< const_iterator > reverse_iterator; typedef std::reverse_iterator< const_iterator > const_reverse_iterator; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; // 24.4.2.1 Construction and assignment: nssv_constexpr basic_string_view() nssv_noexcept : data_( nssv_nullptr ) , size_( 0 ) {} #if nssv_CPP11_OR_GREATER nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; #else nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept : data_( other.data_) , size_( other.size_) {} #endif nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept : data_( s ) , size_( count ) {} nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept : data_( s ) #if nssv_CPP17_OR_GREATER , size_( Traits::length(s) ) #elif nssv_CPP11_OR_GREATER , size_( detail::length(s) ) #else , size_( Traits::length(s) ) #endif {} // Assignment: #if nssv_CPP11_OR_GREATER nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; #else nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept { data_ = other.data_; size_ = other.size_; return *this; } #endif // 24.4.2.2 Iterator support: nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } // 24.4.2.3 Capacity: nssv_constexpr size_type size() const nssv_noexcept { return size_; } nssv_constexpr size_type length() const nssv_noexcept { return size_; } nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } // since C++20 nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept { return 0 == size_; } // 24.4.2.4 Element access: nssv_constexpr const_reference operator[]( size_type pos ) const { return data_at( pos ); } nssv_constexpr14 const_reference at( size_type pos ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos < size() ); #else if ( pos >= size() ) { throw std::out_of_range("nonstd::string_view::at()"); } #endif return data_at( pos ); } nssv_constexpr const_reference front() const { return data_at( 0 ); } nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } // 24.4.2.5 Modifiers: nssv_constexpr14 void remove_prefix( size_type n ) { assert( n <= size() ); data_ += n; size_ -= n; } nssv_constexpr14 void remove_suffix( size_type n ) { assert( n <= size() ); size_ -= n; } nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept { using std::swap; swap( data_, other.data_ ); swap( size_, other.size_ ); } // 24.4.2.6 String operations: size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos <= size() ); #else if ( pos > size() ) { throw std::out_of_range("nonstd::string_view::copy()"); } #endif const size_type rlen = (std::min)( n, size() - pos ); (void) Traits::copy( dest, data() + pos, rlen ); return rlen; } nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos <= size() ); #else if ( pos > size() ) { throw std::out_of_range("nonstd::string_view::substr()"); } #endif return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); } // compare(), 6x: nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) { if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) { return result; } return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; } nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) { return substr( pos1, n1 ).compare( other ); } nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) { return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); } nssv_constexpr int compare( CharT const * s ) const // (4) { return compare( basic_string_view( s ) ); } nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) { return substr( pos1, n1 ).compare( basic_string_view( s ) ); } nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) { return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); } // 24.4.2.7 Searching: // starts_with(), 3x, since C++20: nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) { return size() >= v.size() && compare( 0, v.size(), v ) == 0; } nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) { return starts_with( basic_string_view( &c, 1 ) ); } nssv_constexpr bool starts_with( CharT const * s ) const // (3) { return starts_with( basic_string_view( s ) ); } // ends_with(), 3x, since C++20: nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) { return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; } nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) { return ends_with( basic_string_view( &c, 1 ) ); } nssv_constexpr bool ends_with( CharT const * s ) const // (3) { return ends_with( basic_string_view( s ) ); } // find(), 4x: nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return assert( v.size() == 0 || v.data() != nssv_nullptr ) , pos >= size() ? npos : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); } nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find( basic_string_view( &c, 1 ), pos ); } nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) { return find( basic_string_view( s, n ), pos ); } nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) { return find( basic_string_view( s ), pos ); } // rfind(), 4x: nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { if ( size() < v.size() ) { return npos; } if ( v.empty() ) { return (std::min)( size(), pos ); } const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); return result != last ? size_type( result - cbegin() ) : npos; } nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return rfind( basic_string_view( &c, 1 ), pos ); } nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) { return rfind( basic_string_view( s, n ), pos ); } nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) { return rfind( basic_string_view( s ), pos ); } // find_first_of(), 4x: nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return pos >= size() ? npos : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); } nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find_first_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) { return find_first_of( basic_string_view( s, n ), pos ); } nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) { return find_first_of( basic_string_view( s ), pos ); } // find_last_of(), 4x: nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { return empty() ? npos : pos >= size() ? find_last_of( v, size() - 1 ) : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); } nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return find_last_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_last_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) { return find_last_of( basic_string_view( s ), pos ); } // find_first_not_of(), 4x: nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return pos >= size() ? npos : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); } nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find_first_not_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_first_not_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) { return find_first_not_of( basic_string_view( s ), pos ); } // find_last_not_of(), 4x: nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { return empty() ? npos : pos >= size() ? find_last_not_of( v, size() - 1 ) : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); } nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return find_last_not_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_last_not_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) { return find_last_not_of( basic_string_view( s ), pos ); } // Constants: #if nssv_CPP17_OR_GREATER static nssv_constexpr size_type npos = size_type(-1); #elif nssv_CPP11_OR_GREATER enum : size_type { npos = size_type(-1) }; #else enum { npos = size_type(-1) }; #endif private: struct not_in_view { const basic_string_view v; nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} nssv_constexpr bool operator()( CharT c ) const { return npos == v.find_first_of( c ); } }; nssv_constexpr size_type to_pos( const_iterator it ) const { return it == cend() ? npos : size_type( it - cbegin() ); } nssv_constexpr size_type to_pos( const_reverse_iterator it ) const { return it == crend() ? npos : size_type( crend() - it - 1 ); } nssv_constexpr const_reference data_at( size_type pos ) const { #if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) return data_[pos]; #else return assert( pos < size() ), data_[pos]; #endif } private: const_pointer data_; size_type size_; public: #if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS template< class Allocator > basic_string_view( std::basic_string const & s ) nssv_noexcept : data_( s.data() ) , size_( s.size() ) {} #if nssv_HAVE_EXPLICIT_CONVERSION template< class Allocator > explicit operator std::basic_string() const { return to_string( Allocator() ); } #endif // nssv_HAVE_EXPLICIT_CONVERSION #if nssv_CPP11_OR_GREATER template< class Allocator = std::allocator > std::basic_string to_string( Allocator const & a = Allocator() ) const { return std::basic_string( begin(), end(), a ); } #else std::basic_string to_string() const { return std::basic_string( begin(), end() ); } template< class Allocator > std::basic_string to_string( Allocator const & a ) const { return std::basic_string( begin(), end(), a ); } #endif // nssv_CPP11_OR_GREATER #endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS }; // // Non-member functions: // // 24.4.3 Non-member comparison functions: // lexicographically compare two string views (function template): template< class CharT, class Traits > nssv_constexpr bool operator== ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) == 0 ; } template< class CharT, class Traits > nssv_constexpr bool operator!= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) != 0 ; } template< class CharT, class Traits > nssv_constexpr bool operator< ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0 ; } template< class CharT, class Traits > nssv_constexpr bool operator<= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0 ; } template< class CharT, class Traits > nssv_constexpr bool operator> ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0 ; } template< class CharT, class Traits > nssv_constexpr bool operator>= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0 ; } // Let S be basic_string_view, and sv be an instance of S. // Implementations shall provide sufficient additional overloads marked // constexpr and noexcept so that an object t with an implicit conversion // to S can be compared according to Table 67. #if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) // accomodate for older compilers: // == template< class CharT, class Traits> nssv_constexpr bool operator==( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } // != template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, char const * rhs ) nssv_noexcept { return lhs.compare( rhs ) != 0; } template< class CharT, class Traits> nssv_constexpr bool operator!=( char const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) != 0; } template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; } template< class CharT, class Traits> nssv_constexpr bool operator!=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; } // < template< class CharT, class Traits> nssv_constexpr bool operator<( basic_string_view lhs, char const * rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( char const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) > 0; } // <= template< class CharT, class Traits> nssv_constexpr bool operator<=( basic_string_view lhs, char const * rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( char const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) >= 0; } // > template< class CharT, class Traits> nssv_constexpr bool operator>( basic_string_view lhs, char const * rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( char const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) < 0; } // >= template< class CharT, class Traits> nssv_constexpr bool operator>=( basic_string_view lhs, char const * rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( char const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) <= 0; } #else // newer compilers: #define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type #if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 ) # define nssv_MSVC_ORDER(x) , int=x #else # define nssv_MSVC_ORDER(x) /*, int=x*/ #endif // == template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator==( basic_string_view lhs, nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept { return lhs.compare( rhs ) == 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator==( nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, basic_string_view rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } // != template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator!= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator!= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) != 0 ; } // < template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator< ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0 ; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator< ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0 ; } // <= template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator<= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0 ; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator<= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0 ; } // > template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator> ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0 ; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator> ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0 ; } // >= template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator>= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0 ; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator>= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0 ; } #undef nssv_MSVC_ORDER #undef nssv_BASIC_STRING_VIEW_I #endif // compiler-dependent approach to comparisons // 24.4.4 Inserters and extractors: namespace detail { template< class Stream > void write_padding( Stream & os, std::streamsize n ) { for ( std::streamsize i = 0; i < n; ++i ) os.rdbuf()->sputc( os.fill() ); } template< class Stream, class View > Stream & write_to_stream( Stream & os, View const & sv ) { typename Stream::sentry sentry( os ); if ( !os ) return os; const std::streamsize length = static_cast( sv.length() ); // Whether, and how, to pad: const bool pad = ( length < os.width() ); const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; if ( left_pad ) write_padding( os, os.width() - length ); // Write span characters: os.rdbuf()->sputn( sv.begin(), length ); if ( pad && !left_pad ) write_padding( os, os.width() - length ); // Reset output stream width: os.width( 0 ); return os; } } // namespace detail template< class CharT, class Traits > std::basic_ostream & operator<<( std::basic_ostream& os, basic_string_view sv ) { return detail::write_to_stream( os, sv ); } // Several typedefs for common character types are provided: typedef basic_string_view string_view; typedef basic_string_view wstring_view; #if nssv_HAVE_WCHAR16_T typedef basic_string_view u16string_view; typedef basic_string_view u32string_view; #endif }} // namespace nonstd::sv_lite // // 24.4.6 Suffix for basic_string_view literals: // #if nssv_HAVE_USER_DEFINED_LITERALS namespace nonstd { nssv_inline_ns namespace literals { nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } #endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS #if nssv_CONFIG_USR_SV_OPERATOR nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } #endif // nssv_CONFIG_USR_SV_OPERATOR }}} // namespace nonstd::literals::string_view_literals #endif // // Extensions for std::string: // #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { namespace sv_lite { // Exclude MSVC 14 (19.00): it yields ambiguous to_string(): #if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 template< class CharT, class Traits, class Allocator = std::allocator > std::basic_string to_string( basic_string_view v, Allocator const & a = Allocator() ) { return std::basic_string( v.begin(), v.end(), a ); } #else template< class CharT, class Traits > std::basic_string to_string( basic_string_view v ) { return std::basic_string( v.begin(), v.end() ); } template< class CharT, class Traits, class Allocator > std::basic_string to_string( basic_string_view v, Allocator const & a ) { return std::basic_string( v.begin(), v.end(), a ); } #endif // nssv_CPP11_OR_GREATER template< class CharT, class Traits, class Allocator > basic_string_view to_string_view( std::basic_string const & s ) { return basic_string_view( s.data(), s.size() ); } }} // namespace nonstd::sv_lite #endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS // // make types and algorithms available in namespace nonstd: // namespace nonstd { using sv_lite::basic_string_view; using sv_lite::string_view; using sv_lite::wstring_view; #if nssv_HAVE_WCHAR16_T using sv_lite::u16string_view; #endif #if nssv_HAVE_WCHAR32_T using sv_lite::u32string_view; #endif // literal "sv" using sv_lite::operator==; using sv_lite::operator!=; using sv_lite::operator<; using sv_lite::operator<=; using sv_lite::operator>; using sv_lite::operator>=; using sv_lite::operator<<; #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS using sv_lite::to_string; using sv_lite::to_string_view; #endif } // namespace nonstd // 24.4.5 Hash support (C++11): // Note: The hash value of a string view object is equal to the hash value of // the corresponding string object. #if nssv_HAVE_STD_HASH #include namespace std { template<> struct hash< nonstd::string_view > { public: std::size_t operator()( nonstd::string_view v ) const nssv_noexcept { return std::hash()( std::string( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::wstring_view > { public: std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept { return std::hash()( std::wstring( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::u16string_view > { public: std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept { return std::hash()( std::u16string( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::u32string_view > { public: std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept { return std::hash()( std::u32string( v.data(), v.size() ) ); } }; } // namespace std #endif // nssv_HAVE_STD_HASH nssv_RESTORE_WARNINGS() #endif // nssv_HAVE_STD_STRING_VIEW #endif // NONSTD_SV_LITE_H_INCLUDED TileDB-Py-0.12.2/external/tsl/000077500000000000000000000000001417663620700157255ustar00rootroot00000000000000TileDB-Py-0.12.2/external/tsl/robin_growth_policy.h000066400000000000000000000267641417663620700221770ustar00rootroot00000000000000/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef TSL_ROBIN_GROWTH_POLICY_H #define TSL_ROBIN_GROWTH_POLICY_H #include #include #include #include #include #include #include #include #include #include #ifdef TSL_DEBUG # define tsl_rh_assert(expr) assert(expr) #else # define tsl_rh_assert(expr) (static_cast(0)) #endif /** * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate. */ #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS) # define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg) #else # define TSL_RH_NO_EXCEPTIONS # ifdef NDEBUG # define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate() # else # include # define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::cerr << msg << std::endl; std::terminate(); } while(0) # endif #endif #if defined(__GNUC__) || defined(__clang__) # define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true)) #else # define TSL_RH_LIKELY(exp) (exp) #endif namespace tsl { namespace rh { /** * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. * * GrowthFactor must be a power of two >= 2. */ template class power_of_two_growth_policy { public: /** * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). * * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and * bucket_for_hash must always return 0 in this case. */ explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { if(min_bucket_count_in_out > max_bucket_count()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } if(min_bucket_count_in_out > 0) { min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); m_mask = min_bucket_count_in_out - 1; } else { m_mask = 0; } } /** * Return the bucket [0, bucket_count()) to which the hash belongs. * If bucket_count() is 0, it must always return 0. */ std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; } /** * Return the number of buckets that should be used on next growth. */ std::size_t next_bucket_count() const { if((m_mask + 1) > max_bucket_count() / GrowthFactor) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } return (m_mask + 1) * GrowthFactor; } /** * Return the maximum number of buckets supported by the policy. */ std::size_t max_bucket_count() const { // Largest power of two. return (std::numeric_limits::max() / 2) + 1; } /** * Reset the growth policy as if it was created with a bucket count of 0. * After a clear, the policy must always return 0 when bucket_for_hash is called. */ void clear() noexcept { m_mask = 0; } private: static std::size_t round_up_to_power_of_two(std::size_t value) { if(is_power_of_two(value)) { return value; } if(value == 0) { return 1; } --value; for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { value |= value >> i; } return value + 1; } static constexpr bool is_power_of_two(std::size_t value) { return value != 0 && (value & (value - 1)) == 0; } protected: static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); std::size_t m_mask; }; /** * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash * to a bucket. Slower but it can be useful if you want a slower growth. */ template> class mod_growth_policy { public: explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { if(min_bucket_count_in_out > max_bucket_count()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } if(min_bucket_count_in_out > 0) { m_mod = min_bucket_count_in_out; } else { m_mod = 1; } } std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; } std::size_t next_bucket_count() const { if(m_mod == max_bucket_count()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); if(!std::isnormal(next_bucket_count)) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } if(next_bucket_count > double(max_bucket_count())) { return max_bucket_count(); } else { return std::size_t(next_bucket_count); } } std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; } void clear() noexcept { m_mod = 1; } private: static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; static const std::size_t MAX_BUCKET_COUNT = std::size_t(double( std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR )); static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); std::size_t m_mod; }; namespace detail { #if SIZE_MAX >= ULLONG_MAX #define TSL_RH_NB_PRIMES 51 #elif SIZE_MAX >= ULONG_MAX #define TSL_RH_NB_PRIMES 40 #else #define TSL_RH_NB_PRIMES 23 #endif static constexpr const std::array PRIMES = {{ 1u, 5u, 17u, 29u, 37u, 53u, 67u, 79u, 97u, 131u, 193u, 257u, 389u, 521u, 769u, 1031u, 1543u, 2053u, 3079u, 6151u, 12289u, 24593u, 49157u, #if SIZE_MAX >= ULONG_MAX 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul, #endif #if SIZE_MAX >= ULLONG_MAX 6442450939ull, 12884901893ull, 25769803751ull, 51539607551ull, 103079215111ull, 206158430209ull, 412316860441ull, 824633720831ull, 1649267441651ull, 3298534883309ull, 6597069766657ull, #endif }}; template static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the // compiler can optimize the modulo code better with a constant known at the compilation. static constexpr const std::array MOD_PRIME = {{ &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, #if SIZE_MAX >= ULONG_MAX &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>, #endif #if SIZE_MAX >= ULLONG_MAX &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>, #endif }}; } /** * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in * general but will probably distribute the values around better in the buckets with a poor hash function. * * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. * * With a switch the code would look like: * \code * switch(iprime) { // iprime is the current prime of the hash table * case 0: hash % 5ul; * break; * case 1: hash % 17ul; * break; * case 2: hash % 29ul; * break; * ... * } * \endcode * * Due to the constant variable in the modulo the compiler is able to optimize the operation * by a series of multiplications, substractions and shifts. * * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environment. */ class prime_growth_policy { public: explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { auto it_prime = std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out); if(it_prime == detail::PRIMES.end()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); if(min_bucket_count_in_out > 0) { min_bucket_count_in_out = *it_prime; } else { min_bucket_count_in_out = 0; } } std::size_t bucket_for_hash(std::size_t hash) const noexcept { return detail::MOD_PRIME[m_iprime](hash); } std::size_t next_bucket_count() const { if(m_iprime + 1 >= detail::PRIMES.size()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size."); } return detail::PRIMES[m_iprime + 1]; } std::size_t max_bucket_count() const { return detail::PRIMES.back(); } void clear() noexcept { m_iprime = 0; } private: unsigned int m_iprime; static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), "The type of m_iprime is not big enough."); }; } } #endif TileDB-Py-0.12.2/external/tsl/robin_hash.h000066400000000000000000001471331417663620700202230ustar00rootroot00000000000000/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef TSL_ROBIN_HASH_H #define TSL_ROBIN_HASH_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "robin_growth_policy.h" namespace tsl { namespace detail_robin_hash { template struct make_void { using type = void; }; template struct has_is_transparent: std::false_type { }; template struct has_is_transparent::type>: std::true_type { }; template struct is_power_of_two_policy: std::false_type { }; template struct is_power_of_two_policy>: std::true_type { }; // Only available in C++17, we need to be compatible with C++11 template const T& clamp( const T& v, const T& lo, const T& hi) { return std::min(hi, std::max(lo, v)); } template static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") { T ret = static_cast(value); if(static_cast(ret) != value) { TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message); } const bool is_same_signedness = (std::is_unsigned::value && std::is_unsigned::value) || (std::is_signed::value && std::is_signed::value); if(!is_same_signedness && (ret < T{}) != (value < U{})) { TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message); } return ret; } using truncated_hash_type = std::uint_least32_t; /** * Helper class that stores a truncated hash if StoreHash is true and nothing otherwise. */ template class bucket_entry_hash { public: bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { return true; } truncated_hash_type truncated_hash() const noexcept { return 0; } protected: void set_hash(truncated_hash_type /*hash*/) noexcept { } }; template<> class bucket_entry_hash { public: bool bucket_hash_equal(std::size_t hash) const noexcept { return m_hash == truncated_hash_type(hash); } truncated_hash_type truncated_hash() const noexcept { return m_hash; } protected: void set_hash(truncated_hash_type hash) noexcept { m_hash = truncated_hash_type(hash); } private: truncated_hash_type m_hash; }; /** * Each bucket entry has: * - A value of type `ValueType`. * - An integer to store how far the value of the bucket, if any, is from its ideal bucket * (ex: if the current bucket 5 has the value 'foo' and `hash('foo') % nb_buckets` == 3, * `dist_from_ideal_bucket()` will return 2 as the current value of the bucket is two * buckets away from its ideal bucket) * If there is no value in the bucket (i.e. `empty()` is true) `dist_from_ideal_bucket()` will be < 0. * - A marker which tells us if the bucket is the last bucket of the bucket array (useful for the * iterator of the hash table). * - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also stored in the bucket. * If the size of the hash is more than 32 bits, it is truncated. We don't store the full hash * as storing the hash is a potential opportunity to use the unused space due to the alignment * of the bucket_entry structure. We can thus potentially store the hash without any extra space * (which would not be possible with 64 bits of the hash). */ template class bucket_entry: public bucket_entry_hash { using bucket_hash = bucket_entry_hash; public: using value_type = ValueType; using distance_type = std::int_least16_t; bucket_entry() noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), m_last_bucket(false) { tsl_rh_assert(empty()); } bucket_entry(bool last_bucket) noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), m_last_bucket(last_bucket) { tsl_rh_assert(empty()); } bucket_entry(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible::value): bucket_hash(other), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), m_last_bucket(other.m_last_bucket) { if(!other.empty()) { ::new (static_cast(std::addressof(m_value))) value_type(other.value()); m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; } } /** * Never really used, but still necessary as we must call resize on an empty `std::vector`. * and we need to support move-only types. See robin_hash constructor for details. */ bucket_entry(bucket_entry&& other) noexcept(std::is_nothrow_move_constructible::value): bucket_hash(std::move(other)), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), m_last_bucket(other.m_last_bucket) { if(!other.empty()) { ::new (static_cast(std::addressof(m_value))) value_type(std::move(other.value())); m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; } } bucket_entry& operator=(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible::value) { if(this != &other) { clear(); bucket_hash::operator=(other); if(!other.empty()) { ::new (static_cast(std::addressof(m_value))) value_type(other.value()); } m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; m_last_bucket = other.m_last_bucket; } return *this; } bucket_entry& operator=(bucket_entry&& ) = delete; ~bucket_entry() noexcept { clear(); } void clear() noexcept { if(!empty()) { destroy_value(); m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; } } bool empty() const noexcept { return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; } value_type& value() noexcept { tsl_rh_assert(!empty()); return *reinterpret_cast(std::addressof(m_value)); } const value_type& value() const noexcept { tsl_rh_assert(!empty()); return *reinterpret_cast(std::addressof(m_value)); } distance_type dist_from_ideal_bucket() const noexcept { return m_dist_from_ideal_bucket; } bool last_bucket() const noexcept { return m_last_bucket; } void set_as_last_bucket() noexcept { m_last_bucket = true; } template void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket, truncated_hash_type hash, Args&&... value_type_args) { tsl_rh_assert(dist_from_ideal_bucket >= 0); tsl_rh_assert(empty()); ::new (static_cast(std::addressof(m_value))) value_type(std::forward(value_type_args)...); this->set_hash(hash); m_dist_from_ideal_bucket = dist_from_ideal_bucket; tsl_rh_assert(!empty()); } void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket, truncated_hash_type& hash, value_type& value) { tsl_rh_assert(!empty()); using std::swap; swap(value, this->value()); swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket); // Avoid warning of unused variable if StoreHash is false (void) hash; if(StoreHash) { const truncated_hash_type tmp_hash = this->truncated_hash(); this->set_hash(hash); hash = tmp_hash; } } static truncated_hash_type truncate_hash(std::size_t hash) noexcept { return truncated_hash_type(hash); } private: void destroy_value() noexcept { tsl_rh_assert(!empty()); value().~value_type(); } public: static const distance_type DIST_FROM_IDEAL_BUCKET_LIMIT = 4096; static_assert(DIST_FROM_IDEAL_BUCKET_LIMIT <= std::numeric_limits::max() - 1, "DIST_FROM_IDEAL_BUCKET_LIMIT must be <= std::numeric_limits::max() - 1."); private: using storage = typename std::aligned_storage::type; static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1; distance_type m_dist_from_ideal_bucket; bool m_last_bucket; storage m_value; }; /** * Internal common class used by `robin_map` and `robin_set`. * * ValueType is what will be stored by `robin_hash` (usually `std::pair` for map and `Key` for set). * * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a * reference to the key. * * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a * reference to the value. `ValueSelect` should be void if there is no value (in a set for example). * * The strong exception guarantee only holds if the expression * `std::is_nothrow_swappable::value && std::is_nothrow_move_constructible::value` is true. * * Behaviour is undefined if the destructor of `ValueType` throws. */ template class robin_hash: private Hash, private KeyEqual, private GrowthPolicy { private: template using has_mapped_type = typename std::integral_constant::value>; static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept."); static_assert(noexcept(std::declval().clear()), "GrowthPolicy::clear must be noexcept."); public: template class robin_iterator; using key_type = typename KeySelect::key_type; using value_type = ValueType; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using hasher = Hash; using key_equal = KeyEqual; using allocator_type = Allocator; using reference = value_type&; using const_reference = const value_type&; using pointer = value_type*; using const_pointer = const value_type*; using iterator = robin_iterator; using const_iterator = robin_iterator; private: /** * Either store the hash because we are asked by the `StoreHash` template parameter * or store the hash because it doesn't cost us anything in size and can be used to speed up rehash. */ static constexpr bool STORE_HASH = StoreHash || ( (sizeof(tsl::detail_robin_hash::bucket_entry) == sizeof(tsl::detail_robin_hash::bucket_entry)) && (sizeof(std::size_t) == sizeof(truncated_hash_type) || is_power_of_two_policy::value) && // Don't store the hash for primitive types with default hash. (!std::is_arithmetic::value || !std::is_same>::value) ); /** * Only use the stored hash on lookup if we are explicitly asked. We are not sure how slow * the KeyEqual operation is. An extra comparison may slow things down with a fast KeyEqual. */ static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash; /** * We can only use the hash on rehash if the size of the hash type is the same as the stored one or * if we use a power of two modulo. In the case of the power of two modulo, we just mask * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated * more bytes. */ static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { (void) bucket_count; if(STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) { return true; } else if(STORE_HASH && is_power_of_two_policy::value) { tsl_rh_assert(bucket_count > 0); return (bucket_count - 1) <= std::numeric_limits::max(); } else { return false; } } using bucket_entry = tsl::detail_robin_hash::bucket_entry; using distance_type = typename bucket_entry::distance_type; using buckets_allocator = typename std::allocator_traits::template rebind_alloc; using buckets_container_type = std::vector; public: /** * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the * stored value type. * * In case of a map, to get a mutable reference to the value associated to a key (the '.second' in the * stored pair), you have to call 'value()'. * * The main reason for this is that if we returned a `std::pair&` instead * of a `const std::pair&`, the user may modify the key which will put the map in a undefined state. */ template class robin_iterator { friend class robin_hash; private: using bucket_entry_ptr = typename std::conditional::type; robin_iterator(bucket_entry_ptr bucket) noexcept: m_bucket(bucket) { } public: using iterator_category = std::forward_iterator_tag; using value_type = const typename robin_hash::value_type; using difference_type = std::ptrdiff_t; using reference = value_type&; using pointer = value_type*; robin_iterator() noexcept { } // Copy constructor from iterator to const_iterator. template::type* = nullptr> robin_iterator(const robin_iterator& other) noexcept: m_bucket(other.m_bucket) { } robin_iterator(const robin_iterator& other) = default; robin_iterator(robin_iterator&& other) = default; robin_iterator& operator=(const robin_iterator& other) = default; robin_iterator& operator=(robin_iterator&& other) = default; const typename robin_hash::key_type& key() const { return KeySelect()(m_bucket->value()); } template::value && IsConst>::type* = nullptr> const typename U::value_type& value() const { return U()(m_bucket->value()); } template::value && !IsConst>::type* = nullptr> typename U::value_type& value() const { return U()(m_bucket->value()); } reference operator*() const { return m_bucket->value(); } pointer operator->() const { return std::addressof(m_bucket->value()); } robin_iterator& operator++() { while(true) { if(m_bucket->last_bucket()) { ++m_bucket; return *this; } ++m_bucket; if(!m_bucket->empty()) { return *this; } } } robin_iterator operator++(int) { robin_iterator tmp(*this); ++*this; return tmp; } friend bool operator==(const robin_iterator& lhs, const robin_iterator& rhs) { return lhs.m_bucket == rhs.m_bucket; } friend bool operator!=(const robin_iterator& lhs, const robin_iterator& rhs) { return !(lhs == rhs); } private: bucket_entry_ptr m_bucket; }; public: #if defined(__cplusplus) && __cplusplus >= 201402L robin_hash(size_type bucket_count, const Hash& hash, const KeyEqual& equal, const Allocator& alloc, float min_load_factor = DEFAULT_MIN_LOAD_FACTOR, float max_load_factor = DEFAULT_MAX_LOAD_FACTOR): Hash(hash), KeyEqual(equal), GrowthPolicy(bucket_count), m_buckets_data( [&]() { if(bucket_count > max_bucket_count()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maximum bucket count."); } return bucket_count; }(), alloc ), m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), m_bucket_count(bucket_count), m_nb_elements(0), m_grow_on_next_insert(false), m_try_shrink_on_next_insert(false) { if(m_bucket_count > 0) { tsl_rh_assert(!m_buckets_data.empty()); m_buckets_data.back().set_as_last_bucket(); } this->min_load_factor(min_load_factor); this->max_load_factor(max_load_factor); } #else /** * C++11 doesn't support the creation of a std::vector with a custom allocator and 'count' default-inserted elements. * The needed contructor `explicit vector(size_type count, const Allocator& alloc = Allocator());` is only * available in C++14 and later. We thus must resize after using the `vector(const Allocator& alloc)` constructor. * * We can't use `vector(size_type count, const T& value, const Allocator& alloc)` as it requires the * value T to be copyable. */ robin_hash(size_type bucket_count, const Hash& hash, const KeyEqual& equal, const Allocator& alloc, float min_load_factor = DEFAULT_MIN_LOAD_FACTOR, float max_load_factor = DEFAULT_MAX_LOAD_FACTOR): Hash(hash), KeyEqual(equal), GrowthPolicy(bucket_count), m_buckets_data(alloc), m_buckets(static_empty_bucket_ptr()), m_bucket_count(bucket_count), m_nb_elements(0), m_grow_on_next_insert(false), m_try_shrink_on_next_insert(false) { if(bucket_count > max_bucket_count()) { TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maximum bucket count."); } if(m_bucket_count > 0) { m_buckets_data.resize(m_bucket_count); m_buckets = m_buckets_data.data(); tsl_rh_assert(!m_buckets_data.empty()); m_buckets_data.back().set_as_last_bucket(); } this->min_load_factor(min_load_factor); this->max_load_factor(max_load_factor); } #endif robin_hash(const robin_hash& other): Hash(other), KeyEqual(other), GrowthPolicy(other), m_buckets_data(other.m_buckets_data), m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), m_bucket_count(other.m_bucket_count), m_nb_elements(other.m_nb_elements), m_load_threshold(other.m_load_threshold), m_min_load_factor(other.m_min_load_factor), m_max_load_factor(other.m_max_load_factor), m_grow_on_next_insert(other.m_grow_on_next_insert), m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert) { } robin_hash(robin_hash&& other) noexcept(std::is_nothrow_move_constructible::value && std::is_nothrow_move_constructible::value && std::is_nothrow_move_constructible::value && std::is_nothrow_move_constructible::value) : Hash(std::move(static_cast(other))), KeyEqual(std::move(static_cast(other))), GrowthPolicy(std::move(static_cast(other))), m_buckets_data(std::move(other.m_buckets_data)), m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()), m_bucket_count(other.m_bucket_count), m_nb_elements(other.m_nb_elements), m_load_threshold(other.m_load_threshold), m_min_load_factor(other.m_min_load_factor), m_max_load_factor(other.m_max_load_factor), m_grow_on_next_insert(other.m_grow_on_next_insert), m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert) { other.clear_and_shrink(); } robin_hash& operator=(const robin_hash& other) { if(&other != this) { Hash::operator=(other); KeyEqual::operator=(other); GrowthPolicy::operator=(other); m_buckets_data = other.m_buckets_data; m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr(): m_buckets_data.data(); m_bucket_count = other.m_bucket_count; m_nb_elements = other.m_nb_elements; m_load_threshold = other.m_load_threshold; m_min_load_factor = other.m_min_load_factor; m_max_load_factor = other.m_max_load_factor; m_grow_on_next_insert = other.m_grow_on_next_insert; m_try_shrink_on_next_insert = other.m_try_shrink_on_next_insert; } return *this; } robin_hash& operator=(robin_hash&& other) { other.swap(*this); other.clear(); return *this; } allocator_type get_allocator() const { return m_buckets_data.get_allocator(); } /* * Iterators */ iterator begin() noexcept { std::size_t i = 0; while(i < m_bucket_count && m_buckets[i].empty()) { i++; } return iterator(m_buckets + i); } const_iterator begin() const noexcept { return cbegin(); } const_iterator cbegin() const noexcept { std::size_t i = 0; while(i < m_bucket_count && m_buckets[i].empty()) { i++; } return const_iterator(m_buckets + i); } iterator end() noexcept { return iterator(m_buckets + m_bucket_count); } const_iterator end() const noexcept { return cend(); } const_iterator cend() const noexcept { return const_iterator(m_buckets + m_bucket_count); } /* * Capacity */ bool empty() const noexcept { return m_nb_elements == 0; } size_type size() const noexcept { return m_nb_elements; } size_type max_size() const noexcept { return m_buckets_data.max_size(); } /* * Modifiers */ void clear() noexcept { if(m_min_load_factor > 0.0f) { clear_and_shrink(); } else { for(auto& bucket: m_buckets_data) { bucket.clear(); } m_nb_elements = 0; m_grow_on_next_insert = false; } } template std::pair insert(P&& value) { return insert_impl(KeySelect()(value), std::forward

(value)); } template iterator insert_hint(const_iterator hint, P&& value) { if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { return mutable_iterator(hint); } return insert(std::forward

(value)).first; } template void insert(InputIt first, InputIt last) { if(std::is_base_of::iterator_category>::value) { const auto nb_elements_insert = std::distance(first, last); const size_type nb_free_buckets = m_load_threshold - size(); tsl_rh_assert(m_load_threshold >= size()); if(nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) { reserve(size() + size_type(nb_elements_insert)); } } for(; first != last; ++first) { insert(*first); } } template std::pair insert_or_assign(K&& key, M&& obj) { auto it = try_emplace(std::forward(key), std::forward(obj)); if(!it.second) { it.first.value() = std::forward(obj); } return it; } template iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) { if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { auto it = mutable_iterator(hint); it.value() = std::forward(obj); return it; } return insert_or_assign(std::forward(key), std::forward(obj)).first; } template std::pair emplace(Args&&... args) { return insert(value_type(std::forward(args)...)); } template iterator emplace_hint(const_iterator hint, Args&&... args) { return insert_hint(hint, value_type(std::forward(args)...)); } template std::pair try_emplace(K&& key, Args&&... args) { return insert_impl(key, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), std::forward_as_tuple(std::forward(args)...)); } template iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) { if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { return mutable_iterator(hint); } return try_emplace(std::forward(key), std::forward(args)...).first; } /** * Here to avoid `template size_type erase(const K& key)` being used when * we use an `iterator` instead of a `const_iterator`. */ iterator erase(iterator pos) { erase_from_bucket(pos); /** * Erase bucket used a backward shift after clearing the bucket. * Check if there is a new value in the bucket, if not get the next non-empty. */ if(pos.m_bucket->empty()) { ++pos; } m_try_shrink_on_next_insert = true; return pos; } iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); } iterator erase(const_iterator first, const_iterator last) { if(first == last) { return mutable_iterator(first); } auto first_mutable = mutable_iterator(first); auto last_mutable = mutable_iterator(last); for(auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) { if(!it->empty()) { it->clear(); m_nb_elements--; } } if(last_mutable == end()) { m_try_shrink_on_next_insert = true; return end(); } /* * Backward shift on the values which come after the deleted values. * We try to move the values closer to their ideal bucket. */ std::size_t icloser_bucket = static_cast(first_mutable.m_bucket - m_buckets); std::size_t ito_move_closer_value = static_cast(last_mutable.m_bucket - m_buckets); tsl_rh_assert(ito_move_closer_value > icloser_bucket); const std::size_t ireturn_bucket = ito_move_closer_value - std::min(ito_move_closer_value - icloser_bucket, std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); while(ito_move_closer_value < m_bucket_count && m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) { icloser_bucket = ito_move_closer_value - std::min(ito_move_closer_value - icloser_bucket, std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); tsl_rh_assert(m_buckets[icloser_bucket].empty()); const distance_type new_distance = distance_type(m_buckets[ito_move_closer_value].dist_from_ideal_bucket() - (ito_move_closer_value - icloser_bucket)); m_buckets[icloser_bucket].set_value_of_empty_bucket(new_distance, m_buckets[ito_move_closer_value].truncated_hash(), std::move(m_buckets[ito_move_closer_value].value())); m_buckets[ito_move_closer_value].clear(); ++icloser_bucket; ++ito_move_closer_value; } m_try_shrink_on_next_insert = true; return iterator(m_buckets + ireturn_bucket); } template size_type erase(const K& key) { return erase(key, hash_key(key)); } template size_type erase(const K& key, std::size_t hash) { auto it = find(key, hash); if(it != end()) { erase_from_bucket(it); m_try_shrink_on_next_insert = true; return 1; } else { return 0; } } void swap(robin_hash& other) { using std::swap; swap(static_cast(*this), static_cast(other)); swap(static_cast(*this), static_cast(other)); swap(static_cast(*this), static_cast(other)); swap(m_buckets_data, other.m_buckets_data); swap(m_buckets, other.m_buckets); swap(m_bucket_count, other.m_bucket_count); swap(m_nb_elements, other.m_nb_elements); swap(m_load_threshold, other.m_load_threshold); swap(m_min_load_factor, other.m_min_load_factor); swap(m_max_load_factor, other.m_max_load_factor); swap(m_grow_on_next_insert, other.m_grow_on_next_insert); swap(m_try_shrink_on_next_insert, other.m_try_shrink_on_next_insert); } /* * Lookup */ template::value>::type* = nullptr> typename U::value_type& at(const K& key) { return at(key, hash_key(key)); } template::value>::type* = nullptr> typename U::value_type& at(const K& key, std::size_t hash) { return const_cast(static_cast(this)->at(key, hash)); } template::value>::type* = nullptr> const typename U::value_type& at(const K& key) const { return at(key, hash_key(key)); } template::value>::type* = nullptr> const typename U::value_type& at(const K& key, std::size_t hash) const { auto it = find(key, hash); if(it != cend()) { return it.value(); } else { TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); } } template::value>::type* = nullptr> typename U::value_type& operator[](K&& key) { return try_emplace(std::forward(key)).first.value(); } template size_type count(const K& key) const { return count(key, hash_key(key)); } template size_type count(const K& key, std::size_t hash) const { if(find(key, hash) != cend()) { return 1; } else { return 0; } } template iterator find(const K& key) { return find_impl(key, hash_key(key)); } template iterator find(const K& key, std::size_t hash) { return find_impl(key, hash); } template const_iterator find(const K& key) const { return find_impl(key, hash_key(key)); } template const_iterator find(const K& key, std::size_t hash) const { return find_impl(key, hash); } template bool contains(const K& key) const { return contains(key, hash_key(key)); } template bool contains(const K& key, std::size_t hash) const { return count(key, hash) != 0; } template std::pair equal_range(const K& key) { return equal_range(key, hash_key(key)); } template std::pair equal_range(const K& key, std::size_t hash) { iterator it = find(key, hash); return std::make_pair(it, (it == end())?it:std::next(it)); } template std::pair equal_range(const K& key) const { return equal_range(key, hash_key(key)); } template std::pair equal_range(const K& key, std::size_t hash) const { const_iterator it = find(key, hash); return std::make_pair(it, (it == cend())?it:std::next(it)); } /* * Bucket interface */ size_type bucket_count() const { return m_bucket_count; } size_type max_bucket_count() const { return std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size()); } /* * Hash policy */ float load_factor() const { if(bucket_count() == 0) { return 0; } return float(m_nb_elements)/float(bucket_count()); } float min_load_factor() const { return m_min_load_factor; } float max_load_factor() const { return m_max_load_factor; } void min_load_factor(float ml) { m_min_load_factor = clamp(ml, float(MINIMUM_MIN_LOAD_FACTOR), float(MAXIMUM_MIN_LOAD_FACTOR)); } void max_load_factor(float ml) { m_max_load_factor = clamp(ml, float(MINIMUM_MAX_LOAD_FACTOR), float(MAXIMUM_MAX_LOAD_FACTOR)); m_load_threshold = size_type(float(bucket_count())*m_max_load_factor); } void rehash(size_type count) { count = std::max(count, size_type(std::ceil(float(size())/max_load_factor()))); rehash_impl(count); } void reserve(size_type count) { rehash(size_type(std::ceil(float(count)/max_load_factor()))); } /* * Observers */ hasher hash_function() const { return static_cast(*this); } key_equal key_eq() const { return static_cast(*this); } /* * Other */ iterator mutable_iterator(const_iterator pos) { return iterator(const_cast(pos.m_bucket)); } private: template std::size_t hash_key(const K& key) const { return Hash::operator()(key); } template bool compare_keys(const K1& key1, const K2& key2) const { return KeyEqual::operator()(key1, key2); } std::size_t bucket_for_hash(std::size_t hash) const { const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); tsl_rh_assert(bucket < m_bucket_count || (bucket == 0 && m_bucket_count == 0)); return bucket; } template::value>::type* = nullptr> std::size_t next_bucket(std::size_t index) const noexcept { tsl_rh_assert(index < bucket_count()); return (index + 1) & this->m_mask; } template::value>::type* = nullptr> std::size_t next_bucket(std::size_t index) const noexcept { tsl_rh_assert(index < bucket_count()); index++; return (index != bucket_count())?index:0; } template iterator find_impl(const K& key, std::size_t hash) { return mutable_iterator(static_cast(this)->find(key, hash)); } template const_iterator find_impl(const K& key, std::size_t hash) const { std::size_t ibucket = bucket_for_hash(hash); distance_type dist_from_ideal_bucket = 0; while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { if(TSL_RH_LIKELY((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) && compare_keys(KeySelect()(m_buckets[ibucket].value()), key))) { return const_iterator(m_buckets + ibucket); } ibucket = next_bucket(ibucket); dist_from_ideal_bucket++; } return cend(); } void erase_from_bucket(iterator pos) { pos.m_bucket->clear(); m_nb_elements--; /** * Backward shift, swap the empty bucket, previous_ibucket, with the values on its right, ibucket, * until we cross another empty bucket or if the other bucket has a distance_from_ideal_bucket == 0. * * We try to move the values closer to their ideal bucket. */ std::size_t previous_ibucket = static_cast(pos.m_bucket - m_buckets); std::size_t ibucket = next_bucket(previous_ibucket); while(m_buckets[ibucket].dist_from_ideal_bucket() > 0) { tsl_rh_assert(m_buckets[previous_ibucket].empty()); const distance_type new_distance = distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1); m_buckets[previous_ibucket].set_value_of_empty_bucket(new_distance, m_buckets[ibucket].truncated_hash(), std::move(m_buckets[ibucket].value())); m_buckets[ibucket].clear(); previous_ibucket = ibucket; ibucket = next_bucket(ibucket); } } template std::pair insert_impl(const K& key, Args&&... value_type_args) { const std::size_t hash = hash_key(key); std::size_t ibucket = bucket_for_hash(hash); distance_type dist_from_ideal_bucket = 0; while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { if((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) && compare_keys(KeySelect()(m_buckets[ibucket].value()), key)) { return std::make_pair(iterator(m_buckets + ibucket), false); } ibucket = next_bucket(ibucket); dist_from_ideal_bucket++; } if(rehash_on_extreme_load()) { ibucket = bucket_for_hash(hash); dist_from_ideal_bucket = 0; while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) { ibucket = next_bucket(ibucket); dist_from_ideal_bucket++; } } if(m_buckets[ibucket].empty()) { m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), std::forward(value_type_args)...); } else { insert_value(ibucket, dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), std::forward(value_type_args)...); } m_nb_elements++; /* * The value will be inserted in ibucket in any case, either because it was * empty or by stealing the bucket (robin hood). */ return std::make_pair(iterator(m_buckets + ibucket), true); } template void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, truncated_hash_type hash, Args&&... value_type_args) { value_type value(std::forward(value_type_args)...); insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); } void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, truncated_hash_type hash, value_type&& value) { insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); } /* * We don't use `value_type&& value` as last argument due to a bug in MSVC when `value_type` is a pointer, * The compiler is not able to see the difference between `std::string*` and `std::string*&&` resulting in * a compilation error. * * The `value` will be in a moved state at the end of the function. */ void insert_value_impl(std::size_t ibucket, distance_type dist_from_ideal_bucket, truncated_hash_type hash, value_type& value) { m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); ibucket = next_bucket(ibucket); dist_from_ideal_bucket++; while(!m_buckets[ibucket].empty()) { if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { if(dist_from_ideal_bucket >= bucket_entry::DIST_FROM_IDEAL_BUCKET_LIMIT) { /** * The number of probes is really high, rehash the map on the next insert. * Difficult to do now as rehash may throw an exception. */ m_grow_on_next_insert = true; } m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); } ibucket = next_bucket(ibucket); dist_from_ideal_bucket++; } m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); } void rehash_impl(size_type count) { robin_hash new_table(count, static_cast(*this), static_cast(*this), get_allocator(), m_min_load_factor, m_max_load_factor); const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_table.bucket_count()); for(auto& bucket: m_buckets_data) { if(bucket.empty()) { continue; } const std::size_t hash = use_stored_hash?bucket.truncated_hash(): new_table.hash_key(KeySelect()(bucket.value())); new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0, bucket_entry::truncate_hash(hash), std::move(bucket.value())); } new_table.m_nb_elements = m_nb_elements; new_table.swap(*this); } void clear_and_shrink() noexcept { GrowthPolicy::clear(); m_buckets_data.clear(); m_buckets = static_empty_bucket_ptr(); m_bucket_count = 0; m_nb_elements = 0; m_load_threshold = 0; m_grow_on_next_insert = false; m_try_shrink_on_next_insert = false; } void insert_value_on_rehash(std::size_t ibucket, distance_type dist_from_ideal_bucket, truncated_hash_type hash, value_type&& value) { while(true) { if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) { if(m_buckets[ibucket].empty()) { m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value)); return; } else { m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value); } } dist_from_ideal_bucket++; ibucket = next_bucket(ibucket); } } /** * Grow the table if m_grow_on_next_insert is true or we reached the max_load_factor. * Shrink the table if m_try_shrink_on_next_insert is true (an erase occurred) and * we're below the min_load_factor. * * Return true if the table has been rehashed. */ bool rehash_on_extreme_load() { if(m_grow_on_next_insert || size() >= m_load_threshold) { rehash_impl(GrowthPolicy::next_bucket_count()); m_grow_on_next_insert = false; return true; } if(m_try_shrink_on_next_insert) { m_try_shrink_on_next_insert = false; if(m_min_load_factor != 0.0f && load_factor() < m_min_load_factor) { reserve(size() + 1); return true; } } return false; } public: static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; static constexpr float MINIMUM_MAX_LOAD_FACTOR = 0.2f; static constexpr float MAXIMUM_MAX_LOAD_FACTOR = 0.95f; static constexpr float DEFAULT_MIN_LOAD_FACTOR = 0.0f; static constexpr float MINIMUM_MIN_LOAD_FACTOR = 0.0f; static constexpr float MAXIMUM_MIN_LOAD_FACTOR = 0.15f; static_assert(MINIMUM_MAX_LOAD_FACTOR < MAXIMUM_MAX_LOAD_FACTOR, "MINIMUM_MAX_LOAD_FACTOR should be < MAXIMUM_MAX_LOAD_FACTOR"); static_assert(MINIMUM_MIN_LOAD_FACTOR < MAXIMUM_MIN_LOAD_FACTOR, "MINIMUM_MIN_LOAD_FACTOR should be < MAXIMUM_MIN_LOAD_FACTOR"); static_assert(MAXIMUM_MIN_LOAD_FACTOR < MINIMUM_MAX_LOAD_FACTOR, "MAXIMUM_MIN_LOAD_FACTOR should be < MINIMUM_MAX_LOAD_FACTOR"); private: /** * Return an always valid pointer to an static empty bucket_entry with last_bucket() == true. */ bucket_entry* static_empty_bucket_ptr() noexcept { static bucket_entry empty_bucket(true); return &empty_bucket; } private: buckets_container_type m_buckets_data; /** * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr. * This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying * to find an element. * * TODO Remove m_buckets_data and only use a pointer instead of a pointer+vector to save some space in the robin_hash object. * Manage the Allocator manually. */ bucket_entry* m_buckets; /** * Used a lot in find, avoid the call to m_buckets_data.size() which is a bit slower. */ size_type m_bucket_count; size_type m_nb_elements; size_type m_load_threshold; float m_min_load_factor; float m_max_load_factor; bool m_grow_on_next_insert; /** * We can't shrink down the map on erase operations as the erase methods need to return the next iterator. * Shrinking the map would invalidate all the iterators and we could not return the next iterator in a meaningful way, * On erase, we thus just indicate on erase that we should try to shrink the hash table on the next insert * if we go below the min_load_factor. */ bool m_try_shrink_on_next_insert; }; } } #endif TileDB-Py-0.12.2/external/tsl/robin_map.h000066400000000000000000000653401417663620700200540ustar00rootroot00000000000000/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef TSL_ROBIN_MAP_H #define TSL_ROBIN_MAP_H #include #include #include #include #include #include #include "robin_hash.h" namespace tsl { /** * Implementation of a hash map using open-addressing and the robin hood hashing algorithm with backward shift deletion. * * For operations modifying the hash map (insert, erase, rehash, ...), the strong exception guarantee * is only guaranteed when the expression `std::is_nothrow_swappable>::value && * std::is_nothrow_move_constructible>::value` is true, otherwise if an exception * is thrown during the swap or the move, the hash map may end up in a undefined state. Per the standard * a `Key` or `T` with a noexcept copy constructor and no move constructor also satisfies the * `std::is_nothrow_move_constructible>::value` criterion (and will thus guarantee the * strong exception for the map). * * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve * the performance during lookups if the `KeyEqual` function takes time (if it engenders a cache-miss for example) * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. * When it is detected that storing the hash will not incur any memory penalty due to alignment (i.e. * `sizeof(tsl::detail_robin_hash::bucket_entry) == * sizeof(tsl::detail_robin_hash::bucket_entry)`) and `tsl::rh::power_of_two_growth_policy` is * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will * not be used on lookups unless `StoreHash` is true). * * `GrowthPolicy` defines how the map grows and consequently how a hash value is mapped to a bucket. * By default the map uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo. * Other growth policies are available and you may define your own growth policy, * check `tsl::rh::power_of_two_growth_policy` for the interface. * * `std::pair` must be swappable. * * `Key` and `T` must be copy and/or move constructible. * * If the destructor of `Key` or `T` throws an exception, the behaviour of the class is undefined. * * Iterators invalidation: * - clear, operator=, reserve, rehash: always invalidate the iterators. * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. * - erase: always invalidate the iterators. */ template, class KeyEqual = std::equal_to, class Allocator = std::allocator>, bool StoreHash = false, class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> class robin_map { private: template using has_is_transparent = tsl::detail_robin_hash::has_is_transparent; class KeySelect { public: using key_type = Key; const key_type& operator()(const std::pair& key_value) const noexcept { return key_value.first; } key_type& operator()(std::pair& key_value) noexcept { return key_value.first; } }; class ValueSelect { public: using value_type = T; const value_type& operator()(const std::pair& key_value) const noexcept { return key_value.second; } value_type& operator()(std::pair& key_value) noexcept { return key_value.second; } }; using ht = detail_robin_hash::robin_hash, KeySelect, ValueSelect, Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>; public: using key_type = typename ht::key_type; using mapped_type = T; using value_type = typename ht::value_type; using size_type = typename ht::size_type; using difference_type = typename ht::difference_type; using hasher = typename ht::hasher; using key_equal = typename ht::key_equal; using allocator_type = typename ht::allocator_type; using reference = typename ht::reference; using const_reference = typename ht::const_reference; using pointer = typename ht::pointer; using const_pointer = typename ht::const_pointer; using iterator = typename ht::iterator; using const_iterator = typename ht::const_iterator; public: /* * Constructors */ robin_map(): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { } explicit robin_map(size_type bucket_count, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): m_ht(bucket_count, hash, equal, alloc) { } robin_map(size_type bucket_count, const Allocator& alloc): robin_map(bucket_count, Hash(), KeyEqual(), alloc) { } robin_map(size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_map(bucket_count, hash, KeyEqual(), alloc) { } explicit robin_map(const Allocator& alloc): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { } template robin_map(InputIt first, InputIt last, size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): robin_map(bucket_count, hash, equal, alloc) { insert(first, last); } template robin_map(InputIt first, InputIt last, size_type bucket_count, const Allocator& alloc): robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) { } template robin_map(InputIt first, InputIt last, size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_map(first, last, bucket_count, hash, KeyEqual(), alloc) { } robin_map(std::initializer_list init, size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) { } robin_map(std::initializer_list init, size_type bucket_count, const Allocator& alloc): robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) { } robin_map(std::initializer_list init, size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) { } robin_map& operator=(std::initializer_list ilist) { m_ht.clear(); m_ht.reserve(ilist.size()); m_ht.insert(ilist.begin(), ilist.end()); return *this; } allocator_type get_allocator() const { return m_ht.get_allocator(); } /* * Iterators */ iterator begin() noexcept { return m_ht.begin(); } const_iterator begin() const noexcept { return m_ht.begin(); } const_iterator cbegin() const noexcept { return m_ht.cbegin(); } iterator end() noexcept { return m_ht.end(); } const_iterator end() const noexcept { return m_ht.end(); } const_iterator cend() const noexcept { return m_ht.cend(); } /* * Capacity */ bool empty() const noexcept { return m_ht.empty(); } size_type size() const noexcept { return m_ht.size(); } size_type max_size() const noexcept { return m_ht.max_size(); } /* * Modifiers */ void clear() noexcept { m_ht.clear(); } std::pair insert(const value_type& value) { return m_ht.insert(value); } template::value>::type* = nullptr> std::pair insert(P&& value) { return m_ht.emplace(std::forward

(value)); } std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert_hint(hint, value); } template::value>::type* = nullptr> iterator insert(const_iterator hint, P&& value) { return m_ht.emplace_hint(hint, std::forward

(value)); } iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert_hint(hint, std::move(value)); } template void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } template std::pair insert_or_assign(const key_type& k, M&& obj) { return m_ht.insert_or_assign(k, std::forward(obj)); } template std::pair insert_or_assign(key_type&& k, M&& obj) { return m_ht.insert_or_assign(std::move(k), std::forward(obj)); } template iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { return m_ht.insert_or_assign(hint, k, std::forward(obj)); } template iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); } /** * Due to the way elements are stored, emplace will need to move or copy the key-value once. * The method is equivalent to insert(value_type(std::forward(args)...)); * * Mainly here for compatibility with the std::unordered_map interface. */ template std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } /** * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. * The method is equivalent to insert(hint, value_type(std::forward(args)...)); * * Mainly here for compatibility with the std::unordered_map interface. */ template iterator emplace_hint(const_iterator hint, Args&&... args) { return m_ht.emplace_hint(hint, std::forward(args)...); } template std::pair try_emplace(const key_type& k, Args&&... args) { return m_ht.try_emplace(k, std::forward(args)...); } template std::pair try_emplace(key_type&& k, Args&&... args) { return m_ht.try_emplace(std::move(k), std::forward(args)...); } template iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { return m_ht.try_emplace_hint(hint, k, std::forward(args)...); } template iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { return m_ht.try_emplace_hint(hint, std::move(k), std::forward(args)...); } iterator erase(iterator pos) { return m_ht.erase(pos); } iterator erase(const_iterator pos) { return m_ht.erase(pos); } iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } size_type erase(const key_type& key) { return m_ht.erase(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash. */ size_type erase(const key_type& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> size_type erase(const K& key) { return m_ht.erase(key); } /** * @copydoc erase(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash. */ template::value>::type* = nullptr> size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } void swap(robin_map& other) { other.m_ht.swap(m_ht); } /* * Lookup */ T& at(const Key& key) { return m_ht.at(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } const T& at(const Key& key) const { return m_ht.at(key); } /** * @copydoc at(const Key& key, std::size_t precalculated_hash) */ const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> T& at(const K& key) { return m_ht.at(key); } /** * @copydoc at(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } /** * @copydoc at(const K& key) */ template::value>::type* = nullptr> const T& at(const K& key) const { return m_ht.at(key); } /** * @copydoc at(const K& key, std::size_t precalculated_hash) */ template::value>::type* = nullptr> const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } T& operator[](const Key& key) { return m_ht[key]; } T& operator[](Key&& key) { return m_ht[std::move(key)]; } size_type count(const Key& key) const { return m_ht.count(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> size_type count(const K& key) const { return m_ht.count(key); } /** * @copydoc count(const K& key) const * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } iterator find(const Key& key) { return m_ht.find(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } const_iterator find(const Key& key) const { return m_ht.find(key); } /** * @copydoc find(const Key& key, std::size_t precalculated_hash) */ const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> iterator find(const K& key) { return m_ht.find(key); } /** * @copydoc find(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } /** * @copydoc find(const K& key) */ template::value>::type* = nullptr> const_iterator find(const K& key) const { return m_ht.find(key); } /** * @copydoc find(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } bool contains(const Key& key) const { return m_ht.contains(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ bool contains(const Key& key, std::size_t precalculated_hash) const { return m_ht.contains(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> bool contains(const K& key) const { return m_ht.contains(key); } /** * @copydoc contains(const K& key) const * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> bool contains(const K& key, std::size_t precalculated_hash) const { return m_ht.contains(key, precalculated_hash); } std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ std::pair equal_range(const Key& key, std::size_t precalculated_hash) { return m_ht.equal_range(key, precalculated_hash); } std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } /** * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) */ std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { return m_ht.equal_range(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> std::pair equal_range(const K& key) { return m_ht.equal_range(key); } /** * @copydoc equal_range(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> std::pair equal_range(const K& key, std::size_t precalculated_hash) { return m_ht.equal_range(key, precalculated_hash); } /** * @copydoc equal_range(const K& key) */ template::value>::type* = nullptr> std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } /** * @copydoc equal_range(const K& key, std::size_t precalculated_hash) */ template::value>::type* = nullptr> std::pair equal_range(const K& key, std::size_t precalculated_hash) const { return m_ht.equal_range(key, precalculated_hash); } /* * Bucket interface */ size_type bucket_count() const { return m_ht.bucket_count(); } size_type max_bucket_count() const { return m_ht.max_bucket_count(); } /* * Hash policy */ float load_factor() const { return m_ht.load_factor(); } float min_load_factor() const { return m_ht.min_load_factor(); } float max_load_factor() const { return m_ht.max_load_factor(); } /** * Set the `min_load_factor` to `ml`. When the `load_factor` of the map goes * below `min_load_factor` after some erase operations, the map will be * shrunk when an insertion occurs. The erase method itself never shrinks * the map. * * The default value of `min_load_factor` is 0.0f, the map never shrinks by default. */ void min_load_factor(float ml) { m_ht.min_load_factor(ml); } void max_load_factor(float ml) { m_ht.max_load_factor(ml); } void rehash(size_type count) { m_ht.rehash(count); } void reserve(size_type count) { m_ht.reserve(count); } /* * Observers */ hasher hash_function() const { return m_ht.hash_function(); } key_equal key_eq() const { return m_ht.key_eq(); } /* * Other */ /** * Convert a const_iterator to an iterator. */ iterator mutable_iterator(const_iterator pos) { return m_ht.mutable_iterator(pos); } friend bool operator==(const robin_map& lhs, const robin_map& rhs) { if(lhs.size() != rhs.size()) { return false; } for(const auto& element_lhs: lhs) { const auto it_element_rhs = rhs.find(element_lhs.first); if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { return false; } } return true; } friend bool operator!=(const robin_map& lhs, const robin_map& rhs) { return !operator==(lhs, rhs); } friend void swap(robin_map& lhs, robin_map& rhs) { lhs.swap(rhs); } private: ht m_ht; }; /** * Same as `tsl::robin_map`. */ template, class KeyEqual = std::equal_to, class Allocator = std::allocator>, bool StoreHash = false> using robin_pg_map = robin_map; } // end namespace tsl #endif TileDB-Py-0.12.2/external/tsl/robin_set.h000066400000000000000000000535551417663620700200770ustar00rootroot00000000000000/** * MIT License * * Copyright (c) 2017 Thibaut Goetghebuer-Planchon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef TSL_ROBIN_SET_H #define TSL_ROBIN_SET_H #include #include #include #include #include #include #include "robin_hash.h" namespace tsl { /** * Implementation of a hash set using open-addressing and the robin hood hashing algorithm with backward shift deletion. * * For operations modifying the hash set (insert, erase, rehash, ...), the strong exception guarantee * is only guaranteed when the expression `std::is_nothrow_swappable::value && * std::is_nothrow_move_constructible::value` is true, otherwise if an exception * is thrown during the swap or the move, the hash set may end up in a undefined state. Per the standard * a `Key` with a noexcept copy constructor and no move constructor also satisfies the * `std::is_nothrow_move_constructible::value` criterion (and will thus guarantee the * strong exception for the set). * * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve * the performance during lookups if the `KeyEqual` function takes time (or engenders a cache-miss for example) * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. * When it is detected that storing the hash will not incur any memory penalty due to alignment (i.e. * `sizeof(tsl::detail_robin_hash::bucket_entry) == * sizeof(tsl::detail_robin_hash::bucket_entry)`) and `tsl::rh::power_of_two_growth_policy` is * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will * not be used on lookups unless `StoreHash` is true). * * `GrowthPolicy` defines how the set grows and consequently how a hash value is mapped to a bucket. * By default the set uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. * Other growth policies are available and you may define your own growth policy, * check `tsl::rh::power_of_two_growth_policy` for the interface. * * `Key` must be swappable. * * `Key` must be copy and/or move constructible. * * If the destructor of `Key` throws an exception, the behaviour of the class is undefined. * * Iterators invalidation: * - clear, operator=, reserve, rehash: always invalidate the iterators. * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. * - erase: always invalidate the iterators. */ template, class KeyEqual = std::equal_to, class Allocator = std::allocator, bool StoreHash = false, class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> class robin_set { private: template using has_is_transparent = tsl::detail_robin_hash::has_is_transparent; class KeySelect { public: using key_type = Key; const key_type& operator()(const Key& key) const noexcept { return key; } key_type& operator()(Key& key) noexcept { return key; } }; using ht = detail_robin_hash::robin_hash; public: using key_type = typename ht::key_type; using value_type = typename ht::value_type; using size_type = typename ht::size_type; using difference_type = typename ht::difference_type; using hasher = typename ht::hasher; using key_equal = typename ht::key_equal; using allocator_type = typename ht::allocator_type; using reference = typename ht::reference; using const_reference = typename ht::const_reference; using pointer = typename ht::pointer; using const_pointer = typename ht::const_pointer; using iterator = typename ht::iterator; using const_iterator = typename ht::const_iterator; /* * Constructors */ robin_set(): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { } explicit robin_set(size_type bucket_count, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): m_ht(bucket_count, hash, equal, alloc) { } robin_set(size_type bucket_count, const Allocator& alloc): robin_set(bucket_count, Hash(), KeyEqual(), alloc) { } robin_set(size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_set(bucket_count, hash, KeyEqual(), alloc) { } explicit robin_set(const Allocator& alloc): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { } template robin_set(InputIt first, InputIt last, size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): robin_set(bucket_count, hash, equal, alloc) { insert(first, last); } template robin_set(InputIt first, InputIt last, size_type bucket_count, const Allocator& alloc): robin_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) { } template robin_set(InputIt first, InputIt last, size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_set(first, last, bucket_count, hash, KeyEqual(), alloc) { } robin_set(std::initializer_list init, size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()): robin_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) { } robin_set(std::initializer_list init, size_type bucket_count, const Allocator& alloc): robin_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) { } robin_set(std::initializer_list init, size_type bucket_count, const Hash& hash, const Allocator& alloc): robin_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) { } robin_set& operator=(std::initializer_list ilist) { m_ht.clear(); m_ht.reserve(ilist.size()); m_ht.insert(ilist.begin(), ilist.end()); return *this; } allocator_type get_allocator() const { return m_ht.get_allocator(); } /* * Iterators */ iterator begin() noexcept { return m_ht.begin(); } const_iterator begin() const noexcept { return m_ht.begin(); } const_iterator cbegin() const noexcept { return m_ht.cbegin(); } iterator end() noexcept { return m_ht.end(); } const_iterator end() const noexcept { return m_ht.end(); } const_iterator cend() const noexcept { return m_ht.cend(); } /* * Capacity */ bool empty() const noexcept { return m_ht.empty(); } size_type size() const noexcept { return m_ht.size(); } size_type max_size() const noexcept { return m_ht.max_size(); } /* * Modifiers */ void clear() noexcept { m_ht.clear(); } std::pair insert(const value_type& value) { return m_ht.insert(value); } std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert_hint(hint, value); } iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert_hint(hint, std::move(value)); } template void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } /** * Due to the way elements are stored, emplace will need to move or copy the key-value once. * The method is equivalent to insert(value_type(std::forward(args)...)); * * Mainly here for compatibility with the std::unordered_map interface. */ template std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } /** * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. * The method is equivalent to insert(hint, value_type(std::forward(args)...)); * * Mainly here for compatibility with the std::unordered_map interface. */ template iterator emplace_hint(const_iterator hint, Args&&... args) { return m_ht.emplace_hint(hint, std::forward(args)...); } iterator erase(iterator pos) { return m_ht.erase(pos); } iterator erase(const_iterator pos) { return m_ht.erase(pos); } iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } size_type erase(const key_type& key) { return m_ht.erase(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash. */ size_type erase(const key_type& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> size_type erase(const K& key) { return m_ht.erase(key); } /** * @copydoc erase(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash. */ template::value>::type* = nullptr> size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } void swap(robin_set& other) { other.m_ht.swap(m_ht); } /* * Lookup */ size_type count(const Key& key) const { return m_ht.count(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> size_type count(const K& key) const { return m_ht.count(key); } /** * @copydoc count(const K& key) const * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } iterator find(const Key& key) { return m_ht.find(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } const_iterator find(const Key& key) const { return m_ht.find(key); } /** * @copydoc find(const Key& key, std::size_t precalculated_hash) */ const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> iterator find(const K& key) { return m_ht.find(key); } /** * @copydoc find(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } /** * @copydoc find(const K& key) */ template::value>::type* = nullptr> const_iterator find(const K& key) const { return m_ht.find(key); } /** * @copydoc find(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } bool contains(const Key& key) const { return m_ht.contains(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ bool contains(const Key& key, std::size_t precalculated_hash) const { return m_ht.contains(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> bool contains(const K& key) const { return m_ht.contains(key); } /** * @copydoc contains(const K& key) const * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> bool contains(const K& key, std::size_t precalculated_hash) const { return m_ht.contains(key, precalculated_hash); } std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } /** * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ std::pair equal_range(const Key& key, std::size_t precalculated_hash) { return m_ht.equal_range(key, precalculated_hash); } std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } /** * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) */ std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { return m_ht.equal_range(key, precalculated_hash); } /** * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. * If so, K must be hashable and comparable to Key. */ template::value>::type* = nullptr> std::pair equal_range(const K& key) { return m_ht.equal_range(key); } /** * @copydoc equal_range(const K& key) * * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same * as hash_function()(key). Useful to speed-up the lookup if you already have the hash. */ template::value>::type* = nullptr> std::pair equal_range(const K& key, std::size_t precalculated_hash) { return m_ht.equal_range(key, precalculated_hash); } /** * @copydoc equal_range(const K& key) */ template::value>::type* = nullptr> std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } /** * @copydoc equal_range(const K& key, std::size_t precalculated_hash) */ template::value>::type* = nullptr> std::pair equal_range(const K& key, std::size_t precalculated_hash) const { return m_ht.equal_range(key, precalculated_hash); } /* * Bucket interface */ size_type bucket_count() const { return m_ht.bucket_count(); } size_type max_bucket_count() const { return m_ht.max_bucket_count(); } /* * Hash policy */ float load_factor() const { return m_ht.load_factor(); } float min_load_factor() const { return m_ht.min_load_factor(); } float max_load_factor() const { return m_ht.max_load_factor(); } /** * Set the `min_load_factor` to `ml`. When the `load_factor` of the set goes * below `min_load_factor` after some erase operations, the set will be * shrunk when an insertion occurs. The erase method itself never shrinks * the set. * * The default value of `min_load_factor` is 0.0f, the set never shrinks by default. */ void min_load_factor(float ml) { m_ht.min_load_factor(ml); } void max_load_factor(float ml) { m_ht.max_load_factor(ml); } void rehash(size_type count) { m_ht.rehash(count); } void reserve(size_type count) { m_ht.reserve(count); } /* * Observers */ hasher hash_function() const { return m_ht.hash_function(); } key_equal key_eq() const { return m_ht.key_eq(); } /* * Other */ /** * Convert a const_iterator to an iterator. */ iterator mutable_iterator(const_iterator pos) { return m_ht.mutable_iterator(pos); } friend bool operator==(const robin_set& lhs, const robin_set& rhs) { if(lhs.size() != rhs.size()) { return false; } for(const auto& element_lhs: lhs) { const auto it_element_rhs = rhs.find(element_lhs); if(it_element_rhs == rhs.cend()) { return false; } } return true; } friend bool operator!=(const robin_set& lhs, const robin_set& rhs) { return !operator==(lhs, rhs); } friend void swap(robin_set& lhs, robin_set& rhs) { lhs.swap(rhs); } private: ht m_ht; }; /** * Same as `tsl::robin_set`. */ template, class KeyEqual = std::equal_to, class Allocator = std::allocator, bool StoreHash = false> using robin_pg_set = robin_set; } // end namespace tsl #endif TileDB-Py-0.12.2/misc/000077500000000000000000000000001417663620700142345ustar00rootroot00000000000000TileDB-Py-0.12.2/misc/azure-ci.yml000066400000000000000000000057071417663620700165070ustar00rootroot00000000000000stages: - stage: CI condition: not(or(startsWith(variables['Build.SourceBranch'], 'refs/tags'), startsWith(variables['Build.SourceBranchName'], 'release-'))) jobs: - job: pool: vmImage: $(imageName) strategy: matrix: mac: imageName: 'macOS-10.15' python.version: '3.10' MACOSX_DEPLOYMENT_TARGET: 10.14 windows: imageName: 'windows-latest' python.version: '3.10' linux_py3: imageName: 'ubuntu-latest' python.version: '3.10' maxParallel: 4 steps: - task: UsePythonVersion@0 inputs: versionSpec: '$(python.version)' architecture: 'x64' # Print python and pip version information for debugging. # Azure pipelines windows images have been unstable or out of sync, causing # build failures in the pip step below when the 'bash' task uses the wrong # python or has issue that causes un-corrected cygwin-style paths to be # passed to pip. - bash: | echo "==== Python information ====" which python which pip python --version echo "============================" displayName: 'Print python version in bash task' - script: | printenv displayName: 'Print env' - script: | python -m pip install --upgrade -r misc/requirements_ci.txt displayName: 'Install dependencies' - script: | # vcvarsall is necessary so that numpy uses the correct compiler call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 python setup.py build_ext --inplace python setup.py install displayName: 'Build TileDB and TileDB-Py extension (Windows)' condition: eq(variables['Agent.OS'], 'Windows_NT') - bash: | unset SYSTEM set -xeo pipefail python setup.py build_ext --inplace --werror python setup.py install displayName: 'Build TileDB and TileDB-Py extension (POSIX)' condition: ne(variables['Agent.OS'], 'Windows_NT') - bash: | set -xeo pipefail pytest -vv # Test wheel build, install, and run python setup.py bdist_wheel #whl_file=`pwd`/dist/`ls dist/*.whl` mkdir /tmp/wheel_test cp dist/*.whl /tmp/wheel_test pushd /tmp/wheel_test ls pip install *.whl python -c "import tiledb ; tiledb.libtiledb.version()" displayName: 'Run tests' - bash: | set -xeo pipefail # Display log files if the build failed echo "Dumping log files for failed build" echo "----------------------------------" for f in $(find $BUILD_REPOSITORY_LOCALPATH/build -name *.log); do echo "------" echo $f echo "======" cat $f done; condition: failed() # only run this job if the build step failed displayName: "Print log files (failed build only)" TileDB-Py-0.12.2/misc/azure-libtiledb-darwin.yml000066400000000000000000000074361417663620700213310ustar00rootroot00000000000000steps: - task: Cache@2 inputs: key: 'libtiledb v0 | "$(Agent.OS)" | "$(imageName)" | "$(LIBTILEDB_SHA)" | setup.py | **/azure-*.yml, !tiledb_src/**, !tiledb_build/**' path: $(Pipeline.Workspace)/.libtiledb_dist/$(LIBTILEDB_SHA) cacheHitVar: LIBTILEDB_CACHE_RESTORED - bash: | find $PIPELINE_WORKSPACE/.libtiledb_dist/${LIBTILEDB_SHA} condition: eq(variables.LIBTILEDB_CACHE_RESTORED, 'true') displayName: "Print files restored from cache" - bash: | set -xe pipefail unset SYSTEM git clone ${LIBTILEDB_REPO} $TILEDB_SRC git -C $(TILEDB_SRC) checkout $(LIBTILEDB_SHA) mkdir -p $TILEDB_BUILD cd $TILEDB_BUILD $TILEDB_SRC/bootstrap --force-build-all-deps --disable-werror --enable=s3,gcs,azure,serialization --enable-static-tiledb --disable-avx2 --disable-tests --prefix=$TILEDB_INSTALL cmake --build $TILEDB_BUILD --config Release -j3 cmake --build $TILEDB_BUILD --target install-tiledb --config Release if [[ "$AGENT_OS" == "Darwin" ]]; then cp $TILEDB_BUILD/externals/install/lib/libz.1.dylib $TILEDB_INSTALL/lib || true fi if [[ "$AGENT_OS" == "Darwin" ]]; then otool -L ${TILEDB_INSTALL}/lib/libtiledb.dylib; fi condition: and(ne(variables['Agent.OS'], 'Windows_NT'), ne(variables.LIBTILEDB_CACHE_RESTORED, 'true')) displayName: "Bld libtiledb (POSIX)" - script: | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 echo ON git clone $(LIBTILEDB_REPO) $(TILEDB_SRC) git -C $(TILEDB_SRC) checkout $(LIBTILEDB_SHA) mkdir $(TILEDB_INSTALL) cd $(TILEDB_BUILD) :: use cmake directly because the powershell arg/quoting rules are bonkers cmake -A X64 -DCMAKE_BUILD_TYPE=Release -DTILEDB_WERROR=ON -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DTILEDB_TOOLS=OFF -DTILEDB_SUPERBUILD=ON -DTILEDB_FORCE_ALL_DEPS=ON -DTILEDB_CPP_API=ON -DTILEDB_TESTS=OFF -DTILEDB_HDFS=OFF -DTILEDB_LOG_OUTPUT_ON_FAILURE=ON -DTILEDB_STATIC=ON -DTILEDB_VERBOSE=ON -DMSVC_MP_FLAG="/MP3" -DCMAKE_INSTALL_PREFIX=$(TILEDB_INSTALL) $(TILEDB_SRC) . cmake --build . --config Release -j3 cmake --build . --target install-tiledb --config Release condition: and(eq(variables['Agent.OS'], 'Windows_NT'), ne(variables.LIBTILEDB_CACHE_RESTORED, 'true')) displayName: "Bld libtiledb (Windows)" - bash: | set -xe if [[ "$AGENT_OS" == "Windows_NT" ]]; then 7z a ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA}. ${TILEDB_INSTALL}/* elif [[ "$AGENT_OS" == "Darwin" ]]; then tar -czf ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -C ${TILEDB_INSTALL} . else tar -czf ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -C ${TILEDB_INSTALL} lib64 include fi displayName: "Archive build" - task: PublishBuildArtifacts@1 inputs: pathtoPublish: $(TILEDB_INSTALL)/libtiledb-$(LIBTILEDB_VERSION)-$(LIBTILEDB_SHA) artifactName: libtiledb-$(Agent.OS) - bash: | set -x # Print cmake version echo "CMake version:" cmake --version echo "--- Listing files (BUILD_REPOSITORY_LOCALPATH): ${BUILD_REPOSITORY_LOCALPATH} ---" ls $BUILD_REPOSITORY_LOCALPATH || true echo "--- Finding files (TILEDB_INSTALL): '${TILEDB_INSTALL}'---" find $TILEDB_INSTALL || true echo "--- Printing libtiledb git version ---" libtiledb_sha_actual=$(git -C $TILEDB_SRC show-ref -s $LIBTILEDB_VERSION) if [[ "$libtiledb_sha_actual" != "$(LIBTILEDB_SHA)" ]]; then echo "variable LIBTILEDB_SHA ('$LIBTILEDB_SHA') does not match SHA of LIBTILEDB_VERSION checkout ('$libtiledb_sha_actual')"; fi echo "--- Printing libtiledb linkage ---" otool -L `find $LIBTILEDB_INSTALL -name *libtiledb*` echo "----------------------------------" displayName: "Print debug info" condition: always() TileDB-Py-0.12.2/misc/azure-print-logs.yml000066400000000000000000000006741417663620700202100ustar00rootroot00000000000000steps: - bash: | set -e pipefail # Display log files if the build failed echo "Dumping log files for failed build" echo "----------------------------------" for f in $(find $BUILD_REPOSITORY_LOCALPATH -name *.log); do echo "------" echo $f echo "======" cat $f done; condition: failed() # only run this job if the build step failed displayName: "Print log files (failed build only)" TileDB-Py-0.12.2/misc/azure-release.yml000066400000000000000000000345531417663620700175350ustar00rootroot00000000000000stages: - stage: Release variables: ${{ if startsWith(variables['Build.SourceBranchName'], 'azure-wheel-test-') }}: TILEDBPY_VERSION: dev LIBTILEDB_VERSION: dev LIBTILEDB_SHA: dev ${{ else }}: TILEDBPY_VERSION: 0.12.2 LIBTILEDB_VERSION: 2.6.2 LIBTILEDB_SHA: bf10e49b29ac51a6c6f0caa1cdac7c6d1154d670 LIBTILEDB_REPO: https://github.com/TileDB-Inc/TileDB TILEDB_SRC: '$(Build.Repository.Localpath)/tiledb_src' TILEDB_BUILD: '$(Build.Repository.Localpath)/tiledb_build' TILEDB_INSTALL: '$(Pipeline.Workspace)/.libtiledb_dist/$(LIBTILEDB_SHA)' MACOSX_DEPLOYMENT_TARGET: 10.14 condition: or(startsWith(variables['Build.SourceBranch'], 'refs/tags'), startsWith(variables['Build.SourceBranchName'], 'release-'), startsWith(variables['Build.SourceBranchName'], 'azure-wheel-test-')) jobs: - job: build1_libtiledb strategy: matrix: macOS_libtiledb: imageName: 'macOS-1015' windows_libtiledb: imageName: 'windows-latest' pool: vmImage: $(imageName) steps: - task: UsePythonVersion@0 - template: azure-libtiledb-darwin.yml - job: build1_libtiledb_on_linux pool: vmImage: 'ubuntu-latest' container: quay.io/pypa/manylinux2010_x86_64:2021-11-07-28723f3 variables: CXXFLAGS: "-Wno-unused-parameter -lrt -DKJ_USE_EPOLL10 -D__BIONIC__=1" CFLAGS: "-Wno-unused-parameter -lrt -DKJ_USE_EPOLL=0 -D__BIONIC__=1" steps: - task: UsePythonVersion@0 - template: azure-libtiledb-darwin.yml - job: build2_python310 dependsOn: [build1_libtiledb, build1_libtiledb_on_linux] condition: succeeded() variables: cibw_test_requires: "pytest" USE_CIBW_VERSION: 2.3.0 strategy: matrix: linux_py: imageName: 'ubuntu-latest' CIBW_BUILD: 'cp310-*' CIBW_SKIP: '*_i686 pp* *musl*' CIBW_BUILD_VERBOSITY: 3 macOS_py: imageName: 'macOS-1015' CIBW_BUILD: 'cp310-*' CIBW_SKIP: 'pp*' CIBW_BUILD_VERBOSITY: 3 windows_py: imageName: 'windows-latest' CIBW_BUILD: 'cp310-*' CIBW_SKIP: '*-win32 pp*' CIBW_BUILD_VERBOSITY: 3 pool: vmImage: $(imageName) steps: - script: git tag -f $(TILEDBPY_VERSION) - task: DownloadPipelineArtifact@2 displayName: 'Download libtiledb artifact' inputs: artifactName: libtiledb-$(Agent.OS) path: $(TILEDB_INSTALL) # we have to archive the files because azp breaks newlines in a bare directory restore - bash: | set -x if [[ "$AGENT_OS" == "Windows_NT" ]]; then 7z x -o${TILEDB_INSTALL}/ ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -y else tar xzf ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -C ${TILEDB_INSTALL}; find ${TILEDB_INSTALL} fi # Copy libz (temporary work-around for delocate on macOS) if [[ "$AGENT_OS" == "Darwin" ]]; then cp ${TILEDB_INSTALL}/lib/libz.1.dylib $BUILD_REPOSITORY_LOCALPATH install_name_tool -change libz.1.dylib ${BUILD_REPOSITORY_LOCALPATH}/libz.1.dylib ${TILEDB_INSTALL}/lib/libtiledb.dylib fi displayName: 'Extract libtiledb files' - bash: | # Set the CIBW_ENVIRONMENT from bash in order to get forward slashes because somewhere in the # cmd/cibw/python chain we end up losing the slashes entirely if we use a job-level variable. MPATH=$(python -c 'import os; print(os.environ["TILEDB_INSTALL"].replace("\\","/"))') export CIBW_ENVIRONMENT="TILEDB_PATH=${MPATH}" # !!! DO NOT PUT OTHER VARIABLES IN THIS SECTION - vars w/out expansions go below !!!" echo "##vso[task.setvariable variable=CIBW_ENVIRONMENT]$CIBW_ENVIRONMENT" displayName: "Set CIBW_ENVIRONMENT" condition: and(succeeded(), ne(variables['Agent.OS'], 'Linux')) - bash: | set -xe pipefail mv ${TILEDB_INSTALL} .libtiledb export TILEDB_INSTALL=.libtiledb export CIBW_ENVIRONMENT="TILEDB_PATH=${TILEDB_INSTALL} TILEDB_WHEEL_BUILD=1" # use the requirements_wheel.txt with numpy pins to ensure ABI compatibility export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" export CIBW_TEST_COMMAND="python -c 'import tiledb'" # copy libtiledb into usr/local for audithweel to find export CIBW_BEFORE_BUILD="cp -R .libtiledb/* /usr/local" ls -lR "${TILEDB_INSTALL}" python -c "import os; print(os.environ.get('CIBW_ENVIRONMENT', None))" git rev-parse HEAD python3 -m pip install --upgrade pip pytest cython python3 setup.py sdist --dist-dir wheelhouse pip3 install cibuildwheel==${USE_CIBW_VERSION} cibuildwheel --output-dir wheelhouse . displayName: "Build and test wheels (Linux)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Linux')) - bash: | set -xe pipefail export TILEDB_WHEEL_BUILD=1 # use the requirements_wheel.txt with numpy pins to ensure ABI compatibility export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" export CIBW_TEST_COMMAND="python -c 'import tiledb'" echo "${TILEDB_INSTALL}" python -c "import os; print(os.environ.get('CIBW_ENVIRONMENT', None))" git rev-parse HEAD python3 -m pip install --upgrade pip pytest cython python3 setup.py sdist --dist-dir wheelhouse pip3 install cibuildwheel==${USE_CIBW_VERSION} cibuildwheel --output-dir wheelhouse . displayName: "Build and test wheels (macOS)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Darwin')) - script: | echo ON set "TILEDB_WHEEL_BUILD=1" :: # Have not managed to get this working AZP quoting breaks the command :: set CIBW_TEST_COMMAND=python -c \"import tiledb\"" echo "cibw env: " echo "%CIBW_ENVIRONMENT%" echo "tiledb_install: " echo "%TILEDB_INSTALL%" python -c "import os; print(os.environ['CIBW_ENVIRONMENT'])" python -c "import platform; print('py compiler: ', platform.python_compiler())" :: this runs under cmd on windows, which we need to use vcvarsall call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 git rev-parse HEAD python -m pip install --upgrade pip pytest cython python setup.py sdist --dist-dir wheelhouse pip install cibuildwheel==%USE_CIBW_VERSION% cibuildwheel --output-dir wheelhouse . :: delete the sdist because we *do not* want files with CRLF endings :: (if windows builds finish last then the windows sdist will be :: overwrite any others in the artifact publishing step) del /q wheelhouse\*.tar.gz displayName: "Build and test wheels (Windows)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT')) - template: azure-print-logs.yml - task: PublishBuildArtifacts@1 inputs: {pathtoPublish: 'wheelhouse'} - bash: | set -x echo "TILEDB_SRC -----------------------------------------" find $TILEDB_SRC || true echo "TILEDB_BUILD -----------------------------------------" find $TILEDB_BUILD || true echo "TILEDB_INSTALL -----------------------------------------" find $TILEDB_INSTALL || true displayName: "List all the files" condition: always() - job: build2_python dependsOn: [build1_libtiledb, build1_libtiledb_on_linux] condition: succeeded() variables: cibw_test_requires: "pytest" USE_CIBW_VERSION: 1.6.4 strategy: matrix: linux_py: imageName: 'ubuntu-latest' CIBW_SKIP: 'cp27-* cp35-* *_i686 pp*' CIBW_BUILD_VERBOSITY: 3 macOS_py: imageName: 'macOS-1015' CIBW_SKIP: 'cp27-* cp35-* pp*' CIBW_BUILD_VERBOSITY: 3 windows_py: imageName: 'windows-latest' CIBW_SKIP: 'cp27-* cp35-* cp36-* *-win32 pp*' CIBW_BUILD_VERBOSITY: 3 pool: vmImage: $(imageName) steps: - script: git tag -f $(TILEDBPY_VERSION) - task: DownloadPipelineArtifact@2 displayName: 'Download libtiledb artifact' inputs: artifactName: libtiledb-$(Agent.OS) path: $(TILEDB_INSTALL) # we have to archive the files because azp breaks newlines in a bare directory restore - bash: | set -x if [[ "$AGENT_OS" == "Windows_NT" ]]; then 7z x -o${TILEDB_INSTALL}/ ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -y else tar xzf ${TILEDB_INSTALL}/libtiledb-${LIBTILEDB_VERSION}-${LIBTILEDB_SHA} -C ${TILEDB_INSTALL}; find ${TILEDB_INSTALL} fi # Copy libz (temporary work-around for delocate on macOS) if [[ "$AGENT_OS" == "Darwin" ]]; then cp ${TILEDB_INSTALL}/lib/libz.1.dylib $BUILD_REPOSITORY_LOCALPATH install_name_tool -change libz.1.dylib ${BUILD_REPOSITORY_LOCALPATH}/libz.1.dylib ${TILEDB_INSTALL}/lib/libtiledb.dylib fi displayName: 'Extract libtiledb files' - bash: | # Set the CIBW_ENVIRONMENT from bash in order to get forward slashes because somewhere in the # cmd/cibw/python chain we end up losing the slashes entirely if we use a job-level variable. MPATH=$(python -c 'import os; print(os.environ["TILEDB_INSTALL"].replace("\\","/"))') export CIBW_ENVIRONMENT="TILEDB_PATH=${MPATH}" # !!! DO NOT PUT OTHER VARIABLES IN THIS SECTION - vars w/out expansions go below !!!" echo "##vso[task.setvariable variable=CIBW_ENVIRONMENT]$CIBW_ENVIRONMENT" displayName: "Set CIBW_ENVIRONMENT" condition: and(succeeded(), ne(variables['Agent.OS'], 'Linux')) - bash: | set -xe pipefail mv ${TILEDB_INSTALL} .libtiledb export TILEDB_INSTALL=.libtiledb export CIBW_ENVIRONMENT="TILEDB_PATH=${TILEDB_INSTALL} TILEDB_WHEEL_BUILD=1" # use the requirements_wheel.txt with numpy pins to ensure ABI compatibility export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" export CIBW_TEST_COMMAND="python -c 'import tiledb'" # copy libtiledb into usr/local for audithweel to find export CIBW_BEFORE_BUILD="cp -R .libtiledb/* /usr/local" ls -lR "${TILEDB_INSTALL}" python -c "import os; print(os.environ.get('CIBW_ENVIRONMENT', None))" git rev-parse HEAD python3 -m pip install --upgrade pip pytest cython python3 setup.py sdist --dist-dir wheelhouse pip3 install cibuildwheel==${USE_CIBW_VERSION} cibuildwheel --output-dir wheelhouse . displayName: "Build and test wheels (Linux)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Linux')) - bash: | set -xe pipefail export TILEDB_WHEEL_BUILD=1 # use the requirements_wheel.txt with numpy pins to ensure ABI compatibility export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" export CIBW_TEST_COMMAND="python -c 'import tiledb'" echo "${TILEDB_INSTALL}" python -c "import os; print(os.environ.get('CIBW_ENVIRONMENT', None))" git rev-parse HEAD python3 -m pip install --upgrade pip pytest cython python3 setup.py sdist --dist-dir wheelhouse pip3 install cibuildwheel==${USE_CIBW_VERSION} cibuildwheel --output-dir wheelhouse . displayName: "Build and test wheels (macOS)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Darwin')) - script: | echo ON set "TILEDB_WHEEL_BUILD=1" :: # Have not managed to get this working AZP quoting breaks the command :: set CIBW_TEST_COMMAND=python -c \"import tiledb\"" echo "cibw env: " echo "%CIBW_ENVIRONMENT%" echo "tiledb_install: " echo "%TILEDB_INSTALL%" python -c "import os; print(os.environ['CIBW_ENVIRONMENT'])" python -c "import platform; print('py compiler: ', platform.python_compiler())" :: this runs under cmd on windows, which we need to use vcvarsall call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 git rev-parse HEAD python -m pip install --upgrade pip pytest cython python setup.py sdist --dist-dir wheelhouse pip install cibuildwheel==%USE_CIBW_VERSION% cibuildwheel --output-dir wheelhouse . :: delete the sdist because we *do not* want files with CRLF endings :: (if windows builds finish last then the windows sdist will be :: overwrite any others in the artifact publishing step) del /q wheelhouse\*.tar.gz displayName: "Build and test wheels (Windows)" condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT')) - template: azure-print-logs.yml - task: PublishBuildArtifacts@1 inputs: {pathtoPublish: 'wheelhouse'} - bash: | set -x echo "TILEDB_SRC -----------------------------------------" find $TILEDB_SRC || true echo "TILEDB_BUILD -----------------------------------------" find $TILEDB_BUILD || true echo "TILEDB_INSTALL -----------------------------------------" find $TILEDB_INSTALL || true displayName: "List all the files" condition: always() - job: trigger_fail_build dependsOn: [build2_python] condition: failed() pool: server steps: - task: InvokeRESTAPI@1 inputs: connectionType: 'connectedServiceName' connectedServiceName: 'TileDB-Py-Test' method: 'POST' body: | { "event_type": "failed", "client_payload": {"build_id:": "$(Build.BuildId)"} } urlSuffix: 'repos/$(Build.Repository.Name)/dispatches' waitForCompletion: 'false' - job: trigger_success_build dependsOn: [build2_python] condition: succeeded() pool: server steps: - task: InvokeRESTAPI@1 condition: succeeded() inputs: connectionType: 'connectedServiceName' connectedServiceName: 'TileDB-Py-Test' method: 'POST' body: | { "event_type": "succeeded", "client_payload": {"build_id:": "$(Build.BuildId)"} } urlSuffix: 'repos/$(Build.Repository.Name)/dispatches' waitForCompletion: 'false' TileDB-Py-0.12.2/misc/clean000066400000000000000000000001331417663620700152360ustar00rootroot00000000000000clean: rm -f tiledb/*.cpp tiledb/*.so tiledb/*.dll tiledb/*.pyc tiledb/native/libtiledb.* TileDB-Py-0.12.2/misc/pypi_linux/000077500000000000000000000000001417663620700164345ustar00rootroot00000000000000TileDB-Py-0.12.2/misc/pypi_linux/Dockerfile.aarch64.manylinux2014000066400000000000000000000035571417663620700242210ustar00rootroot00000000000000FROM quay.io/pypa/manylinux2014_aarch64 ############################################### # version args ARG LIBTILEDB_VERSION=2.5.2 ENV LIBTILEDB_VERSION=$LIBTILEDB_VERSION ARG LIBTILEDB_REPO=https://github.com/TileDB-Inc/TileDB ENV LIBTILEDB_REPO=$LIBTILEDB_REPO ARG TILEDBPY_VERSION=0.11.2 ENV TILEDBPY_VERSION=$TILEDBPY_VERSION ARG CMAKE_VERSION=3.21 ENV CMAKE_VERSION=$CMAKE_VERSION ############################################### # python settings # NOTE: MUST USE the 'mu' variant here to be compatible # with "most" linux distros (see manylinux README) ENV PYTHON_BASE /opt/python/cp38-cp38/bin/ RUN useradd tiledb ENV HOME /home/tiledb # dependencies: # - cmake (need recent) and auditwheel from pip RUN $PYTHON_BASE/pip install cmake==${CMAKE_VERSION} auditwheel cibuildwheel ENV CMAKE $PYTHON_BASE/cmake ############################################### # build libtiledb (core) # notes: # 1) we are using auditwheel from https://github.com/pypa/auditwheel # this verifies and tags wheel products with the manylinux1 label, # and allows us to build libtiledb once, install it to a normal # system path, and then use it to build wheels for all of the python # versions. # NOTE: NO GCS SUPPORT RUN cd /home/tiledb/ && \ git clone ${LIBTILEDB_REPO} -b ${LIBTILEDB_VERSION} --depth=1 && \ mkdir build && \ cd build && \ $CMAKE -DTILEDB_S3=ON -DTILEDB_AZURE=ON \ -DTILEDB_SERIALIZATION=ON \ -DTILEDB_CPP_API=ON -DTILEDB_HDFS=ON -DTILEDB_TESTS=OFF \ -DTILEDB_FORCE_ALL_DEPS:BOOL=ON \ -DTILEDB_LOG_OUTPUT_ON_FAILURE:BOOL=ON \ -DSANITIZER=OFF -DTILEDB_WERROR=OFF \ -DCMAKE_CXX_STANDARD=17 \ ../TileDB && \ make -j$(nproc) && \ make install-tiledb ############################################### # add source directory. note: run from base of tree ADD . /home/tiledb/TileDB-Py TileDB-Py-0.12.2/misc/pypi_linux/Dockerfile2010000066400000000000000000000044641417663620700207410ustar00rootroot00000000000000FROM quay.io/pypa/manylinux2010_x86_64:latest ############################################### # version args ARG LIBTILEDB_VERSION=2.2.9 ENV LIBTILEDB_VERSION=$LIBTILEDB_VERSION ARG LIBTILEDB_SHA=dc3bb54adc9bb0d99cb3f56ede2ab5b14e62ab76 ENV LIBTILEDB_SHA=$LIBTILEDB_SHA ARG TILEDBPY_VERSION=0.8.10 ENV TILEDBPY_VERSION=$TILEDBPY_VERSION ARG LIBTILEDB_REPO=https://github.com/TileDB-Inc/TileDB ENV LIBTILEDB_REPO=$LIBTILEDB_REPO ############################################### # python settings # NOTE: MUST USE the 'mu' variant here to be compatible # with "most" linux distros (see manylinux README) ENV PYTHON_BASE /opt/python/cp38-cp38/bin/ RUN useradd tiledb ENV HOME /home/tiledb # dependencies: # - cmake (need recent) and auditwheel from pip RUN $PYTHON_BASE/pip install cmake<3.22 auditwheel ENV CMAKE $PYTHON_BASE/cmake ############################################### # 1) Nothing builds under GCC 4.8 due to default constructor unused-parameter warnings # 2) adding -lrt as a work-around for now because python2.7 doesn't link it, but it # ends up as an unlinked dependency. # 3) Capnproto (TileDB Serialization) requeries -DKJ_USE_EPOLL=0 -D__BIONIC__=1 per # https://github.com/capnproto/capnproto/issues/350#issuecomment-270930594 ENV CXXFLAGS -Wno-unused-parameter -lrt -DKJ_USE_EPOLL=0 -D__BIONIC__=1 ENV CFLAGS -Wno-unused-parameter -lrt -DKJ_USE_EPOLL=0 -D__BIONIC__=1 # build libtiledb (core) # notes: # 1) we are using auditwheel from https://github.com/pypa/auditwheel # this verifies and tags wheel products with the manylinux1 label, # and allows us to build libtiledb once, install it to a normal # system path, and then use it to build wheels for all of the python # versions. RUN cd /home/tiledb/ && \ git clone ${LIBTILEDB_REPO} && \ git -C TileDB checkout $LIBTILEDB_SHA && \ mkdir build && \ cd build && \ $CMAKE -DTILEDB_S3=ON -DTILEDB_AZURE=ON -DTILEDB_GCS=ON \ -DTILEDB_CPP_API=ON -DTILEDB_HDFS=ON -DTILEDB_TESTS=OFF \ -DTILEDB_FORCE_ALL_DEPS:BOOL=ON \ -DTILEDB_LOG_OUTPUT_ON_FAILURE:BOOL=ON \ -DSANITIZER="OFF;-DCOMPILER_SUPPORTS_AVX2:BOOL=FALSE" \ ../TileDB && \ make -j8 && \ make install-tiledb ADD misc/pypi_linux/build.sh /usr/bin/build.sh RUN chmod +x /usr/bin/build.sh # add source directory as optional TILEDB_PY_REPO ADD . /opt/TileDB-Py TileDB-Py-0.12.2/misc/pypi_linux/build.aarch64.sh000066400000000000000000000020011417663620700213070ustar00rootroot00000000000000#!/bin/sh # Usage: # 1) update version information # 2) run from root directory of TileDB-Py checkout # 3) test and upload wheels to PyPI set -xeu export LIBTILEDB_VERSION=2.5.3 export TILEDBPY_VERSION=0.11.5 export CIBW_MANYLINUX_AARCH64_IMAGE=wheel-host-aarch64.manylinux2014-$LIBTILEDB_VERSION export CIBW_SKIP='cp27-* cp35-* cp36-* cp310-* pp-* *_i686 pp* *-musllinux*' export CIBW_PLATFORM='linux' export CIBW_ENVIRONMENT='TILEDB_PATH=/usr/local/' export CIBW_BUILD_VERBOSITY=1 export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" export CIBW_TEST_COMMAND="python -c 'import tiledb'" export TILEDB_WHEEL_BUILD=1 docker build --build-arg=LIBTILEDB_VERSION=$LIBTILEDB_VERSION --build-arg TILEDBPY_VERSION=$TILEDBPY_VERSION -t $CIBW_MANYLINUX_AARCH64_IMAGE -f misc/pypi_linux/Dockerfile.aarch64.manylinux2014 . rm -rf /tmp/cibuildwheel_venv python3 -m venv /tmp/cibuildwheel_venv . /tmp/cibuildwheel_venv/bin/activate pip install cibuildwheel cibuildwheel --platform=linux --output-dir=wheelhouse . TileDB-Py-0.12.2/misc/pypi_linux/build.sh000066400000000000000000000057731417663620700201030ustar00rootroot00000000000000#!/bin/sh # USAGE #------ # 0) cd TileDB-Py (NOTE: root directory!) # 1) docker build -f misc/pypi_linux/Dockerfile . -t wheel_builder # 2) docker run -v `pwd`/misc/pypi_linux/wheels:/wheels -ti wheel_builder build.sh # # testing (e.g. using the official python docker images) # - $ docker run -v `pwd`/misc/pypi_linux/wheels:/wheels --rm -ti python bash # -- pip3 install /wheels/*cp37*.whl # -- python3.7 -c "import tiledb; print(tiledb.libtiledb.version()) and assert tiledb.VFS().supports('s3')" set -ex export TILEDB_PY_REPO="/opt/TileDB-Py" # build python36 wheel cd /home/tiledb git clone $TILEDB_PY_REPO TileDB-Py36 git -C TileDB-Py36 checkout $TILEDBPY_VERSION cd /home/tiledb/TileDB-Py36 /opt/python/cp36-cp36m/bin/python3.6 -m pip install -r misc/requirements_wheel.txt /opt/python/cp36-cp36m/bin/python3.6 setup.py build_ext bdist_wheel --tiledb=/usr/local auditwheel repair dist/*.whl /opt/python/cp36-cp36m/bin/python3.6 -m pip install wheelhouse/*.whl cd tiledb/tests #/opt/python/cp36-cp36m/bin/python3.6 -m unittest # build python37 wheel cd /home/tiledb git clone $TILEDB_PY_REPO TileDB-Py37 git -C TileDB-Py37 checkout $TILEDBPY_VERSION cd /home/tiledb/TileDB-Py37 /opt/python/cp36-cp36m/bin/python3.6 -m pip install -r misc/requirements_wheel.txt /opt/python/cp37-cp37m/bin/python3.7 setup.py build_ext bdist_wheel --tiledb=/usr/local auditwheel repair dist/*.whl /opt/python/cp37-cp37m/bin/python3.7 -m pip install wheelhouse/*.whl cd tiledb/tests #/opt/python/cp37-cp37m/bin/python3.7 -m unittest # build python38 wheel cd /home/tiledb git clone $TILEDB_PY_REPO TileDB-Py38 git -C TileDB-Py38 checkout $TILEDBPY_VERSION cd /home/tiledb/TileDB-Py38 /opt/python/cp36-cp36m/bin/python3.6 -m pip install -r misc/requirements_wheel.txt /opt/python/cp38-cp38/bin/python3.8 setup.py build_ext bdist_wheel --tiledb=/usr/local auditwheel repair dist/*.whl /opt/python/cp38-cp38/bin/python3.8 -m pip install wheelhouse/*.whl cd tiledb/tests # build python39 wheel cd /home/tiledb git clone $TILEDB_PY_REPO TileDB-Py39 git -C TileDB-Py39 checkout $TILEDBPY_VERSION cd /home/tiledb/TileDB-Py39 /opt/python/cp36-cp36m/bin/python3.6 -m pip install -r misc/requirements_wheel.txt /opt/python/cp39-cp39/bin/python3.9 setup.py build_ext bdist_wheel --tiledb=/usr/local auditwheel repair dist/*.whl /opt/python/cp39-cp39/bin/python3.9 -m pip install wheelhouse/*.whl cd tiledb/tests # build python310 wheel cd /home/tiledb git clone $TILEDB_PY_REPO TileDB-Py310 git -C TileDB-Py310 checkout $TILEDBPY_VERSION cd /home/tiledb/TileDB-Py310 /opt/python/cp36-cp36m/bin/python3.6 -m pip install -r misc/requirements_wheel.txt /opt/python/cp310-cp310/bin/python3.10 setup.py build_ext bdist_wheel --tiledb=/usr/local auditwheel repair dist/*.whl /opt/python/cp310-cp310/bin/python3.10 -m pip install wheelhouse/*.whl cd tiledb/tests # copy build products out cp /home/tiledb/TileDB-Py36/wheelhouse/* /wheels cp /home/tiledb/TileDB-Py37/wheelhouse/* /wheels cp /home/tiledb/TileDB-Py38/wheelhouse/* /wheels cp /home/tiledb/TileDB-Py39/wheelhouse/* /wheels TileDB-Py-0.12.2/misc/requirements_ci.txt000066400000000000000000000001051417663620700201670ustar00rootroot00000000000000dask distributed -r ../requirements_dev.txt -r requirements_test.txt TileDB-Py-0.12.2/misc/requirements_test.txt000066400000000000000000000001231417663620700205530ustar00rootroot00000000000000pandas ; python_version > '3.5' psutil pyarrow pytest hypothesis hypothesis[numpy] TileDB-Py-0.12.2/misc/requirements_wheel.txt000066400000000000000000000013551417663620700207100ustar00rootroot00000000000000# numpy pinning for ABI forward-compatibility numpy==1.16.5 ; python_version < "3.8" and platform_machine !='aarch64' numpy==1.17.* ; python_version == "3.8" and platform_machine !='aarch64' numpy==1.19.4 ; python_version == "3.9" and platform_machine !='aarch64' # NOTE: oldest-supported-numpy (1.19.2) had forward ABI compat problems numpy==1.20.* ; python_version < "3.10" and platform_machine=='aarch64' numpy==1.21.* ; python_version >= "3.10" #------------------------------- # Note 11/23/2021: the current version of the AWS sdk does not work with cmake 3.22 cmake >= 3.21, < 3.22 cython >= 0.27 pybind11 >= 2.6.2 setuptools >= 18.0 setuptools_scm >= 1.5.4 wheel >= 0.30 contextvars ;python_version<"3.7" dataclasses ;python_version<"3.7" TileDB-Py-0.12.2/pyproject.toml000066400000000000000000000004531417663620700162170ustar00rootroot00000000000000[tool.pytest.ini_options] python_classes = "*Test*" python_files = "test_*.py" testpaths = ["tiledb/tests"] addopts = "--ignore=tiledb/tests/perf --ignore=tiledb/tests/__pycache__" filterwarnings = [ "error", "default::pytest.PytestWarning", "default::DeprecationWarning:distributed", ] TileDB-Py-0.12.2/requirements.txt000066400000000000000000000004141417663620700165640ustar00rootroot00000000000000numpy>=1.16.5 ; python_version < "3.10" and platform_machine != 'aarch64' numpy>=1.19.2 ; python_version < "3.10" and platform_machine == 'aarch64' numpy>=1.21.0 ; python_version >= "3.10" packaging contextvars ;python_version<"3.7" dataclasses ;python_version<"3.7" TileDB-Py-0.12.2/requirements_dev.txt000066400000000000000000000006621417663620700174270ustar00rootroot00000000000000numpy >= 1.16.5 # ------------------------------------------------ # ** MUST sync with misc/requirements_wheel.txt ** # ------------------------------------------------ # Note 11/23/2021: the current version of the AWS sdk does not work with cmake 3.22 cmake >= 3.21, < 3.22 cython >= 0.27 pybind11 >= 2.6.2 setuptools >= 18.0 setuptools_scm >= 1.5.4 wheel >= 0.30 contextvars ;python_version<"3.7" dataclasses ;python_version<"3.7" TileDB-Py-0.12.2/setup.cfg000066400000000000000000000001001417663620700151110ustar00rootroot00000000000000[metadata] license_files = LICENSE external/LICENSE-*.txt TileDB-Py-0.12.2/setup.py000066400000000000000000000600651417663620700150220ustar00rootroot00000000000000import ctypes import io import multiprocessing import os import platform import shutil import subprocess import sys import zipfile from sysconfig import get_config_var from urllib.error import URLError from urllib.request import urlopen from pkg_resources import resource_filename from setuptools import Extension, find_packages, setup # Target branch TILEDB_VERSION = "2.6.2" # allow overriding w/ environment variable TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION # Use `setup.py [] --debug` for a debug build of libtiledb TILEDB_DEBUG_BUILD = False # Use `setup.py [] --release-symbols` for a release build with symbols libtiledb TILEDB_SYMBOLS_BUILD = False # Use `setup.py [] --modular` for a modular build of libtiledb_py # Each .pyx file will be built as a separate shared library for faster # compilation. This is disabled by default to avoid distributing multiple # shared libraries. TILEDBPY_MODULAR = False # Allow to override TILEDB_FORCE_ALL_DEPS with environment variable TILEDB_FORCE_ALL_DEPS = "TILEDB_FORCE_ALL_DEPS" in os.environ TILEDB_DISABLE_SERIALIZATION = "TILEDB_DISABLE_SERIALIZATION" in os.environ CMAKE_GENERATOR = os.environ.get("CMAKE_GENERATOR", None) # Directory containing this file CONTAINING_DIR = os.path.abspath(os.path.dirname(__file__)) # Build directory path BUILD_DIR = os.path.join(CONTAINING_DIR, "build") # TileDB package source directory TILEDB_PKG_DIR = os.path.join(CONTAINING_DIR, "tiledb") # Set deployment target for mac # # TO OVERRIDE: # set MACOSX_DEPLOYMENT_TARGET before calling setup.py if sys.platform == "darwin": if "MACOSX_DEPLOYMENT_TARGET" not in os.environ: os.environ["MACOSX_DEPLOYMENT_TARGET"] = "10.14" # Is this process building a wheel? WHEEL_BUILD = ("bdist_wheel" in sys.argv) or ("TILEDB_WHEEL_BUILD" in os.environ) # Is this being built under conda-forge? CONDA_FORGE_BUILD = os.environ.get("TILEDB_CONDA_BUILD") is not None def is_windows(): return os.name == "nt" def _libtiledb_exists(library_dirs): """ Checks the given list of paths and returns true if any contain the TileDB library. :return: The path to the TileDB library, or None. """ print("libtiledb_exists checking 'library_dirs': {}".format(library_dirs)) names = libtiledb_library_names() if len(library_dirs) > 0: paths = [os.path.join(d, n) for d in library_dirs for n in names] for p in paths: if os.path.exists(p): return p raise RuntimeError( "Could not find given --tiledb library path(s):\n{}".format( "\n".join(paths) ) ) # If no explicit path is given check to see if TileDB is globally installed. import ctypes lib_name = names[0] try: # note: this is a relative path on linux # https://bugs.python.org/issue21042 ctypes.CDLL(lib_name) return lib_name except: pass return None def libtiledb_exists(library_dirs): lib = _libtiledb_exists(library_dirs) print("libtiledb_exists found: '{}'".format(lib)) return lib def libtiledb_library_names(): """ :return: List of TileDB shared library names. """ if os.name == "posix": if sys.platform == "darwin": return ["libtiledb.dylib"] else: return ["libtiledb.so"] elif os.name == "nt": return ["tiledb.dll"] else: raise RuntimeError("Unsupported OS name " + os.name) def download_libtiledb(): """ Downloads the native TileDB source. :return: Path to extracted source directory. """ dest_name = "TileDB-{}".format(TILEDB_VERSION) dest = os.path.join(BUILD_DIR, dest_name) build_dir = os.path.join(BUILD_DIR, f"TileDB-{TILEDB_VERSION[:8]}") if not os.path.exists(build_dir): url = "https://github.com/TileDB-Inc/TileDB/archive/{}.zip".format( TILEDB_VERSION ) print("Downloading TileDB package from {}...".format(TILEDB_VERSION)) try: with zipfile.ZipFile(io.BytesIO(urlopen(url).read())) as z: z.extractall(BUILD_DIR) except URLError: # try falling back to wget, maybe SSL is broken subprocess.check_call(["wget", url], shell=True) with zipfile.ZipFile("{}.zip".format(TILEDB_VERSION)) as z: z.extractall(BUILD_DIR) shutil.move(dest, build_dir) return build_dir def build_libtiledb(src_dir): """ Builds and installs the native TileDB library. :param src_dir: Path to libtiledb source directory. :return: Path to the directory where the library was installed. """ libtiledb_build_dir = os.path.join(src_dir, "build") libtiledb_install_dir = os.path.join(src_dir, "dist") if not os.path.exists(libtiledb_build_dir): os.makedirs(libtiledb_build_dir) print("Building libtiledb in directory {}...".format(libtiledb_build_dir)) werror = os.environ.get("TILEDB_WERROR", "OFF") cmake = os.environ.get("CMAKE", "cmake") cmake_cmd = [ cmake, "-DCMAKE_INSTALL_PREFIX={}".format(libtiledb_install_dir), "-DTILEDB_TESTS=OFF", "-DTILEDB_S3=ON", "-DTILEDB_WERROR={}".format(werror), "-DTILEDB_HDFS={}".format("ON" if os.name == "posix" else "OFF"), "-DTILEDB_INSTALL_LIBDIR=lib", "-DTILEDB_CPP_API=ON", "-DTILEDB_LOG_OUTPUT_ON_FAILURE=ON", "-DTILEDB_FORCE_ALL_DEPS:BOOL={}".format( "ON" if TILEDB_FORCE_ALL_DEPS else "OFF" ), "-DTILEDB_SERIALIZATION:BOOL={}".format( "OFF" if TILEDB_DISABLE_SERIALIZATION else "ON" ), ] deployment_target = os.environ.get("MACOSX_DEPLOYMENT_TARGET", None) if deployment_target: cmake_cmd.append(f"-DCMAKE_OSX_DEPLOYMENT_TARGET={deployment_target}") extra_cmake_args = os.environ.get("CMAKE_ARGS", []) if extra_cmake_args: cmake_cmd.extend(extra_cmake_args.split()) if TILEDB_DEBUG_BUILD: build_type = "Debug" elif TILEDB_SYMBOLS_BUILD: build_type = "RelWithDebInfo" else: build_type = "Release" cmake_cmd.append("-DCMAKE_BUILD_TYPE={}".format(build_type)) if os.name == "nt": cmake_cmd.extend(["-A", "x64", "-DMSVC_MP_FLAG=/MP4"]) if CMAKE_GENERATOR: cmake_cmd.extend(["-G", CMAKE_GENERATOR]) # cmake target directory -- important cmake_cmd.append(src_dir) print("CMake configure command: {}".format(cmake_cmd)) have_make = True try: subprocess.check_call(["make", "-v"]) except: have_make = False if have_make and not os.name == "nt": njobs = multiprocessing.cpu_count() or 2 build_cmd = ["make", "-j{:d}".format(njobs)] install_cmd = ["make", "install-tiledb"] else: build_cmd = ["cmake", "--build", ".", "--config", build_type] install_cmd = [ "cmake", "--build", ".", "--config", build_type, "--target", "install-tiledb", ] # Build and install libtiledb # - run cmake # - run build via 'cmake --build' # - run install-tiledb subprocess.check_call(cmake_cmd, cwd=libtiledb_build_dir) subprocess.check_call(build_cmd, cwd=libtiledb_build_dir) subprocess.check_call(install_cmd, cwd=libtiledb_build_dir) return libtiledb_install_dir def find_or_install_libtiledb(setuptools_cmd): """ Find the TileDB library required for building the Cython extension. If not found, download, build and install TileDB, copying the resulting shared libraries into a path where they will be found by package_data or the build process. :param setuptools_cmd: The setuptools command instance. """ tiledb_ext = None main_ext = None print("ext_modules: ", setuptools_cmd.distribution.ext_modules) for ext in setuptools_cmd.distribution.ext_modules: if ext.name == "tiledb.libtiledb": tiledb_ext = ext elif ext.name == "tiledb.main": main_ext = ext print("tiledb_ext: ", tiledb_ext) print("main_ext: ", main_ext) print("tiledb_ext.library_dirs: ", tiledb_ext.library_dirs) wheel_build = getattr(tiledb_ext, "tiledb_wheel_build", False) from_source = getattr(tiledb_ext, "tiledb_from_source", False) lib_exists = libtiledb_exists(tiledb_ext.library_dirs) do_install = False # Download, build and locally install TileDB if needed. if from_source or not lib_exists: src_dir = download_libtiledb() prefix_dir = build_libtiledb(src_dir) do_install = True elif lib_exists: prefix_dir = os.path.abspath(os.path.join(os.path.split(lib_exists)[0], "..")) elif hasattr(tiledb_ext, "tiledb_path"): prefix_dir = getattr(tiledb_ext, "tiledb_path") if wheel_build and is_windows() and lib_exists: do_install = True print("prefix_dir: ", main_ext) print("do_install: ", do_install) if do_install: lib_subdir = "bin" if os.name == "nt" else "lib" native_subdir = "" if is_windows() else "native" # Copy libtiledb shared object(s) to the package directory so they can be found # with package_data. dest_dir = os.path.join(TILEDB_PKG_DIR, native_subdir) for libname in libtiledb_library_names(): src = os.path.join(prefix_dir, lib_subdir, libname) if not os.path.exists(dest_dir): os.makedirs(dest_dir) dest = os.path.join(dest_dir, libname) print("Copying file {0} to {1}".format(src, dest)) shutil.copy(src, dest) # Copy dependencies if is_windows(): def do_copy(src, dest): print("Copying file {0} to {1}".format(src, dest)) shutil.copy(src, dest) # lib files for linking src = os.path.join(prefix_dir, "lib", "tiledb.lib") dest = os.path.join(dest_dir, "tiledb.lib") do_copy(src, dest) # tbb has_tbb = False if os.path.isdir(os.path.join(prefix_dir, "bin", "tbb.dll")): has_tbb = True src = os.path.join(prefix_dir, "bin", "tbb.dll") dest = os.path.join(dest_dir, "tbb.dll") do_copy(src, dest) src = os.path.join(prefix_dir, "lib", "tbb.lib") dest = os.path.join(dest_dir, "tbb.lib") do_copy(src, dest) # tiledb_ext.library_dirs += [os.path.join(prefix_dir, "lib")] main_ext.library_dirs += [os.path.join(prefix_dir, "lib")] # Update the extension instances with correct build-time paths. tiledb_ext.library_dirs += [os.path.join(prefix_dir, lib_subdir)] tiledb_ext.include_dirs += [os.path.join(prefix_dir, "include")] main_ext.library_dirs += [os.path.join(prefix_dir, lib_subdir)] main_ext.include_dirs += [os.path.join(prefix_dir, "include")] # Update package_data so the shared object gets installed with the Python module. libtiledb_objects = [ os.path.join(native_subdir, libname) for libname in libtiledb_library_names() ] # Make sure the built library is usable for shared_obj in libtiledb_objects: if is_windows(): continue test_path = os.path.join(TILEDB_PKG_DIR, shared_obj) # should only ever be 1, not sure why libtiledb_library_names -> List try: ctypes.CDLL(test_path) except: print("\n-------------------") print("Failed to load shared library: {}".format(test_path)) print("-------------------\n") raise # This needs to be a glob in order to pick up the versioned SO libtiledb_objects = [so + "*" for so in libtiledb_objects] if is_windows(): libtiledb_objects.extend( [os.path.join(native_subdir, libname) for libname in ["tiledb.lib"]] ) if has_tbb: libtiledb_objects.extend( [ os.path.join(native_subdir, libname) for libname in ["tbb.lib", "tbb.dll"] ] ) print("\n-------------------") print("libtiledb_objects: ", libtiledb_objects) print("-------------------\n") setuptools_cmd.distribution.package_data.update({"tiledb": libtiledb_objects}) class LazyCommandClass(dict): """ Lazy command class that defers operations requiring Cython and numpy until they've actually been downloaded and installed by setup_requires. """ def __contains__(self, key): if key in ("build_ext", "bdist_wheel", "bdist_egg"): return True return super().__contains__(key) def __setitem__(self, key, value): if key == "build_ext": raise AssertionError("build_ext overridden!") super().__setitem__(key, value) def __getitem__(self, key): if key == "build_ext": return self.make_build_ext_cmd() elif key == "bdist_wheel": return self.make_bdist_wheel_cmd() elif key == "bdist_egg": return self.make_bdist_egg_cmd() else: return super().__getitem__(key) def make_build_ext_cmd(self): """ :return: A command class implementing 'build_ext'. """ from Cython.Distutils import build_ext as cython_build_ext class build_ext(cython_build_ext): """ Custom build_ext command that lazily adds numpy's include_dir to extensions. """ def build_extensions(self): """ Lazily append numpy's include directory to Extension includes. This is done here rather than at module scope because setup.py may be run before numpy has been installed, in which case importing numpy and calling `numpy.get_include()` will fail. """ numpy_incl = resource_filename("numpy", "core/include") for ext in self.extensions: ext.include_dirs.append(numpy_incl) find_or_install_libtiledb(self) super().build_extensions() return build_ext def make_bdist_wheel_cmd(self): """ :return: A command class implementing 'bdist_wheel'. """ from wheel.bdist_wheel import bdist_wheel class bdist_wheel_cmd(bdist_wheel): def run(self): # This may modify package_data: find_or_install_libtiledb(self) bdist_wheel.run(self) return bdist_wheel_cmd def make_bdist_egg_cmd(self): """ :return: A command class implementing 'bdist_egg'. """ from setuptools.command.bdist_egg import bdist_egg class bdist_egg_cmd(bdist_egg): def run(self): # This may modify package_data: find_or_install_libtiledb(self) bdist_egg.run(self) return bdist_egg_cmd class get_pybind_include(object): """Helper class to determine the pybind11 include path The purpose of this class is to postpone importing pybind11 until it is actually installed, so that the ``get_include()`` method can be invoked.""" def __init__(self, user=False): self.user = user def __str__(self): import pybind11 return pybind11.get_include(self.user) def cmake_available(): """ Checks whether CMake command is available and >= version 3.21 Note 11/23/2021: < 3.22 temporarily due to AWS SDK imcompatibility. :return: """ CMAKE_MINIMUM_MAJOR = 3 CMAKE_MINIMUM_MINOR = 21 CMAKE_MAXIMUM_MINOR = 22 try: output = subprocess.check_output(["cmake", "--version"]).split() version = output[2].decode("utf-8").split(".") return ( int(version[0]) >= CMAKE_MINIMUM_MAJOR and int(version[1]) >= CMAKE_MINIMUM_MINOR and int(version[1]) < CMAKE_MAXIMUM_MINOR ) except: return False def parse_requirements(req_file): with open(req_file) as f: return f.read().strip().split("\n") def setup_requires(): if CONDA_FORGE_BUILD: return [] if WHEEL_BUILD: req = parse_requirements("misc/requirements_wheel.txt") else: req = parse_requirements("requirements_dev.txt") req = list(filter(lambda r: not r.startswith("-r"), req)) req_cmake = list(filter(lambda r: "cmake" in r, req))[0] # Add cmake requirement if libtiledb is not found and cmake is not available. if not libtiledb_exists(LIB_DIRS) and not cmake_available(): req.append(req_cmake) return req def install_requires(): if CONDA_FORGE_BUILD: return [] return parse_requirements("requirements.txt") # Allow setting (lib) TileDB directory if it is installed on the system TILEDB_PATH = os.environ.get("TILEDB_PATH", "") print("TILEDB_PATH from env: '{}'".format(TILEDB_PATH)) # Sources & libraries INC_DIRS = [] LIB_DIRS = [] LIBS = ["tiledb"] DEF_MACROS = [] # Pass command line flags to setup.py script # handle --tiledb=[PATH] --lflags=[FLAGS] --cxxflags=[FLAGS] args = sys.argv[:] for arg in args: if arg.find("--tiledb=") == 0: TILEDB_PATH = os.path.expanduser(arg.split("=")[1]) sys.argv.remove(arg) if arg.find("--lflags=") == 0: LFLAGS = arg.split("=")[1].split() sys.argv.remove(arg) if arg.find("--cxxflags=") == 0: CXXFLAGS = arg.split("=")[1].split() sys.argv.remove(arg) if arg.find("--debug") == 0: TILEDB_DEBUG_BUILD = True sys.argv.remove(arg) if arg.find("--release-symbols") == 0: TILEDB_SYMBOLS_BUILD = True sys.argv.remove(arg) if arg.find("--modular") == 0: TILEDBPY_MODULAR = True sys.argv.remove(arg) TILEDBPY_WERROR = False if arg.find("--werror") == 0: TILEDBPY_WERROR = True sys.argv.remove(arg) # Global variables CXXFLAGS = os.environ.get("CXXFLAGS", "").split() if not is_windows(): CXXFLAGS.append("-std=c++17") if TILEDBPY_WERROR: CXXFLAGS.append("-Werror") if not TILEDB_DEBUG_BUILD: CXXFLAGS.append("-Wno-deprecated-declarations") elif TILEDB_DEBUG_BUILD: CXXFLAGS.append("-g") CXXFLAGS.append("-O0") CXXFLAGS.append("-UNDEBUG") # defined by distutils if TILEDB_SYMBOLS_BUILD: CXXFLAGS.append("-g") elif is_windows(): CXXFLAGS.append("/std:c++17") LFLAGS = os.environ.get("LFLAGS", "").split() if TILEDB_PATH != "" and TILEDB_PATH != "source": print("TILEDB_PATH in block before: '{}'".format(TILEDB_PATH)) TILEDB_PATH = os.path.normpath(TILEDB_PATH) print("TILEDB_PATH in block after: '{}'".format(TILEDB_PATH)) LIB_DIRS += [os.path.join(os.path.normpath(TILEDB_PATH), "lib")] if sys.platform.startswith("linux"): LIB_DIRS += [ os.path.join(TILEDB_PATH, "lib64"), os.path.join(TILEDB_PATH, "lib", "x86_64-linux-gnu"), ] elif os.name == "nt": LIB_DIRS += [os.path.join(TILEDB_PATH, "bin")] INC_DIRS += [os.path.join(TILEDB_PATH, "include")] if sys.platform == "darwin": LFLAGS += ["-Wl,-rpath,{}".format(p) for p in LIB_DIRS] with open("README.md") as f: README_MD = f.read() # Source files for build MODULAR_SOURCES = ["tiledb/np2buf.pyx", "tiledb/indexing.pyx", "tiledb/libmetadata.pyx"] MODULAR_HEADERS = ["tiledb/libtiledb.pxd", "tiledb/indexing.pxd"] __extensions = [ Extension( "tiledb.libtiledb", include_dirs=INC_DIRS, define_macros=DEF_MACROS, sources=["tiledb/libtiledb.pyx"], depends=MODULAR_HEADERS, library_dirs=LIB_DIRS, libraries=LIBS, extra_link_args=LFLAGS, extra_compile_args=CXXFLAGS.copy().remove("-Werror") if CXXFLAGS.count("-Werror") else CXXFLAGS, language="c++", ), Extension( "tiledb.main", [ "tiledb/main.cc", "tiledb/core.cc", "tiledb/npbuffer.cc", "tiledb/fragment.cc", "tiledb/serialization.cc", "tiledb/schema_evolution.cc", "tiledb/tests/test_serialization.cc", "tiledb/tests/test_metadata.cc", # TODO currently included in core.cc due to dependency. # need to un-comment after refactor. # "tiledb/query_condition.cc", ], include_dirs=INC_DIRS + [get_pybind_include(), get_pybind_include(user=True)], language="c++", library_dirs=LIB_DIRS, libraries=LIBS, extra_link_args=LFLAGS, extra_compile_args=CXXFLAGS + ["-fvisibility=hidden"], ), ] if TILEDBPY_MODULAR: for source in MODULAR_SOURCES: module_name = os.path.splitext(os.path.split(source)[-1])[0] if module_name + ".pxd" in MODULAR_HEADERS: deps = module_name + ".pxd" else: deps = None ext = Extension( "tiledb.{}".format(module_name), include_dirs=INC_DIRS, define_macros=DEF_MACROS, sources=[source], depends=[deps] if deps else [], library_dirs=LIB_DIRS, libraries=LIBS, extra_link_args=LFLAGS, extra_compile_args=CXXFLAGS, language="c++", ) __extensions.append(ext) else: __extensions[0].depends += MODULAR_SOURCES # Helper to set Extension attributes correctly based on python version def ext_attr_update(attr, value): for x in __extensions: setattr(x, attr, value) # Monkey patches to be forwarded to cythonize # some of these will error out if passed directly # to Extension(..) above if WHEEL_BUILD: ext_attr_update("tiledb_wheel_build", True) # - build with `#line` directive annotations # (equivalent to `emit_linenums` command line directive) ext_attr_update("cython_line_directives", 1) # - generate XML debug mapping file (`cython_debug`) if TILEDB_DEBUG_BUILD: ext_attr_update("cython_gdb", True) __extensions[1].depends += "tiledb/debug.cc" # - set rt lib dirs to get correct RPATH on unixy platforms # note that we set rpath for darwin separately above. if not is_windows(): ext_attr_update("runtime_library_dirs", LIB_DIRS) if TILEDB_PATH == "source": ext_attr_update("tiledb_from_source", True) elif TILEDB_PATH != "": ext_attr_update("tiledb_path", TILEDB_PATH) # This must always be set so the compile-time conditional has a value ext_attr_update("cython_compile_time_env", {"TILEDBPY_MODULAR": TILEDBPY_MODULAR}) setup( name="tiledb", description="Pythonic interface to the TileDB array storage manager", long_description=README_MD, long_description_content_type="text/markdown", author="TileDB, Inc.", author_email="help@tiledb.io", maintainer="TileDB, Inc.", maintainer_email="help@tiledb.io", url="https://github.com/TileDB-Inc/TileDB-Py", license="MIT", platforms=["any"], use_scm_version={ "version_scheme": "guess-next-dev", "local_scheme": "dirty-tag", "write_to": "tiledb/version.py", }, ext_modules=__extensions, setup_requires=setup_requires(), install_requires=install_requires(), packages=find_packages(), cmdclass=LazyCommandClass(), zip_safe=False, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Operating System :: Unix", "Operating System :: POSIX :: Linux", "Operating System :: MacOS :: MacOS X", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", ], ) TileDB-Py-0.12.2/tiledb/000077500000000000000000000000001417663620700145445ustar00rootroot00000000000000TileDB-Py-0.12.2/tiledb/__init__.py000066400000000000000000000060511417663620700166570ustar00rootroot00000000000000import ctypes import os import sys # un-comment this section to fix Cython backtrace line-numbers in # IPython/Jupyter. see https://bugs.python.org/issue32797#msg323167 # --- # try: # from importlib.machinery import ExtensionFileLoader # else: # del ExtensionFileLoader.get_source # --- if os.name == "posix": if sys.platform == "darwin": lib_name = "libtiledb.dylib" else: lib_name = "libtiledb.so" else: lib_name = "tiledb" # On Windows and whl builds, we may have a shared library already linked, or # adjacent to, the cython .pyd shared object. In this case, we can import directly # from .libtiledb try: import tiledb from .libtiledb import Ctx except: try: lib_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "native") ctypes.CDLL(os.path.join(lib_dir, lib_name)) except OSError as e: # Otherwise try loading by name only. ctypes.CDLL(lib_name) from .ctx import default_ctx, scope_ctx from .libtiledb import ( Array, Ctx, Config, Dim, Domain, Attr, ArraySchema, TileDBError, VFS, FileIO, Filter, FilterList, NoOpFilter, GzipFilter, ZstdFilter, LZ4Filter, Bzip2Filter, RleFilter, DoubleDeltaFilter, BitShuffleFilter, ByteShuffleFilter, BitWidthReductionFilter, PositiveDeltaFilter, ChecksumMD5Filter, ChecksumSHA256Filter, consolidate, group_create, object_type, ls, walk, remove, move, schema_like, stats_enable, stats_disable, stats_reset, stats_dump, vacuum, ) from .array import DenseArray, SparseArray from .fragment import ( FragmentInfoList, FragmentInfo, FragmentsInfo, copy_fragments_to_existing_array, delete_fragments, create_array_from_fragments, ) from .highlevel import ( open, save, from_numpy, empty_like, array_exists, array_fragments, ) from .query_condition import QueryCondition from .schema_evolution import ArraySchemaEvolution # TODO restricted imports from .dataframe_ import from_csv, from_pandas, open_dataframe from .multirange_indexing import EmptyRange from .parquet_ import from_parquet from .version import version as __version__ from .version_ import VersionHelper version = VersionHelper() # Note: we use a modified namespace packaging to allow continuity of existing TileDB-Py imports. # Therefore, 'tiledb/__init__.py' must *only* exist in this package. # Furthermore, in sub-packages, the `find_packages` helper will not work at the # root directory due to lack of 'tiledb/__init__.py'. Sub-package 'setup.py' scripts # must declare constituents accordingly, such as by running 'find_packages' on a sub-directory # and applying prefixes accordingly. # 1) https://packaging.python.org/guides/packaging-namespace-packages/#native-namespace-packages # 2) https://stackoverflow.com/a/53486554 # # Note: 'pip -e' in particular will not work without this declaration: __path__ = __import__("pkgutil").extend_path(__path__, __name__) TileDB-Py-0.12.2/tiledb/array.py000066400000000000000000000037061417663620700162420ustar00rootroot00000000000000from .libtiledb import DenseArrayImpl, SparseArrayImpl # Extensible (pure Python) array class definitions inheriting from the # Cython implemention. The cloudarray mix-in adds optional functionality # for registering arrays and executing functions on the # NOTE: the mixin import must be inside the __new__ initializer because it # needs to be deferred. tiledb.cloud is not yet known to the importer # when this code is imported. # TODO: might be possible to work-around/simplify by using # import meta-hooks instead. class DenseArray(DenseArrayImpl): """Class representing a dense TileDB array. Inherits properties and methods of :py:class:`tiledb.Array` and implements `__setitem__` and `__getitem__` for dense array indexing and assignment. """ _mixin_init = False def __new__(cls, *args, **kwargs): if not cls._mixin_init: # must set before importing, because import is not thread-safe # https://github.com/TileDB-Inc/TileDB-Py/issues/244 cls._mixin_init = True try: from tiledb.cloud import cloudarray DenseArray.__bases__ = DenseArray.__bases__ + (cloudarray.CloudArray,) except ImportError: pass return super().__new__(cls, *args, **kwargs) class SparseArray(SparseArrayImpl): """Class representing a sparse TileDB array. Inherits properties and methods of :py:class:`tiledb.Array` and implements `__setitem__` and `__getitem__` for sparse array indexing and assignment. """ _mixin_init = False def __new__(cls, *args, **kwargs): if not cls._mixin_init: cls._mixin_init = True try: from tiledb.cloud import cloudarray SparseArray.__bases__ = SparseArray.__bases__ + (cloudarray.CloudArray,) except ImportError: pass return super().__new__(cls, *args, **kwargs) TileDB-Py-0.12.2/tiledb/common.pxi000066400000000000000000000037521417663620700165650ustar00rootroot00000000000000from cpython.bytes cimport (PyBytes_GET_SIZE, PyBytes_AS_STRING, PyBytes_Size, PyBytes_FromString, PyBytes_FromStringAndSize) from cpython.float cimport PyFloat_FromDouble from cpython.long cimport PyLong_FromLong from cpython.ref cimport (Py_INCREF, Py_DECREF, PyTypeObject) from libc.stdio cimport (FILE, stdout) from libc.stdio cimport stdout from libc.stdlib cimport malloc, calloc, free from libc.string cimport memcpy from libc.stdint cimport (uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, uint64_t, int64_t, uintptr_t) from libc.stddef cimport ptrdiff_t from libc cimport limits from libcpp.vector cimport vector cdef extern from "Python.h": object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) object PyUnicode_FromString(const char *u) # Numpy imports """ cdef extern from "numpyFlags.h": # Include 'numpyFlags.h' into the generated C code to disable warning. # This must be included before numpy is cimported pass """ import numpy as np cimport numpy as np cdef extern from "numpy/arrayobject.h": # Steals a reference to dtype, need to incref the dtype object PyArray_NewFromDescr(PyTypeObject* subtype, np.dtype descr, int nd, np.npy_intp* dims, np.npy_intp* strides, void* data, int flags, object obj) # Steals a reference to dtype, need to incref the dtype object PyArray_Scalar(void* ptr, np.dtype descr, object itemsize) void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) void* PyDataMem_NEW(size_t nbytes) void* PyDataMem_RENEW(void* data, size_t nbytes) void PyDataMem_FREE(void* data) TileDB-Py-0.12.2/tiledb/core.cc000066400000000000000000001423421417663620700160110ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include "npbuffer.h" #include "util.h" #include #include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include #include // C #include // C++ #include // C #include // C #include "../external/string_view.hpp" #include "../external/tsl/robin_map.h" #if !defined(NDEBUG) //#include "debug.cc" #endif #include "query_condition.cc" namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; using TimerType = std::chrono::duration; struct StatsInfo { std::map counters; }; bool config_has_key(tiledb::Config config, std::string key) { try { config.get(key); } catch (TileDBError &e) { (void)e; return false; } return true; } struct PAPair { int64_t get_array() { if (!exported_) { TPY_ERROR_LOC("Cannot export uninitialized array!"); } return (int64_t)&array_; }; int64_t get_schema() { if (!exported_) { TPY_ERROR_LOC("Cannot export uninitialized schema!"); } return (int64_t)&schema_; } ArrowSchema schema_; ArrowArray array_; bool exported_ = false; }; // global stats counters static std::unique_ptr g_stats; // forward declaration py::dtype tiledb_dtype(tiledb_datatype_t type, uint32_t cell_val_num); struct BufferInfo { BufferInfo(std::string name, size_t data_nbytes, tiledb_datatype_t data_type, uint32_t cell_val_num, size_t offsets_num, size_t validity_num, bool isvar = false, bool isnullable = false) : name(name), type(data_type), cell_val_num(cell_val_num), isvar(isvar), isnullable(isnullable) { try { dtype = tiledb_dtype(data_type, cell_val_num); elem_nbytes = tiledb_datatype_size(type); data = py::array(py::dtype("uint8"), data_nbytes); offsets = py::array_t(offsets_num); validity = py::array_t(validity_num); } catch (py::error_already_set &e) { TPY_ERROR_LOC(e.what()) } // TODO use memset here for zero'd buffers in debug mode } string name; tiledb_datatype_t type; py::dtype dtype; size_t elem_nbytes = 1; uint64_t data_vals_read = 0; uint32_t cell_val_num; uint64_t offsets_read = 0; uint64_t validity_vals_read = 0; bool isvar; bool isnullable; py::array data; py::array_t offsets; py::array_t validity; }; py::dtype tiledb_dtype(tiledb_datatype_t type, uint32_t cell_val_num) { if (cell_val_num == 1) { auto np = py::module::import("numpy"); auto datetime64 = np.attr("datetime64"); switch (type) { case TILEDB_INT32: return py::dtype("int32"); case TILEDB_INT64: return py::dtype("int64"); case TILEDB_FLOAT32: return py::dtype("float32"); case TILEDB_FLOAT64: return py::dtype("float64"); case TILEDB_INT8: return py::dtype("int8"); case TILEDB_UINT8: return py::dtype("uint8"); case TILEDB_INT16: return py::dtype("int16"); case TILEDB_UINT16: return py::dtype("uint16"); case TILEDB_UINT32: return py::dtype("uint32"); case TILEDB_UINT64: return py::dtype("uint64"); case TILEDB_STRING_ASCII: return py::dtype("S1"); case TILEDB_STRING_UTF8: return py::dtype("U1"); case TILEDB_STRING_UTF16: case TILEDB_STRING_UTF32: TPY_ERROR_LOC("Unimplemented UTF16 or UTF32 string conversion!"); case TILEDB_STRING_UCS2: case TILEDB_STRING_UCS4: TPY_ERROR_LOC("Unimplemented UCS2 or UCS4 string conversion!"); case TILEDB_CHAR: return py::dtype("S1"); case TILEDB_DATETIME_YEAR: return py::dtype("M8[Y]"); case TILEDB_DATETIME_MONTH: return py::dtype("M8[M]"); case TILEDB_DATETIME_WEEK: return py::dtype("M8[W]"); case TILEDB_DATETIME_DAY: return py::dtype("M8[D]"); case TILEDB_DATETIME_HR: return py::dtype("M8[h]"); case TILEDB_DATETIME_MIN: return py::dtype("M8[m]"); case TILEDB_DATETIME_SEC: return py::dtype("M8[s]"); case TILEDB_DATETIME_MS: return py::dtype("M8[ms]"); case TILEDB_DATETIME_US: return py::dtype("M8[us]"); case TILEDB_DATETIME_NS: return py::dtype("M8[ns]"); case TILEDB_DATETIME_PS: return py::dtype("M8[ps]"); case TILEDB_DATETIME_FS: return py::dtype("M8[fs]"); case TILEDB_DATETIME_AS: return py::dtype("M8[as]"); #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3 /* duration types map to timedelta */ case TILEDB_TIME_HR: return py::dtype("m8[h]"); case TILEDB_TIME_MIN: return py::dtype("m8[m]"); case TILEDB_TIME_SEC: return py::dtype("m8[s]"); case TILEDB_TIME_MS: return py::dtype("m8[ms]"); case TILEDB_TIME_US: return py::dtype("m8[us]"); case TILEDB_TIME_NS: return py::dtype("m8[ns]"); case TILEDB_TIME_PS: return py::dtype("m8[ps]"); case TILEDB_TIME_FS: return py::dtype("m8[fs]"); case TILEDB_TIME_AS: return py::dtype("m8[as]"); #endif #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 7 case TILEDB_BLOB: return py::dtype("bytes"); #endif case TILEDB_ANY: break; } } else if (cell_val_num == 2 && type == TILEDB_FLOAT32) { return py::dtype("complex64"); } else if (cell_val_num == 2 && type == TILEDB_FLOAT64) { return py::dtype("complex128"); } else if (type == TILEDB_CHAR || type == TILEDB_STRING_UTF8 || type == TILEDB_STRING_ASCII) { std::string base_str; switch (type) { case TILEDB_CHAR: case TILEDB_STRING_ASCII: base_str = "|S"; break; case TILEDB_STRING_UTF8: base_str = "|U"; break; default: TPY_ERROR_LOC("internal error: unhandled string type"); } if (cell_val_num < TILEDB_VAR_NUM) { base_str = base_str + std::to_string(cell_val_num); } return py::dtype(base_str); } else if (cell_val_num == TILEDB_VAR_NUM) { return tiledb_dtype(type, 1); } else if (cell_val_num > 1) { py::dtype base_dtype = tiledb_dtype(type, 1); py::tuple rec_elem = py::make_tuple("", base_dtype); py::list rec_list; for (size_t i = 0; i < cell_val_num; i++) rec_list.append(rec_elem); auto np = py::module::import("numpy"); // note: we call the 'dtype' constructor b/c py::dtype does not accept list auto np_dtype = np.attr("dtype"); return np_dtype(rec_list); } TPY_ERROR_LOC("tiledb datatype not understood ('" + tiledb::impl::type_to_str(type) + "', cell_val_num: " + std::to_string(cell_val_num) + ")"); } py::array_t uint8_bool_to_uint8_bitmap(py::array_t validity_array) { // TODO profile, probably replace; avoid inplace reassignment auto np = py::module::import("numpy"); auto packbits = np.attr("packbits"); auto tmp = packbits(validity_array, "bitorder"_a = "little"); return tmp; } uint64_t count_zeros(py::array_t a) { uint64_t count = 0; for (Py_ssize_t idx = 0; idx < a.size(); idx++) count += (a.data()[idx] == 0) ? 1 : 0; return count; } class PyQuery { private: Context ctx_; shared_ptr domain_; shared_ptr array_schema_; shared_ptr array_; shared_ptr query_; std::vector attrs_; std::vector dims_; map buffers_; vector buffers_order_; bool deduplicate_ = true; bool use_arrow_ = false; // initialize the query buffers with exactly `init_buffer_bytes` // rather than the estimated result size. for incomplete testing. bool exact_init_bytes_ = false; uint64_t init_buffer_bytes_ = DEFAULT_INIT_BUFFER_BYTES; uint64_t alloc_max_bytes_ = DEFAULT_ALLOC_MAX_BYTES; py::object pyschema_; public: tiledb_ctx_t *c_ctx_; tiledb_array_t *c_array_; bool preload_metadata_ = false; bool return_incomplete_ = false; size_t retries_ = 0; public: PyQuery() = delete; PyQuery(py::object ctx, py::object array, py::iterable attrs, py::iterable dims, py::object py_layout, py::object use_arrow) { tiledb_ctx_t *c_ctx_ = (py::capsule)ctx.attr("__capsule__")(); if (c_ctx_ == nullptr) TPY_ERROR_LOC("Invalid context pointer!") ctx_ = Context(c_ctx_, false); init_config(); // initialize arrow argument from user, if provided // call after init_config if (!use_arrow.is(py::none())) { use_arrow_ = py::cast(use_arrow); } tiledb_array_t *c_array_ = (py::capsule)array.attr("__capsule__")(); // we never own this pointer, pass own=false array_ = std::shared_ptr(new Array(ctx_, c_array_, false)); array_schema_ = std::shared_ptr(new ArraySchema(array_->schema())); domain_ = std::shared_ptr(new Domain(array_schema_->domain())); pyschema_ = array.attr("schema"); bool issparse = array_->schema().array_type() == TILEDB_SPARSE; // initialize the dims that we are asked to read for (auto d : dims) { dims_.push_back(d.cast()); } // initialize the attrs that we are asked to read for (auto a : attrs) { attrs_.push_back(a.cast()); } py::object pre_buffers = array.attr("_buffers"); if (!pre_buffers.is(py::none())) { py::dict pre_buffers_dict = pre_buffers.cast(); // iterate over (key, value) pairs for (std::pair b : pre_buffers_dict) { py::str name = b.first.cast(); // unpack value tuple of (data, offsets) auto bfrs = b.second.cast>(); auto data_array = bfrs.first.cast(); auto offsets_array = bfrs.second.cast(); import_buffer(name, data_array, offsets_array); } } query_ = std::shared_ptr(new Query(ctx_, *array_, TILEDB_READ)); // [](Query* p){} /* note: no deleter*/); tiledb_layout_t layout = (tiledb_layout_t)py_layout.cast(); if (!issparse && layout == TILEDB_UNORDERED) { TPY_ERROR_LOC("TILEDB_UNORDERED read is not supported for dense arrays") } query_->set_layout(layout); #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 2 if (use_arrow_) { // enable arrow mode in the Query auto tmp_config = ctx_.config(); tmp_config.set("sm.var_offsets.bitsize", "64"); tmp_config.set("sm.var_offsets.mode", "elements"); tmp_config.set("sm.var_offsets.extra_element", "true"); ctx_.handle_error(tiledb_query_set_config( ctx_.ptr().get(), query_->ptr().get(), tmp_config.ptr().get())); } #endif } void init_config() { // get config parameters std::string tmp_str; if (config_has_key(ctx_.config(), "py.init_buffer_bytes")) { tmp_str = ctx_.config().get("py.init_buffer_bytes"); try { init_buffer_bytes_ = std::stoull(tmp_str); } catch (const std::invalid_argument &e) { (void)e; throw std::invalid_argument( "Failed to convert 'py.init_buffer_bytes' to uint64_t ('" + tmp_str + "')"); } } if (config_has_key(ctx_.config(), "py.alloc_max_bytes")) { tmp_str = ctx_.config().get("py.alloc_max_bytes"); try { alloc_max_bytes_ = std::stoull(tmp_str); } catch (const std::invalid_argument &e) { (void)e; throw std::invalid_argument( "Failed to convert 'py.alloc_max_bytes' to uint64_t ('" + tmp_str + "')"); } if (alloc_max_bytes_ < pow(1024, 2)) { throw std::invalid_argument("Invalid parameter: 'py.alloc_max_bytes' " "must be >= 1 MB (1024 ** 2 bytes)"); }; } if (config_has_key(ctx_.config(), "py.deduplicate")) { tmp_str = ctx_.config().get("py.deduplicate"); if (tmp_str == "true") { deduplicate_ = true; } else if (tmp_str == "false") { deduplicate_ = false; } else { throw std::invalid_argument( "Failed to convert configuration 'py.deduplicate' to bool ('" + tmp_str + "')"); } } if (config_has_key(ctx_.config(), "py.exact_init_buffer_bytes")) { tmp_str = ctx_.config().get("py.exact_init_buffer_bytes"); if (tmp_str == "true") { exact_init_bytes_ = true; } else if (tmp_str == "false") { exact_init_bytes_ = false; } else { throw std::invalid_argument("Failed to convert configuration " "'py.exact_init_buffer_bytes' to bool ('" + tmp_str + "')"); } } if (config_has_key(ctx_.config(), "py.use_arrow")) { tmp_str = ctx_.config().get("py.use_arrow"); if (tmp_str == "True") { use_arrow_ = true; } else if (tmp_str == "False") { use_arrow_ = false; } else { throw std::invalid_argument( "Failed to convert configuration 'py.use_arrow' to bool ('" + tmp_str + "')"); } } } void add_dim_range(uint32_t dim_idx, py::tuple r) { if (py::len(r) == 0) return; else if (py::len(r) != 2) TPY_ERROR_LOC("Unexpected range len != 2"); auto r0 = r[0]; auto r1 = r[1]; // no type-check here, because we might allow cast-conversion // if (r0.get_type() != r1.get_type()) // TPY_ERROR_LOC("Mismatched type"); auto dim = domain_->dimension(dim_idx); auto tiledb_type = dim.type(); try { switch (tiledb_type) { case TILEDB_INT32: { using T = int32_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_INT64: { using T = int64_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_INT8: { using T = int8_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_UINT8: { using T = uint8_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_INT16: { using T = int16_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_UINT16: { using T = uint16_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_UINT32: { using T = uint32_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_UINT64: { using T = uint64_t; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_FLOAT32: { using T = float; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_FLOAT64: { using T = double; query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_STRING_ASCII: case TILEDB_STRING_UTF8: case TILEDB_CHAR: { if (!py::isinstance(r0) != !py::isinstance(r1)) { TPY_ERROR_LOC( "internal error: ranges must both be strings or (None, None)"); } else if (!py::isinstance(r0) && !py::isinstance(r1) && !py::isinstance(r0) && !py::isinstance(r1) && !py::isinstance(r0) && !py::isinstance(r1)) { TPY_ERROR_LOC( "internal error: expected string type for var-length dim!"); } if (!py::isinstance(r0) && !py::isinstance(r0)) query_->add_range(dim_idx, r0.cast(), r1.cast()); break; } case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: case TILEDB_DATETIME_WEEK: case TILEDB_DATETIME_DAY: case TILEDB_DATETIME_HR: case TILEDB_DATETIME_MIN: case TILEDB_DATETIME_SEC: case TILEDB_DATETIME_MS: case TILEDB_DATETIME_US: case TILEDB_DATETIME_NS: case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: { #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3 case TILEDB_TIME_HR: case TILEDB_TIME_MIN: case TILEDB_TIME_SEC: case TILEDB_TIME_MS: case TILEDB_TIME_US: case TILEDB_TIME_NS: case TILEDB_TIME_PS: case TILEDB_TIME_FS: case TILEDB_TIME_AS: #endif py::dtype dtype = tiledb_dtype(tiledb_type, 1); auto dt0 = py::isinstance(r0) ? r0 : r0.attr("astype")(dtype); auto dt1 = py::isinstance(r1) ? r1 : r1.attr("astype")(dtype); // TODO, this is suboptimal, should define pybind converter if (py::isinstance(dt0) && py::isinstance(dt1)) { query_->add_range(dim_idx, py::cast(dt0), py::cast(dt1)); } else { auto darray = py::array(py::make_tuple(dt0, dt1)); query_->add_range(dim_idx, *(int64_t *)darray.data(0), *(int64_t *)darray.data(1)); } break; } default: TPY_ERROR_LOC("Unknown dim type conversion!"); } } catch (py::cast_error &e) { (void)e; std::string msg = "Failed to cast dim range '" + (string)py::repr(r) + "' to dim type " + tiledb::impl::type_to_str(tiledb_type); TPY_ERROR_LOC(msg); } } #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 6 void set_ranges_bulk(py::iterable ranges) { // ranges are specified as one iterable per dimension uint32_t dim_idx = 0; for (auto dim_range : ranges) { // py::print(dim_range); if (py::isinstance(dim_range)) { py::array r_array = dim_range.cast(); add_bulk_range(dim_idx, r_array); } else { py::tuple dim_range_iter = dim_range.cast(); for (auto r : dim_range_iter) { py::tuple r_tuple = r.cast(); add_dim_range(dim_idx, r_tuple); } } dim_idx++; } } void add_bulk_range(uint32_t dim_idx, py::array ranges) { tiledb_ctx_t *c_ctx = ctx_.ptr().get(); tiledb_query_t *c_query = query_.get()->ptr().get(); ctx_.handle_error(tiledb_query_add_point_ranges( c_ctx, c_query, dim_idx, (void *)ranges.data(), ranges.size())); } #endif void set_ranges(py::iterable ranges) { // ranges are specified as one iterable per dimension uint32_t dim_idx = 0; for (auto dim_range : ranges) { py::tuple dim_range_iter = dim_range.cast(); for (auto r : dim_range_iter) { py::tuple r_tuple = r.cast(); add_dim_range(dim_idx, r_tuple); } dim_idx++; } } void set_subarray(py::array subarray) { auto ndim = domain_->ndim(); if (subarray.size() != (2 * ndim)) TPY_ERROR_LOC( "internal error: failed to set subarray (mismatched dimension count"); py::object r0, r1; for (unsigned dim_idx = 0; dim_idx < ndim; dim_idx++) { auto r = subarray[py::int_(dim_idx)]; r0 = r[py::int_(0)]; r1 = r[py::int_(1)]; add_dim_range(dim_idx, py::make_tuple(r0, r1)); } } void set_serialized_query(py::buffer serialized_query) { int rc; tiledb_query_t *c_query; tiledb_buffer_t *c_buffer; tiledb_ctx_t *c_ctx = ctx_.ptr().get(); rc = tiledb_buffer_alloc(c_ctx, &c_buffer); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not allocate c_buffer."); py::buffer_info buffer_info = serialized_query.request(); rc = tiledb_buffer_set_data(c_ctx, c_buffer, buffer_info.ptr, buffer_info.shape[0]); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not set c_buffer."); c_query = query_.get()->ptr().get(); rc = tiledb_deserialize_query(c_ctx, c_buffer, TILEDB_CAPNP, 0, c_query); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not deserialize query."); } void set_attr_cond(py::object attr_cond) { if (!attr_cond.is(py::none())) { py::object init_pyqc = attr_cond.attr("init_query_condition"); try { init_pyqc(pyschema_, attrs_); } catch (tiledb::TileDBError &e) { TPY_ERROR_LOC(e.what()); } catch (py::error_already_set &e) { TPY_ERROR_LOC(e.what()); } auto pyqc = (attr_cond.attr("c_obj")).cast(); auto qc = pyqc.ptr().get(); query_->set_condition(*qc); } } bool is_dimension(std::string name) { return domain_->has_dimension(name); } bool is_attribute(std::string name) { return array_schema_->has_attribute(name); } bool is_var(std::string name) { if (is_dimension(name)) { auto dim = domain_->dimension(name); return dim.cell_val_num() == TILEDB_VAR_NUM; } else if (is_attribute(name)) { auto attr = array_schema_->attribute(name); return attr.cell_val_num() == TILEDB_VAR_NUM; } else { TPY_ERROR_LOC("Unknown buffer type for is_var check (expected attribute " "or dimension)") } } bool is_nullable(std::string name) { if (is_dimension(name)) { return false; } auto attr = array_schema_->attribute(name); return attr.nullable(); } std::pair buffer_type(std::string name) { tiledb_datatype_t type; uint32_t cell_val_num; if (is_dimension(name)) { type = domain_->dimension(name).type(); cell_val_num = domain_->dimension(name).cell_val_num(); } else if (is_attribute(name)) { type = array_schema_->attribute(name).type(); cell_val_num = array_schema_->attribute(name).cell_val_num(); } else { TPY_ERROR_LOC("Unknown buffer '" + name + "'"); } return {type, cell_val_num}; } uint32_t buffer_ncells(std::string name) { if (is_dimension(name)) { return domain_->dimension(name).cell_val_num(); } else if (is_attribute(name)) { return array_schema_->attribute(name).cell_val_num(); } TPY_ERROR_LOC("Unknown buffer '" + name + "' for buffer_ncells"); } py::dtype buffer_dtype(std::string name) { try { auto t = buffer_type(name); return tiledb_dtype(t.first, t.second); } catch (TileDBError &e) { (void)e; return py::none(); } } bool is_sparse() { return array_->schema().array_type() == TILEDB_SPARSE; } void import_buffer(std::string name, py::array data, py::array offsets) { tiledb_datatype_t type; uint32_t cell_val_num; std::tie(type, cell_val_num) = buffer_type(name); uint64_t cell_nbytes = tiledb_datatype_size(type); if (cell_val_num != TILEDB_VAR_NUM) cell_nbytes *= cell_val_num; auto dtype = tiledb_dtype(type, cell_val_num); buffers_order_.push_back(name); // set nbytes and noffsets=0 here to avoid allocation; buffers set below auto buffer_info = BufferInfo(name, 0, type, cell_val_num, 0, 0, // TODO is_var(name), is_nullable(name)); buffer_info.data = data; buffer_info.offsets = offsets; buffers_.insert({name, buffer_info}); } void alloc_buffer(std::string name) { tiledb_datatype_t type; uint32_t cell_val_num; std::tie(type, cell_val_num) = buffer_type(name); uint64_t cell_nbytes = tiledb_datatype_size(type); if (cell_val_num != TILEDB_VAR_NUM) cell_nbytes *= cell_val_num; auto dtype = tiledb_dtype(type, cell_val_num); uint64_t buf_nbytes = 0; uint64_t offsets_num = 0; uint64_t validity_num = 0; bool var = is_var(name); bool nullable = is_nullable(name); bool dense = array_schema_->array_type() == TILEDB_DENSE; if (retries_ < 1 && dense) { // we must not call after submitting if (nullable && var) { auto sizes = query_->est_result_size_var_nullable(name); offsets_num = sizes[0]; buf_nbytes = sizes[1]; validity_num = sizes[2] / sizeof(uint8_t); } else if (nullable && !var) { auto sizes = query_->est_result_size_nullable(name); buf_nbytes = sizes[0]; validity_num = sizes[1] / sizeof(uint8_t); } else if (!nullable && var) { auto size_pair = query_->est_result_size_var(name); #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR < 2 buf_nbytes = size_pair.first; offsets_num = size_pair.second; #else buf_nbytes = size_pair[0]; offsets_num = size_pair[1]; #endif } else { // !nullable && !var buf_nbytes = query_->est_result_size(name); } } // - for sparse arrays: don't try to allocate more than alloc_max_bytes_ // - for dense arrays: the estimate should be exact, so don't cap if (is_sparse() && buf_nbytes > alloc_max_bytes_) { buf_nbytes = alloc_max_bytes_; } // use max to avoid overflowing to zero in the multiplication, in case the // estimate is too large if (max(validity_num, validity_num * sizeof(uint8_t)) > alloc_max_bytes_) { validity_num = alloc_max_bytes_ / sizeof(uint8_t); } if (max(offsets_num, offsets_num * sizeof(uint64_t)) > alloc_max_bytes_) { offsets_num = alloc_max_bytes_ / sizeof(uint64_t); } // use init_buffer_bytes configuration option if the // estimate is smaller if ((var || is_sparse()) && (buf_nbytes < init_buffer_bytes_ || exact_init_bytes_)) { buf_nbytes = init_buffer_bytes_; offsets_num = init_buffer_bytes_ / sizeof(uint64_t); validity_num = init_buffer_bytes_ / cell_nbytes; } buffers_order_.push_back(name); buffers_.insert( {name, BufferInfo(name, buf_nbytes, type, cell_val_num, offsets_num, validity_num, var, nullable)}); } py::object get_buffers() { py::list result; for (auto &bp : buffers_) { const BufferInfo b = bp.second; result.append(b.data); result.append(b.offsets); } return std::move(result); } void set_buffers() { for (auto bp : buffers_) { auto name = bp.first; const BufferInfo b = bp.second; size_t offsets_read = b.offsets_read; size_t data_vals_read = b.data_vals_read; size_t validity_vals_read = b.validity_vals_read; void *data_ptr = (void *)((char *)b.data.data() + (data_vals_read * b.elem_nbytes)); uint64_t data_nelem = (b.data.size() - (data_vals_read * b.elem_nbytes)) / b.elem_nbytes; query_->set_data_buffer(b.name, data_ptr, data_nelem); if (b.isvar) { size_t offsets_size = b.offsets.size() - offsets_read; uint64_t *offsets_ptr = (uint64_t *)b.offsets.data() + offsets_read; query_->set_offsets_buffer(b.name, (uint64_t *)(offsets_ptr), offsets_size); } if (b.isnullable) { uint64_t validity_size = b.validity.size() - validity_vals_read; uint8_t *validity_ptr = (uint8_t *)b.validity.data() + validity_vals_read; query_->set_validity_buffer(b.name, validity_ptr, validity_size); } } } void update_read_elem_num() { #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3 // needs https://github.com/TileDB-Inc/TileDB/pull/2238 auto result_elements = query_->result_buffer_elements_nullable(); #else auto result_elements = query_->result_buffer_elements_nullable(); auto result_offsets_tmp = query_->result_buffer_elements(); #endif for (const auto &read_info : result_elements) { auto name = read_info.first; uint64_t offset_elem_num = 0, data_vals_num = 0, validity_elem_num = 0; std::tie(offset_elem_num, data_vals_num, validity_elem_num) = read_info.second; #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR < 3 // we need to fix-up the offset count b/c incorrect before 2.3 // (https://github.com/TileDB-Inc/TileDB/pull/2238) offset_elem_num = result_offsets_tmp[name].first; #endif BufferInfo &buf = buffers_.at(name); // TODO if we ever support per-attribute read offset bitsize // then need to handle here. Currently this is hard-coded to // 64-bit to match query config. auto offset_ptr = buf.offsets.mutable_data(); if (buf.isvar) { if (offset_elem_num > 0) { // account for 'sm.var_offsets.extra_element' offset_elem_num -= (use_arrow_) ? 1 : 0; } if (buf.offsets_read > 0) { if (offset_ptr[buf.offsets_read] == 0) { auto last_size = (buf.data_vals_read * buf.elem_nbytes); for (uint64_t i = 0; i < offset_elem_num; i++) { offset_ptr[buf.offsets_read + i] += last_size; } } } } buf.data_vals_read += data_vals_num; buf.offsets_read += offset_elem_num; buf.validity_vals_read += validity_elem_num; } } void reset_read_elem_num() { for (auto &bp : buffers_) { auto &buf = bp.second; buf.offsets_read = 0; buf.data_vals_read = 0; buf.validity_vals_read = 0; } } uint64_t get_max_retries() { // should make this a templated getter for any key w/ default std::string tmp_str; size_t max_retries; try { tmp_str = ctx_.config().get("py.max_incomplete_retries"); max_retries = std::stoull(tmp_str); } catch (const std::invalid_argument &e) { (void)e; throw TileDBError( "Failed to convert 'py.max_incomplete_retries' to uint64_t ('" + tmp_str + "')"); } catch (tiledb::TileDBError &e) { (void)e; max_retries = 100; } return max_retries; } void resubmit_read() { #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 6 tiledb_query_status_details_t status_details; tiledb_query_get_status_details(ctx_.ptr().get(), query_.get()->ptr().get(), &status_details); if (status_details.incomplete_reason == TILEDB_REASON_USER_BUFFER_SIZE) { #else if (true) { #endif auto start_incomplete_buffer_update = std::chrono::high_resolution_clock::now(); for (auto &bp : buffers_) { auto &buf = bp.second; // Check if values buffer should be resized if ((buf.data_vals_read == 0) || (int64_t)(buf.data_vals_read * buf.elem_nbytes) > (buf.data.nbytes() + 1) / 2) { size_t new_size = buf.data.size() * 2; buf.data.resize({new_size}, false); } // Check if offset buffer should be resized if ((buf.isvar && buf.offsets_read == 0) || ((int64_t)(buf.offsets_read * sizeof(uint64_t)) > (buf.offsets.nbytes() + 1) / 2)) { size_t new_offsets_size = buf.offsets.size() * 2; buf.offsets.resize({new_offsets_size}, false); } // Check if validity buffer should be resized if ((buf.isnullable && buf.validity_vals_read == 0) || ((int64_t)(buf.validity_vals_read * sizeof(uint8_t)) > (buf.validity.nbytes() + 1) / 2)) { size_t new_validity_size = buf.validity.size() * 2; buf.validity.resize({new_validity_size}, false); } } // note: this block confuses lldb. continues from here unless bp set after // block. set_buffers(); if (g_stats) { auto now = std::chrono::high_resolution_clock::now(); g_stats.get() ->counters["py.read_query_incomplete_buffer_resize_time"] += now - start_incomplete_buffer_update; } } { py::gil_scoped_release release; query_->submit(); } update_read_elem_num(); return; } void resize_output_buffers() { // resize the output buffers to match the final read total // the higher level code uses the size of the buffers to // determine how much to unpack, but we may have over-allocated // account for the extra element at the end of offsets in arrow mode size_t arrow_offset_size = use_arrow_ ? 1 : 0; for (auto &bp : buffers_) { auto name = bp.first; auto &buf = bp.second; Py_ssize_t final_data_nbytes = buf.data_vals_read * buf.elem_nbytes; Py_ssize_t final_offsets_count = buf.offsets_read + arrow_offset_size; Py_ssize_t final_validity_count = buf.validity_vals_read; assert(final_data_nbytes <= buf.data.size()); assert(final_offsets_count <= (Py_ssize_t)(buf.offsets.size() + arrow_offset_size)); buf.data.resize({final_data_nbytes}); buf.offsets.resize({final_offsets_count}); buf.validity.resize({final_validity_count}); if (use_arrow_) { if (retries_ > 0) { // we need to write the final size to the final offset slot // because core doesn't track size between incomplete submits buf.offsets.mutable_data()[buf.offsets_read] = final_data_nbytes; } // reset bytes-read so that set_buffers uses the full buffer size buf.data_vals_read = 0; buf.offsets_read = 0; buf.validity_vals_read = 0; } } if (use_arrow_) { // this is a very light hack: // call set_buffers here to reset the buffers to the *full* // buffer in case there were incomplete queries. without this call, // the active tiledb::Query only knows about the buffer ptr/size // for the *last* submit loop, so we don't get full result set. // ArrowAdapter gets the buffer sizes from tiledb::Query. set_buffers(); } } void allocate_buffers() { // allocate buffers for dims // - we want to return dims first, if any requested for (size_t dim_idx = 0; dim_idx < domain_->ndim(); dim_idx++) { auto dim = domain_->dimension(dim_idx); if ((std::find(dims_.begin(), dims_.end(), dim.name()) == dims_.end()) && // we need to also check if this is an attr for backward-compatibility (std::find(attrs_.begin(), attrs_.end(), dim.name()) == attrs_.end())) { continue; } alloc_buffer(dim.name()); } // allocate buffers for attributes // - schema.attributes() is unordered, but we need to return ordered // results for (size_t attr_idx = 0; attr_idx < array_schema_->attribute_num(); attr_idx++) { auto attr = array_schema_->attribute(attr_idx); if (std::find(attrs_.begin(), attrs_.end(), attr.name()) == attrs_.end()) { continue; } alloc_buffer(attr.name()); } } void submit_read() { if (retries_ > 0 && query_->query_status() == tiledb::Query::Status::INCOMPLETE) { buffers_.clear(); assert(buffers_.size() == 0); buffers_order_.clear(); // reset_read_elem_num(); } else if (buffers_.size() != 0) { // we have externally imported buffers return; } // start time auto start = std::chrono::high_resolution_clock::now(); // Initiate a metadata API request to make libtiledb fetch the // metadata ahead of time. In some queries we know we will always // access metadata, so initiating this call saves time when loading // from remote arrays because metadata i/o is lazy in core. /* // This section is disabled pending final disposition of SC-11720 // This call is not currently safe as of TileDB 2.5, and has caused // reproducible deadlocks with the subesequent calls to // Array::est_result_sizes from the main thread. // std::future metadata_num_preload; if (preload_metadata_) { metadata_num_preload = std::async( std::launch::async, [this]() { return array_->metadata_num(); }); } */ allocate_buffers(); // set the buffers on the Query set_buffers(); size_t max_retries = get_max_retries(); auto start_submit = std::chrono::high_resolution_clock::now(); { py::gil_scoped_release release; query_->submit(); } if (g_stats) { auto now = std::chrono::high_resolution_clock::now(); g_stats.get()->counters["py.core_read_query_initial_submit_time"] += now - start_submit; } // update the BufferInfo read-counts to match the query results read update_read_elem_num(); // fetch the result of the metadata get task // this will block if not yet completed /* // disabled, see comment above if (preload_metadata_) { metadata_num_preload.get(); } */ auto incomplete_start = std::chrono::high_resolution_clock::now(); // TODO: would be nice to have a callback here for custom realloc strategy while (!return_incomplete_ && query_->query_status() == Query::Status::INCOMPLETE) { if (++retries_ > max_retries) TPY_ERROR_LOC( "Exceeded maximum retries ('py.max_incomplete_retries': '" + std::to_string(max_retries) + "')"); resubmit_read(); } if (g_stats && retries_ > 0) { auto now = std::chrono::high_resolution_clock::now(); g_stats.get()->counters["py.core_read_query_incomplete_retry_time"] += now - incomplete_start; } // update TileDB-Py stat counter if (g_stats) { auto now = std::chrono::high_resolution_clock::now(); g_stats.get()->counters["py.core_read_query_total_time"] += now - start; } if (g_stats) { g_stats.get()->counters["py.query_retries_count"] += TimerType(retries_); } resize_output_buffers(); if (return_incomplete_) { // increment in case we submit again retries_++; } } py::array unpack_buffer(std::string name, py::array buf, py::array_t off) { auto start = std::chrono::high_resolution_clock::now(); if (off.size() < 1) TPY_ERROR_LOC(std::string("Unexpected empty offsets array ('") + name + "')"); auto dtype = buffer_dtype(name); bool is_unicode = dtype.is(py::dtype("U")); bool is_str = dtype.is(py::dtype("S")); if (is_unicode || is_str) { dtype = py::dtype("O"); } // Hashmap for string deduplication // fastest so far: typedef tsl::robin_map MapType; MapType map; std::vector object_v; if (is_unicode) { map.reserve(size_t(off.size() / 10) + 1); } auto result_array = py::array(py::dtype("O"), off.size()); auto result_p = (py::object *)result_array.mutable_data(); uint64_t last = 0; uint64_t cur = 0; size_t size = 0; uint64_t create = 0; auto off_data = off.data(); last = off_data[0]; // initial should always be 0 for (auto i = 1; i < off.size() + 1; i++) { if (i == off.size()) cur = buf.nbytes(); else { cur = off_data[i]; } size = cur - last; py::object o; auto data_ptr = (char *)buf.data() + last; if (is_unicode) if (size == 0 || (data_ptr[0] == '\0' && size == 1)) { o = py::str(""); } else { if (!deduplicate_) { o = py::str(data_ptr, size); } else { auto v = nonstd::string_view{data_ptr, size}; auto h = std::hash()(v); auto needle = map.find(h); if (needle == map.end()) { o = py::str(data_ptr, size); map.insert(needle, {h, create}); object_v.push_back(o); create++; } else { auto idx = needle->second; o = object_v[idx]; } } } else if (is_str) if (size == 0 || (data_ptr[0] == '\0' && size == 1)) { o = py::bytes(""); } else { o = py::bytes(data_ptr, size); } else { o = py::array(py::dtype("uint8"), size, data_ptr); o.attr("dtype") = dtype; } result_p[i - 1] = o; last = cur; } if (g_stats) { auto now = std::chrono::high_resolution_clock::now(); g_stats.get()->counters["py.buffer_conversion_time"] += now - start; } return result_array; } void submit_write() {} void submit() { if (array_->query_type() == TILEDB_READ) submit_read(); else if (array_->query_type() == TILEDB_WRITE) submit_write(); else TPY_ERROR_LOC("Unknown query type!") } py::dict results() { py::dict results; for (auto &buffer_name : buffers_order_) { auto bp = buffers_.at(buffer_name); results[py::str(buffer_name)] = py::make_tuple(bp.data, bp.offsets); } return results; } std::unique_ptr buffer_to_pa(std::string name) { if (query_->query_status() != tiledb::Query::Status::COMPLETE) TPY_ERROR_LOC("Cannot convert buffers unless Query is complete"); #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR < 2 tiledb::arrow::ArrowAdapter adapter(query_); #else tiledb::arrow::ArrowAdapter adapter(&ctx_, query_.get()); #endif std::unique_ptr pa_pair(new PAPair()); adapter.export_buffer(name.c_str(), &(pa_pair->array_), &(pa_pair->schema_)); pa_pair->exported_ = true; return pa_pair; } py::object buffers_to_pa_table() { using namespace pybind11::literals; auto pa = py::module::import("pyarrow"); auto pa_array_import = pa.attr("Array").attr("_import_from_c"); #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR < 2 tiledb::arrow::ArrowAdapter adapter(query_); #else tiledb::arrow::ArrowAdapter adapter(&ctx_, query_.get()); #endif py::list names; py::list results; for (auto &buffer_name : buffers_order_) { ArrowArray c_pa_array; ArrowSchema c_pa_schema; adapter.export_buffer(buffer_name.c_str(), static_cast(&c_pa_array), static_cast(&c_pa_schema)); if (is_nullable(buffer_name)) { BufferInfo &buffer_info = buffers_.at(buffer_name); // count zeros before converting to bitmap c_pa_array.null_count = count_zeros(buffer_info.validity); // convert to bitmap buffer_info.validity = uint8_bool_to_uint8_bitmap(buffer_info.validity); c_pa_array.buffers[0] = buffer_info.validity.data(); c_pa_array.n_buffers = is_var(buffer_name) ? 3 : 2; c_pa_schema.flags |= ARROW_FLAG_NULLABLE; } else if (!is_var(buffer_name)) { // reset the number of buffers for non-nullable data c_pa_array.n_buffers = 2; } // work around for SC-11522: metadata field must be set to nullptr c_pa_schema.metadata = nullptr; py::object pa_array = pa_array_import(py::int_((ptrdiff_t)&c_pa_array), py::int_((ptrdiff_t)&c_pa_schema)); results.append(pa_array); names.append(buffer_name); } auto pa_table = pa.attr("Table").attr("from_arrays")(results, "names"_a = names); return pa_table; } py::object is_incomplete() { if (!query_) { throw TileDBPyError("Internal error: PyQuery not initialized!"); } return py::cast(query_->query_status() == tiledb::Query::Status::INCOMPLETE); } py::object estimated_result_sizes() { py::dict results; for (auto const &bp : buffers_) { auto name = bp.first; auto buf = bp.second; size_t est_offsets = 0, est_data_bytes = 0; if (is_var(name)) { query_->est_result_size_var(name); auto est_sizes = query_->est_result_size_var(name); est_offsets = std::get<0>(est_sizes); est_data_bytes = std::get<1>(est_sizes); } else { est_data_bytes = query_->est_result_size(name); } results[py::str(name)] = py::make_tuple(est_offsets, est_data_bytes); } return std::move(results); } py::array _test_array() { py::array_t a; a.resize({10}); a.resize({20}); return std::move(a); } uint64_t _test_init_buffer_bytes() { // test helper to get the configured init_buffer_bytes return init_buffer_bytes_; } uint64_t _test_alloc_max_bytes() { // test helper to get the configured init_buffer_bytes return alloc_max_bytes_; } std::string get_stats() { return query_->stats(); } }; // namespace tiledbpy void init_stats() { g_stats.reset(new StatsInfo()); auto stats_counters = g_stats.get()->counters; stats_counters["py.core_read_query_initial_submit_time"] = TimerType(); stats_counters["py.core_read_query_total_time"] = TimerType(); stats_counters["py.core_read_query_incomplete_retry_time"] = TimerType(); stats_counters["py.buffer_conversion_time"] = TimerType(); stats_counters["py.read_query_incomplete_buffer_resize_time"] = TimerType(); stats_counters["py.query_retries_count"] = TimerType(); } void disable_stats() { g_stats.reset(nullptr); } void increment_stat(std::string key, double value) { auto &stats_counters = g_stats.get()->counters; if (stats_counters.count(key) == 0) stats_counters[key] = TimerType(); auto &timer = stats_counters[key]; auto incr = std::chrono::duration(value); timer += incr; } bool use_stats() { return (bool)g_stats; } py::object get_stats() { if (!g_stats) { TPY_ERROR_LOC("Stats counters are not uninitialized!") } auto &stats_counters = g_stats.get()->counters; py::dict res; for (auto iter = stats_counters.begin(); iter != stats_counters.end(); ++iter) { auto val = std::chrono::duration(iter->second); res[py::str(iter->first)] = py::float_(val.count()); } return std::move(res); } std::string python_internal_stats() { if (!g_stats) { TPY_ERROR_LOC("Stats counters are not uninitialized!") } auto counters = g_stats.get()->counters; std::ostringstream os; // core.cc is only tracking read time right now; don't print if we // have no query submission time auto rq_time = counters["py.core_read_query_initial_submit_time"].count(); if (rq_time == 0) return os.str(); os << std::endl; os << "==== Python Stats ====" << std::endl << std::endl; for (auto &stat : counters) { os << " " << stat.first << " : " << stat.second.count() << std::endl; } return os.str(); } void init_core(py::module &m) { init_query_condition(m); auto pq = py::class_(m, "PyQuery") .def(py::init()) .def("buffer_dtype", &PyQuery::buffer_dtype) .def("results", &PyQuery::results) .def("set_ranges", &PyQuery::set_ranges) #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 6 .def("set_ranges_bulk", &PyQuery::set_ranges_bulk) #endif .def("set_subarray", &PyQuery::set_subarray) .def("set_attr_cond", &PyQuery::set_attr_cond) .def("set_serialized_query", &PyQuery::set_serialized_query) .def("submit", &PyQuery::submit) .def("unpack_buffer", &PyQuery::unpack_buffer) .def("estimated_result_sizes", &PyQuery::estimated_result_sizes) .def("get_stats", &PyQuery::get_stats) .def("_allocate_buffers", &PyQuery::allocate_buffers) .def("_get_buffers", &PyQuery::get_buffers) .def("_buffer_to_pa", &PyQuery::buffer_to_pa) .def("_buffers_to_pa_table", &PyQuery::buffers_to_pa_table) .def("_test_array", &PyQuery::_test_array) .def("_test_err", [](py::object self, std::string s) { throw TileDBPyError(s); }) .def_readwrite("_preload_metadata", &PyQuery::preload_metadata_) .def_readwrite("_return_incomplete", &PyQuery::return_incomplete_) // properties .def_property_readonly("is_incomplete", &PyQuery::is_incomplete) .def_property_readonly("_test_init_buffer_bytes", &PyQuery::_test_init_buffer_bytes) .def_property_readonly("_test_alloc_max_bytes", &PyQuery::_test_alloc_max_bytes) .def_readonly("retries", &PyQuery::retries_); m.def("array_to_buffer", &convert_np); m.def("init_stats", &init_stats); m.def("disable_stats", &init_stats); m.def("python_internal_stats", &python_internal_stats); m.def("increment_stat", &increment_stat); m.def("get_stats", &get_stats); m.def("use_stats", &use_stats); py::class_(m, "PAPair") .def(py::init()) .def("get_array", &PAPair::get_array) .def("get_schema", &PAPair::get_schema); /* We need to make sure C++ TileDBError is translated to a correctly-typed py error. Note that using py::exception(..., "TileDBError") creates a new exception in the *readquery* module, so we must import to reference. */ static auto tiledb_py_error = (py::object)py::module::import("tiledb").attr("TileDBError"); py::register_exception_translator([](std::exception_ptr p) { try { if (p) std::rethrow_exception(p); } catch (const TileDBPyError &e) { PyErr_SetString(tiledb_py_error.ptr(), e.what()); } catch (const tiledb::TileDBError &e) { PyErr_SetString(tiledb_py_error.ptr(), e.what()); } catch (py::builtin_exception &e) { // just forward the error throw; //} catch (std::runtime_error &e) { // std::cout << "unexpected runtime_error: " << e.what() << std::endl; } }); }; }; // namespace tiledbpy TileDB-Py-0.12.2/tiledb/ctx.py000066400000000000000000000055031417663620700157170ustar00rootroot00000000000000from contextlib import contextmanager from contextvars import ContextVar import tiledb _ctx_var = ContextVar("ctx") already_warned = False def check_ipykernel_warn_once(): """ This function checks if we have imported ipykernel version < 6 in the current process, and provides a warning that default_ctx/scope_ctx will not work correctly due to a bug in IPython contextvar support.""" global already_warned if not already_warned: try: import sys, warnings if "ipykernel" in sys.modules and tuple( map(int, sys.modules["ipykernel"].__version__.split(".")) ) < (6, 0): warnings.warn( "tiledb.default_ctx and scope_ctx will not function correctly " "due to bug in IPython contextvar support. You must supply a " "Ctx object to each function for custom configuration options. " "Please consider upgrading to ipykernel >= 6!" "Please see https://github.com/TileDB-Inc/TileDB-Py/issues/667 " "for more information." ) except: pass finally: already_warned = True @contextmanager def scope_ctx(ctx_or_config=None): """ Context manager for setting the default `tiledb.Ctx` context variable when entering a block of code and restoring it to its previous value when exiting the block. :param ctx_or_config: :py:class:`tiledb.Ctx` or :py:class:`tiledb.Config` object or dictionary with config parameters. :return: Ctx """ check_ipykernel_warn_once() if not isinstance(ctx_or_config, tiledb.Ctx): ctx = tiledb.Ctx(ctx_or_config) else: ctx = ctx_or_config token = _ctx_var.set(ctx) try: yield _ctx_var.get() finally: _ctx_var.reset(token) def default_ctx(config=None): """ Returns, and optionally initializes, the default `tiledb.Ctx` context variable. This Ctx object is used by Python API functions when no `ctx` keyword argument is provided. Most API functions accept an optional `ctx` kwarg, but that is typically only necessary in advanced usage with multiple contexts per program. For initialization, this function must be called before any other tiledb functions. The initialization call accepts a :py:class:`tiledb.Config` object to override the defaults for process-global parameters. :param config: :py:class:`tiledb.Config` object or dictionary with config parameters. :return: Ctx """ check_ipykernel_warn_once() try: ctx = _ctx_var.get() if config is not None: raise tiledb.TileDBError("Global context already initialized!") except LookupError: ctx = tiledb.Ctx(config) _ctx_var.set(ctx) return ctx TileDB-Py-0.12.2/tiledb/dataframe_.py000066400000000000000000000726641417663620700172200ustar00rootroot00000000000000import copy import json import os import warnings from dataclasses import dataclass from typing import Optional, Union import numpy as np import tiledb from tiledb import TileDBError, libtiledb def check_dataframe_deps(): pd_error = """Pandas version >= 1.0 required for dataframe functionality. Please `pip install pandas>=1.0` to proceed.""" pa_error = """PyArrow version >= 1.0 is suggested for dataframe functionality. Please `pip install pyarrow>=1.0`.""" try: import pandas as pd except ImportError: raise Exception(pd_error) from packaging.version import Version if Version(pd.__version__) < Version("1.0"): raise Exception(pd_error) try: import pyarrow as pa if Version(pa.__version__) < Version("1.0"): warnings.warn(pa_error) except ImportError: warnings.warn(pa_error) # Note: 'None' is used to indicate optionality for many of these options # For example, if the `sparse` argument is unspecified we will default # to False (dense) unless the input has string or heterogenous indexes. TILEDB_KWARG_DEFAULTS = { "ctx": None, "sparse": None, "index_dims": None, "allows_duplicates": True, "mode": "ingest", "attr_filters": True, "dim_filters": True, "coords_filters": True, "offsets_filters": True, "full_domain": False, "tile": None, "row_start_idx": None, "fillna": None, "column_types": None, "varlen_types": None, "capacity": None, "date_spec": None, "cell_order": "row-major", "tile_order": "row-major", "timestamp": None, "debug": None, } def parse_tiledb_kwargs(kwargs): parsed_args = dict(TILEDB_KWARG_DEFAULTS) for key in TILEDB_KWARG_DEFAULTS.keys(): if key in kwargs: parsed_args[key] = kwargs.pop(key) return parsed_args @dataclass(frozen=True) class ColumnInfo: dtype: np.dtype repr: Optional[str] = None nullable: bool = False var: bool = False @classmethod def from_values(cls, array_like, varlen_types=()): from pandas.api import types as pd_types if pd_types.is_object_dtype(array_like): # Note: this does a full scan of the column... not sure what else to do here # because Pandas allows mixed string column types (and actually has # problems w/ allowing non-string types in object columns) inferred_dtype = pd_types.infer_dtype(array_like) if inferred_dtype == "bytes": return cls.from_dtype(np.bytes_) elif inferred_dtype == "string": # TODO we need to make sure this is actually convertible return cls.from_dtype(np.str_) else: raise NotImplementedError( f"{inferred_dtype} inferred dtype not supported" ) else: if not hasattr(array_like, "dtype"): array_like = np.asanyarray(array_like) return cls.from_dtype(array_like.dtype, varlen_types) @classmethod def from_dtype(cls, dtype, varlen_types=()): from pandas.api import types as pd_types dtype = pd_types.pandas_dtype(dtype) # Note: be careful if you rearrange the order of the following checks # extension types if pd_types.is_extension_array_dtype(dtype): if pd_types.is_bool_dtype(dtype): np_type = np.uint8 else: # XXX Parametrized dtypes such as "foo[int32]") sometimes have a "subtype" # property that holds the "int32". If it exists use this, otherwise use # the standard type property np_type = getattr(dtype, "subtype", dtype.type) var = bool(varlen_types and dtype in varlen_types) if var: # currently TileDB-py doesn't support nullable var-length attributes nullable = False else: # currently nullability is a (private) property of ExtensionArray # see https://github.com/pandas-dev/pandas/issues/40574 nullable = bool(dtype.construct_array_type()._can_hold_na) return cls(np.dtype(np_type), repr=dtype.name, nullable=nullable, var=var) # bool type if pd_types.is_bool_dtype(dtype): return cls(np.dtype("uint8"), repr="bool") # complex types if pd_types.is_complex_dtype(dtype): raise NotImplementedError("complex dtype not supported") # remaining numeric types if pd_types.is_numeric_dtype(dtype): if dtype == np.float16 or hasattr(np, "float128") and dtype == np.float128: raise NotImplementedError( "Only single and double precision float dtypes are supported" ) return cls(dtype) # datetime types if pd_types.is_datetime64_any_dtype(dtype): if dtype == "datetime64[ns]": return cls(dtype) else: raise NotImplementedError( "Only 'datetime64[ns]' datetime dtype is supported" ) # string types # don't use pd_types.is_string_dtype() because it includes object types too if dtype.type in (np.bytes_, np.str_): # str and bytes are always stored as var-length return cls(dtype, var=True) raise NotImplementedError(f"{dtype} dtype not supported") def _get_column_infos(df, column_types, varlen_types): column_infos = {} for name, column in df.items(): if column_types and name in column_types: column_infos[name] = ColumnInfo.from_dtype(column_types[name], varlen_types) else: column_infos[name] = ColumnInfo.from_values(column, varlen_types) return column_infos def _get_schema_filters(filters): if filters is True: # default case, unspecified: use libtiledb defaults return None elif filters is None: # empty filter list (schema uses zstd by default if unspecified) return tiledb.FilterList() elif isinstance(filters, (list, tiledb.FilterList)): return tiledb.FilterList(filters) elif isinstance(filters, tiledb.libtiledb.Filter): return tiledb.FilterList([filters]) else: raise ValueError("Unknown FilterList type!") def _get_attr_dim_filters(name, filters): if isinstance(filters, dict): # support passing a dict of filters per-attribute return _get_schema_filters(filters.get(name, True)) else: return _get_schema_filters(filters) def _get_attrs(names, column_infos, attr_filters): attrs = [] attr_reprs = {} for name in names: filters = _get_attr_dim_filters(name, attr_filters) column_info = column_infos[name] attrs.append( tiledb.Attr( name=name, filters=filters, dtype=column_info.dtype, nullable=column_info.nullable, var=column_info.var, ) ) if column_info.repr is not None: attr_reprs[name] = column_info.repr return attrs, attr_reprs def dim_for_column(name, values, dtype, tile, full_domain=False, dim_filters=None): if full_domain: if dtype not in (np.bytes_, np.str_): # Use the full type domain, deferring to the constructor dtype_min, dtype_max = tiledb.libtiledb.dtype_range(dtype) dim_max = dtype_max if dtype.kind == "M": date_unit = np.datetime_data(dtype)[0] dim_min = np.datetime64(dtype_min, date_unit) tile_max = np.iinfo(np.uint64).max - tile if np.uint64(dtype_max - dtype_min) > tile_max: dim_max = np.datetime64(dtype_max - tile, date_unit) else: dim_min = dtype_min if np.issubdtype(dtype, np.integer): tile_max = np.iinfo(np.uint64).max - tile if np.uint64(dtype_max - dtype_min) > tile_max: dim_max = dtype_max - tile else: dim_min, dim_max = None, None else: if not isinstance(values, np.ndarray): values = values.values dim_min = np.min(values) dim_max = np.max(values) if np.issubdtype(dtype, np.integer) or dtype.kind == "M": # we can't make a tile larger than the dimension range or lower than 1 tile = max(1, min(tile, np.uint64(dim_max - dim_min))) elif np.issubdtype(dtype, np.floating): # this difference can be inf with np.errstate(over="ignore"): dim_range = dim_max - dim_min if dim_range < tile: tile = np.ceil(dim_range) return tiledb.Dim( name=name, domain=(dim_min, dim_max), # libtiledb only supports TILEDB_ASCII dimensions, so we must use # nb.bytes_ which will force encoding on write dtype=np.bytes_ if dtype == np.str_ else dtype, tile=tile, filters=dim_filters, ) def _sparse_from_dtypes(dtypes, sparse=None): if any(dtype in (np.bytes_, np.str_) for dtype in dtypes): if sparse is False: raise TileDBError("Cannot create dense array with string-typed dimensions") if sparse is None: return True dtype0 = next(iter(dtypes)) if not all(dtype0 == dtype for dtype in dtypes): if sparse is False: raise TileDBError( "Cannot create dense array with heterogeneous dimension data types" ) if sparse is None: return True # Fall back to default dense type if unspecified and not inferred from dimension types return sparse if sparse is not None else False def create_dims(df, index_dims, tile=None, full_domain=False, filters=None): check_dataframe_deps() import pandas as pd per_dim_tile = isinstance(tile, dict) if tile is not None: tile_values = tile.values() if per_dim_tile else (tile,) if not all(isinstance(v, (int, float)) for v in tile_values): raise ValueError( "Invalid tile kwarg: expected int or dict of column names mapped to ints. " f"Got '{tile!r}'" ) index = df.index name_dtype_values = [] dim_metadata = {} for name in index_dims or index.names: if name in index.names: values = index.get_level_values(name) elif name in df.columns: values = df[name] else: raise ValueError(f"Unknown column or index named {name!r}") dtype = ColumnInfo.from_values(values).dtype internal_dtype = dtype if name == "__tiledb_rows" and isinstance(index, pd.RangeIndex): internal_dtype = np.dtype("uint64") if name is None: name = "__tiledb_rows" dim_metadata[name] = dtype name_dtype_values.append((name, internal_dtype, values)) ndim = len(name_dtype_values) default_dim_tile = ( 10000 if ndim == 1 else 1000 if ndim == 2 else 100 if ndim == 3 else 10 ) def get_dim_tile(name): dim_tile = tile.get(name) if per_dim_tile else tile return dim_tile if dim_tile is not None else default_dim_tile dims = [ dim_for_column( name, values, dtype, tile=get_dim_tile(name), full_domain=full_domain, dim_filters=_get_attr_dim_filters(name, filters), ) for name, dtype, values in name_dtype_values ] return dims, dim_metadata def write_array_metadata(array, attr_metadata=None, index_metadata=None): """ :param array: open, writable TileDB array :param metadata: dict :return: """ if attr_metadata: attr_md_dict = {n: str(t) for n, t in attr_metadata.items()} array.meta["__pandas_attribute_repr"] = json.dumps(attr_md_dict) if index_metadata: index_md_dict = {n: str(t) for n, t in index_metadata.items()} array.meta["__pandas_index_dims"] = json.dumps(index_md_dict) def _df_to_np_arrays(df, column_infos, fillna): ret = {} nullmaps = {} for name, column in df.items(): column_info = column_infos[name] if fillna is not None and name in fillna: column = column.fillna(fillna[name]) to_numpy_kwargs = {} if not column_info.var: to_numpy_kwargs.update(dtype=column_info.dtype) if column_info.nullable: # use default 0/empty for the dtype to_numpy_kwargs.update(na_value=column_info.dtype.type()) nullmaps[name] = (~column.isna()).to_numpy(dtype=np.uint8) ret[name] = column.to_numpy(**to_numpy_kwargs) return ret, nullmaps def from_pandas(uri, dataframe, **kwargs): """Create TileDB array at given URI from a Pandas dataframe Supports most Pandas series types, including nullable integers and bools. :param uri: URI for new TileDB array :param dataframe: pandas DataFrame :param mode: Creation mode, one of 'ingest' (default), 'schema_only', 'append' :Keyword Arguments: * Any `pandas.read_csv `_ supported keyword argument * **ctx** - A TileDB context * **sparse** - (default True) Create sparse schema * **index_dims** - Set the df index using a list of existing column names * **allows_duplicates** - Generated schema should allow duplicates * **mode** - (default ``ingest``), Ingestion mode: ``ingest``, ``schema_only``, ``append`` * **attr_filters** - FilterList to apply to Attributes: FilterList or Dict[str -> FilterList] for any attribute(s). Unspecified attributes will use default. * **dim_filters** - FilterList to apply to Dimensions: FilterList or Dict[str -> FilterList] for any dimensions(s). Unspecified dimensions will use default. * **coords_filters** - FilterList to apply to all coordinates (Dimensions) * **offsets_filters** - FilterList to apply to all offsets * **full_domain** - Dimensions should be created with full range of the dtype * **tile** - Dimension tiling: accepts either an int that applies the tiling to all dimensions or a dict("dim_name": int) to specifically assign tiling to a given dimension * **row_start_idx** - Start index to start new write (for row-indexed ingestions). * **fillna** - Value to use to to fill holes * **column_types** - Dictionary of {``column_name``: dtype} to apply dtypes to columns * **varlen_types** - A set of {dtypes}; any column wihin the set is converted to a variable length attribute * **capacity** - Schema capacity. * **date_spec** - Dictionary of {``column_name``: format_spec} to apply to date/time columns which are not correctly inferred by pandas 'parse_dates'. Format must be specified using the Python format codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior * **cell_order** - (default 'row-major) Schema cell order: 'row-major', 'col-major', or 'hilbert' * **tile_order** - (default 'row-major) Schema tile order: 'row-major' or 'col-major' * **timestamp** - Write TileDB array at specific timestamp. :raises: :py:exc:`tiledb.TileDBError` :return: None """ check_dataframe_deps() if "tiledb_args" in kwargs: tiledb_args = kwargs.pop("tiledb_args") else: tiledb_args = parse_tiledb_kwargs(kwargs) with tiledb.scope_ctx(tiledb_args.get("ctx")): _from_pandas(uri, dataframe, tiledb_args) def _from_pandas(uri, dataframe, tiledb_args): import pandas as pd mode = tiledb_args.get("mode", "ingest") if mode != "append" and tiledb.array_exists(uri): raise TileDBError("Array URI '{}' already exists!".format(uri)) sparse = tiledb_args["sparse"] index_dims = tiledb_args.get("index_dims") or () row_start_idx = tiledb_args.get("row_start_idx") write = True create_array = True if mode is not None: if mode == "schema_only": write = False elif mode == "append": create_array = False schema = tiledb.ArraySchema.load(uri) if not schema.sparse and row_start_idx is None: raise TileDBError( "Cannot append to dense array without 'row_start_idx'" ) elif mode != "ingest": raise TileDBError("Invalid mode specified ('{}')".format(mode)) # TODO: disentangle the full_domain logic full_domain = tiledb_args.get("full_domain", False) if sparse == False and (not index_dims or "index_col" not in kwargs): full_domain = True if full_domain is None and tiledb_args.get("nrows"): full_domain = False date_spec = tiledb_args.get("date_spec") if date_spec: dataframe = dataframe.assign( **{ name: pd.to_datetime(dataframe[name], format=format) for name, format in date_spec.items() } ) dataframe.columns = dataframe.columns.map(str) column_infos = _get_column_infos( dataframe, tiledb_args.get("column_types"), tiledb_args.get("varlen_types") ) with tiledb.scope_ctx(tiledb_args.get("ctx")): if create_array: _create_array( uri, dataframe, sparse, full_domain, index_dims, column_infos, tiledb_args, ) if write: if tiledb_args.get("debug", True): print(f"`tiledb.from_pandas` writing '{len(dataframe)}' rows") write_dict, nullmaps = _df_to_np_arrays( dataframe, column_infos, tiledb_args.get("fillna") ) _write_array( uri, dataframe, write_dict, nullmaps, create_array, index_dims, row_start_idx, timestamp=tiledb_args.get("timestamp"), ) def _create_array(uri, df, sparse, full_domain, index_dims, column_infos, tiledb_args): dims, dim_metadata = create_dims( df, index_dims, full_domain=full_domain, tile=tiledb_args.get("tile"), filters=tiledb_args.get("dim_filters", True), ) sparse = _sparse_from_dtypes(dim_metadata.values(), sparse) # ignore any column used as a dim/index attr_names = [c for c in df.columns if c not in index_dims] attrs, attr_metadata = _get_attrs( attr_names, column_infos, tiledb_args.get("attr_filters", True) ) # create the ArraySchema schema = tiledb.ArraySchema( sparse=sparse, domain=tiledb.Domain(*dims), attrs=attrs, cell_order=tiledb_args["cell_order"], tile_order=tiledb_args["tile_order"], coords_filters=_get_schema_filters(tiledb_args.get("coords_filters", True)), offsets_filters=_get_schema_filters(tiledb_args.get("offsets_filters", True)), # 0 will use the libtiledb internal default capacity=tiledb_args.get("capacity") or 0, # don't set allows_duplicates=True for dense allows_duplicates=sparse and tiledb_args.get("allows_duplicates", False), ) tiledb.Array.create(uri, schema) # write the metadata so we can reconstruct df with tiledb.open(uri, "w") as A: write_array_metadata(A, attr_metadata, dim_metadata) def _write_array( uri, df, write_dict, nullmaps, create_array, index_dims, row_start_idx=None, timestamp=None, ): with tiledb.open(uri, "w", timestamp=timestamp) as A: if A.schema.sparse: coords = [] for k in range(A.schema.ndim): dim_name = A.schema.domain.dim(k).name if ( (not create_array or dim_name in index_dims) and dim_name not in df.index.names and dim_name != "__tiledb_rows" ): # this branch handles the situation where a user did not specify # index_col and is using mode='append'. We would like to try writing # with the columns corresponding to existing dimension name. coords.append(write_dict.pop(dim_name)) else: coords.append(df.index.get_level_values(k)) # TODO ensure correct col/dim ordering libtiledb._setitem_impl_sparse(A, tuple(coords), write_dict, nullmaps) else: if row_start_idx is None: row_start_idx = 0 row_end_idx = row_start_idx + len(df) A._setitem_impl(slice(row_start_idx, row_end_idx), write_dict, nullmaps) def open_dataframe(uri, *, attrs=None, use_arrow=None, idx=slice(None), ctx=None): """Open TileDB array at given URI as a Pandas dataframe If the array was saved using tiledb.from_pandas, then columns will be interpreted as non-primitive pandas or numpy types when available. :param uri: :return: dataframe constructed from given TileDB array URI **Example:** >>> import tiledb >>> df = tiledb.open_dataframe("iris.tldb") >>> tiledb.object_type("iris.tldb") 'array' """ check_dataframe_deps() # TODO support `distributed=True` option? with tiledb.open(uri, ctx=ctx) as A: df = A.query(attrs=attrs, use_arrow=use_arrow, coords=True).df[idx] if attrs and list(df.columns) != list(attrs): df = df[attrs] return df def _iterate_csvs_pandas(csv_list, pandas_args): """Iterate over a list of CSV files. Uses pandas.read_csv with pandas_args and returns a list of dataframe(s) for each iteration, up to the specified 'chunksize' argument in 'pandas_args' """ import pandas as pd assert "chunksize" in pandas_args chunksize = pandas_args["chunksize"] rows_read = 0 result_list = list() file_iter = iter(csv_list) next_file = next(file_iter, None) while next_file is not None: df_iter = pd.read_csv(next_file, **pandas_args) df_iter.chunksize = chunksize - rows_read df = next(df_iter, None) while df is not None: result_list.append(df) rows_read += len(df) df_iter.chunksize = chunksize - rows_read if rows_read == chunksize: yield result_list # start over rows_read = 0 df_iter.chunksize = chunksize result_list = list() df = next(df_iter, None) next_file = next(file_iter, None) if next_file is None and len(result_list) > 0: yield result_list def from_csv(uri, csv_file, **kwargs): """ Create TileDB array at given URI from a CSV file or list of files :param uri: URI for new TileDB array :param csv_file: input CSV file or list of CSV files. Note: multi-file ingestion requires a `chunksize` argument. Files will be read in batches of at least `chunksize` rows before writing to the TileDB array. :Keyword Arguments: * Any `pandas.read_csv `_ supported keyword argument * **ctx** - A TileDB context * **sparse** - (default True) Create sparse schema * **index_dims** - Set the df index using a list of existing column names * **allows_duplicates** - Generated schema should allow duplicates * **mode** - (default ``ingest``), Ingestion mode: ``ingest``, ``schema_only``, ``append`` * **attr_filters** - FilterList to apply to Attributes: FilterList or Dict[str -> FilterList] for any attribute(s). Unspecified attributes will use default. * **dim_filters** - FilterList to apply to Dimensions: FilterList or Dict[str -> FilterList] for any dimensions(s). Unspecified dimensions will use default. * **coords_filters** - FilterList to apply to all coordinates (Dimensions) * **offsets_filters** - FilterList to apply to all offsets * **full_domain** - Dimensions should be created with full range of the dtype * **tile** - Dimension tiling: accepts either an int that applies the tiling to all dimensions or a dict("dim_name": int) to specifically assign tiling to a given dimension * **row_start_idx** - Start index to start new write (for row-indexed ingestions). * **fillna** - Value to use to to fill holes * **column_types** - Dictionary of {``column_name``: dtype} to apply dtypes to columns * **varlen_types** - A set of {dtypes}; any column wihin the set is converted to a variable length attribute * **capacity** - Schema capacity. * **date_spec** - Dictionary of {``column_name``: format_spec} to apply to date/time columns which are not correctly inferred by pandas 'parse_dates'. Format must be specified using the Python format codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior * **cell_order** - (default 'row-major) Schema cell order: 'row-major', 'col-major', or 'hilbert' * **tile_order** - (default 'row-major) Schema tile order: 'row-major' or 'col-major' * **timestamp** - Write TileDB array at specific timestamp. :return: None **Example:** >>> import tiledb >>> tiledb.from_csv("iris.tldb", "iris.csv") >>> tiledb.object_type("iris.tldb") 'array' """ check_dataframe_deps() import pandas if "tiledb_args" in kwargs: tiledb_args = kwargs.get("tiledb_args") else: tiledb_args = parse_tiledb_kwargs(kwargs) multi_file = False pandas_args = copy.deepcopy(kwargs) ########################################################################## # set up common arguments ########################################################################## if isinstance(csv_file, str) and not os.path.isfile(csv_file): # for non-local files, use TileDB VFS i/o vfs = tiledb.VFS(ctx=tiledb_args.get("ctx")) csv_file = tiledb.FileIO(vfs, csv_file, mode="rb") elif isinstance(csv_file, (list, tuple)): # TODO may be useful to support a filter callback here multi_file = True mode = tiledb_args.get("mode", None) if mode is not None: # For schema_only mode we need to pass a max read count into # pandas.read_csv # Note that 'nrows' is a pandas arg! if mode == "schema_only" and not "nrows" in kwargs: pandas_args["nrows"] = 500 elif mode not in ["ingest", "append"]: raise TileDBError("Invalid mode specified ('{}')".format(mode)) if mode != "append" and tiledb.array_exists(uri): raise TileDBError("Array URI '{}' already exists!".format(uri)) # this is a pandas pass-through argument, do not pop! chunksize = kwargs.get("chunksize", None) if multi_file and not (chunksize or mode == "schema_only"): raise TileDBError("Multiple input CSV files requires a 'chunksize' argument") if multi_file: input_csv_list = csv_file else: input_csv = csv_file ########################################################################## # handle multi_file and chunked arguments ########################################################################## # we need to use full-domain for multi or chunked reads, because we # won't get a chance to see the full range during schema creation if multi_file or chunksize is not None: if not "nrows" in kwargs: tiledb_args["full_domain"] = True ########################################################################## # read path ########################################################################## if multi_file: array_created = False if mode == "append": array_created = True rows_written = 0 # multi-file or chunked always writes to full domain # TODO: allow specifying dimension range for schema creation tiledb_args["full_domain"] = True for df_list in _iterate_csvs_pandas(input_csv_list, pandas_args): if df_list is None: break df = pandas.concat(df_list) if not "index_col" in tiledb_args and df.index.name is None: df.index.name = "__tiledb_rows" tiledb_args["row_start_idx"] = rows_written from_pandas(uri, df, tiledb_args=tiledb_args, pandas_args=pandas_args) tiledb_args["mode"] = "append" rows_written += len(df) if mode == "schema_only": break elif chunksize is not None: rows_written = 0 # for chunked reads, we need to iterate over chunks df_iter = pandas.read_csv(input_csv, **pandas_args) df = next(df_iter, None) while df is not None: if not "index_col" in tiledb_args and df.index.name is None: df.index.name = "__tiledb_rows" # tell from_pandas what row to start the next write tiledb_args["row_start_idx"] = rows_written from_pandas(uri, df, tiledb_args=tiledb_args, pandas_args=pandas_args) tiledb_args["mode"] = "append" rows_written += len(df) df = next(df_iter, None) else: df = pandas.read_csv(csv_file, **kwargs) if not "index_col" in tiledb_args and df.index.name is None: df.index.name = "__tiledb_rows" kwargs.update(tiledb_args) from_pandas(uri, df, **kwargs) TileDB-Py-0.12.2/tiledb/debug.cc000066400000000000000000000040611417663620700161420ustar00rootroot00000000000000#include #ifndef TILEDBPY_DEBUGCC #define TILEDBPY_DEBUGCC namespace { extern "C" { namespace py = pybind11; using namespace pybind11::literals; // __attribute__((used)) to make the linker keep the symbol __attribute__((used)) static void pyprint(pybind11::object o) { pybind11::print(o); } __attribute__((used)) static void pyprint(pybind11::handle h) { pybind11::print(h); } __attribute__((used)) static std::string pyrepr(py::handle h) { auto locals = py::dict("_v"_a = h); return py::cast(py::eval("repr(_v)", py::globals(), locals)); } __attribute__((used)) static std::string pyrepr(py::object o) { auto locals = py::dict("_v"_a = o); return py::cast(py::eval("repr(_v)", py::globals(), locals)); } __attribute__((used)) static void pycall1(const char *expr, pybind11::object o = py::none()) { // this doesn't work in lldb // py::scoped_interpreter guard{}; /* * NOTE: the catch statements below do not work in lldb, because exceptions * are trapped internally. So, an error in eval currently breaks * use of this function until the process is restarted. */ // usage: given some py::object 'o', exec a string w/ 'local _v'==o, e.g.: // (lldb) p pycall1("_v.shape", o) py::object res = py::none(); try { if (!o.is(py::none())) { auto locals = py::dict("_v"_a = o); res = py::eval(expr, py::globals(), locals); } else { res = py::eval(expr, py::globals()); } if (!res.is(py::none())) { py::print(res); } } catch (py::error_already_set &e) { std::cout << "pycall error_already_set: " << std::endl; } catch (std::runtime_error &e) { std::cout << "pycall runtime_error: " << e.what() << std::endl; } catch (...) { std::cout << "pycall unknown exception" << std::endl; } } __attribute__((used)) static void pycall(const char *expr) { pycall1(expr, py::none()); } __attribute__((used)) static void pyerror() { // print the last py error, if any } } }; // namespace #endif TileDB-Py-0.12.2/tiledb/fragment.cc000066400000000000000000000222111417663620700166540ustar00rootroot00000000000000 #include #include #include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include "util.h" #include // C++ #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2 #if !defined(NDEBUG) //#include "debug.cc" #endif namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; class PyFragmentInfo { private: Context ctx_; unique_ptr fi_; py::object schema_; uint32_t num_fragments_; py::tuple uri_; py::tuple version_; py::tuple nonempty_domain_; py::tuple cell_num_; py::tuple timestamp_range_; py::tuple sparse_; uint32_t unconsolidated_metadata_num_; py::tuple has_consolidated_metadata_; py::tuple to_vacuum_; py::tuple mbrs_; py::tuple array_schema_name_; public: tiledb_ctx_t *c_ctx_; public: PyFragmentInfo() = delete; PyFragmentInfo(const string &uri, py::object schema, py::bool_ include_mbrs, py::object ctx) { schema_ = schema; tiledb_ctx_t *c_ctx_ = (py::capsule)ctx.attr("__capsule__")(); if (c_ctx_ == nullptr) TPY_ERROR_LOC("Invalid context pointer!"); ctx_ = Context(c_ctx_, false); fi_ = unique_ptr(new FragmentInfo(ctx_, uri)); load(); num_fragments_ = fragment_num(); uri_ = fill_uri(); version_ = fill_version(); nonempty_domain_ = fill_non_empty_domain(); cell_num_ = fill_cell_num(); timestamp_range_ = fill_timestamp_range(); sparse_ = fill_sparse(); unconsolidated_metadata_num_ = unconsolidated_metadata_num(); has_consolidated_metadata_ = fill_has_consolidated_metadata(); to_vacuum_ = fill_to_vacuum_uri(); array_schema_name_ = fill_array_schema_name(); #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 5 if (include_mbrs) mbrs_ = fill_mbr(); #endif close(); } uint32_t get_num_fragments() { return num_fragments_; }; py::tuple get_uri() { return uri_; }; py::tuple get_version() { return version_; }; py::tuple get_nonempty_domain() { return nonempty_domain_; }; py::tuple get_cell_num() { return cell_num_; }; py::tuple get_timestamp_range() { return timestamp_range_; }; py::tuple get_sparse() { return sparse_; }; uint32_t get_unconsolidated_metadata_num() { return unconsolidated_metadata_num_; }; py::tuple get_has_consolidated_metadata() { return has_consolidated_metadata_; }; py::tuple get_to_vacuum() { return to_vacuum_; }; py::tuple get_mbrs() { return mbrs_; }; py::tuple get_array_schema_name() { return array_schema_name_; }; void dump() const { return fi_->dump(stdout); } private: template py::object for_all_fid(T (FragmentInfo::*fn)(uint32_t) const) const { py::list l; uint32_t nfrag = fragment_num(); for (uint32_t i = 0; i < nfrag; ++i) l.append((fi_.get()->*fn)(i)); return py::tuple(l); } void load() const { try { fi_->load(); } catch (TileDBError &e) { TPY_ERROR_LOC(e.what()); } } void load(tiledb_encryption_type_t encryption_type, const string &encryption_key) const { try { fi_->load(encryption_type, encryption_key); } catch (TileDBError &e) { TPY_ERROR_LOC(e.what()); } } void close() { fi_.reset(); } py::tuple fill_uri() const { return for_all_fid(&FragmentInfo::fragment_uri); } py::tuple fill_non_empty_domain() const { py::list all_frags; uint32_t nfrag = fragment_num(); for (uint32_t fid = 0; fid < nfrag; ++fid) all_frags.append(fill_non_empty_domain(fid)); return std::move(all_frags); } py::tuple fill_non_empty_domain(uint32_t fid) const { py::list all_dims; int ndim = (schema_.attr("domain").attr("ndim")).cast(); for (int did = 0; did < ndim; ++did) all_dims.append(fill_non_empty_domain(fid, did)); return std::move(all_dims); } template py::tuple fill_non_empty_domain(uint32_t fid, T did) const { py::bool_ isvar = get_dim_isvar(schema_.attr("domain"), did); if (isvar) { pair lims = fi_->non_empty_domain_var(fid, did); return py::make_tuple(lims.first, lims.second); } py::dtype type = get_dim_type(schema_.attr("domain"), did); py::dtype array_type = type.kind() == 'M' ? pybind11::dtype::of() : type; py::array limits = py::array(array_type, 2); py::buffer_info buffer = limits.request(); fi_->get_non_empty_domain(fid, did, buffer.ptr); if (type.kind() == 'M') { auto np = py::module::import("numpy"); auto datetime64 = np.attr("datetime64"); auto datetime_data = np.attr("datetime_data"); uint64_t *dates = static_cast(buffer.ptr); limits = py::make_tuple(datetime64(dates[0], datetime_data(type)), datetime64(dates[1], datetime_data(type))); } return std::move(limits); } py::bool_ get_dim_isvar(py::object dom, uint32_t did) const { // passing templated type "did" to Python function dom.attr("dim") // does not work return (dom.attr("dim")(did).attr("isvar")).cast(); } py::bool_ get_dim_isvar(py::object dom, string did) const { // passing templated type "did" to Python function dom.attr("dim") // does not work return (dom.attr("dim")(did).attr("isvar")).cast(); } py::dtype get_dim_type(py::object dom, uint32_t did) const { // passing templated type "did" to Python function dom.attr("dim") // does not work return (dom.attr("dim")(did).attr("dtype")).cast(); } py::dtype get_dim_type(py::object dom, string did) const { // passing templated type "did" to Python function dom.attr("dim") // does not work return (dom.attr("dim")(did).attr("dtype")).cast(); } py::tuple fill_timestamp_range() const { return for_all_fid(&FragmentInfo::timestamp_range); } uint32_t fragment_num() const { return fi_->fragment_num(); } py::tuple fill_sparse() const { return for_all_fid(&FragmentInfo::sparse); } py::tuple fill_cell_num() const { return for_all_fid(&FragmentInfo::cell_num); } py::tuple fill_version() const { return for_all_fid(&FragmentInfo::version); } py::tuple fill_has_consolidated_metadata() const { return for_all_fid(&FragmentInfo::has_consolidated_metadata); } uint32_t unconsolidated_metadata_num() const { return fi_->unconsolidated_metadata_num(); } uint32_t to_vacuum_num() const { return fi_->to_vacuum_num(); } py::tuple fill_to_vacuum_uri() const { py::list l; uint32_t nfrag = to_vacuum_num(); for (uint32_t i = 0; i < nfrag; ++i) l.append((fi_->to_vacuum_uri(i))); return py::tuple(l); } #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 5 py::tuple fill_mbr() const { py::list all_frags; uint32_t nfrag = fragment_num(); for (uint32_t fid = 0; fid < nfrag; ++fid) all_frags.append(fill_mbr(fid)); return std::move(all_frags); } py::tuple fill_mbr(uint32_t fid) const { py::list all_mbrs; uint64_t nmbr = fi_->mbr_num(fid); for (uint32_t mid = 0; mid < nmbr; ++mid) all_mbrs.append(fill_mbr(fid, mid)); return std::move(all_mbrs); } py::tuple fill_mbr(uint32_t fid, uint32_t mid) const { py::list all_dims; int ndim = (schema_.attr("domain").attr("ndim")).cast(); for (int did = 0; did < ndim; ++did) all_dims.append(fill_mbr(fid, mid, did)); return std::move(all_dims); } py::tuple fill_mbr(uint32_t fid, uint32_t mid, uint32_t did) const { py::dtype type = get_dim_type(schema_.attr("domain"), did); py::dtype array_type = type.kind() == 'M' ? pybind11::dtype::of() : type; py::array limits = py::array(array_type, 2); py::buffer_info buffer = limits.request(); fi_->get_mbr(fid, mid, did, buffer.ptr); return std::move(limits); } py::tuple fill_array_schema_name() const { return for_all_fid(&FragmentInfo::array_schema_name); } #endif }; void init_fragment(py::module &m) { py::class_(m, "PyFragmentInfo") .def(py::init()) .def("get_num_fragments", &PyFragmentInfo::get_num_fragments) .def("get_uri", &PyFragmentInfo::get_uri) .def("get_version", &PyFragmentInfo::get_version) .def("get_nonempty_domain", &PyFragmentInfo::get_nonempty_domain) .def("get_cell_num", &PyFragmentInfo::get_cell_num) .def("get_timestamp_range", &PyFragmentInfo::get_timestamp_range) .def("get_sparse", &PyFragmentInfo::get_sparse) .def("get_unconsolidated_metadata_num", &PyFragmentInfo::get_unconsolidated_metadata_num) .def("get_has_consolidated_metadata", &PyFragmentInfo::get_has_consolidated_metadata) .def("get_to_vacuum", &PyFragmentInfo::get_to_vacuum) #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 5 .def("get_mbrs", &PyFragmentInfo::get_mbrs) .def("get_array_schema_name", &PyFragmentInfo::get_array_schema_name) #endif .def("dump", &PyFragmentInfo::dump); } }; // namespace tiledbpy #endif TileDB-Py-0.12.2/tiledb/fragment.py000066400000000000000000000540531417663620700167300ustar00rootroot00000000000000import pprint import warnings import numpy as np import os import tiledb from tiledb.main import PyFragmentInfo """ Classes and functions relating to TileDB fragments. """ class FragmentInfoList: """ Class representing an ordered list of FragmentInfo objects. :param array_uri: URI for the TileDB array (any supported TileDB URI) :type array_uri: str :param include_mbrs: (default False) include minimum bounding rectangles in FragmentInfo result :type include_mbrs: bool :param ctx: A TileDB context :type ctx: tiledb.Ctx :ivar uri: URIs of fragments :ivar version: Fragment version of each fragment :ivar nonempty_domain: Non-empty domain of each fragment :ivar cell_num: Number of cells in each fragment :ivar timestamp_range: Timestamp range of when each fragment was written :ivar sparse: For each fragment, True if fragment is sparse, else False :ivar has_consolidated_metadata: For each fragment, True if fragment has consolidated fragment metadata, else False :ivar unconsolidated_metadata_num: Number of unconsolidated metadata fragments in each fragment :ivar to_vacuum: URIs of already consolidated fragments to vacuum :ivar mbrs: (TileDB Embedded 2.5.0+ only) The mimimum bounding rectangle of each fragment; only present when `include_mbrs=True` :ivar array_schema_name: (TileDB Embedded 2.5.0+ only) The array schema's name **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4] and space tiles 2x2 ... dom = tiledb.Domain( ... tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), ... tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), ... ) ... # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. ... schema = tiledb.ArraySchema( ... domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] ... ) ... # Set URI of the array ... uri = tmp + "/array" ... # Create the (empty) array on disk. ... tiledb.Array.create(uri, schema) ... ... # Write three fragments to the array ... with tiledb.DenseArray(uri, mode="w") as A: ... A[1:3, 1:5] = np.array(([1, 2, 3, 4, 5, 6, 7, 8])) ... with tiledb.DenseArray(uri, mode="w") as A: ... A[2:4, 2:4] = np.array(([101, 102, 103, 104])) ... with tiledb.DenseArray(uri, mode="w") as A: ... A[3:4, 4:5] = np.array(([202])) ... ... # tiledb.array_fragments() requires TileDB-Py version > 0.8.5 ... fragments_info = tiledb.array_fragments(uri) ... ... "====== FRAGMENTS INFO ======" ... f"number of fragments: {len(fragments_info)}" ... f"nonempty domains: {fragments_info.nonempty_domain}" ... f"sparse fragments: {fragments_info.sparse}" ... ... for fragment in fragments_info: ... f"===== FRAGMENT NUMBER {fragment.num} =====" ... f"is sparse: {fragment.sparse}" ... f"cell num: {fragment.cell_num}" ... f"has consolidated metadata: {fragment.has_consolidated_metadata}" ... f"nonempty domain: {fragment.nonempty_domain}" '====== FRAGMENTS INFO ======' 'number of fragments: 3' 'nonempty domains: (((1, 2), (1, 4)), ((2, 3), (2, 3)), ((3, 3), (4, 4)))' 'sparse fragments: (False, False, False)' '===== FRAGMENT NUMBER 0 =====' 'is sparse: False' 'cell num: 8' 'has consolidated metadata: False' 'nonempty domain: ((1, 2), (1, 4))' '===== FRAGMENT NUMBER 1 =====' 'is sparse: False' 'cell num: 16' 'has consolidated metadata: False' 'nonempty domain: ((2, 3), (2, 3))' '===== FRAGMENT NUMBER 2 =====' 'is sparse: False' 'cell num: 4' 'has consolidated metadata: False' 'nonempty domain: ((3, 3), (4, 4))' """ def __init__(self, array_uri, include_mbrs=False, ctx=None): if ctx is None: ctx = tiledb.default_ctx() schema = tiledb.ArraySchema.load(array_uri, ctx=ctx) self.array_uri = array_uri fi = PyFragmentInfo(self.array_uri, schema, include_mbrs, ctx) self.__nums = fi.get_num_fragments() self.uri = fi.get_uri() self.version = fi.get_version() self.nonempty_domain = fi.get_nonempty_domain() self.cell_num = fi.get_cell_num() self.timestamp_range = fi.get_timestamp_range() self.sparse = fi.get_sparse() self.unconsolidated_metadata_num = fi.get_unconsolidated_metadata_num() self.has_consolidated_metadata = fi.get_has_consolidated_metadata() self.to_vacuum = fi.get_to_vacuum() if include_mbrs: if tiledb.libtiledb.version() >= (2, 5, 0): self.mbrs = fi.get_mbrs() else: warnings.warn( "MBRs for fragments not available; " "please install libtiledb 2.5.0+", UserWarning, ) if tiledb.libtiledb.version() >= (2, 5, 0): self.array_schema_name = fi.get_array_schema_name() @property def non_empty_domain(self): warnings.warn( "FragmentInfoList.non_empty_domain is deprecated; " "please use FragmentInfoList.nonempty_domain", DeprecationWarning, ) return self.nonempty_domain @property def to_vacuum_num(self): warnings.warn( "FragmentInfoList.to_vacuum_num is deprecated; " "please use len(FragmentInfoList.to_vacuum)", DeprecationWarning, ) return len(self.to_vacuum) @property def to_vacuum_uri(self): warnings.warn( "FragmentInfoList.to_vacuum_uri is deprecated; " "please use FragmentInfoList.to_vacuum", DeprecationWarning, ) return self.to_vacuum @property def dense(self): warnings.warn( "FragmentInfoList.dense is deprecated; " "please use FragmentInfoList.sparse", DeprecationWarning, ) return list(~np.array(self.sparse)) def __getattr__(self, name): if name == "mbrs": raise AttributeError( "'FragmentInfoList' object has no attribute 'mbrs'. " "(Hint: retrieving minimum bounding rectangles is disabled " "by default to optimize speed and space. " "Use tiledb.array_fragments(include_mbrs=True) to enable)" ) return self.__getattribute__(name) def __iter__(self): return FragmentsInfoIterator(self) def __getitem__(self, key): if isinstance(key, slice): # Get the start, stop, and step from the slice return [FragmentInfo(self, idx) for idx in range(*key.indices(len(self)))] elif isinstance(key, int): return FragmentInfo(self, key) else: raise TypeError("Invalid argument type.") def __len__(self): return self.__nums def __repr__(self): public_attrs = { key: value for (key, value) in self.__dict__.items() if not key.startswith("_") } return pprint.PrettyPrinter().pformat(public_attrs) def _repr_html_(self) -> str: from io import StringIO output = StringIO() output.write("

\n") output.write(f"

Fragments for {self.array_uri}

\n") for frag in self: output.write("
\n") output.write(f"{frag.uri}\n") output.write(frag._repr_html_()) output.write("
\n") output.write("
\n") return output.getvalue() class FragmentsInfoIterator: """ Iterator class for the FragmentsInfo container. """ def __init__(self, fragments): self._fragments = fragments self._index = 0 def __next__(self): if self._index < len(self._fragments): fi = FragmentInfo(self._fragments, self._index) self._index += 1 return fi raise StopIteration class FragmentInfo: """ Class representing the metadata for a single fragment. See :py:class:`tiledb.FragmentInfoList` for example of usage. :ivar uri: URIs of fragments :ivar version: Fragment version of each fragment :ivar nonempty_domain: Non-empty domain of each fragment :ivar cell_num: Number of cells in each fragment :ivar timestamp_range: Timestamp range of when each fragment was written :ivar sparse: For each fragment, True if fragment is sparse, else False :ivar has_consolidated_metadata: For each fragment, True if fragment has consolidated fragment metadata, else False :ivar unconsolidated_metadata_num: Number of unconsolidated metadata fragments in each fragment :ivar to_vacuum: URIs of already consolidated fragments to vacuum :ivar mbrs: (TileDB Embedded 2.5.0+ only) The mimimum bounding rectangle of each fragment; only present when `include_mbrs=True` :ivar array_schema_name: (TileDB Embedded 2.5.0+ only) The array schema's name """ def __init__(self, fragments: FragmentInfoList, num): self._frags = fragments self.num = num self.uri = fragments.uri[num] self.version = fragments.version[num] self.nonempty_domain = fragments.nonempty_domain[num] self.cell_num = fragments.cell_num[num] self.timestamp_range = fragments.timestamp_range[num] self.sparse = fragments.sparse[num] self.has_consolidated_metadata = fragments.has_consolidated_metadata[num] self.unconsolidated_metadata_num = fragments.unconsolidated_metadata_num if hasattr(fragments, "mbrs"): self.mbrs = fragments.mbrs[num] if hasattr(fragments, "array_schema_name"): self.array_schema_name = fragments.array_schema_name[num] def __repr__(self): return pprint.PrettyPrinter().pformat(self.__dict__) def _repr_html_(self) -> str: from io import StringIO output = StringIO() output.write("
\n") output.write("\n") for key in self.__dict__: if not key.startswith("_"): output.write("\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
{key}{self.__dict__[key]}
\n") output.write("
\n") return output.getvalue() def __getattr__(self, name): if name == "mbrs": raise AttributeError( "'FragmentInfo' object has no attribute 'mbrs'. " "(Hint: retrieving minimum bounding rectangles is disabled " "by default to optimize speed and space. " "Use tiledb.array_fragments(include_mbrs=True) to enable)" ) return self.__getattribute__(name) @property def non_empty_domain(self): warnings.warn( "FragmentInfo.non_empty_domain is deprecated; " "please use FragmentInfo.nonempty_domain", DeprecationWarning, ) return self.nonempty_domain @property def to_vacuum_num(self): warnings.warn( "FragmentInfo.to_vacuum_num is deprecated; " "please use len(FragmentInfoList.to_vacuum)", DeprecationWarning, ) return len(self._frags.to_vacuum) @property def to_vacuum_uri(self): warnings.warn( "FragmentInfo.to_vacuum_uri is deprecated; " "please use FragmentInfoList.to_vacuum", DeprecationWarning, ) return self._frags.to_vacuum @property def to_vacuum_uri(self): warnings.warn( "FragmentInfo.dense is deprecated; please use FragmentInfo.sparse", DeprecationWarning, ) return not self._frags.sparse def FragmentsInfo(array_uri, ctx=None): """ Deprecated in 0.8.8. Renamed to FragmentInfoList to make name more distinguishable from FragmentInfo. """ warnings.warn( "FragmentsInfo is deprecated; please use FragmentInfoList", DeprecationWarning ) if ctx is None: ctx = tiledb.default_ctx() return FragmentInfoList(array_uri, ctx) def delete_fragments( uri, timestamp_range, config=None, ctx=None, verbose=False, dry_run=False ): """ Delete fragments from an array located at uri that falls within a given timestamp_range. :param str uri: URI for the TileDB array (any supported TileDB URI) :param (int, int) timestamp_range: (default None) If not None, vacuum the array using the given range (inclusive) :param config: Override the context configuration. Defaults to ctx.config() :param ctx: (optional) TileDB Ctx :param verbose: (optional) Print fragments being deleted (default: False) :param dry_run: (optional) Preview fragments to be deleted without running (default: False) """ if not isinstance(timestamp_range, tuple) and len(timestamp_range) != 2: raise TypeError( "'timestamp_range' argument expects tuple(start: int, end: int)" ) if not ctx: ctx = tiledb.default_ctx() if config is None: config = tiledb.Config(ctx.config()) vfs = tiledb.VFS(config=config, ctx=ctx) if verbose or dry_run: print("Deleting fragments:") # TODO currently we cannot mix old and new style schemas, so it is only # relevant to check if we need to delete new style schemas. we will need to # check both in the future. deleted_fragment_schema = set() for frag in tiledb.array_fragments(uri): if ( timestamp_range[0] <= frag.timestamp_range[0] and frag.timestamp_range[1] <= timestamp_range[1] ): if verbose or dry_run: print(f"\t{frag.uri}") if not dry_run: vfs.remove_file(f"{frag.uri}.ok") vfs.remove_dir(frag.uri) deleted_fragment_schema.add(frag.array_schema_name) schemas_in_array = set(tiledb.array_fragments(uri).array_schema_name) schemas_on_disk = set( [ os.path.basename(full_path) for full_path in vfs.ls(os.path.join(uri, "__schema")) ] ) schemas_to_remove_on_disk = list(deleted_fragment_schema - schemas_in_array) if schemas_to_remove_on_disk and (verbose or dry_run): print("Deleting schemas:") for schema_name in schemas_to_remove_on_disk: schema = os.path.join(uri, "__schema", schema_name) if verbose or dry_run: print(schema) vfs.remove_file(schema) def create_array_from_fragments( src_uri, dst_uri, timestamp_range, config=None, ctx=None, verbose=False, dry_run=False, ): """ (POSIX only). Create a new array from an already existing array by selecting fragments that fall withing a given timestamp_range. The original array is located at src_uri and the new array is created at dst_uri. :param str src_uri: URI for the source TileDB array (any supported TileDB URI) :param str dst_uri: URI for the newly created TileDB array (any supported TileDB URI) :param (int, int) timestamp_range: (default None) If not None, vacuum the array using the given range (inclusive) :param config: Override the context configuration. Defaults to ctx.config() :param ctx: (optional) TileDB Ctx :param verbose: (optional) Print fragments being copied (default: False) :param dry_run: (optional) Preview fragments to be copied without running (default: False) """ if tiledb.array_exists(dst_uri): raise tiledb.TileDBError(f"Array URI `{dst_uri}` already exists") if not isinstance(timestamp_range, tuple) and len(timestamp_range) != 2: raise TypeError( "'timestamp_range' argument expects tuple(start: int, end: int)" ) if not ctx: ctx = tiledb.default_ctx() if config is None: config = tiledb.Config(ctx.config()) vfs = tiledb.VFS(config=config, ctx=ctx) fragment_info = tiledb.array_fragments(src_uri) if len(fragment_info) < 1: print("Cannot create new array; no fragments to copy") return if verbose or dry_run: print(f"Creating directory for array at {dst_uri}\n") if not dry_run: vfs.create_dir(dst_uri) src_lock = os.path.join(src_uri, "__lock.tdb") dst_lock = os.path.join(dst_uri, "__lock.tdb") if verbose or dry_run: print(f"Copying lock file {dst_uri}\n") if not dry_run: vfs.copy_file(f"{src_lock}", f"{dst_lock}") list_new_style_schema = [ver >= 10 for ver in fragment_info.version] is_mixed_versions = len(set(list_new_style_schema)) > 1 if is_mixed_versions: raise tiledb.TileDBError( "Cannot copy fragments - this array contains a mix of old and " "new style schemas" ) is_new_style_schema = list_new_style_schema[0] for frag in fragment_info: if not ( timestamp_range[0] <= frag.timestamp_range[0] and frag.timestamp_range[1] <= timestamp_range[1] ): continue schema_name = frag.array_schema_name if is_new_style_schema: schema_name = os.path.join("__schema", schema_name) src_schema = os.path.join(src_uri, schema_name) dst_schema = os.path.join(dst_uri, schema_name) if verbose or dry_run: print(f"Copying schema `{src_schema}` to `{dst_schema}`\n") if not dry_run: if is_new_style_schema: new_style_schema_uri = os.path.join(dst_uri, "__schema") if not vfs.is_dir(new_style_schema_uri): vfs.create_dir(new_style_schema_uri) if not vfs.is_file(dst_schema): vfs.copy_file(src_schema, dst_schema) src_frag = frag.uri dst_frag = os.path.join(dst_uri, os.path.basename(frag.uri)) if verbose or dry_run: print(f"Copying fragment `{src_frag}` to `{dst_frag}`\n") if not dry_run: vfs.copy_file(f"{src_frag}.ok", f"{dst_frag}.ok") vfs.copy_dir(src_frag, dst_frag) def copy_fragments_to_existing_array( src_uri, dst_uri, timestamp_range, config=None, ctx=None, verbose=False, dry_run=False, ): """ (POSIX only). Copy fragments from an array at src_uri to another array at dst_uri by selecting fragments that fall withing a given timestamp_range. :param str src_uri: URI for the source TileDB array (any supported TileDB URI) :param str dst_uri: URI for the destination TileDB array (any supported TileDB URI) :param (int, int) timestamp_range: (default None) If not None, vacuum the array using the given range (inclusive) :param config: Override the context configuration. Defaults to ctx.config() :param ctx: (optional) TileDB Ctx :param verbose: (optional) Print fragments being copied (default: False) :param dry_run: (optional) Preview fragments to be copied without running (default: False) """ if not tiledb.array_exists(dst_uri): raise tiledb.TileDBError(f"Array URI `{dst_uri}` does not exist") if not isinstance(timestamp_range, tuple) and len(timestamp_range) != 2: raise TypeError( "'timestamp_range' argument expects tuple(start: int, end: int)" ) if not ctx: ctx = tiledb.default_ctx() if config is None: config = tiledb.Config(ctx.config()) vfs = tiledb.VFS(config=config, ctx=ctx) dst_schema_file = os.path.join(dst_uri, "__array_schema.tdb") src_schema_file = os.path.join(src_uri, "__array_schema.tdb") dst_schema_dir = os.path.join(dst_uri, "__schema") src_schema_dir = os.path.join(src_uri, "__schema") is_old_style = vfs.is_file(dst_schema_file) and vfs.is_file(src_schema_file) is_new_style = vfs.is_dir(dst_schema_dir) and vfs.is_dir(src_schema_dir) if is_old_style and is_new_style: raise tiledb.TileDBError( "Mix of old and new style schemas detected. There can only be " "one schema version present in both the source and destination " "arrays and both must be identical" ) elif is_new_style: if len(vfs.ls(dst_schema_dir)) != 1 or len(vfs.ls(src_schema_dir)) != 1: raise tiledb.TileDBError( "Mutltiple evolved schemas detected. There can only be one " "schema version present in both the source and destination " "arrays and both must be identical" ) schema_name = os.path.basename(vfs.ls(src_schema_dir)[0]) src_schema = os.path.join(src_uri, "__schema", schema_name) dst_schema = os.path.join(dst_uri, "__schema", schema_name) if tiledb.ArraySchema.load(src_uri) != tiledb.ArraySchema.load(dst_uri): raise tiledb.TileDBError( "The source and destination array must have matching schemas." ) if is_new_style: if verbose or dry_run: print(f"Copying schema `{src_schema}` to `{dst_schema}`\n") if not dry_run: vfs.copy_file(src_schema, dst_schema) fragment_info = tiledb.array_fragments(src_uri) for frag in fragment_info: if not ( timestamp_range[0] <= frag.timestamp_range[0] and frag.timestamp_range[1] <= timestamp_range[1] ): continue src_frag = frag.uri dst_frag = os.path.join(dst_uri, os.path.basename(frag.uri)) if src_frag == dst_frag: if verbose or dry_run: print( f"Fragment {src_frag} not copied. Already exists in " "destination array.\n" ) continue if verbose or dry_run: print(f"Copying fragment `{src_frag}` to `{dst_frag}`\n") if not dry_run: vfs.copy_file(f"{src_frag}.ok", f"{dst_frag}.ok") vfs.copy_dir(src_frag, dst_frag) TileDB-Py-0.12.2/tiledb/highlevel.py000066400000000000000000000125221417663620700170670ustar00rootroot00000000000000import tiledb import numpy as np import os from tiledb import fragment def open(uri, mode="r", key=None, attr=None, config=None, timestamp=None, ctx=None): """ Open a TileDB array at the given URI :param uri: any TileDB supported URI :param timestamp: array timestamp to open, int or None. See the TileDB `time traveling `_ documentation for detailed functionality description. :param key: encryption key, str or None :param str mode: (default 'r') Open the array object in read 'r' or write 'w' mode :param attr: attribute name to select from a multi-attribute array, str or None :param config: TileDB config dictionary, dict or None :return: open TileDB {Sparse,Dense}Array object """ return tiledb.Array.load_typed( uri, mode=mode, key=key, timestamp=timestamp, attr=attr, ctx=_get_ctx(ctx, config), ) def save(uri, array, **kwargs): """ Save array-like object at the given URI. :param uri: str or None :param array: array-like object convertible to NumPy :param kwargs: optional keyword args will be forwarded to tiledb.Array constructor :return: """ # TODO: deprecate this in favor of from_numpy? return from_numpy(uri, array, **kwargs) def empty_like(uri, arr, config=None, key=None, tile=None, ctx=None): """ Create and return an empty, writeable DenseArray with schema based on a NumPy-array like object. :param uri: array URI :param arr: NumPy ndarray, or shape tuple :param config: (optional, deprecated) configuration to apply to *new* Ctx :param key: (optional) encryption key, if applicable :param tile: (optional) tiling of generated array :param ctx: (optional) TileDB Ctx :return: """ ctx = _get_ctx(ctx, config) schema = tiledb.schema_like(arr, tile=tile, ctx=ctx) tiledb.DenseArray.create(uri, schema, key=key, ctx=ctx) return tiledb.DenseArray(uri, mode="w", key=key, ctx=ctx) def from_numpy(uri, array, config=None, ctx=None, **kwargs): """ Write a NumPy array into a TileDB DenseArray, returning a readonly DenseArray instance. :param str uri: URI for the TileDB array (any supported TileDB URI) :param numpy.ndarray array: dense numpy array to persist :param config: TileDB config dictionary, dict or None :param tiledb.Ctx ctx: A TileDB Context :param kwargs: additional arguments to pass to the DenseArray constructor :rtype: tiledb.DenseArray :return: An open DenseArray (read mode) with a single anonymous attribute :raises TypeError: cannot convert ``uri`` to unicode string :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... # Creates array 'array' on disk. ... with tiledb.DenseArray.from_numpy(tmp + "/array", np.array([1.0, 2.0, 3.0])) as A: ... pass """ if not isinstance(array, np.ndarray): raise Exception("from_numpy is only currently supported for numpy.ndarray") return tiledb.DenseArray.from_numpy(uri, array, ctx=_get_ctx(ctx, config), **kwargs) def array_exists(uri, isdense=False, issparse=False): """ Check if arrays exists and is open-able at the given URI Optionally restrict to `isdense` or `issparse` array types. """ try: with tiledb.open(uri) as a: if isdense: return not a.schema.sparse if issparse: return a.schema.sparse return True except tiledb.TileDBError: return False def array_fragments(uri, include_mbrs=False, ctx=None): """ Creates a `FragmentInfoList` object, which is an ordered list of `FragmentInfo` objects, representing all fragments in the array at the given URI. The returned object contain the following attributes: - `uri`: URIs of fragments - `version`: Fragment version of each fragment - `nonempty_domain`: Non-empty domain of each fragment - `cell_num`: Number of cells in each fragment - `timestamp_range`: Timestamp range of when each fragment was written - `sparse`: For each fragment, True if fragment is sparse, else False - `has_consolidated_metadata`: For each fragment, True if fragment has consolidated fragment metadata, else False - `unconsolidated_metadata_num`: Number of unconsolidated metadata fragments in each fragment - `to_vacuum`: URIs of already consolidated fragments to vacuum - `mbrs`: The mimimum bounding rectangle of each fragment; only present when `include_mbrs=True` :param str uri: URI for the TileDB array (any supported TileDB URI) :param bool include_mbrs: Include minimum bouding rectangles in result; this is disabled by default for optimize time and space :param ctx: (optional) TileDB Ctx :return: FragmentInfoList """ return tiledb.FragmentInfoList(uri, include_mbrs, ctx) def _get_ctx(ctx=None, config=None): if ctx: if config: raise ValueError( "Received extra Ctx or Config argument: either one may be provided, but not both" ) elif config: ctx = tiledb.Ctx(tiledb.Config(config)) else: ctx = tiledb.default_ctx() return ctx TileDB-Py-0.12.2/tiledb/indexing.pxd000066400000000000000000000002271417663620700170670ustar00rootroot00000000000000from .libtiledb cimport Array, ArraySchema, Query cdef class DomainIndexer: cdef object array_ref cdef ArraySchema schema cdef Query queryTileDB-Py-0.12.2/tiledb/indexing.pyx000066400000000000000000000316231417663620700171200ustar00rootroot00000000000000IF TILEDBPY_MODULAR: include "common.pxi" from .libtiledb cimport * from libc.stdio cimport printf import numpy as np from .array import DenseArray, SparseArray import weakref def _index_as_tuple(idx): """Forces scalar index objects to a tuple representation""" if isinstance(idx, tuple): return idx return (idx,) # ref # https://github.com/TileDB-Inc/TileDB-Py/issues/102 # https://github.com/TileDB-Inc/TileDB-Py/issues/201 cdef class DomainIndexer(object): @staticmethod def with_schema(ArraySchema schema): cdef DomainIndexer indexer = DomainIndexer.__new__(DomainIndexer) indexer.array = None indexer.schema = schema return indexer def __init__(self, Array array, query = None): self.array_ref = weakref.ref(array) self.schema = array.schema self.query = query @property def array(self): assert self.array_ref() is not None, \ "Internal error: invariant violation (index[] with dead array_ref)" return self.array_ref() def __getitem__(self, object idx): # implements domain-based indexing: slice by domain coordinates, not 0-based python indexing cdef ArraySchema schema = self.array.schema cdef Domain dom = schema.domain cdef ndim = dom.ndim cdef list attr_names = list() idx = _index_as_tuple(idx) if len(idx) < dom.ndim: raise IndexError("number of indices does not match domain rank: " "(got {!r}, expected: {!r})".format(len(idx), ndim)) new_idx = [] for i in range(dom.ndim): dim = dom.dim(i) dim_idx = idx[i] if np.isscalar(dim_idx): start = dim_idx stop = dim_idx new_idx.append(slice(start, stop, None)) else: new_idx.append(dim_idx) subarray = list() for i, subidx in enumerate(new_idx): assert isinstance(subidx, slice) subarray.append((subidx.start, subidx.stop)) attr_names = list(schema.attr(i).name for i in range(schema.nattr)) attr_cond = None order = None # TODO make coords optional for array.domain_index. there are no kwargs in slicing[], so # one way to do this would be to overload __call__ and return a new # object with a flag set. not ideal. coords = True if self.query is not None: # if we are called via Query object, then we need to respect Query semantics order = self.query.order attr_names = self.query.attrs if self.query.attrs else attr_names # query.attrs might be None -> all attr_cond = self.query.attr_cond coords = self.query.coords if coords: attr_names = [dom.dim(idx).name for idx in range(self.schema.ndim)] + attr_names if order is None or order == 'C': layout = TILEDB_ROW_MAJOR elif order == 'F': layout = TILEDB_COL_MAJOR elif order == 'G': layout = TILEDB_GLOBAL_ORDER elif order == 'U': layout = TILEDB_UNORDERED else: raise ValueError("order must be 'C' (TILEDB_ROW_MAJOR), 'F' (TILEDB_COL_MAJOR), or 'G' (TILEDB_GLOBAL_ORDER)") if isinstance(self.array, SparseArray): return (self.array)._read_sparse_subarray(subarray, attr_names, attr_cond, layout) elif isinstance(self.array, DenseArray): return (self.array)._read_dense_subarray(subarray, attr_names, attr_cond, layout, coords) else: raise Exception("No handler for Array type: " + str(type(self.array))) cdef class QueryAttr(object): cdef unicode name cdef np.dtype dtype def __init__(self, name, dtype): self.name = name self.dtype = dtype cdef dict execute_multi_index(Array array, tiledb_query_t* query_ptr, tuple attr_names, return_coord): # NOTE: query_ptr *must* only be freed in caller cdef: tiledb_ctx_t* ctx_ptr = array.ctx.ptr tiledb_query_status_t query_status cdef: uint64_t result_bytes = 0 size_t result_elements float result_elements_f, result_rem uint64_t el_count = 0 bint repeat_query = True uint64_t repeat_count = 0 uint64_t buffer_bytes_remaining = 0 uint64_t* buffer_sizes_ptr = NULL cdef: np.dtype coords_dtype unicode coord_name = (tiledb_coords()).decode('UTF-8') cdef: Attr attr Py_ssize_t attr_idx bytes battr_name unicode attr_name np.ndarray attr_array np.dtype attr_dtype QueryAttr qattr cdef list attrs = list() # Coordinate attribute buffers must be set first if return_coord: dims = tuple(array.schema.domain.dim(dim_idx) for dim_idx in \ range(array.schema.ndim)) attrs += [QueryAttr(dim.name, dim.dtype) for dim in dims] # Get the attributes attrs += [QueryAttr(a.name, a.dtype) for a in [array.schema.attr(name) for name in attr_names]] # Create and assign attribute result buffers cdef Py_ssize_t nattr = len(attrs) cdef uint64_t ndim = array.ndim cdef dict result_dict = dict() cdef np.ndarray buffer_sizes = np.zeros(nattr, np.uint64) cdef np.ndarray result_bytes_read = np.zeros(nattr, np.uint64) cdef uint64_t init_buffer_size = 1310720 * 8 # 10 MB int64 if 'py.init_buffer_bytes' in array.ctx.config(): init_buffer_size = int(array.ctx.config()['py.init_buffer_bytes']) # switch from exponential to linear (+4GB) allocation cdef uint64_t linear_alloc_bytes = 4 * (2**30) # 4 GB # There are two different conditions which may cause incomplete queries, # requiring retries and potentially reallocation to complete the read. # 1) user-allocated buffer is not large enough. In this case, we need to # allocate more memory and retry. This is accomplished below by resizing # the array in-place (preserving the existing data), then bumping the # query buffer pointer. # 2) internal memory limit is exceeded: the libtiledb parameter # 'sm.memory_budget' governs internal memory allocation. If libtiledb's # internal allocation exceeds this budget, the query may need to be # retried, but we do not necessarily need to bump the user buffer allocation. while repeat_query: for attr_idx in range(nattr): qattr = attrs[attr_idx] attr_name = qattr.name attr_dtype = qattr.dtype # allocate initial array if repeat_count == 0: result_dict[attr_name] = np.zeros(int(init_buffer_size / attr_dtype.itemsize), dtype=attr_dtype) # Get the array here in order to save a lookup attr_array = result_dict[attr_name] if repeat_count > 0: buffer_bytes_remaining = attr_array.nbytes - result_bytes_read[attr_idx] if buffer_sizes[attr_idx] > (.25 * buffer_bytes_remaining): # Check number of bytes read during the *last* pass. # The conditional above handles situation (2) in order to avoid re-allocation # on every repeat, in case we are reading small chunks at a time due to libtiledb # memory budget. # TODO make sure 'refcheck=False' is always safe if attr_array.nbytes < linear_alloc_bytes: attr_array.resize(attr_array.size * 2, refcheck=False) else: new_size = attr_array.size + linear_alloc_bytes / attr_dtype.itemsize attr_array.resize(new_size, refcheck=False) battr_name = attr_name.encode('UTF-8') attr_array_ptr = np.PyArray_DATA(attr_array) # we need to give the pointer to the current starting point after reallocation attr_array_ptr = \ (attr_array_ptr + result_bytes_read[attr_idx]) buffer_sizes[attr_idx] = attr_array.nbytes - result_bytes_read[attr_idx] buffer_sizes_ptr = np.PyArray_DATA(buffer_sizes) rc = tiledb_query_set_buffer(ctx_ptr, query_ptr, battr_name, attr_array_ptr, &(buffer_sizes_ptr[attr_idx])) if rc != TILEDB_OK: # NOTE: query_ptr *must* only be freed in caller _raise_ctx_err(ctx_ptr, rc) with nogil: rc = tiledb_query_submit(ctx_ptr, query_ptr) if rc != TILEDB_OK: # NOTE: query_ptr *must* only be freed in caller _raise_ctx_err(ctx_ptr, rc) # update bytes-read count for attr_idx in range(nattr): result_bytes_read[attr_idx] += buffer_sizes[attr_idx] rc = tiledb_query_get_status(ctx_ptr, query_ptr, &query_status) if rc != TILEDB_OK: # NOTE: query_ptr *must* only be freed in caller _raise_ctx_err(ctx_ptr, rc) if query_status == TILEDB_INCOMPLETE: #printf("%s\n", "got incomplete!") repeat_query = True repeat_count += 1 elif query_status == TILEDB_COMPLETED: repeat_query = False break elif query_status == TILEDB_FAILED: raise TileDBError("Query returned TILEDB_FAILED") elif query_status == TILEDB_INPROGRESS: raise TileDBError("Query returned TILEDB_INPROGRESS") elif query_status == TILEDB_INCOMPLETE: raise TileDBError("Query returned TILEDB_INCOMPLETE") else: raise TileDBError("internal error: unknown query status") # resize arrays to final bytes-read for attr_idx in range(nattr): qattr = attrs[attr_idx] attr_name = qattr.name attr_dtype = qattr.dtype attr_item_size = attr_dtype.itemsize attr_array = result_dict[attr_name] attr_array.resize(int(result_bytes_read[attr_idx] / attr_item_size), refcheck=False) return result_dict cpdef multi_index(Array array, tuple attr_names, tuple ranges, order = None, coords = None): cdef tiledb_layout_t layout = TILEDB_UNORDERED if order is None or order == 'C': layout = TILEDB_ROW_MAJOR elif order == 'F': layout = TILEDB_COL_MAJOR elif order == 'G': layout = TILEDB_GLOBAL_ORDER else: raise ValueError("order must be 'C' (TILEDB_ROW_MAJOR), "\ "'F' (TILEDB_COL_MAJOR), "\ "or 'G' (TILEDB_GLOBAL_ORDER)") cdef tiledb_ctx_t* ctx_ptr = array.ctx.ptr cdef tiledb_array_t* array_ptr = array.ptr cdef tiledb_query_t* query_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_query_alloc(ctx_ptr, array_ptr, TILEDB_READ, &query_ptr) if rc != TILEDB_OK: tiledb_query_free(&query_ptr) _raise_ctx_err(ctx_ptr, rc) rc = tiledb_query_set_layout(ctx_ptr, query_ptr, layout) if rc != TILEDB_OK: tiledb_query_free(&query_ptr) _raise_ctx_err(ctx_ptr, rc) cdef Dim dim = array.schema.domain.dim(0) cdef uint32_t c_dim_idx cdef void* start_ptr = NULL cdef void* end_ptr = NULL cdef tuple cur_range cdef np.ndarray start cdef np.ndarray end # Add ranges to query ##################### # we loop over the range tuple left to right and apply # (unspecified dimensions are excluded) cdef Py_ssize_t dim_idx, range_idx for dim_idx in range(len(ranges)): c_dim_idx = dim_idx dim_ranges = ranges[dim_idx] # skip empty dimensions if len(dim_ranges) == 0: continue for range_idx in range(len(dim_ranges)): if len(dim_ranges[range_idx]) != 2: raise TileDBError("internal error: invalid sub-range: ", dim_ranges[range_idx]) start = np.array(dim_ranges[range_idx][0], dtype=dim.dtype) end = np.array(dim_ranges[range_idx][1], dtype=dim.dtype) start_ptr = np.PyArray_DATA(start) end_ptr = np.PyArray_DATA(end) rc = tiledb_query_add_range(ctx_ptr, query_ptr, dim_idx, start_ptr, end_ptr, NULL) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) try: if coords is None: coords = True result = execute_multi_index(array, query_ptr, attr_names, coords) finally: tiledb_query_free(&query_ptr) return result TileDB-Py-0.12.2/tiledb/libmetadata.pyx000066400000000000000000000402021417663620700175530ustar00rootroot00000000000000IF TILEDBPY_MODULAR: include "common.pxi" from .libtiledb import * from .libtiledb cimport * import weakref from collections.abc import MutableMapping from cython.operator cimport dereference as deref _NP_DATA_PREFIX = "__np_flat_" _NP_SHAPE_PREFIX = "__np_shape_" cdef extern from "Python.h": int PyBUF_READ object PyMemoryView_FromMemory(char*, Py_ssize_t, int) cdef class PackedBuffer: cdef bytes data cdef tiledb_datatype_t tdbtype cdef uint32_t value_num def __init__(self, data, tdbtype, value_num): self.data = data self.tdbtype = tdbtype self.value_num = value_num cdef PackedBuffer pack_metadata_val(value): if isinstance(value, bytes): return PackedBuffer(value, TILEDB_CHAR, len(value)) if isinstance(value, str): value = value.encode('UTF-8') return PackedBuffer(value, TILEDB_STRING_UTF8, len(value)) if not isinstance(value, (list, tuple)): value = (value,) if not value: # special case for empty values return PackedBuffer(b'', TILEDB_INT32, 0) val0 = value[0] if not isinstance(val0, (int, float)): raise TypeError(f"Unsupported item type '{type(val0)}'") cdef: uint32_t value_num = len(value) tiledb_datatype_t tiledb_type = TILEDB_INT64 if isinstance(val0, int) else TILEDB_FLOAT64 bytearray data = bytearray(value_num * tiledb_datatype_size(tiledb_type)) char[:] data_view = data Py_ssize_t pack_idx = 0 double * double_ptr int64_t * int64_ptr if tiledb_type == TILEDB_INT64: int64_ptr = &data_view[0] while pack_idx < value_num: value_item = value[pack_idx] if not isinstance(value_item, int): raise TypeError(f"Mixed-type sequences are not supported: {value}") int64_ptr[pack_idx] = value_item pack_idx += 1 else: double_ptr = &data_view[0] while pack_idx < value_num: value_item = value[pack_idx] if not isinstance(value_item, float): raise TypeError(f"Mixed-type sequences are not supported: {value}") double_ptr[pack_idx] = value_item pack_idx += 1 return PackedBuffer(bytes(data), tiledb_type, value_num) cdef object unpack_metadata_val( tiledb_datatype_t value_type, uint32_t value_num, const char* value_ptr ): assert value_num != 0, "internal error: unexpected value_num==0" if value_type == TILEDB_STRING_UTF8: return value_ptr[:value_num].decode('UTF-8') if value_ptr != NULL else '' if value_type == TILEDB_CHAR or value_type == TILEDB_STRING_ASCII: return value_ptr[:value_num] if value_ptr != NULL else b'' if value_ptr == NULL: return () unpacked = [None] * value_num cdef uint64_t itemsize = tiledb_datatype_size(value_type) for i in range(value_num): if value_type == TILEDB_INT64: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_FLOAT64: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_FLOAT32: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_INT32: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_UINT32: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_UINT64: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_INT8: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_UINT8: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_INT16: unpacked[i] = deref( value_ptr) elif value_type == TILEDB_UINT16: unpacked[i] = deref( value_ptr) else: raise NotImplementedError(f"TileDB datatype '{value_type}' not supported") value_ptr += itemsize # we don't differentiate between length-1 sequences and scalars return unpacked[0] if value_num == 1 else tuple(unpacked) cdef np.ndarray unpack_metadata_ndarray( tiledb_datatype_t value_type, uint32_t value_num, const char* value_ptr ): cdef np.dtype dtype = np.dtype(_numpy_dtype(value_type)) if value_ptr == NULL: return np.array((), dtype=dtype) # special case for TILEDB_STRING_UTF8: TileDB assumes size=1 if value_type != TILEDB_STRING_UTF8: value_num *= dtype.itemsize return np.frombuffer(PyMemoryView_FromMemory(value_ptr, value_num, PyBUF_READ), dtype=dtype).copy() cdef object unpack_metadata( bint is_ndarray, tiledb_datatype_t value_type, uint32_t value_num, const char * value_ptr ): if value_ptr == NULL and value_num != 1: raise KeyError if is_ndarray: return unpack_metadata_ndarray(value_type, value_num, value_ptr) else: return unpack_metadata_val(value_type, value_num, value_ptr) cdef put_metadata(Array array, key, value): cdef: PackedBuffer packed_buf tiledb_datatype_t tiledb_type uint32_t value_num cdef const unsigned char[:] data_view cdef const void* data_ptr if isinstance(value, np.ndarray): if value.ndim != 1: raise TypeError(f"Only 1D Numpy arrays can be stored as metadata") tiledb_type, ncells = array_type_ncells(value.dtype) if ncells != 1: raise TypeError(f"Unsupported dtype '{value.dtype}'") value_num = len(value) # special case for TILEDB_STRING_UTF8: TileDB assumes size=1 if tiledb_type == TILEDB_STRING_UTF8: value_num *= value.itemsize data_ptr = np.PyArray_DATA(value) else: packed_buf = pack_metadata_val(value) tiledb_type = packed_buf.tdbtype value_num = packed_buf.value_num data_view = packed_buf.data data_ptr = &data_view[0] if value_num > 0 else NULL key_utf8 = key.encode('UTF-8') cdef const char* key_utf8_ptr = key_utf8 cdef int rc = TILEDB_OK with nogil: rc = tiledb_array_put_metadata( array.ctx.ptr, array.ptr, key_utf8_ptr, tiledb_type, value_num, data_ptr, ) if rc != TILEDB_OK: _raise_ctx_err(array.ctx.ptr, rc) cdef object get_metadata(Array array, key, is_ndarray=False): cdef: tiledb_datatype_t value_type uint32_t value_num = 0 const char* value_ptr = NULL bytes key_utf8 = key.encode('UTF-8') const char* key_utf8_ptr = key_utf8 cdef int32_t rc = TILEDB_OK with nogil: rc = tiledb_array_get_metadata( array.ctx.ptr, array.ptr, key_utf8_ptr, &value_type, &value_num, &value_ptr, ) if rc != TILEDB_OK: _raise_ctx_err(array.ctx.ptr, rc) return unpack_metadata(is_ndarray, value_type, value_num, value_ptr) def iter_metadata(Array array, keys_only): """ Iterate over array metadata keys or (key, value) tuples :param array: tiledb_array_t :param keys_only: whether to yield just keys or values too """ cdef: tiledb_ctx_t* ctx_ptr = array.ctx.ptr tiledb_array_t* array_ptr = array.ptr uint64_t metadata_num const char* key_ptr = NULL uint32_t key_len tiledb_datatype_t value_type uint32_t value_num const char* value_ptr = NULL cdef int32_t rc = TILEDB_OK with nogil: rc = tiledb_array_get_metadata_num(ctx_ptr, array_ptr, &metadata_num) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) for i in range(metadata_num): with nogil: rc = tiledb_array_get_metadata_from_index( ctx_ptr, array_ptr, i, &key_ptr, &key_len, &value_type, &value_num, &value_ptr, ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) key = key_ptr[:key_len].decode('UTF-8') if keys_only: yield key else: value = unpack_metadata(key.startswith(_NP_DATA_PREFIX), value_type, value_num, value_ptr) yield key, value cdef class Metadata: def __init__(self, array): self.array_ref = weakref.ref(array) @property def array(self): assert self.array_ref() is not None, \ "Internal error: invariant violation ([] from gc'd Array)" return self.array_ref() def __setitem__(self, key, value): if not isinstance(key, str): raise TypeError(f"Unexpected key type '{type(key)}': expected str") # ensure previous key(s) are deleted (e.g. in case of replacing a # non-numpy value with a numpy value or vice versa) del self[key] if isinstance(value, np.ndarray): flat_value = value.ravel() put_metadata(self.array, _NP_DATA_PREFIX + key, flat_value) if value.shape != flat_value.shape: put_metadata(self.array, _NP_SHAPE_PREFIX + key, value.shape) else: put_metadata(self.array, key, value) def __getitem__(self, key): if not isinstance(key, str): raise TypeError(f"Unexpected key type '{type(key)}': expected str") array = self.array try: return get_metadata(array, key) except KeyError as ex: try: np_array = get_metadata(array, _NP_DATA_PREFIX + key, is_ndarray=True) except KeyError: raise KeyError(key) from None try: shape = get_metadata(array, _NP_SHAPE_PREFIX + key) except KeyError: return np_array else: return np_array.reshape(shape) def __delitem__(self, key): if not isinstance(key, str): raise TypeError(f"Unexpected key type '{type(key)}': expected str") cdef: tiledb_ctx_t* ctx_ptr = (self.array).ctx.ptr tiledb_array_t* array_ptr = (self.array).ptr const char* key_utf8_ptr int32_t rc # key may be stored as is or it may be prefixed (for numpy values) # we don't know this here so delete all potential internal keys for k in key, _NP_DATA_PREFIX + key, _NP_SHAPE_PREFIX + key: key_utf8 = k.encode('UTF-8') key_utf8_ptr = key_utf8 with nogil: rc = tiledb_array_delete_metadata(ctx_ptr, array_ptr, key_utf8_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) def __contains__(self, key): if not isinstance(key, str): raise TypeError(f"Unexpected key type '{type(key)}': expected str") cdef: tiledb_ctx_t* ctx_ptr = (self.array).ctx.ptr tiledb_array_t* array_ptr = (self.array).ptr bytes key_utf8 = key.encode('UTF-8') const char* key_utf8_ptr = key_utf8 tiledb_datatype_t value_type int32_t has_key cdef int32_t rc = TILEDB_OK with nogil: rc = tiledb_array_has_metadata_key( ctx_ptr, array_ptr, key_utf8_ptr, &value_type, &has_key, ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) # if key doesn't exist, check the _NP_DATA_PREFIX prefixed key if not has_key and not key.startswith(_NP_DATA_PREFIX): has_key = self.__contains__(_NP_DATA_PREFIX + key) return bool(has_key) def consolidate(self): """ Consolidate array metadata. Array must be closed. :return: """ # TODO: ensure that the array is not x-locked? cdef Ctx ctx = ( self.array).ctx cdef Config config = ctx.config() cdef: uint32_t rc = 0 tiledb_ctx_t* ctx_ptr = ctx.ptr tiledb_config_t* config_ptr = NULL tiledb_encryption_type_t key_type = TILEDB_NO_ENCRYPTION void* key_ptr = NULL uint32_t key_len = 0 bytes bkey bytes buri = unicode_path(self.array.uri) str key = (self.array).key if config: config_ptr = config.ptr if key is not None: if isinstance(key, str): bkey = key.encode('ascii') else: bkey = bytes(self.array.key) key_type = TILEDB_AES_256_GCM key_ptr = PyBytes_AS_STRING(bkey) #TODO: unsafe cast here ssize_t -> uint64_t key_len = PyBytes_GET_SIZE(bkey) cdef const char* buri_ptr = buri with nogil: rc = tiledb_array_consolidate_metadata_with_key( ctx_ptr, buri_ptr, key_type, key_ptr, key_len, config_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) get = MutableMapping.get update = MutableMapping.update def setdefault(self, key, default=None): raise NotImplementedError("Metadata.setdefault requires read-write access to array") def pop(self, key, default=None): raise NotImplementedError("Metadata.pop requires read-write access to array") def popitem(self): raise NotImplementedError("Metadata.popitem requires read-write access to array") def clear(self): raise NotImplementedError("Metadata.clear requires read-write access to array") def __len__(self): cdef: tiledb_ctx_t* ctx_ptr = (self.array).ctx.ptr tiledb_array_t* array_ptr = (self.array).ptr uint64_t num cdef int32_t rc = TILEDB_OK with nogil: rc = tiledb_array_get_metadata_num(ctx_ptr, array_ptr, &num) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) # subtract the _NP_SHAPE_PREFIX prefixed keys for key in iter_metadata(self.array, keys_only=True): if key.startswith(_NP_SHAPE_PREFIX): num -= 1 return num def __iter__(self): np_data_prefix_len = len(_NP_DATA_PREFIX) for key in iter_metadata(self.array, keys_only=True): if key.startswith(_NP_DATA_PREFIX): yield key[np_data_prefix_len:] elif not key.startswith(_NP_SHAPE_PREFIX): yield key # else: ignore the shape keys def keys(self): """ Return metadata keys as list. :return: List of keys """ # TODO this should be an iterator/view return list(self) def values(self): """ Return metadata values as list. :return: List of values """ # TODO this should be an iterator/view return [v for k, v in self._iteritems()] def items(self): # TODO this should be an iterator/view return tuple(self._iteritems()) def _iteritems(self): np_data_prefix_len = len(_NP_DATA_PREFIX) np_shape_prefix_len = len(_NP_SHAPE_PREFIX) ndarray_items = [] np_shape_map = {} # 1. yield all non-ndarray (key, value) pairs and keep track of # the ndarray data and shape to assemble them later for key, value in iter_metadata(self.array, keys_only=False): if key.startswith(_NP_DATA_PREFIX): ndarray_items.append((key[np_data_prefix_len:], value)) elif key.startswith(_NP_SHAPE_PREFIX): np_shape_map[key[np_shape_prefix_len:]] = value else: yield key, value # 2. yield all ndarray (key, value) pairs after reshaping (if necessary) for key, value in ndarray_items: shape = np_shape_map.get(key) if shape is not None: value = value.reshape(shape) yield key, value TileDB-Py-0.12.2/tiledb/libtiledb.pxd000066400000000000000000001076471417663620700172320ustar00rootroot00000000000000from libc.stdio cimport FILE from libc.stdint cimport uint64_t, uint32_t IF TILEDBPY_MODULAR: from .indexing cimport DomainIndexer include "common.pxi" cdef extern from "tiledb/tiledb.h": # Constants enum: TILEDB_OK enum: TILEDB_ERR enum: TILEDB_OOM enum: TILEDB_VAR_NUM unsigned int tiledb_var_num() enum: TILEDB_COORDS const char* tiledb_coords() enum: TILEDB_MAX_PATH unsigned int tiledb_max_path() enum: TILEDB_OFFSET_SIZE unsigned int tiledb_offset_size() # Version void tiledb_version(int* major, int* minor, int* rev) # Stats void tiledb_stats_enable() void tiledb_stats_disable() void tiledb_stats_reset() int32_t tiledb_stats_dump_str(char** out) int32_t tiledb_stats_raw_dump_str(char** out) int32_t tiledb_stats_free_str(char** out) # Enums ctypedef enum tiledb_object_t: TILEDB_INVALID TILEDB_GROUP TILEDB_ARRAY ctypedef enum tiledb_query_type_t: TILEDB_READ TILEDB_WRITE ctypedef enum tiledb_query_status_t: TILEDB_FAILED TILEDB_COMPLETED TILEDB_INPROGRESS TILEDB_INCOMPLETE ctypedef enum tiledb_datatype_t: TILEDB_INT32 TILEDB_INT64 TILEDB_FLOAT32 TILEDB_FLOAT64 TILEDB_CHAR TILEDB_INT8 TILEDB_UINT8 TILEDB_INT16 TILEDB_UINT16 TILEDB_UINT32 TILEDB_UINT64 TILEDB_STRING_ASCII TILEDB_STRING_UTF8 TILEDB_STRING_UTF16 TILEDB_STRING_UTF32 TILEDB_STRING_UCS2 TILEDB_STRING_UCS4 TILEDB_DATETIME_YEAR TILEDB_DATETIME_MONTH TILEDB_DATETIME_WEEK TILEDB_DATETIME_DAY TILEDB_DATETIME_HR TILEDB_DATETIME_MIN TILEDB_DATETIME_SEC TILEDB_DATETIME_MS TILEDB_DATETIME_US TILEDB_DATETIME_NS TILEDB_DATETIME_PS TILEDB_DATETIME_FS TILEDB_DATETIME_AS ctypedef enum tiledb_array_type_t: TILEDB_DENSE TILEDB_SPARSE ctypedef enum tiledb_layout_t: TILEDB_ROW_MAJOR TILEDB_COL_MAJOR TILEDB_GLOBAL_ORDER TILEDB_UNORDERED TILEDB_HILBERT ctypedef enum tiledb_filter_type_t: TILEDB_FILTER_NONE = 0 TILEDB_FILTER_GZIP = 1 TILEDB_FILTER_ZSTD = 2 TILEDB_FILTER_LZ4 = 3 TILEDB_FILTER_RLE = 4 TILEDB_FILTER_BZIP2 = 5 TILEDB_FILTER_DOUBLE_DELTA = 6 TILEDB_FILTER_BIT_WIDTH_REDUCTION = 7 TILEDB_FILTER_BITSHUFFLE = 8 TILEDB_FILTER_BYTESHUFFLE = 9 TILEDB_FILTER_POSITIVE_DELTA = 10 # 11 is encryption, see tiledb_enum.h TILEDB_FILTER_CHECKSUM_MD5 = 12 TILEDB_FILTER_CHECKSUM_SHA256 = 13 ctypedef enum tiledb_filter_option_t: TILEDB_COMPRESSION_LEVEL = 0 TILEDB_BIT_WIDTH_MAX_WINDOW = 1 TILEDB_POSITIVE_DELTA_MAX_WINDOW = 2 ctypedef enum tiledb_encryption_type_t: TILEDB_NO_ENCRYPTION TILEDB_AES_256_GCM ctypedef enum tiledb_walk_order_t: TILEDB_PREORDER TILEDB_POSTORDER ctypedef enum tiledb_filesystem_t: TILEDB_HDFS TILEDB_S3 TILEDB_AZURE TILEDB_GCS ctypedef enum tiledb_vfs_mode_t: TILEDB_VFS_READ TILEDB_VFS_WRITE TILEDB_VFS_APPEND # Types ctypedef struct tiledb_ctx_t: pass ctypedef struct tiledb_config_t: pass ctypedef struct tiledb_config_iter_t: pass ctypedef struct tiledb_error_t: pass ctypedef struct tiledb_array_t: pass ctypedef struct tiledb_attribute_t: pass ctypedef struct tiledb_array_schema_t: pass ctypedef struct tiledb_dimension_t: pass ctypedef struct tiledb_domain_t: pass ctypedef struct tiledb_query_t: pass ctypedef struct tiledb_filter_t: pass ctypedef struct tiledb_filter_list_t: pass ctypedef struct tiledb_vfs_t: pass ctypedef struct tiledb_vfs_fh_t: pass # Config int tiledb_config_alloc( tiledb_config_t** config, tiledb_error_t** error) void tiledb_config_free( tiledb_config_t** config) int tiledb_config_set( tiledb_config_t* config, const char* param, const char* value, tiledb_error_t** error) int tiledb_config_get( tiledb_config_t* config, const char* param, const char** value, tiledb_error_t** error) int tiledb_config_load_from_file( tiledb_config_t* config, const char* filename, tiledb_error_t** error) nogil int tiledb_config_unset( tiledb_config_t* config, const char* param, tiledb_error_t** error) int tiledb_config_save_to_file( tiledb_config_t* config, const char* filename, tiledb_error_t** error) nogil # Config Iterator int tiledb_config_iter_alloc( tiledb_config_t* config, const char* prefix, tiledb_config_iter_t** config_iter, tiledb_error_t** error) void tiledb_config_iter_free( tiledb_config_iter_t** config_iter) int tiledb_config_iter_here( tiledb_config_iter_t* config_iter, const char** param, const char** value, tiledb_error_t** error) int tiledb_config_iter_next( tiledb_config_iter_t* config_iter, tiledb_error_t** error) int tiledb_config_iter_done( tiledb_config_iter_t* config_iter, int* done, tiledb_error_t** error) # Context int tiledb_ctx_alloc( tiledb_config_t* config, tiledb_ctx_t** ctx) void tiledb_ctx_free( tiledb_ctx_t** ctx) int tiledb_ctx_get_config( tiledb_ctx_t* ctx, tiledb_config_t** config) int tiledb_ctx_get_last_error( tiledb_ctx_t* ctx, tiledb_error_t** error) int tiledb_ctx_get_stats( tiledb_ctx_t* ctx, char** stats_json); int tiledb_ctx_is_supported_fs( tiledb_ctx_t* ctx, tiledb_filesystem_t fs, int* is_supported) int tiledb_ctx_set_tag( tiledb_ctx_t* ctx, const char* key, const char* value) # Error int tiledb_error_message( tiledb_error_t* err, char** msg) void tiledb_error_free( tiledb_error_t** err) # Group int tiledb_group_create( tiledb_ctx_t* ctx, const char* group) nogil # Filter int tiledb_filter_alloc( tiledb_ctx_t* ctx, tiledb_filter_type_t filter_type, tiledb_filter_t** filter) nogil int tiledb_filter_free( tiledb_filter_t **filter) int tiledb_filter_get_type( tiledb_ctx_t* ctx, tiledb_filter_t* filter, tiledb_filter_type_t* type) int tiledb_filter_set_option( tiledb_ctx_t* ctx, tiledb_filter_t* filter, tiledb_filter_option_t option, const void* value) int tiledb_filter_get_option( tiledb_ctx_t* ctx, tiledb_filter_t* filter, tiledb_filter_option_t option, void* value) # Filter List int tiledb_filter_list_alloc( tiledb_ctx_t* ctx, tiledb_filter_list_t** filter_list) int tiledb_filter_list_free( tiledb_filter_list_t** filter_list) int tiledb_filter_list_add_filter( tiledb_ctx_t* ctx, tiledb_filter_list_t* filter_list, tiledb_filter_t* filter) int tiledb_filter_list_set_max_chunk_size( tiledb_ctx_t* ctx, const tiledb_filter_list_t* filter_list, unsigned int max_chunk_size) int tiledb_filter_list_get_nfilters( tiledb_ctx_t* ctx, const tiledb_filter_list_t* filter_list, unsigned int* num_filters) int tiledb_filter_list_get_filter_from_index( tiledb_ctx_t* ctx, const tiledb_filter_list_t* filter_list, unsigned int index, tiledb_filter_t** filter) int tiledb_filter_list_get_max_chunk_size( tiledb_ctx_t* ctx, const tiledb_filter_list_t* filter_list, unsigned int* max_chunk_size) # Attribute int tiledb_attribute_alloc( tiledb_ctx_t* ctx, const char* name, tiledb_datatype_t atype, tiledb_attribute_t** attr) void tiledb_attribute_free( tiledb_attribute_t** attr) int tiledb_attribute_set_filter_list( tiledb_ctx_t* ctx_ptr, const tiledb_attribute_t* attr, tiledb_filter_list_t* filter_list) int tiledb_attribute_set_fill_value( tiledb_ctx_t *ctx, tiledb_attribute_t *attr, const void *value, uint64_t size) int tiledb_attribute_set_cell_val_num( tiledb_ctx_t* ctx, tiledb_attribute_t* attr, unsigned int cell_val_num) int tiledb_attribute_get_name( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, const char** name) int tiledb_attribute_get_type( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, tiledb_datatype_t* type) int tiledb_attribute_get_filter_list( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, tiledb_filter_list_t** filter_list) int tiledb_attribute_get_fill_value( tiledb_ctx_t *ctx, tiledb_attribute_t *attr, const void **value, uint64_t *size) int tiledb_attribute_get_cell_val_num( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, unsigned int* cell_val_num) int tiledb_attribute_set_nullable( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, uint8_t nullable) int tiledb_attribute_get_nullable( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, uint8_t* nullable) int tiledb_attribute_dump( tiledb_ctx_t* ctx, const tiledb_attribute_t* attr, FILE* out) # Datatype uint64_t tiledb_datatype_size( tiledb_datatype_t type); # Domain int tiledb_domain_alloc( tiledb_ctx_t* ctx, tiledb_domain_t** domain) void tiledb_domain_free( tiledb_domain_t** domain) int tiledb_domain_get_type( tiledb_ctx_t* ctx, const tiledb_domain_t* domain, tiledb_datatype_t* dtype) int tiledb_domain_get_ndim( tiledb_ctx_t* ctx, const tiledb_domain_t* domain, unsigned int* ndim) int tiledb_domain_add_dimension( tiledb_ctx_t* ctx, tiledb_domain_t* domain, tiledb_dimension_t* dim) int tiledb_domain_get_dimension_from_index( tiledb_ctx_t* ctx, const tiledb_domain_t* domain, unsigned int index, tiledb_dimension_t** dim) int tiledb_domain_get_dimension_from_name( tiledb_ctx_t* ctx, const tiledb_domain_t* domain, const char* name, tiledb_dimension_t** dim) int tiledb_domain_has_dimension( tiledb_ctx_t * ctx, const tiledb_domain_t* domain, const char* name, int32_t* has_dim) int tiledb_domain_dump( tiledb_ctx_t* ctx, const tiledb_domain_t* domain, FILE* out) # Dimension int tiledb_dimension_alloc( tiledb_ctx_t* ctx, const char* name, tiledb_datatype_t type, const void* dim_domain, const void* tile_extent, tiledb_dimension_t** dim) void tiledb_dimension_free( tiledb_dimension_t** dim) int tiledb_dimension_get_name( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const char** name) int tiledb_dimension_get_cell_val_num( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, uint32_t* cell_val_num) int tiledb_dimension_get_type( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, tiledb_datatype_t* type) int tiledb_dimension_get_domain( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const void** domain) int tiledb_dimension_get_tile_extent( tiledb_ctx_t* ctx, const tiledb_dimension_t* dim, const void** tile_extent) int tiledb_dimension_get_filter_list( tiledb_ctx_t *ctx, tiledb_dimension_t *dim, tiledb_filter_list_t **filter_list) int tiledb_dimension_set_filter_list( tiledb_ctx_t *ctx, tiledb_dimension_t *dim, tiledb_filter_list_t *filter_list) # Array schema int tiledb_array_schema_alloc( tiledb_ctx_t* ctx, tiledb_array_type_t array_type, tiledb_array_schema_t** array_schema) void tiledb_array_schema_free( tiledb_array_schema_t** array_schema) int tiledb_array_schema_add_attribute( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_attribute_t* attr) int tiledb_array_schema_has_attribute( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, const char* name, int32_t* has_attr) int tiledb_array_schema_set_domain( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_domain_t* domain); int tiledb_array_schema_set_capacity( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, uint64_t capacity); int tiledb_array_schema_set_cell_order( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_layout_t cell_order); int tiledb_array_schema_set_tile_order( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_layout_t tile_order) int tiledb_array_schema_set_offsets_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schmea, tiledb_filter_list_t* filter_list) int tiledb_array_schema_set_coords_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_filter_list_t* filter_list) int tiledb_array_schema_set_validity_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_filter_list_t* filter_list) int tiledb_array_schema_check( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema) int tiledb_array_schema_load( tiledb_ctx_t* ctx, const char* array_uri, tiledb_array_schema_t** array_schema) nogil int tiledb_array_schema_load_with_key( tiledb_ctx_t* ctx, const char* array_uri, tiledb_encryption_type_t key_type, const void* key_ptr, unsigned int key_len, tiledb_array_schema_t** array_schema) nogil int tiledb_array_schema_get_array_type( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_array_type_t* array_type) int tiledb_array_schema_get_capacity( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, uint64_t* capacity) int tiledb_array_schema_get_cell_order( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_layout_t* cell_order) int tiledb_array_schema_get_coords_filter_list( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_filter_list_t** filter_list) int tiledb_array_schema_get_offsets_filter_list( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_filter_list_t** filter_list) int tiledb_array_schema_get_validity_filter_list( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, tiledb_filter_list_t** filter_list) int tiledb_array_schema_get_domain( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_domain_t** domain) int tiledb_array_schema_get_tile_order( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, tiledb_layout_t* tile_order) int tiledb_array_schema_get_attribute_num( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, unsigned int* num_attributes) int tiledb_array_schema_get_attribute_from_index( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, unsigned int index, tiledb_attribute_t** attr) int tiledb_array_schema_get_attribute_from_name( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, const char* name, tiledb_attribute_t** attr) int tiledb_array_schema_get_array_name( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, const char** array_name) int tiledb_array_schema_get_allows_dups( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, int* allows_dups); int tiledb_array_schema_set_allows_dups( tiledb_ctx_t* ctx, tiledb_array_schema_t* array_schema, int allows_dups) int tiledb_array_schema_dump( tiledb_ctx_t* ctx, const tiledb_array_schema_t* array_schema, FILE* out) int tiledb_array_get_open_timestamp_start( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t* timestamp_start) int tiledb_array_get_open_timestamp_end( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t* timestamp_end) int tiledb_array_set_open_timestamp_start( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t timestamp_start) int tiledb_array_set_open_timestamp_end( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t timestamp_end) int tiledb_array_put_metadata( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* key, tiledb_datatype_t value_type, uint32_t value_num, const void* value) nogil int tiledb_array_delete_metadata( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* key) nogil int tiledb_array_has_metadata_key( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* key, tiledb_datatype_t* value_type, int32_t* has_key) nogil int tiledb_array_get_metadata( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* key, tiledb_datatype_t* value_type, uint32_t* value_num, const void** value) nogil int tiledb_array_get_metadata_num( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t* num) nogil int tiledb_array_get_metadata_from_index( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t index, const char** key, uint32_t* key_len, tiledb_datatype_t* value_type, uint32_t* value_num, const void** value) nogil int tiledb_array_consolidate_metadata( tiledb_ctx_t* ctx, const char* array_uri, tiledb_config_t* config) nogil int tiledb_array_consolidate_metadata_with_key( tiledb_ctx_t* ctx, const char* array_uri, tiledb_encryption_type_t encryption_type, const void* encryption_key, uint32_t key_length, tiledb_config_t* config) nogil # Query int tiledb_query_alloc( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_query_type_t query_type, tiledb_query_t** query) int tiledb_query_set_subarray( tiledb_ctx_t* ctx, tiledb_query_t* query, const void* subarray) int tiledb_query_set_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* attribute, void* buffer, uint64_t* buffer_size) int32_t tiledb_query_set_data_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* name, void* buffer, uint64_t* buffer_size) int tiledb_query_set_validity_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* name, uint8_t* buffer, uint64_t* buffer_size) int32_t tiledb_query_set_offsets_buffer( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* name, uint64_t* buffer, uint64_t* buffer_size) int tiledb_query_set_buffer_var( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* attribute, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, uint64_t* buffer_val_size) int tiledb_query_set_buffer_nullable( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* name, void* buffer, uint64_t* buffer_size, uint8_t* buffer_validity_bytemap, uint64_t* buffer_validity_bytemap_size) int tiledb_query_set_buffer_var_nullable( tiledb_ctx_t* ctx, tiledb_query_t* query, const char* name, uint64_t* buffer_off, uint64_t* buffer_off_size, void* buffer_val, uint64_t* buffer_val_size, uint8_t* buffer_validity_bytemap, uint64_t* buffer_validity_bytemap_size) int tiledb_query_set_layout( tiledb_ctx_t* ctx, tiledb_query_t* query, tiledb_layout_t layout) void tiledb_query_free( tiledb_query_t** query) int tiledb_query_finalize( tiledb_ctx_t* ctx, tiledb_query_t* query) nogil int tiledb_query_submit( tiledb_ctx_t* ctx, tiledb_query_t* query) nogil int tiledb_query_submit_async( tiledb_ctx_t* ctx, tiledb_query_t* query, void* (*callback)(void*), void* callback_data) int tiledb_query_get_status( tiledb_ctx_t* ctx, tiledb_query_t* query, tiledb_query_status_t* status) int tiledb_query_get_type( tiledb_ctx_t* ctx, tiledb_query_t* query, tiledb_query_type_t* query_type) int tiledb_query_has_results( tiledb_ctx_t* ctx, tiledb_query_t* query, int* has_results) int tiledb_query_get_fragment_num( tiledb_ctx_t* ctx, const tiledb_query_t* query, uint32_t* num) int tiledb_query_get_fragment_uri( tiledb_ctx_t* ctx, const tiledb_query_t* query, uint64_t idx, const char** uri) int tiledb_query_get_fragment_timestamp_range( tiledb_ctx_t* ctx, const tiledb_query_t* query, uint64_t idx, uint64_t* t1, uint64_t* t2) int tiledb_query_add_range( tiledb_ctx_t* ctx, tiledb_query_t* query, uint32_t dim_idx, const void * start, const void * end, const void * stride) int tiledb_query_add_range_var( tiledb_ctx_t* ctx, tiledb_query_t* query, uint32_t dim_idx, const void * start, uint64_t start_size, const void * end, uint64_t end_size) int tiledb_query_get_range( tiledb_ctx_t* ctx, const tiledb_query_t* query, uint32_t dim_idx, uint64_t range_idx, const void** start, const void** end, const void** stride) int tiledb_query_get_range_num( tiledb_ctx_t* ctx, const tiledb_query_t* query, uint32_t dim_idx, uint64_t * range_num) int tiledb_query_get_est_result_size( tiledb_ctx_t* ctx, const tiledb_query_t* query, const char* attr_name, uint64_t* size) int tiledb_query_get_est_result_size_var( tiledb_ctx_t* ctx, const tiledb_query_t* query, const char* attr_name, uint64_t* size_off, uint64_t* size_val) int tiledb_query_get_stats( tiledb_ctx_t* ctx, tiledb_query_t* query, char** stats_json); # Array int tiledb_array_alloc( tiledb_ctx_t* ctx, const char* uri, tiledb_array_t** array) int tiledb_array_open( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_query_type_t query_type) nogil int tiledb_array_open_with_key( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_query_type_t query_type, tiledb_encryption_type_t key_type, const void* key, unsigned int key_len) nogil int tiledb_array_open_at_with_key( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_query_type_t query_type, tiledb_encryption_type_t encryption_type, const void * encryption_key, int key_length, uint64_t timestamp) nogil int tiledb_array_reopen( tiledb_ctx_t* ctx, tiledb_array_t* array) nogil int tiledb_array_reopen_at( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t timestamp) nogil int tiledb_array_close( tiledb_ctx_t* ctx, tiledb_array_t* array) nogil void tiledb_array_free( tiledb_array_t** array) int tiledb_array_create( tiledb_ctx_t* ctx, const char* uri, const tiledb_array_schema_t* array_schema) nogil int tiledb_array_create_with_key( tiledb_ctx_t* ctx, const char* uri, const tiledb_array_schema_t* array_schema, tiledb_encryption_type_t key_type, const void* key, unsigned int key_len) nogil int tiledb_array_is_open( tiledb_ctx_t* ctx, tiledb_array_t* array, int* is_open) int tiledb_array_consolidate( tiledb_ctx_t* ctx, const char* array_path, tiledb_config_t* config) nogil int tiledb_array_consolidate_with_key( tiledb_ctx_t* ctx, const char* uri, tiledb_encryption_type_t key_type, const void* key_ptr, unsigned int key_len, tiledb_config_t* config) nogil int tiledb_array_get_schema( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_array_schema_t** array_schema) nogil int tiledb_array_get_timestamp( tiledb_ctx_t* ctx, tiledb_array_t* array, uint64_t* timestamp) nogil int tiledb_array_get_query_type( tiledb_ctx_t* ctx, tiledb_array_t* array, tiledb_query_type_t* query_type) int tiledb_array_get_non_empty_domain( tiledb_ctx_t* ctx, tiledb_array_t* array, void* domain, int* isempty) nogil int32_t tiledb_array_get_non_empty_domain_from_index( tiledb_ctx_t* ctx, tiledb_array_t* array, uint32_t idx, void* domain, int32_t* is_empty); int32_t tiledb_array_get_non_empty_domain_from_name( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* name, void* domain, int32_t* is_empty); int32_t tiledb_array_get_non_empty_domain_var_size_from_index( tiledb_ctx_t* ctx, tiledb_array_t* array, uint32_t idx, uint64_t* start_size, uint64_t* end_size, int32_t* is_empty) int32_t tiledb_array_get_non_empty_domain_var_size_from_name( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* name, uint64_t* start_size, uint64_t* end_size, int32_t* is_empty) int32_t tiledb_array_get_non_empty_domain_var_from_index( tiledb_ctx_t* ctx, tiledb_array_t* array, uint32_t idx, void* start, void* end, int32_t* is_empty); int32_t tiledb_array_get_non_empty_domain_var_from_name( tiledb_ctx_t* ctx, tiledb_array_t* array, const char* name, void* start, void* end, int32_t* is_empty) int tiledb_array_vacuum( tiledb_ctx_t* ctx, const char* array_uri, tiledb_config_t* config) nogil # Resource management int tiledb_object_type( tiledb_ctx_t* ctx, const char* path, tiledb_object_t* otype) nogil int tiledb_object_remove( tiledb_ctx_t* ctx, const char* path) nogil int tiledb_object_move( tiledb_ctx_t* ctx, const char* old_path, const char* new_path) nogil int tiledb_object_walk( tiledb_ctx_t* ctx, const char* path, tiledb_walk_order_t order, int (*callback)(const char*, tiledb_object_t, void*), void* data) int tiledb_object_ls( tiledb_ctx_t* ctx, const char* path, int (*callback)(const char*, tiledb_object_t, void*), void* data) # VFS int tiledb_vfs_alloc( tiledb_ctx_t* ctx, tiledb_config_t* config, tiledb_vfs_t** vfs) void tiledb_vfs_free( tiledb_vfs_t** vfs) int tiledb_vfs_create_bucket( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_remove_bucket( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_empty_bucket( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_is_empty_bucket( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, int* is_empty) nogil int tiledb_vfs_is_bucket( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, int* is_bucket) nogil int tiledb_vfs_create_dir( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_is_dir( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, int* is_dir) nogil int tiledb_vfs_remove_dir( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_is_file( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, int* is_file) nogil int tiledb_vfs_remove_file( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_file_size( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, uint64_t* size) nogil int tiledb_vfs_dir_size( tiledb_ctx_t * ctx, tiledb_vfs_t * vfs, const char * uri, uint64_t * size) nogil int tiledb_vfs_ls( tiledb_ctx_t * ctx, tiledb_vfs_t * vfs, const char * path, int (*callback)(const char *, void *), void * data) int tiledb_vfs_move_file( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* old_uri, const char* new_uri) nogil int tiledb_vfs_move_dir( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* old_uri, const char* new_uri) nogil int tiledb_vfs_copy_file( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* old_uri, const char* new_uri) nogil int tiledb_vfs_copy_dir( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* old_uri, const char* new_uri) nogil int tiledb_vfs_open( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri, tiledb_vfs_mode_t mode, tiledb_vfs_fh_t** fh) nogil int tiledb_vfs_close( tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh) nogil int tiledb_vfs_read( tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh, uint64_t offset, void* buffer, uint64_t nbytes) nogil int tiledb_vfs_write( tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh, const void* buffer, uint64_t nbytes) nogil int tiledb_vfs_sync( tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh) nogil void tiledb_vfs_fh_free( tiledb_vfs_fh_t** fh) nogil int tiledb_vfs_fh_is_closed( tiledb_ctx_t* ctx, tiledb_vfs_fh_t* fh, int* is_closed) nogil int tiledb_vfs_touch( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) nogil int tiledb_vfs_get_config( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, tiledb_config_t** config) # URI int tiledb_uri_to_path( tiledb_ctx_t* ctx, const char* uri, char* path_out, unsigned* path_length) nogil # Free helper functions cpdef unicode ustring(object s) cpdef check_error(Ctx ctx, int rc) cdef _raise_tiledb_error(tiledb_error_t* err_ptr) cdef _raise_ctx_err(tiledb_ctx_t* ctx_ptr, int rc) cdef tiledb_datatype_t _tiledb_dtype_datetime(np.dtype dtype) except? TILEDB_DATETIME_YEAR ############################################################################### # # # TileDB-Py API declaration # # # ############################################################################### cdef class Config(object): cdef tiledb_config_t* ptr @staticmethod cdef from_ptr(tiledb_config_t* ptr) cdef class ConfigKeys(object): cdef ConfigItems config_items cdef class ConfigItems(object): cdef Config config cdef tiledb_config_iter_t* ptr cdef class ConfigValues(object): cdef ConfigItems config_items cdef class Ctx(object): cdef tiledb_ctx_t* ptr cdef class Filter(object): cdef Ctx ctx cdef tiledb_filter_t* ptr cdef class FilterList(object): cdef Ctx ctx cdef tiledb_filter_list_t* ptr @staticmethod cdef FilterList from_ptr(tiledb_filter_list_t* ptr, Ctx ctx=*) cdef Filter _getfilter(FilterList self, int idx) cdef class Attr(object): cdef Ctx ctx cdef tiledb_attribute_t* ptr @staticmethod cdef from_ptr(const tiledb_attribute_t* ptr, Ctx ctx=*) cdef unicode _get_name(Attr self) cdef unsigned int _cell_val_num(Attr self) except? 0 cdef tiledb_datatype_t _get_type(Attr self) except? TILEDB_CHAR cdef class Dim(object): cdef Ctx ctx cdef tiledb_dimension_t* ptr @staticmethod cdef from_ptr(const tiledb_dimension_t* ptr, Ctx ctx=*) cdef tiledb_datatype_t _get_type(Dim self) except? TILEDB_CHAR cdef unsigned int _cell_val_num(Dim self) except? 0 cdef _integer_domain(self) cdef _datetime_domain(self) cdef _shape(self) cdef class Domain(object): cdef Ctx ctx cdef tiledb_domain_t* ptr @staticmethod cdef from_ptr(const tiledb_domain_t* ptr, Ctx ctx=*) cdef tiledb_datatype_t _get_type(Domain self) except? TILEDB_CHAR cdef _integer_domain(Domain self) cdef _is_homogeneous(Domain self) cdef _shape(Domain self) cdef class ArraySchema(object): cdef Ctx ctx cdef tiledb_array_schema_t* ptr @staticmethod cdef from_ptr(const tiledb_array_schema_t* schema_ptr, Ctx ctx=*) cdef _cell_order(ArraySchema self, tiledb_layout_t* cell_order_ptr) cdef _tile_order(ArraySchema self, tiledb_layout_t* tile_order_ptr) cdef _attr_name(self, name) cdef _attr_idx(self, int idx) cdef class Array(object): cdef object __weakref__ cdef Ctx ctx cdef tiledb_array_t* ptr cdef unicode uri cdef unicode mode cdef bint _isopen cdef object view_attr # can be None cdef object key # can be None cdef object schema cdef object _buffers cdef DomainIndexer domain_index cdef object multi_index cdef object df cdef Metadata meta cdef object last_fragment_info cdef object pyquery cdef _ndarray_is_varlen(self, np.ndarray array) cdef class SparseArrayImpl(Array): cdef _read_sparse_subarray(self, list subarray, list attr_names, object attr_cond, tiledb_layout_t layout) cdef class DenseArrayImpl(Array): cdef _read_dense_subarray(self, list subarray, list attr_names, object attr_cond, tiledb_layout_t layout, bint include_coords) cdef class FileHandle(object): cdef Ctx ctx cdef VFS vfs cdef unicode uri cdef tiledb_vfs_fh_t* ptr @staticmethod cdef from_ptr(VFS vfs, unicode uri, tiledb_vfs_fh_t* fh_ptr) cpdef closed(self) cdef class VFS(object): cdef Ctx ctx cdef tiledb_vfs_t* ptr cdef class Query(object): cdef Array array cdef object attrs cdef object attr_cond cdef object dims cdef object order cdef object coords cdef object index_col cdef object use_arrow cdef object return_arrow cdef object return_incomplete cdef DomainIndexer domain_index cdef object multi_index cdef object df cdef class ReadQuery(object): cdef object _buffers cdef object _offsets cdef class Metadata(object): cdef object array_ref cdef class TileDBError(Exception): pass IF (not TILEDBPY_MODULAR): include "indexing.pxd" TileDB-Py-0.12.2/tiledb/libtiledb.pyx000066400000000000000000007755511417663620700172640ustar00rootroot00000000000000#!python #cython: embedsignature=True #cython: auto_pickle=False from cpython.version cimport PY_MAJOR_VERSION from cpython.pycapsule cimport PyCapsule_New, PyCapsule_IsValid, PyCapsule_GetPointer include "common.pxi" import io import html import sys import warnings from collections import OrderedDict from .array import DenseArray, SparseArray from .ctx import default_ctx ############################################################################### # Numpy initialization code (critical) # ############################################################################### # https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array np.import_array() ############################################################################### # MODULAR IMPORTS # ############################################################################### IF TILEDBPY_MODULAR: from .indexing import DomainIndexer from .libmetadata import get_metadata, load_metadata, put_metadata from .np2buf import array_type_ncells, dtype_to_tiledb ELSE: include "indexing.pyx" include "np2buf.pyx" include "libmetadata.pyx" ############################################################################### # Utility/setup # ############################################################################### # KB / MB in bytes _KB = 1024 _MB = 1024 * _KB # Maximum number of retries for incomplete query _MAX_QUERY_RETRIES = 3 # The native int type for this platform IntType = np.dtype(np.int_) # Integer types supported by Python / System _inttypes = (int, np.integer) # Numpy initialization code (critical) # https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array np.import_array() # Conversion from TileDB dtype to Numpy datetime _tiledb_dtype_to_datetime_convert = { TILEDB_DATETIME_YEAR: np.datetime64('', 'Y'), TILEDB_DATETIME_MONTH: np.datetime64('', 'M'), TILEDB_DATETIME_WEEK: np.datetime64('', 'W'), TILEDB_DATETIME_DAY: np.datetime64('', 'D'), TILEDB_DATETIME_HR: np.datetime64('', 'h'), TILEDB_DATETIME_MIN: np.datetime64('', 'm'), TILEDB_DATETIME_SEC: np.datetime64('', 's'), TILEDB_DATETIME_MS: np.datetime64('', 'ms'), TILEDB_DATETIME_US: np.datetime64('', 'us'), TILEDB_DATETIME_NS: np.datetime64('', 'ns'), TILEDB_DATETIME_PS: np.datetime64('', 'ps'), TILEDB_DATETIME_FS: np.datetime64('', 'fs'), TILEDB_DATETIME_AS: np.datetime64('', 'as') } # Conversion from Numpy datetime to TileDB dtype _datetime_tiledb_dtype_convert = { 'Y': TILEDB_DATETIME_YEAR, 'M': TILEDB_DATETIME_MONTH, 'W': TILEDB_DATETIME_WEEK, 'D': TILEDB_DATETIME_DAY, 'h': TILEDB_DATETIME_HR, 'm': TILEDB_DATETIME_MIN, 's': TILEDB_DATETIME_SEC, 'ms': TILEDB_DATETIME_MS, 'us': TILEDB_DATETIME_US, 'ns': TILEDB_DATETIME_NS, 'ps': TILEDB_DATETIME_PS, 'fs': TILEDB_DATETIME_FS, 'as': TILEDB_DATETIME_AS } # Conversion from TileDB dtype to Numpy typeid _tiledb_dtype_to_numpy_typeid_convert ={ TILEDB_INT32: np.NPY_INT32, TILEDB_UINT32: np.NPY_UINT32, TILEDB_INT64: np.NPY_INT64, TILEDB_UINT64: np.NPY_UINT64, TILEDB_FLOAT32: np.NPY_FLOAT32, TILEDB_FLOAT64: np.NPY_FLOAT64, TILEDB_INT8: np.NPY_INT8, TILEDB_UINT8: np.NPY_UINT8, TILEDB_INT16: np.NPY_INT16, TILEDB_UINT16: np.NPY_UINT16, TILEDB_CHAR: np.NPY_STRING, TILEDB_STRING_UTF8: np.NPY_UNICODE } # Conversion from TileDB dtype to Numpy dtype _tiledb_dtype_to_numpy_dtype_convert = { TILEDB_INT32: np.int32, TILEDB_UINT32: np.uint32, TILEDB_INT64: np.int64, TILEDB_UINT64: np.uint64, TILEDB_FLOAT32: np.float32, TILEDB_FLOAT64: np.float64, TILEDB_INT8: np.int8, TILEDB_UINT8: np.uint8, TILEDB_INT16: np.int16, TILEDB_UINT16: np.uint16, TILEDB_CHAR: np.dtype('S1'), TILEDB_STRING_ASCII: np.bytes_, TILEDB_STRING_UTF8: np.dtype('U1') } def version(): """Return the version of the linked ``libtiledb`` shared library :rtype: tuple :return: Semver version (major, minor, rev) """ cdef: int major = 0 int minor = 0 int rev = 0 tiledb_version(&major, &minor, &rev) return major, minor, rev def offset_size(): """Return the offset size (TILEDB_OFFSET_SIZE)""" return tiledb_offset_size() def regularize_tiling(tile, ndim): if not tile: return None elif np.isscalar(tile): tiling = tuple(int(tile) for _ in range(ndim)) elif (tile is str) or (len(tile) != ndim): raise ValueError("'tile' argument must be iterable " "and match array dimensionality") else: tiling = tuple(tile) return tiling def schema_like(*args, shape=None, dtype=None, ctx=None, **kw): """ Return an ArraySchema corresponding to a NumPy-like object or `shape` and `dtype` kwargs. Users are encouraged to pass 'tile' and 'capacity' keyword arguments as appropriate for a given application. :param A: NumPy array-like object, or TileDB reference URI, optional :param tuple shape: array shape, optional :param dtype: array dtype, optional :param Ctx ctx: TileDB Ctx :param kwargs: additional keyword arguments to pass through, optional :return: tiledb.ArraySchema """ if not ctx: ctx = default_ctx() def is_ndarray_like(obj): return hasattr(arr, 'shape') and hasattr(arr, 'dtype') and hasattr(arr, 'ndim') # support override of default dimension dtype dim_dtype = kw.pop('dim_dtype', np.uint64) if len(args) == 1: arr = args[0] if is_ndarray_like(arr): tiling = regularize_tiling(kw.pop('tile', None), arr.ndim) schema = schema_like_numpy(arr, tile=tiling, dim_dtype=dim_dtype, ctx=ctx) else: raise ValueError("expected ndarray-like object") elif shape and dtype: if np.issubdtype(np.bytes_, dtype): dtype = np.dtype('S') elif np.issubdtype(dtype, np.unicode_): dtype = np.dtype('U') ndim = len(shape) tiling = regularize_tiling(kw.pop('tile', None), ndim) dims = [] for d in range(ndim): # support smaller tile extents by kw # domain is based on full shape tile_extent = tiling[d] if tiling else shape[d] domain = (0, shape[d] - 1) dims.append(Dim(domain=domain, tile=tile_extent, dtype=dim_dtype, ctx=ctx)) att = Attr(dtype=dtype, ctx=ctx) dom = Domain(*dims, ctx=ctx) schema = ArraySchema(ctx=ctx, domain=dom, attrs=(att,), **kw) elif kw is not None: raise ValueError else: raise ValueError("Must provide either ndarray-like object or 'shape' " "and 'dtype' keyword arguments") return schema def schema_like_numpy(array, ctx=None, **kw): """create array schema from Numpy array-like object internal function. tiledb.schema_like is exported and recommended """ if not ctx: ctx = default_ctx() # create an ArraySchema from the numpy array object tiling = regularize_tiling(kw.pop('tile', None), array.ndim) attr_name = kw.pop('attr_name', '') dim_dtype = kw.pop('dim_dtype', np.uint64) dims = [] for (dim_num,d) in enumerate(range(array.ndim)): # support smaller tile extents by kw # domain is based on full shape tile_extent = tiling[d] if tiling else array.shape[d] domain = (0, array.shape[d] - 1) dims.append(Dim(domain=domain, tile=tile_extent, dtype=dim_dtype, ctx=ctx)) var = False if array.dtype == object: # for object arrays, we use the dtype of the first element # consistency check should be done later, if needed el0 = array.flat[0] if type(el0) is bytes: el_dtype = np.dtype('S') var = True elif type(el0) is str: el_dtype = np.dtype('U') var = True elif type(el0) == np.ndarray: if len(el0.shape) != 1: raise TypeError("Unsupported sub-array type for Attribute: {} " \ "(only string arrays and 1D homogeneous NumPy arrays are supported)". format(type(el0))) el_dtype = el0.dtype else: raise TypeError("Unsupported sub-array type for Attribute: {} " \ "(only strings and homogeneous-typed NumPy arrays are supported)". format(type(el0))) else: el_dtype = array.dtype att = Attr(dtype=el_dtype, name=attr_name, var=var, ctx=ctx) dom = Domain(*dims, ctx=ctx) return ArraySchema(ctx=ctx, domain=dom, attrs=(att,), **kw) # note: this function is cdef, so it must return a python object in order to # properly forward python exceptions raised within the function. See: # https://cython.readthedocs.io/en/latest/src/userguide/language_basics.html#error-return-values cdef dict get_query_fragment_info(tiledb_ctx_t* ctx_ptr, tiledb_query_t* query_ptr): cdef int rc = TILEDB_OK cdef uint32_t num_fragments cdef Py_ssize_t fragment_idx cdef const char* fragment_uri_ptr cdef unicode fragment_uri cdef uint64_t fragment_t1, fragment_t2 cdef dict result = dict() rc = tiledb_query_get_fragment_num(ctx_ptr, query_ptr, &num_fragments) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if (num_fragments < 1): return result for fragment_idx in range(0, num_fragments): rc = tiledb_query_get_fragment_uri(ctx_ptr, query_ptr, fragment_idx, &fragment_uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) rc = tiledb_query_get_fragment_timestamp_range( ctx_ptr, query_ptr, fragment_idx, &fragment_t1, &fragment_t2) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) fragment_uri = fragment_uri_ptr.decode('UTF-8') result[fragment_uri] = (fragment_t1, fragment_t2) return result cdef _write_array(tiledb_ctx_t* ctx_ptr, tiledb_array_t* array_ptr, object tiledb_array, list coords_or_subarray, list attributes, list values, dict nullmaps, dict fragment_info, bint issparse): # used for buffer conversion (local import to avoid circularity) import tiledb.main cdef bint isfortran = False cdef Py_ssize_t nattr = len(attributes) cdef Py_ssize_t nattr_alloc = nattr # add 1 to nattr for sparse coordinates if issparse: nattr_alloc += tiledb_array.schema.ndim # Set up buffers cdef np.ndarray buffer_sizes = np.zeros((nattr_alloc,), dtype=np.uint64) cdef np.ndarray buffer_offsets_sizes = np.zeros((nattr_alloc,), dtype=np.uint64) cdef np.ndarray nullmaps_sizes = np.zeros((nattr_alloc,), dtype=np.uint64) output_values = list() output_offsets = list() for i in range(nattr): # if dtype is ASCII, ensure all characters are valid if tiledb_array.schema.attr(i).isascii: try: values[i] = np.asarray(values[i], dtype=np.bytes_) except Exception as exc: raise TileDBError(f'Attr\'s dtype is "ascii" but attr_val contains invalid ASCII characters') attr = tiledb_array.schema.attr(i) if attr.isvar: try: buffer, offsets = tiledb.main.array_to_buffer(values[i], True, False) except Exception as exc: raise type(exc)(f"Failed to convert buffer for attribute: '{attr.name}'") from exc buffer_offsets_sizes[i] = offsets.nbytes else: buffer, offsets = values[i], None buffer_sizes[i] = buffer.nbytes output_values.append(buffer) output_offsets.append(offsets) # Check value layouts if len(values): value = output_values[0] isfortran = value.ndim > 1 and value.flags.f_contiguous if nattr > 1: for i in range(1, nattr): value = values[i] if value.ndim > 1 and value.flags.f_contiguous and not isfortran: raise ValueError("mixed C and Fortran array layouts") #### Allocate and fill query #### cdef tiledb_query_t* query_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_query_alloc(ctx_ptr, array_ptr, TILEDB_WRITE, &query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) cdef tiledb_layout_t layout = TILEDB_COL_MAJOR if isfortran else TILEDB_ROW_MAJOR # Set coordinate buffer size and name, and layout for sparse writes if issparse: for dim_idx in range(tiledb_array.schema.ndim): name = tiledb_array.schema.domain.dim(dim_idx).name val = coords_or_subarray[dim_idx] if tiledb_array.schema.domain.dim(dim_idx).isvar: buffer, offsets = tiledb.main.array_to_buffer(val, True, False) buffer_sizes[nattr + dim_idx] = buffer.nbytes buffer_offsets_sizes[nattr + dim_idx] = offsets.nbytes else: buffer, offsets = val, None buffer_sizes[nattr + dim_idx] = buffer.nbytes attributes.append(name) output_values.append(buffer) output_offsets.append(offsets) nattr += tiledb_array.schema.ndim layout = TILEDB_UNORDERED # Create nullmaps sizes array if necessary # Set layout rc = tiledb_query_set_layout(ctx_ptr, query_ptr, layout) if rc != TILEDB_OK: tiledb_query_free(&query_ptr) _raise_ctx_err(ctx_ptr, rc) cdef void* buffer_ptr = NULL cdef uint8_t* nulmap_buffer_ptr = NULL cdef uint cdef bytes battr_name cdef uint64_t* offsets_buffer_ptr = NULL cdef uint64_t* buffer_sizes_ptr = np.PyArray_DATA(buffer_sizes) cdef uint64_t* offsets_buffer_sizes_ptr = np.PyArray_DATA(buffer_offsets_sizes) cdef uint64_t* nullmaps_sizes_ptr = np.PyArray_DATA(nullmaps_sizes) # set subarray (ranges) cdef np.ndarray s_start cdef np.ndarray s_end cdef void* s_start_ptr = NULL cdef void* s_end_ptr = NULL cdef Domain dom = None cdef Dim dim = None cdef np.dtype dim_dtype = None if not issparse: dom = tiledb_array.schema.domain for dim_idx,s_range in enumerate(coords_or_subarray): dim = dom.dim(dim_idx) dim_dtype = dim.dtype s_start = np.asarray(s_range[0], dtype=dim_dtype) s_end = np.asarray(s_range[1], dtype=dim_dtype) s_start_ptr = np.PyArray_DATA(s_start) s_end_ptr = np.PyArray_DATA(s_end) if dim.isvar: rc = tiledb_query_add_range_var( ctx_ptr, query_ptr, dim_idx, s_start_ptr, s_start.nbytes, s_end_ptr, s_end.nbytes) else: rc = tiledb_query_add_range( ctx_ptr, query_ptr, dim_idx, s_start_ptr, s_end_ptr, NULL) if rc != TILEDB_OK: tiledb_query_free(&query_ptr) _raise_ctx_err(ctx_ptr, rc) try: for i in range(0, nattr): battr_name = attributes[i].encode('UTF-8') buffer_ptr = np.PyArray_DATA(output_values[i]) rc = tiledb_query_set_data_buffer(ctx_ptr, query_ptr, battr_name, buffer_ptr, &(buffer_sizes_ptr[i])) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) var = output_offsets[i] is not None nullable = attributes[i] in nullmaps if var: offsets_buffer_ptr = np.PyArray_DATA(output_offsets[i]) rc = tiledb_query_set_offsets_buffer(ctx_ptr, query_ptr, battr_name, offsets_buffer_ptr, &(offsets_buffer_sizes_ptr[i])) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if attributes[i] in nullmaps: # NOTE: validity map is owned *by the caller* nulmap = nullmaps[attributes[i]] nullmaps_sizes[i] = len(nulmap) nulmap_buffer_ptr = np.PyArray_DATA(nulmap) rc = tiledb_query_set_validity_buffer( ctx_ptr, query_ptr, battr_name, nulmap_buffer_ptr, &(nullmaps_sizes_ptr[i]) ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) with nogil: rc = tiledb_query_submit(ctx_ptr, query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) rc = tiledb_query_finalize(ctx_ptr, query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if fragment_info is not False: assert(type(fragment_info) is dict) fragment_info.clear() fragment_info.update(get_query_fragment_info(ctx_ptr, query_ptr)) finally: tiledb_query_free(&query_ptr) return cdef class TileDBError(Exception): """TileDB Error Exception Captures and raises error return code (``TILEDB_ERR``) messages when calling ``libtiledb`` functions. The error message that is raised is the last error set for the :py:class:`tiledb.Ctx` A Python :py:class:`MemoryError` is raised on ``TILEDB_OOM`` """ @property def message(self): """The TileDB error message string :rtype: str :return: error message """ return self.args[0] cdef _raise_tiledb_error(tiledb_error_t* err_ptr): cdef const char* err_msg_ptr = NULL ret = tiledb_error_message(err_ptr, &err_msg_ptr) if ret != TILEDB_OK: tiledb_error_free(&err_ptr) if ret == TILEDB_OOM: raise MemoryError() raise TileDBError("error retrieving error message") cdef unicode message_string try: message_string = err_msg_ptr.decode('UTF-8', 'strict') finally: tiledb_error_free(&err_ptr) raise TileDBError(message_string) cdef _raise_ctx_err(tiledb_ctx_t* ctx_ptr, int rc): if rc == TILEDB_OK: return if rc == TILEDB_OOM: raise MemoryError() cdef tiledb_error_t* err_ptr = NULL cdef int ret = tiledb_ctx_get_last_error(ctx_ptr, &err_ptr) if ret != TILEDB_OK: tiledb_error_free(&err_ptr) if ret == TILEDB_OOM: raise MemoryError() raise TileDBError("error retrieving error object from ctx") _raise_tiledb_error(err_ptr) cpdef check_error(Ctx ctx, int rc): _raise_ctx_err(ctx.ptr, rc) def stats_enable(): """Enable TileDB internal statistics.""" tiledb_stats_enable() import tiledb.main tiledb.main.init_stats() def stats_disable(): """Disable TileDB internal statistics.""" tiledb_stats_disable() import tiledb.main tiledb.main.disable_stats() def stats_reset(): """Reset all TileDB internal statistics to 0.""" tiledb_stats_reset() import tiledb.main tiledb.main.init_stats() def stats_dump(version=True, print_out=True, include_python=True, json=False, verbose=True): """Return TileDB internal statistics as a string. :param include_python: Include TileDB-Py statistics :param print_out: Print string to console (default True), or return as string :param version: Include TileDB Embedded and TileDB-Py versions (default: True) :param json: Return stats JSON object (default: False) :param verbose: Print extended internal statistics (default: True) """ cdef char* stats_str_ptr = NULL; if json or not verbose: if tiledb_stats_raw_dump_str(&stats_str_ptr) == TILEDB_ERR: raise TileDBError("Unable to dump stats to stats_str_ptr.") else: if tiledb_stats_dump_str(&stats_str_ptr) == TILEDB_ERR: raise TileDBError("Unable to dump stats to stats_str_ptr.") stats_str_core = stats_str_ptr.decode("UTF-8", "strict").strip() if json or not verbose: import json stats_json_core = json.loads(stats_str_core) if json: return stats_json_core if tiledb_stats_free_str(&stats_str_ptr) == TILEDB_ERR: raise TileDBError("Unable to free stats_str_ptr.") stats_str = "" if version: import tiledb stats_str += f"TileDB Embedded Version: {tiledb.libtiledb.version()}\n" stats_str += f"TileDB-Py Version: {tiledb.version.version}\n" if not verbose: stats_str += "\n==== READ ====\n\n" stats_str += "- Number of read queries: {}\n".format( stats_json_core["READ_NUM"]) stats_str += "- Number of attributes read: {}\n".format( stats_json_core["READ_ATTR_FIXED_NUM"] + stats_json_core["READ_ATTR_VAR_NUM"]) stats_str += "- Time to compute estimated result size: {}\n".format( stats_json_core["READ_COMPUTE_EST_RESULT_SIZE"]) stats_str += "- Read time: {}\n".format(stats_json_core["READ"]) stats_str += "- Total read query time (array open + init state + read): {}\n".format( stats_json_core["READ"] + stats_json_core["READ_INIT_STATE"]) else: stats_str += "\n" stats_str += stats_str_core stats_str += "\n" if include_python: import tiledb.main stats_str += tiledb.main.python_internal_stats() if print_out: print(stats_str) else: return stats_str cpdef unicode ustring(object s): """Coerce a python object to a unicode string""" if type(s) is unicode: return s elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes): return ( s).decode('ascii') elif isinstance(s, unicode): return unicode(s) raise TypeError( "ustring() must be a string or a bytes-like object" ", not {0!r}".format(type(s))) cdef bytes unicode_path(object path): """Returns a UTF-8 encoded byte representation of a given URI path string""" return ustring(path).encode('UTF-8') def safe_repr(obj): """repr an object, without raising exception. Return placeholder string on failure""" try: return repr(obj) except: return "" def dtype_range(np.dtype dtype): """Return the range of a Numpy dtype""" if np.issubdtype(dtype, np.integer): info = np.iinfo(dtype) dtype_min, dtype_max = info.min, info.max elif np.issubdtype(dtype, np.floating): info = np.finfo(dtype) dtype_min, dtype_max = info.min, info.max elif dtype.kind == 'M': info = np.iinfo(np.int64) date_unit = np.datetime_data(dtype)[0] # +1 to exclude NaT dtype_min = np.datetime64(info.min + 1, date_unit) dtype_max = np.datetime64(info.max, date_unit) else: raise TypeError("invalid Dim dtype {0!r}".format(dtype)) return (dtype_min, dtype_max) ############################################################################### # # # CLASS DEFINITIONS # # # ############################################################################### cdef class Config(object): """TileDB Config class The Config object stores configuration parameters for both TileDB Embedded and TileDB-Py. For TileDB Embedded parameters, see: https://docs.tiledb.com/main/how-to/configuration#configuration-parameters The following configuration options are supported by TileDB-Py: - `py.init_buffer_bytes`: Initial allocation size in bytes for attribute and dimensions buffers. If result size exceed the pre-allocated buffer(s), then the query will return incomplete and TileDB-Py will allocate larger buffers and resubmit. Specifying a sufficiently large buffer size will often improve performance. Default 10 MB (1024**2 * 10). - `py.use_arrow`: Use `pyarrow` from the Apache Arrow project to convert query results into Pandas dataframe format when requested. Default `True`. - `py.deduplicate`: Attempt to deduplicate Python objects during buffer conversion to Python. Deduplication may reduce memory usage for datasets with many identical strings, at the cost of some performance reduction due to hash calculation/lookup for each object. Unknown parameters will be ignored! :param dict params: Set parameter values from dict like object :param str path: Set parameter values from persisted Config parameter file """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_config_free(&self.ptr) def __init__(self, params=None, path=None): cdef tiledb_config_t* config_ptr = NULL cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_alloc(&config_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) assert(config_ptr != NULL) self.ptr = config_ptr if path is not None: self.load(path) if params is not None: self.update(params) @staticmethod cdef from_ptr(tiledb_config_t* ptr): """Constructs a Config class instance from a (non-null) tiledb_config_t pointer""" assert(ptr != NULL) cdef Config config = Config.__new__(Config) config.ptr = ptr return config @staticmethod def load(object uri): """Constructs a Config class instance from config parameters loaded from a local Config file :parameter str uri: a local URI config file path :rtype: tiledb.Config :return: A TileDB Config instance with persisted parameter values :raises TypeError: `uri` cannot be converted to a unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef Config config = Config.__new__(Config) cdef tiledb_config_t* config_ptr = NULL cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_alloc(&config_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() if rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) with nogil: rc = tiledb_config_load_from_file(config_ptr, uri_ptr, &err_ptr) if rc == TILEDB_OOM: tiledb_config_free(&config_ptr) raise MemoryError() if rc == TILEDB_ERR: tiledb_config_free(&config_ptr) _raise_tiledb_error(err_ptr) assert(config_ptr != NULL) config.ptr = config_ptr return config def __setitem__(self, object key, object value): """Sets a config parameter value. :param str key: Name of parameter to set :param str value: Value of parameter to set :raises TypeError: `key` or `value` cannot be encoded into a UTF-8 string :raises: :py:exc:`tiledb.TileDBError` """ key, value = unicode(key), unicode(value) cdef bytes bparam = key.encode('UTF-8') cdef bytes bvalue = value.encode('UTF-8') cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_set(self.ptr, bparam, bvalue, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) return def get(self, object key, raise_keyerror = True): key = unicode(key) cdef bytes bparam = key.encode('UTF-8') cdef const char* value_ptr = NULL cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_get(self.ptr, bparam, &value_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) if value_ptr == NULL: if raise_keyerror: raise KeyError(key) else: return None cdef bytes value = PyBytes_FromString(value_ptr) return value.decode('UTF-8') def __getitem__(self, object key): """Gets a config parameter value. :param str key: Name of parameter to get :return: Config parameter value string :rtype str: :raises TypeError: `key` cannot be encoded into a UTF-8 string :raises KeyError: Config parameter not found :raises: :py:exc:`tiledb.TileDBError` """ return self.get(key, True) def __delitem__(self, object key): """ Removes a configured parameter (resetting it to its default). :param str key: Name of parameter to reset. :raises TypeError: `key` cannot be encoded into a UTF-8 string """ key = unicode(key) cdef bytes bkey = ustring(key).encode("UTF-8") cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_unset(self.ptr, bkey, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) return def __iter__(self): """Returns an iterator over the Config parameters (keys)""" return ConfigKeys(self) def __len__(self): """Returns the number of parameters (keys) held by the Config object""" return sum(1 for _ in self) def __eq__(self, object config): if not isinstance(config, Config): return False keys = set(self.keys()) okeys = set(config.keys()) if keys != okeys: return False for k in keys: val, oval = self[k], config[k] if val != oval: return False return True def __repr__(self): colnames = ["Parameter", "Value"] params = list(self.keys()) values = list(map(repr, self.values())) colsizes = [max(len(colnames[0]), *map(len, (p for p in params))), max(len(colnames[1]), *map(len, (v for v in values)))] format_str = ' | '.join("{{:<{}}}".format(i) for i in colsizes) output = [] output.append(format_str.format(colnames[0], colnames[1])) output.append(format_str.format('-' * colsizes[0], '-' * colsizes[1])) output.extend(format_str.format(p, v) for p, v in zip(params, values)) return "\n".join(output) def _repr_html_(self): output = io.StringIO() output.write("
\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") params = list(self.keys()) values = list(map(repr, self.values())) for p, v in zip(params, values): output.write("\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
ParameterValue
{p}{v}
\n") output.write("
\n") return output.getvalue() def items(self, prefix=u""): """Returns an iterator object over Config parameters, values :param str prefix: return only parameters with a given prefix :rtype: ConfigItems :returns: iterator over Config parameter, value tuples """ return ConfigItems(self, prefix=prefix) def keys(self, prefix=u""): """Returns an iterator object over Config parameters (keys) :param str prefix: return only parameters with a given prefix :rtype: ConfigKeys :returns: iterator over Config parameter string keys """ return ConfigKeys(self, prefix=prefix) def values(self, prefix=u""): """Returns an iterator object over Config values :param str prefix: return only parameters with a given prefix :rtype: ConfigValues :returns: iterator over Config string values """ return ConfigValues(self, prefix=prefix) def dict(self, prefix=u""): """Returns a dict representation of a Config object :param str prefix: return only parameters with a given prefix :rtype: dict :return: Config parameter / values as a a Python dict """ return dict(ConfigItems(self, prefix=prefix)) def clear(self): """Unsets all Config parameters (returns them to their default values)""" for k in self.keys(): del self[k] def get(self, key, *args): """Gets the value of a config parameter, or a default value. :param str key: Config parameter :param args: return `arg` if Config does not contain parameter `key` :return: Parameter value, `arg` or None. """ nargs = len(args) if nargs > 1: raise TypeError("get expected at most 2 arguments, got {}".format(nargs)) try: return self[key] except KeyError: return args[0] if nargs == 1 else None def update(self, object odict): """Update a config object with parameter, values from a dict like object :param odict: dict-like object containing parameter, values to update Config. """ for (key, value) in odict.items(): self[key] = value return def from_file(self, path): """Update a Config object with from a persisted config file :param path: A local Config file path """ config = Config.load(path) self.update(config) def save(self, uri): """Persist Config parameter values to a config file :parameter str uri: a local URI config file path :raises TypeError: `uri` cannot be converted to a unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef tiledb_config_t* config_ptr = self.ptr cdef tiledb_error_t* err_ptr = NULL cdef int rc with nogil: rc = tiledb_config_save_to_file(config_ptr, uri_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) return cdef class ConfigKeys(object): """ An iterator object over Config parameter strings (keys) """ def __init__(self, Config config, prefix=u""): self.config_items = ConfigItems(config, prefix=prefix) def __iter__(self): return self def __next__(self): (k, _) = self.config_items.__next__() return k cdef class ConfigValues(object): """ An iterator object over Config parameter value strings """ def __init__(self, Config config, prefix=u""): self.config_items = ConfigItems(config, prefix=prefix) def __iter__(self): return self def __next__(self): (_, v) = self.config_items.__next__() return v cdef class ConfigItems(object): """ An iterator object over Config parameter, values :param config: TileDB Config object :type config: tiledb.Config :param prefix: (default "") Filter paramter names with given prefix :type prefix: str """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_config_iter_free(&self.ptr) def __init__(self, Config config, prefix=u""): cdef bytes bprefix = prefix.encode("UTF-8") cdef const char* prefix_ptr = PyBytes_AS_STRING(bprefix) cdef tiledb_config_iter_t* config_iter_ptr = NULL cdef tiledb_error_t* err_ptr = NULL cdef rc = tiledb_config_iter_alloc( config.ptr, prefix_ptr, &config_iter_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) assert (config_iter_ptr != NULL) self.config = config self.ptr = config_iter_ptr def __iter__(self): return self def __next__(self): cdef int done = 0 cdef tiledb_error_t* err_ptr = NULL cdef int rc = tiledb_config_iter_done(self.ptr, &done, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) if done > 0: raise StopIteration() cdef const char* param_ptr = NULL cdef const char* value_ptr = NULL rc = tiledb_config_iter_here(self.ptr, ¶m_ptr, &value_ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) cdef bytes bparam cdef bytes bvalue if param_ptr == NULL: bparam = b'' else: bparam = PyBytes_FromString(param_ptr) if value_ptr == NULL: bvalue = b'' else: bvalue = PyBytes_FromString(value_ptr) rc = tiledb_config_iter_next(self.ptr, &err_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: _raise_tiledb_error(err_ptr) return (bparam.decode('UTF-8'), bvalue.decode('UTF-8')) cdef class Ctx(object): """Class representing a TileDB context. A TileDB context wraps a TileDB storage manager. :param config: Initialize Ctx with given config parameters :type config: tiledb.Config or dict """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_ctx_free(&self.ptr) def __capsule__(self): if self.ptr == NULL: raise TileDBError("internal error: cannot create capsule for uninitialized Ctx!") cdef const char* name = "ctx" cap = PyCapsule_New((self.ptr), name, NULL) return cap def __init__(self, config=None): cdef Config _config = Config() if config is not None: if isinstance(config, Config): _config = config else: _config.update(config) cdef tiledb_ctx_t* ctx_ptr = NULL cdef int rc = tiledb_ctx_alloc(_config.ptr, &ctx_ptr) if rc == TILEDB_OOM: raise MemoryError() elif rc == TILEDB_ERR: # we assume that the ctx pointer is valid if not OOM # the ctx object will be free'd when it goes out of scope # after the exception is raised _raise_ctx_err(ctx_ptr, rc) self.ptr = ctx_ptr self._set_default_tags() def __repr__(self): return "tiledb.Ctx() [see Ctx.config() for configuration]" def config(self): """Returns the Config instance associated with the Ctx.""" cdef tiledb_config_t* config_ptr = NULL check_error(self, tiledb_ctx_get_config(self.ptr, &config_ptr)) return Config.from_ptr(config_ptr) def set_tag(self, key, value): """Sets a (string, string) "tag" on the Ctx (internal).""" cdef tiledb_ctx_t* ctx_ptr = self.ptr bkey = key.encode('UTF-8') bvalue = value.encode('UTF-8') cdef int rc = TILEDB_OK rc = tiledb_ctx_set_tag(ctx_ptr, bkey, bvalue) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) def _set_default_tags(self): """Sets all default tags on the Ctx""" self.set_tag('x-tiledb-api-language', 'python') self.set_tag('x-tiledb-api-language-version', '{}.{}.{}'.format(*sys.version_info)) self.set_tag('x-tiledb-api-sys-platform', sys.platform) def get_stats(self, print_out=True, json=False): """Retrieves the stats from a TileDB context. :param print_out: Print string to console (default True), or return as string :param json: Return stats JSON object (default: False) """ cdef tiledb_ctx_t* ctx_ptr = self.ptr cdef int rc = TILEDB_OK cdef char* stats_bytes rc = tiledb_ctx_get_stats(ctx_ptr, &stats_bytes) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) cdef unicode stats = stats_bytes.decode('UTF-8', 'strict') if json: import json output = json.loads(stats) else: output = stats if print_out: print(output) else: return output def _tiledb_datetime_extent(begin, end): """ Returns the integer extent of a datetime range. :param begin: beginning of datetime range :type begin: numpy.datetime64 :param end: end of datetime range :type end: numpy.datetime64 :return: Extent of range, returned as an integer number of time units :rtype: int """ extent = end - begin + 1 date_unit = np.datetime_data(extent.dtype)[0] one = np.timedelta64(1, date_unit) # Dividing a timedelta by 1 will convert the timedelta to an integer return int(extent / one) cdef bint _tiledb_type_is_datetime(tiledb_datatype_t tiledb_type) except? False: """Returns True if the tiledb type is a datetime type""" return tiledb_type in (TILEDB_DATETIME_YEAR, TILEDB_DATETIME_MONTH, TILEDB_DATETIME_WEEK, TILEDB_DATETIME_DAY, TILEDB_DATETIME_HR, TILEDB_DATETIME_MIN, TILEDB_DATETIME_SEC, TILEDB_DATETIME_MS, TILEDB_DATETIME_US, TILEDB_DATETIME_NS, TILEDB_DATETIME_PS, TILEDB_DATETIME_FS, TILEDB_DATETIME_AS) def _tiledb_type_to_datetime(tiledb_datatype_t tiledb_type): """ Return a datetime64 with appropriate unit for the given tiledb_datetype_t enum value """ tdb_type = _tiledb_dtype_to_datetime_convert.get(tiledb_type, None) if tdb_type is None: raise TypeError("tiledb type is not a datetime {0!r}".format(tiledb_type)) return tdb_type cdef tiledb_datatype_t _tiledb_dtype_datetime(np.dtype dtype) except? TILEDB_DATETIME_YEAR: """Return tiledb_datetype_t enum value for a given np.datetime64 dtype""" if dtype.kind != 'M': raise TypeError("data type {0!r} not a datetime".format(dtype)) date_unit = np.datetime_data(dtype)[0] if date_unit == 'generic': raise TypeError("datetime {0!r} does not specify a date unit".format(dtype)) tdb_dt = _datetime_tiledb_dtype_convert.get(date_unit, None) if tdb_dt is None: raise TypeError("np type is not a datetime {0!r}".format(date_unit)) return tdb_dt def _tiledb_cast_tile_extent(tile_extent, dtype): """Given a tile extent value, cast it to np.array of the given numpy dtype.""" # Special handling for datetime domains if dtype.kind == 'M': date_unit = np.datetime_data(dtype)[0] if isinstance(tile_extent, np.timedelta64): extent_value = int(tile_extent / np.timedelta64(1, date_unit)) tile_size_array = np.array(np.int64(extent_value), dtype=np.int64) else: tile_size_array = np.array(tile_extent, dtype=dtype) else: tile_size_array = np.array(tile_extent, dtype=dtype) if tile_size_array.size != 1: raise ValueError("tile extent must be a scalar") return tile_size_array cdef int _numpy_typeid(tiledb_datatype_t tiledb_dtype): """Return a numpy type num (int) given a tiledb_datatype_t enum value.""" np_id_type = _tiledb_dtype_to_numpy_typeid_convert.get(tiledb_dtype, None) if np_id_type: return np_id_type return np.NPY_DATETIME if _tiledb_type_is_datetime(tiledb_dtype) else np.NPY_NOTYPE cdef _numpy_dtype(tiledb_datatype_t tiledb_dtype, cell_size = 1): """Return a numpy type given a tiledb_datatype_t enum value.""" cdef base_dtype cdef uint32_t cell_val_num = cell_size if cell_val_num == 1: if tiledb_dtype in _tiledb_dtype_to_numpy_dtype_convert: return _tiledb_dtype_to_numpy_dtype_convert[tiledb_dtype] elif _tiledb_type_is_datetime(tiledb_dtype): return _tiledb_type_to_datetime(tiledb_dtype) elif cell_val_num == 2 and tiledb_dtype == TILEDB_FLOAT32: return np.complex64 elif cell_val_num == 2 and tiledb_dtype == TILEDB_FLOAT64: return np.complex128 elif tiledb_dtype in (TILEDB_CHAR, TILEDB_STRING_UTF8): if tiledb_dtype == TILEDB_CHAR: dtype_str = '|S' elif tiledb_dtype == TILEDB_STRING_UTF8: dtype_str = '|U' if cell_val_num != TILEDB_VAR_NUM: dtype_str += str(cell_val_num) return np.dtype(dtype_str) elif cell_val_num == TILEDB_VAR_NUM: base_dtype = _numpy_dtype(tiledb_dtype, cell_size=1) return base_dtype elif cell_val_num > 1: # construct anonymous record dtype base_dtype = _numpy_dtype(tiledb_dtype, cell_size=1) rec = np.dtype([('', base_dtype)] * cell_val_num) return rec raise TypeError("tiledb datatype not understood") """ cdef _numpy_scalar(tiledb_datatype_t typ, void* data, uint64_t nbytes): # Return a numpy scalar object from a tiledb_datatype_t enum type value and void pointer to scalar data if typ == TILEDB_CHAR: # bytes type, ensure a full copy return PyBytes_FromStringAndSize( data, nbytes) # fixed size numeric type cdef int type_num = _numpy_type_num(typ) return PyArray_Scalar(data, np.PyArray_DescrFromType(type_num), None) """ cdef tiledb_layout_t _tiledb_layout(object order) except TILEDB_UNORDERED: """Return the tiledb_layout_t enum value given a layout string label.""" if order == "row-major" or order == 'C': return TILEDB_ROW_MAJOR elif order == "col-major" or order == 'F': return TILEDB_COL_MAJOR elif order == "global": return TILEDB_GLOBAL_ORDER elif order == "hilbert" or order == 'H': return TILEDB_HILBERT elif order == None or order == "unordered" or order == 'U': return TILEDB_UNORDERED raise ValueError("unknown tiledb layout: {0!r}".format(order)) cdef unicode _tiledb_layout_string(tiledb_layout_t order): """ Return the unicode string label given a tiledb_layout_t enum value """ tiledb_order_to_string ={ TILEDB_ROW_MAJOR: u"row-major", TILEDB_COL_MAJOR: u"col-major", TILEDB_GLOBAL_ORDER: u"global", TILEDB_UNORDERED: u"unordered", TILEDB_HILBERT: u"hilbert" } if order not in tiledb_order_to_string: raise ValueError("unknown tiledb order: {0!r}".format(order)) return tiledb_order_to_string[order] cdef class Filter(object): """Base class for all TileDB filters.""" def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_filter_free(&self.ptr) def __init__(self, tiledb_filter_type_t filter_type, Ctx ctx=None): if not ctx: ctx = default_ctx() cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef tiledb_filter_t* filter_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_filter_alloc(ctx_ptr, filter_type, &filter_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) self.ctx = ctx self.ptr = filter_ptr return def __repr__(self): output = io.StringIO() output.write(f"{type(self).__name__}(") if hasattr(self, '_attrs_'): for f in self._attrs_(): a = getattr(self, f) output.write(f"{f}={a}") output.write(")") return output.getvalue() def _repr_html_(self): output = io.StringIO() output.write("
\n") output.write("\n") output.write("\n") output.write("\n") if hasattr(self, '_attrs_'): for f in self._attrs_(): output.write(f"") output.write("\n") output.write("\n") output.write(f"\n") if hasattr(self, '_attrs_'): for f in self._attrs_(): output.write(f"") output.write("\n") output.write("
{f}
{type(self).__name__}{getattr(self, f)}
\n") output.write("
\n") return output.getvalue() def __eq__(self, other): if type(self) != type(other): return False for f in self._attrs_(): left = getattr(self, f) right = getattr(other, f) if left != right: return False return True cdef class CompressionFilter(Filter): """ Base class for filters performing compression. All compression filters support a compression level option, although some (such as RLE) ignore it. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.GzipFilter(level=10)])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ def __init__(self, tiledb_filter_type_t filter_type, level, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(filter_type, ctx) if level is None: return cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef int clevel = int(level) cdef int rc = TILEDB_OK rc = tiledb_filter_set_option(ctx_ptr, self.ptr, TILEDB_COMPRESSION_LEVEL, &clevel) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) @property def level(self): """The compression level setting for the filter. Every compressor interprets this value differently (some ignore it, such as RLE). :return: compression level :rtype: int """ cdef int32_t rc = TILEDB_OK cdef tiledb_filter_option_t option = TILEDB_COMPRESSION_LEVEL cdef int32_t level = -1 rc = tiledb_filter_get_option(self.ctx.ptr, self.ptr, option, &level) if rc != TILEDB_OK: _raise_ctx_err(self.ctx.ptr, rc) return level cdef class NoOpFilter(Filter): """A filter that does nothing.""" @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef NoOpFilter filter_obj = NoOpFilter.__new__(NoOpFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_NONE, ctx=ctx) def _attrs_(self): return {} cdef class GzipFilter(CompressionFilter): """ Filter that compresses using gzip. :param ctx: TileDB Ctx :type ctx: tiledb.Ctx :param level: (default None) If not None set the compressor level :type level: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.GzipFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef GzipFilter filter_obj = GzipFilter.__new__(GzipFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, level=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_GZIP, level, ctx=ctx) def _attrs_(self): return {'level': self.level} cdef class ZstdFilter(CompressionFilter): """ Filter that compresses using zstd. :param ctx: TileDB Ctx :type ctx: tiledb.Ctx :param level: (default None) If not None set the compressor level :type level: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.ZstdFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef ZstdFilter filter_obj = ZstdFilter.__new__(ZstdFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, level=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_ZSTD, level, ctx=ctx) def _attrs_(self): return {'level': self.level} cdef class LZ4Filter(CompressionFilter): """ Filter that compresses using lz4. :param ctx: TileDB Ctx :type ctx: tiledb.Ctx :param level: (default None) If not None set the compressor level :type level: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.LZ4Filter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef LZ4Filter filter_obj = LZ4Filter.__new__(LZ4Filter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, level=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_LZ4, level, ctx) def _attrs_(self): return {'level': self.level} cdef class Bzip2Filter(CompressionFilter): """ Filter that compresses using bzip2. :param level: (default None) If not None set the compressor level :type level: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.Bzip2Filter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef Bzip2Filter filter_obj = Bzip2Filter.__new__(Bzip2Filter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, level=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_BZIP2, level, ctx=ctx) def _attrs_(self): return {'level': self.level} cdef class RleFilter(CompressionFilter): """ Filter that compresses using run-length encoding (RLE). **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.RleFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef RleFilter filter_obj = RleFilter.__new__(RleFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_RLE, None, ctx=ctx) def _attrs_(self): return {} cdef class DoubleDeltaFilter(CompressionFilter): """ Filter that performs double-delta encoding. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.DoubleDeltaFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef DoubleDeltaFilter filter_obj = DoubleDeltaFilter.__new__(DoubleDeltaFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = None super().__init__(TILEDB_FILTER_DOUBLE_DELTA, None, ctx) def _attrs_(self): return {} cdef class BitShuffleFilter(Filter): """ Filter that performs a bit shuffle transformation. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.BitShuffleFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef BitShuffleFilter filter_obj = BitShuffleFilter.__new__(BitShuffleFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_BITSHUFFLE, ctx=ctx) def _attrs_(self): return {} cdef class ByteShuffleFilter(Filter): """ Filter that performs a byte shuffle transformation. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.ByteShuffleFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef ByteShuffleFilter filter_obj = ByteShuffleFilter.__new__(ByteShuffleFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_BYTESHUFFLE, ctx=ctx) def _attrs_(self): return {} cdef class BitWidthReductionFilter(Filter): """Filter that performs bit-width reduction. :param ctx: A TileDB Context :type ctx: tiledb.Ctx :param window: (default None) max window size for the filter :type window: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.BitWidthReductionFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef BitWidthReductionFilter filter_obj = BitWidthReductionFilter.__new__(BitWidthReductionFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, window=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_BIT_WIDTH_REDUCTION, ctx) if window is None: return cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef unsigned int cwindow = window cdef int rc = TILEDB_OK rc = tiledb_filter_set_option(ctx_ptr, self.ptr, TILEDB_BIT_WIDTH_MAX_WINDOW, &cwindow) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) def _attrs_(self): return {'window': self.window} @property def window(self): """ :return: The maximum window size used for the filter :rtype: int """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_t* filter_ptr = self.ptr cdef unsigned int cwindow = 0 cdef int rc = TILEDB_OK rc = tiledb_filter_get_option(ctx_ptr, filter_ptr, TILEDB_BIT_WIDTH_MAX_WINDOW, &cwindow) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return int(cwindow) cdef class PositiveDeltaFilter(Filter): """ Filter that performs positive-delta encoding. :param ctx: A TileDB Context :type ctx: tiledb.Ctx :param window: (default None) the max window for the filter :type window: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.PositiveDeltaFilter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef PositiveDeltaFilter filter_obj = PositiveDeltaFilter.__new__(PositiveDeltaFilter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, window=None, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_POSITIVE_DELTA, ctx=ctx) if window is None: return cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef unsigned int cwindow = window cdef int rc = TILEDB_OK rc = tiledb_filter_set_option(ctx_ptr, self.ptr, TILEDB_POSITIVE_DELTA_MAX_WINDOW, &cwindow) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) def _attrs_(self): return {'window': self.window} @property def window(self): """ :return: The maximum window size used for the filter :rtype: int """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_t* filter_ptr = self.ptr cdef unsigned int cwindow = 0 cdef int rc = TILEDB_OK rc = tiledb_filter_get_option(ctx_ptr, filter_ptr, TILEDB_POSITIVE_DELTA_MAX_WINDOW, &cwindow) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return int(cwindow) cdef class ChecksumMD5Filter(Filter): """ MD5 checksum filter. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.ChecksumMD5Filter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef ChecksumMD5Filter filter_obj = ChecksumMD5Filter.__new__(ChecksumMD5Filter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_CHECKSUM_MD5, ctx=ctx) def _attrs_(self): return {} cdef class ChecksumSHA256Filter(Filter): """ SHA256 checksum filter. **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, ... filters=tiledb.FilterList([tiledb.ChecksumSHA256Filter()])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ @staticmethod cdef from_ptr(const tiledb_filter_t* filter_ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(filter_ptr != NULL) cdef ChecksumSHA256Filter filter_obj = ChecksumSHA256Filter.__new__(ChecksumSHA256Filter) filter_obj.ctx = ctx # need to cast away the const filter_obj.ptr = filter_ptr return filter_obj def __init__(self, Ctx ctx=None): if not ctx: ctx = default_ctx() super().__init__(TILEDB_FILTER_CHECKSUM_SHA256, ctx=ctx) def _attrs_(self): return {} cdef Filter _filter_type_ptr_to_filter(Ctx ctx, tiledb_filter_type_t filter_type, tiledb_filter_t* filter_ptr): """ Return a filter instance for the given type. """ if filter_type == TILEDB_FILTER_NONE: return NoOpFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_GZIP: return GzipFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_ZSTD: return ZstdFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_LZ4: return LZ4Filter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_RLE: return RleFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_BZIP2: return Bzip2Filter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_DOUBLE_DELTA: return DoubleDeltaFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_BIT_WIDTH_REDUCTION: return BitWidthReductionFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_BITSHUFFLE: return BitShuffleFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_BYTESHUFFLE: return ByteShuffleFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_POSITIVE_DELTA: return PositiveDeltaFilter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_CHECKSUM_MD5: return ChecksumSHA256Filter.from_ptr(filter_ptr, ctx=ctx) elif filter_type == TILEDB_FILTER_CHECKSUM_SHA256: return ChecksumMD5Filter.from_ptr(filter_ptr, ctx=ctx) else: raise ValueError("unknown filter type tag: {:s}".format(filter_type)) cdef class FilterList(object): """ An ordered list of Filter objects for filtering TileDB data. FilterLists contain zero or more Filters, used for filtering attribute data, the array coordinate data, etc. :param ctx: A TileDB context :type ctx: tiledb.Ctx :param filters: An iterable of Filter objects to add. :param chunksize: (default None) chunk size used by the filter list in bytes :type chunksize: int **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... # Create several filters ... gzip_filter = tiledb.GzipFilter() ... bw_filter = tiledb.BitWidthReductionFilter() ... # Create a filter list that will first perform bit width reduction, then gzip compression. ... filters = tiledb.FilterList([bw_filter, gzip_filter]) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, filters=filters) ... # Create a second attribute filtered only by gzip compression. ... a2 = tiledb.Attr(name="a2", dtype=np.int64, ... filters=tiledb.FilterList([gzip_filter])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1, a2)) ... tiledb.DenseArray.create(tmp + "/array", schema) """ def __init__(self, filters=None, chunksize=None, Ctx ctx=None): if not ctx: ctx = default_ctx() if filters is not None: filters = list(filters) for f in filters: if not isinstance(f, Filter): raise ValueError("filters argument must be an iterable of TileDB filter objects") if chunksize is not None: if not isinstance(chunksize, int): raise TypeError("chunksize keyword argument must be an integer or None") if chunksize <= 0: raise ValueError("chunksize arugment must be > 0") cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef tiledb_filter_list_t* filter_list_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_filter_list_alloc(ctx_ptr, &filter_list_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) cdef tiledb_filter_t* filter_ptr = NULL cdef Filter filter if filters is not None: try: for f in filters: filter_ptr = ( f).ptr rc = tiledb_filter_list_add_filter(ctx_ptr, filter_list_ptr, filter_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) except: tiledb_filter_list_free(&filter_list_ptr) raise if chunksize is not None: rc = tiledb_filter_list_set_max_chunk_size(ctx_ptr, filter_list_ptr, chunksize) if rc != TILEDB_OK: tiledb_filter_list_free(&filter_list_ptr) self.ctx = ctx self.ptr = filter_list_ptr def __cint__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_filter_list_free(&self.ptr) @staticmethod cdef FilterList from_ptr(tiledb_filter_list_t* ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(ptr != NULL) cdef FilterList filter_list = FilterList.__new__(FilterList) filter_list.ctx = ctx # need to cast away the const filter_list.ptr = ptr return filter_list def __repr__(self): filters = ",\n ".join( [repr(self._getfilter(i)) for i in range(self.nfilters)]) return "FilterList([{0!s}])".format(filters) def _repr_html_(self): output = io.StringIO() output.write("
\n") for i in range(self.nfilters): output.write(self._getfilter(i)._repr_html_()) output.write("
\n") return output.getvalue() def __eq__(self, other): if other is None: return False if len(self) != len(other): return False for i,f in enumerate(self): if f != other[i]: return False return True @property def chunksize(self): """The chunk size used by the filter list.""" cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_list_t* filter_list_ptr = self.ptr cdef unsigned int chunksize = 0 cdef int rc = TILEDB_OK rc = tiledb_filter_list_get_max_chunk_size(ctx_ptr, filter_list_ptr, &chunksize) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return chunksize @property def nfilters(self): """ :return: Number of filters in the filter list :rtype: int """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_list_t* filter_list_ptr = self.ptr cdef unsigned int nfilters = 0 cdef int rc = TILEDB_OK rc = tiledb_filter_list_get_nfilters(ctx_ptr, filter_list_ptr, &nfilters) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return nfilters cdef Filter _getfilter(FilterList self, int idx): cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_list_t* filter_list_ptr = self.ptr cdef tiledb_filter_t* filter_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_filter_list_get_filter_from_index(ctx_ptr, filter_list_ptr, idx, &filter_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) cdef tiledb_filter_type_t filter_type = TILEDB_FILTER_NONE rc = tiledb_filter_get_type(ctx_ptr, filter_ptr, &filter_type) if rc != TILEDB_OK: tiledb_filter_free(&filter_ptr) _raise_ctx_err(ctx_ptr, rc) return _filter_type_ptr_to_filter(self.ctx, filter_type, filter_ptr) def __len__(self): """Returns the number of filters in the list.""" return self.nfilters def __getitem__(self, idx): """Gets a copy of the filter in the list at the given index :param idx: index into the :type idx: int or slice :returns: A filter at given index / slice :raises IndexError: invalid index :raises: :py:exc:`tiledb.TileDBError` """ if not isinstance(idx, (int, slice)): raise TypeError("FilterList indices must be integers or slices, not {:s}".format(type(idx).__name__)) nfilters = self.nfilters if isinstance(idx, int): if idx < 0 or idx > (nfilters - 1): raise IndexError("FilterList index out of range") idx = slice(idx, idx + 1) else: if not isinstance(idx.start, int) or not isinstance(idx.stop, int) or not isinstance(idx.step, int): raise IndexError("FilterList slice indices must be integers or None") filters = [] (start, stop, step) = idx.indices(nfilters) for i in range(start, stop, step): filters.append(self._getfilter(i)) if len(filters) == 1: return filters[0] return filters def append(self, Filter filter): """Appends `filter` to the end of filter list :param filter: filter object to add :type filter: Filter :returns: None """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_filter_list_t* filter_list_ptr = self.ptr assert(filter_list_ptr != NULL) if not isinstance(filter, Filter): raise ValueError("filter argument must be a TileDB filter objects") cdef tiledb_filter_t* filter_ptr = filter.ptr cdef int rc = TILEDB_OK rc = tiledb_filter_list_add_filter(ctx_ptr, filter_list_ptr, filter_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) cdef class Attr(object): """Class representing a TileDB array attribute. :param tiledb.Ctx ctx: A TileDB Context :param str name: Attribute name, empty if anonymous :param dtype: Attribute value datatypes :type dtype: numpy.dtype object or type or string :param nullable: Attribute is nullable :type bool: :param fill: Fill value for unset cells. :param var: Attribute is variable-length (automatic for byte/string types) :type dtype: bool :param filters: List of filters to apply :type filters: FilterList :raises TypeError: invalid dtype :raises: :py:exc:`tiledb.TileDBError` """ cdef unicode _get_name(Attr self): cdef const char* c_name = NULL check_error(self.ctx, tiledb_attribute_get_name(self.ctx.ptr, self.ptr, &c_name)) cdef unicode name = c_name.decode('UTF-8', 'strict') return name cdef unsigned int _cell_val_num(Attr self) except? 0: cdef unsigned int ncells = 0 check_error(self.ctx, tiledb_attribute_get_cell_val_num(self.ctx.ptr, self.ptr, &ncells)) return ncells def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_attribute_free(&self.ptr) def __capsule__(self): if self.ptr == NULL: raise TileDBError("internal error: cannot create capsule for uninitialized Attr!") cdef const char* name = "ctx" cap = PyCapsule_New((self.ptr), name, NULL) return cap @staticmethod cdef from_ptr(const tiledb_attribute_t* ptr, Ctx ctx=None): """Constructs an Attr class instance from a (non-null) tiledb_attribute_t pointer """ if not ctx: ctx = default_ctx() assert(ptr != NULL) cdef Attr attr = Attr.__new__(Attr) attr.ctx = ctx # need to cast away the const attr.ptr = ptr return attr def __init__(self, name=u"", dtype=np.float64, fill=None, var=None, nullable=False, filters=None, Ctx ctx=None): if not ctx: ctx = default_ctx() cdef bytes bname = ustring(name).encode('UTF-8') cdef const char* name_ptr = PyBytes_AS_STRING(bname) cdef np.dtype _dtype = None cdef tiledb_datatype_t tiledb_dtype cdef uint32_t ncells if isinstance(dtype, str) and dtype == "ascii": tiledb_dtype = TILEDB_STRING_ASCII ncells = TILEDB_VAR_NUM else: _dtype = np.dtype(dtype) tiledb_dtype, ncells = array_type_ncells(_dtype) # ensure that all unicode strings are var-length if var or _dtype.kind == 'U': var = True ncells = TILEDB_VAR_NUM if _dtype and _dtype.kind == 'S': if var and 0 < _dtype.itemsize: warnings.warn( f"Attr given `var=True` but `dtype` `{_dtype}` is fixed; " "setting `dtype=S0`. Hint: set `var=True` with `dtype=S0`, " f"or `var=False`with `dtype={_dtype}`", DeprecationWarning, ) _dtype = np.dtype("S0") if _dtype.itemsize == 0: if var == False: warnings.warn( f"Attr given `var=False` but `dtype` `S0` is var-length; " "setting `var=True` and `dtype=S0`. Hint: set `var=False` " "with `dtype=S0`, or `var=False` with a fixed-width " "string `dtype=S` where is n>1", DeprecationWarning, ) var = True ncells = TILEDB_VAR_NUM var = var or False # variable-length cell type if ncells == TILEDB_VAR_NUM and not var: raise TypeError("dtype is not compatible with var-length attribute") cdef FilterList filter_list if filters is not None: if not isinstance(filters, FilterList): try: filters = iter(filters) except: raise TypeError("filters argument must be a tiledb.FilterList or iterable of Filters") else: # we want this to raise a specific error if construction fails filters = FilterList(filters) filter_list = filters # alloc attribute object and set cell num / compressor cdef tiledb_attribute_t* attr_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_attribute_alloc(ctx.ptr, name_ptr, tiledb_dtype, &attr_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx.ptr, rc) rc = tiledb_attribute_set_cell_val_num(ctx.ptr, attr_ptr, ncells) if rc != TILEDB_OK: tiledb_attribute_free(&attr_ptr) _raise_ctx_err(ctx.ptr, rc) if nullable: rc = tiledb_attribute_set_nullable(ctx.ptr, attr_ptr, 1) if rc != TILEDB_OK: tiledb_attribute_free(&attr_ptr) _raise_ctx_err(ctx.ptr, rc) cdef tiledb_filter_list_t* filter_list_ptr = NULL if filters is not None: filter_list_ptr = filter_list.ptr rc = tiledb_attribute_set_filter_list(ctx.ptr, attr_ptr, filter_list_ptr) if rc != TILEDB_OK: tiledb_attribute_free(&attr_ptr) _raise_ctx_err(ctx.ptr, rc) cdef void* fill_ptr cdef uint64_t fill_nbytes if fill is not None: fill_array = np.array(fill, dtype=dtype) fill_nbytes = fill_array.nbytes fill_ptr = np.PyArray_DATA(fill_array) rc = tiledb_attribute_set_fill_value(ctx.ptr, attr_ptr, fill_ptr, fill_nbytes) if rc != TILEDB_OK: tiledb_attribute_free(&attr_ptr) _raise_ctx_err(ctx.ptr, rc) self.ctx = ctx self.ptr = attr_ptr def __eq__(self, other): if not isinstance(other, Attr): return False if (self.name != other.name or self.dtype != other.dtype): return False return True cdef tiledb_datatype_t _get_type(Attr self) except? TILEDB_CHAR: cdef tiledb_datatype_t typ check_error(self.ctx, tiledb_attribute_get_type(self.ctx.ptr, self.ptr, &typ)) return typ def dump(self): """Dumps a string representation of the Attr object to standard output (stdout)""" check_error(self.ctx, tiledb_attribute_dump(self.ctx.ptr, self.ptr, stdout)) print('\n') return @property def dtype(self): """Return numpy dtype object representing the Attr type :rtype: numpy.dtype """ cdef tiledb_datatype_t typ check_error(self.ctx, tiledb_attribute_get_type(self.ctx.ptr, self.ptr, &typ)) cdef uint32_t ncells = 0 check_error(self.ctx, tiledb_attribute_get_cell_val_num(self.ctx.ptr, self.ptr, &ncells)) return np.dtype(_numpy_dtype(typ, ncells)) @property def name(self): """Attribute string name, empty string if the attribute is anonymous :rtype: str :raises: :py:exc:`tiledb.TileDBError` """ internal_name = self._get_name() # handle __attr names from arrays written with libtiledb < 2 if internal_name == "__attr": return u"" return internal_name @property def _internal_name(self): return self._get_name() @property def isanon(self): """True if attribute is an anonymous attribute :rtype: bool """ cdef unicode name = self._get_name() return name == u"" or name.startswith(u"__attr") @property def compressor(self): """String label of the attributes compressor and compressor level :rtype: tuple(str, int) :raises: :py:exc:`tiledb.TileDBError` """ # do we want to reimplement this on top of new API? pass @property def filters(self): """FilterList of the TileDB attribute :rtype: tiledb.FilterList :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_filter_list_t* filter_list_ptr = NULL cdef int rc = TILEDB_OK check_error(self.ctx, tiledb_attribute_get_filter_list(self.ctx.ptr, self.ptr, &filter_list_ptr)) return FilterList.from_ptr(filter_list_ptr, self.ctx) @property def fill(self): """Fill value for unset cells of this attribute :rtype: depends on dtype :raises: :py:exc:`tiledb.TileDBERror` """ cdef const uint8_t* value_ptr = NULL cdef uint64_t size check_error(self.ctx, tiledb_attribute_get_fill_value( self.ctx.ptr, self.ptr, &value_ptr, &size)) if value_ptr == NULL: return None if size == 0: raise TileDBError("Unexpected zero-length non-null fill value") cdef np.npy_intp shape[1] shape[0] = 1 cdef tiledb_datatype_t tiledb_type = self._get_type() cdef int typeid = _numpy_typeid(tiledb_type) assert(typeid != np.NPY_NOTYPE) cdef np.ndarray fill_array if np.issubdtype(self.dtype, np.bytes_): return (value_ptr)[:size] elif np.issubdtype(self.dtype, np.unicode_): return (value_ptr)[:size].decode('utf-8') else: fill_array = np.empty(1, dtype=self.dtype) memcpy(np.PyArray_DATA(fill_array), value_ptr, size) if _tiledb_type_is_datetime(tiledb_type): # Coerce to np.int64 fill_array.dtype = np.int64 datetime_dtype = _tiledb_type_to_datetime(tiledb_type).dtype date_unit = np.datetime_data(datetime_dtype)[0] tmp_val = None if fill_array[0] == 0: # undefined should span the whole dimension domain tmp_val = int(self.shape[0]) else: tmp_val = int(fill_array[0]) return np.timedelta64(tmp_val, date_unit) return fill_array @property def isnullable(self): """True if the attribute is nullable :rtype: bool :raises: :py:exc:`tiledb.TileDBError` """ cdef uint8_t nullable = 0 cdef int rc = TILEDB_OK check_error( self.ctx, tiledb_attribute_get_nullable(self.ctx.ptr, self.ptr, &nullable)) return nullable @property def isvar(self): """True if the attribute is variable length :rtype: bool :raises: :py:exc:`tiledb.TileDBError` """ cdef unsigned int ncells = self._cell_val_num() return ncells == TILEDB_VAR_NUM @property def ncells(self): """The number of cells (scalar values) for a given attribute value :rtype: int :raises: :py:exc:`tiledb.TileDBError` """ cdef unsigned int ncells = self._cell_val_num() assert (ncells != 0) return int(ncells) @property def isascii(self): """True if the attribute is TileDB dtype TILEDB_STRING_ASCII :rtype: bool :raises: :py:exc:`tiledb.TileDBError` """ return self._get_type() == TILEDB_STRING_ASCII def __repr__(self): filters_str = "" if self.filters: filters_str = ", filters=FilterList([" for f in self.filters: filters_str += repr(f) + ", " filters_str += "])" attr_dtype = "ascii" if self.isascii else self.dtype # filters_str must be last with no spaces return (f"""Attr(name={repr(self.name)}, dtype='{attr_dtype!s}', """ f"""var={self.isvar!s}, nullable={self.isnullable!s}""" f"""{filters_str})""") def _repr_html_(self): output = io.StringIO() output.write("
\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
NameData TypeIs Var-LenIs Nullable
{self.name}{'ascii' if self.isascii else self.dtype}{self.isvar}{self.isnullable}
\n") output.write("
\n") return output.getvalue() cdef class Dim(object): """Class representing a dimension of a TileDB Array. :param str name: the dimension name, empty if anonymous :param domain: :type domain: tuple(int, int) or tuple(float, float) :param tile: Tile extent :type tile: int or float :param filters: List of filters to apply :type filters: FilterList :dtype: the Dim numpy dtype object, type object, or string \ that can be corerced into a numpy dtype object :raises ValueError: invalid domain or tile extent :raises TypeError: invalid domain, tile extent, or dtype type :raises: :py:exc:`TileDBError` :param tiledb.Ctx ctx: A TileDB Context """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_dimension_free(&self.ptr) @staticmethod cdef from_ptr(const tiledb_dimension_t* ptr, Ctx ctx=None): if not ctx: ctx = default_ctx() assert(ptr != NULL) cdef Dim dim = Dim.__new__(Dim) dim.ctx = ctx # need to cast away the const dim.ptr = ptr return dim def __init__(self, name=u"__dim_0", domain=None, tile=None, filters=None, dtype=np.uint64, var=None, Ctx ctx=None): if not ctx: ctx = default_ctx() if var is not None: if var and np.dtype(dtype) not in (np.str_, np.bytes_): raise TypeError("'var=True' specified for non-str/bytes dtype") if domain is not None and len(domain) != 2: raise ValueError('invalid domain extent, must be a pair') # argument conversion cdef bytes bname = ustring(name).encode('UTF-8') cdef const char* name_ptr = PyBytes_AS_STRING(bname) cdef tiledb_datatype_t dim_datatype cdef const void* domain_ptr = NULL cdef tiledb_dimension_t* dim_ptr = NULL cdef void* tile_size_ptr = NULL cdef np.dtype domain_dtype if ((isinstance(dtype, str) and dtype == "ascii") or dtype == np.dtype('S')): # Handle var-len domain type # (currently only TILEDB_STRING_ASCII) # The dimension's domain is implicitly formed as # coordinates are written. dim_datatype = TILEDB_STRING_ASCII else: if domain is None or len(domain) != 2: raise ValueError('invalid domain extent, must be a pair') if dtype is not None: dtype = np.dtype(dtype) dtype_min, dtype_max = dtype_range(dtype) if domain == (None, None): # this means to use the full extent of the type domain = (dtype_min, dtype_max) elif (domain[0] < dtype_min or domain[0] > dtype_max or domain[1] < dtype_min or domain[1] > dtype_max): raise TypeError( "invalid domain extent, domain cannot be safely cast to dtype {0!r}".format(dtype)) domain_array = np.asarray(domain, dtype=dtype) domain_ptr = np.PyArray_DATA(domain_array) domain_dtype = domain_array.dtype dim_datatype = dtype_to_tiledb(domain_dtype) # check that the domain type is a valid dtype (integer / floating) if (not np.issubdtype(domain_dtype, np.integer) and not np.issubdtype(domain_dtype, np.floating) and not domain_dtype.kind == 'M'): raise TypeError("invalid Dim dtype {0!r}".format(domain_dtype)) # if the tile extent is specified, cast if tile is not None: tile_size_array = _tiledb_cast_tile_extent(tile, domain_dtype) if tile_size_array.size != 1: raise ValueError("tile extent must be a scalar") tile_size_ptr = np.PyArray_DATA(tile_size_array) cdef FilterList filter_list cdef tiledb_filter_list_t* filter_list_ptr = NULL try: check_error(ctx, tiledb_dimension_alloc(ctx.ptr, name_ptr, dim_datatype, domain_ptr, tile_size_ptr, &dim_ptr)) assert dim_ptr != NULL, "internal error: tiledb_dimension_alloc null dim_ptr" if filters is not None: if not isinstance(filters, FilterList): filters = FilterList(filters) filter_list = filters filter_list_ptr = filter_list.ptr check_error(ctx, tiledb_dimension_set_filter_list(ctx.ptr, dim_ptr, filter_list_ptr)) except: raise self.ctx = ctx self.ptr = dim_ptr def __repr__(self): filters_str = "" if self.filters: filters_str = ", filters=FilterList([" for f in self.filters: filters_str += repr(f) + ", " filters_str += "])" # for consistency, print `var=True` for string-like types varlen = "" if not self.dtype in (np.str_, np.bytes_) else ", var=True" return "Dim(name={0!r}, domain={1!s}, tile='{2!s}', dtype='{3!s}'{4}{5})" \ .format(self.name, self.domain, self.tile, self.dtype, varlen, filters_str) def _repr_html_(self) -> str: output = io.StringIO() output.write("
\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") filters_str = "" if self.filters: filters_str = ", filters=FilterList([" for f in self.filters: filters_str += repr(f) + ", " filters_str += "])" output.write("\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
NameDomainTileData TypeIs Var-LenFilters
{self.name}{self.domain}{self.tile}{self.dtype}{self.dtype in (np.str_, np.bytes_) }{filters_str}
\n") output.write("
\n") return output.getvalue() def __len__(self): return self.size def __eq__(self, other): if not isinstance(other, Dim): return False if (self.name != other.name or self.domain != other.domain or self.tile != other.tile or self.dtype != other.dtype): return False return True def __array__(self, dtype=None, **kw): if not self._integer_domain(): raise TypeError("conversion to numpy ndarray only valid for integer dimension domains") lb, ub = self.domain return np.arange(int(lb), int(ub) + 1, dtype=dtype if dtype else self.dtype) cdef tiledb_datatype_t _get_type(Dim self) except? TILEDB_CHAR: cdef tiledb_datatype_t typ check_error(self.ctx, tiledb_dimension_get_type(self.ctx.ptr, self.ptr, &typ)) return typ @property def dtype(self): """Numpy dtype representation of the dimension type. :rtype: numpy.dtype """ return np.dtype(_numpy_dtype(self._get_type())) @property def name(self): """The dimension label string. Anonymous dimensions return a default string representation based on the dimension index. :rtype: str """ cdef const char* name_ptr = NULL check_error(self.ctx, tiledb_dimension_get_name(self.ctx.ptr, self.ptr, &name_ptr)) return name_ptr.decode('UTF-8', 'strict') @property def isvar(self): """True if the dimension is variable length :rtype: bool :raises: :py:exc:`tiledb.TileDBError` """ cdef unsigned int ncells = self._cell_val_num() return ncells == TILEDB_VAR_NUM @property def isanon(self): """True if the dimension is anonymous :rtype: bool """ name = self.name return name == u"" or name.startswith("__dim") @property def filters(self): """FilterList of the TileDB dimension :rtype: tiledb.FilterList :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_filter_list_t* filter_list_ptr = NULL cdef int rc = TILEDB_OK check_error(self.ctx, tiledb_dimension_get_filter_list(self.ctx.ptr, self.ptr, &filter_list_ptr)) return FilterList.from_ptr(filter_list_ptr, self.ctx) cdef unsigned int _cell_val_num(Dim self) except? 0: cdef unsigned int ncells = 0 check_error(self.ctx, tiledb_dimension_get_cell_val_num( self.ctx.ptr, self.ptr, &ncells)) return ncells cdef _integer_domain(self): cdef tiledb_datatype_t typ = self._get_type() if typ == TILEDB_FLOAT32 or typ == TILEDB_FLOAT64: return False return True cdef _datetime_domain(self): cdef tiledb_datatype_t typ = self._get_type() return _tiledb_type_is_datetime(typ) cdef _shape(self): domain = self.domain if self._datetime_domain(): return (_tiledb_datetime_extent(domain[0], domain[1]),) else: return ((domain[1].item() - domain[0].item() + 1),) @property def shape(self): """The shape of the dimension given the dimension's domain. **Note**: The shape is only valid for integer and datetime dimension domains. :rtype: tuple(numpy scalar, numpy scalar) :raises TypeError: floating point (inexact) domain """ if not self._integer_domain() and not self._datetime_domain(): raise TypeError("shape only valid for integer and datetime dimension domains") return self._shape() @property def size(self): """The size of the dimension domain (number of cells along dimension). :rtype: int :raises TypeError: floating point (inexact) domain """ if not self._integer_domain(): raise TypeError("size only valid for integer dimension domains") return int(self._shape()[0]) @property def tile(self): """The tile extent of the dimension. :rtype: numpy scalar or np.timedelta64 """ cdef const void* tile_ptr = NULL check_error(self.ctx, tiledb_dimension_get_tile_extent(self.ctx.ptr, self.ptr, &tile_ptr)) if tile_ptr == NULL: return None cdef np.npy_intp shape[1] shape[0] = 1 cdef tiledb_datatype_t tiledb_type = self._get_type() cdef int typeid = _numpy_typeid(tiledb_type) assert(typeid != np.NPY_NOTYPE) cdef np.ndarray tile_array =\ np.PyArray_SimpleNewFromData(1, shape, typeid, tile_ptr) if _tiledb_type_is_datetime(tiledb_type): # Coerce to np.int64 tile_array.dtype = np.int64 datetime_dtype = _tiledb_type_to_datetime(tiledb_type).dtype date_unit = np.datetime_data(datetime_dtype)[0] extent = None if tile_array[0] == 0: # undefined tiles should span the whole dimension domain extent = int(self.shape[0]) else: extent = int(tile_array[0]) return np.timedelta64(extent, date_unit) else: if tile_array[0] == 0: # undefined tiles should span the whole dimension domain return self.shape[0] return tile_array[0] @property def domain(self): """The dimension (inclusive) domain. The dimension's domain is defined by a (lower bound, upper bound) tuple. :rtype: tuple(numpy scalar, numpy scalar) """ if self.dtype == np.dtype('S'): return None, None cdef const void* domain_ptr = NULL check_error(self.ctx, tiledb_dimension_get_domain(self.ctx.ptr, self.ptr, &domain_ptr)) cdef np.npy_intp shape[1] shape[0] = 2 cdef tiledb_datatype_t tiledb_type = self._get_type() cdef int typeid = _numpy_typeid(tiledb_type) assert (typeid != np.NPY_NOTYPE) cdef np.ndarray domain_array = \ np.PyArray_SimpleNewFromData(1, shape, typeid, domain_ptr) if _tiledb_type_is_datetime(tiledb_type): domain_array.dtype = _tiledb_type_to_datetime(tiledb_type).dtype return domain_array[0], domain_array[1] def clone_dim_with_name(Dim dim, name): return Dim(name=name, domain=dim.domain, tile=dim.tile, dtype=dim.dtype, ctx=dim.ctx) cdef class Domain(object): """Class representing the domain of a TileDB Array. :param *dims*: one or more tiledb.Dim objects up to the Domain's ndim :raises TypeError: All dimensions must have the same dtype :raises: :py:exc:`TileDBError` :param tiledb.Ctx ctx: A TileDB Context """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_domain_free(&self.ptr) @staticmethod cdef from_ptr(const tiledb_domain_t* ptr, Ctx ctx=None): """Constructs an Domain class instance from a (non-null) tiledb_domain_t pointer""" if not ctx: ctx = default_ctx() assert(ptr != NULL) cdef Domain dom = Domain.__new__(Domain) dom.ctx = ctx dom.ptr = ptr return dom cdef tiledb_datatype_t _get_type(Domain self) except? TILEDB_CHAR: cdef tiledb_datatype_t typ check_error(self.ctx, tiledb_domain_get_type(self.ctx.ptr, self.ptr, &typ)) return typ cdef _integer_domain(Domain self): if not self._is_homogeneous(): return False cdef tiledb_datatype_t typ = self._get_type() if typ == TILEDB_FLOAT32 or typ == TILEDB_FLOAT64: return False return True cdef _is_homogeneous(Domain self): cdef np.dtype dtype0 = self.dim(0).dtype return all(self.dim(i).dtype == dtype0 for i in range(1,self.ndim)) cdef _shape(Domain self): return tuple(self.dim(i).shape[0] for i in range(self.ndim)) def __init__(self, *dims, Ctx ctx=None): if not ctx: ctx = default_ctx() # support passing a list of dims without splatting if len(dims) == 1 and isinstance(dims[0], list): dims = dims[0] cdef Py_ssize_t ndim = len(dims) if ndim == 0: raise TileDBError("Domain must have ndim >= 1") if (ndim > 1): if all(dim.name == '__dim_0' for dim in dims): # rename anonymous dimensions sequentially dims = [clone_dim_with_name(dims[i], name=f'__dim_{i}') for i in range(ndim)] elif any(dim.name.startswith('__dim_0') for dim in dims[1:]): raise TileDBError("Mixed dimension naming: dimensions must be either all anonymous or all named.") cdef tiledb_domain_t* domain_ptr = NULL cdef int rc = tiledb_domain_alloc(ctx.ptr, &domain_ptr) if rc != TILEDB_OK: check_error(ctx, rc) assert(domain_ptr != NULL) cdef Dim dimension for i in range(ndim): if not isinstance(dims[i], Dim): raise TypeError("Cannot create Domain with non-Dim value for 'dims' argument") dimension = dims[i] rc = tiledb_domain_add_dimension( ctx.ptr, domain_ptr, dimension.ptr) if rc != TILEDB_OK: tiledb_domain_free(&domain_ptr) check_error(ctx, rc) self.ctx = ctx self.ptr = domain_ptr def __repr__(self): dims = ",\n ".join( [repr(self.dim(i)) for i in range(self.ndim)]) return "Domain({0!s})".format(dims) def _repr_html_(self) -> str: output = io.StringIO() output.write("
\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") for i in range(self.ndim): dim = self.dim(i) output.write("\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
NameDomainTileData TypeIs Var-lengthFilters
{html.escape(dim.name)}{dim.domain}{dim.tile}{html.escape(str(dim.dtype))}{dim.dtype in (np.str_, np.bytes_) }{dim.filters._repr_html_()}
\n") output.write("
\n") return output.getvalue() def __len__(self): """Returns the number of dimensions of the domain""" return self.ndim def __iter__(self): """Returns a generator object that iterates over the domain's dimension objects""" return (self.dim(i) for i in range(self.ndim)) def __eq__(self, other): """Returns true if Domain is equal to self. :rtype: bool """ if not isinstance(other, Domain): return False cdef bint same_dtype = self._is_homogeneous() if (same_dtype and self.shape != other.shape): return False ndim = self.ndim if (ndim != other.ndim): return False for i in range(ndim): if self.dim(i) != other.dim(i): return False return True @property def ndim(self): """The number of dimensions of the domain. :rtype: int """ cdef unsigned int ndim = 0 check_error(self.ctx, tiledb_domain_get_ndim(self.ctx.ptr, self.ptr, &ndim)) return ndim @property def dtype(self): """The numpy dtype of the domain's dimension type. :rtype: numpy.dtype """ cdef tiledb_datatype_t typ = self._get_type() return np.dtype(_numpy_dtype(typ)) @property def shape(self): """The domain's shape, valid only for integer domains. :rtype: tuple :raises TypeError: floating point (inexact) domain """ if not self._integer_domain(): raise TypeError("shape valid only for integer domains") return self._shape() @property def size(self): """The domain's size (number of cells), valid only for integer domains. :rtype: int :raises TypeError: floating point (inexact) domain """ if not self._integer_domain(): raise TypeError("shape valid only for integer domains") return np.product(self._shape()) @property def homogeneous(self): """Returns True if the domain's dimension types are homogeneous.""" return self._is_homogeneous() def dim(self, dim_id): """Returns a Dim object from the domain given the dimension's index or name. :param dim_d: dimension index (int) or name (str) :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_dimension_t* dim_ptr = NULL cdef bytes uname cdef const char* name_ptr = NULL if isinstance(dim_id, (str, unicode)): uname = ustring(dim_id).encode('UTF-8') name_ptr = uname check_error(self.ctx, tiledb_domain_get_dimension_from_name( self.ctx.ptr, self.ptr, name_ptr, &dim_ptr)) elif isinstance(dim_id, int): check_error(self.ctx, tiledb_domain_get_dimension_from_index( self.ctx.ptr, self.ptr, dim_id, &dim_ptr)) else: raise ValueError("Unsupported dim identifier: '{}' (expected int or str)".format( safe_repr(dim_id) )) assert(dim_ptr != NULL) return Dim.from_ptr(dim_ptr, self.ctx) def has_dim(self, name): """ Returns true if the Domain has a Dimension with the given name :param name: name of Dimension :rtype: bool :return: """ cdef: cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_domain_t* dom_ptr = self.ptr int32_t has_dim = 0 int32_t rc = TILEDB_OK bytes bname = name.encode("UTF-8") rc = tiledb_domain_has_dimension( ctx_ptr, dom_ptr, bname, &has_dim ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return bool(has_dim) def dump(self): """Dumps a string representation of the domain object to standard output (STDOUT)""" check_error(self.ctx, tiledb_domain_dump(self.ctx.ptr, self.ptr, stdout)) print("\n") return def index_as_tuple(idx): """Forces scalar index objects to a tuple representation""" if isinstance(idx, tuple): return idx return (idx,) def replace_ellipsis(ndim: int, idx: tuple): """ Replace indexing ellipsis object with slice objects to match the number of dimensions. """ # count number of ellipsis n_ellip = sum(1 for i in idx if i is Ellipsis) if n_ellip > 1: raise IndexError("an index can only have a single ellipsis ('...')") elif n_ellip == 1: n = len(idx) if (n - 1) >= ndim: # does nothing, strip it out idx = tuple(i for i in idx if i is not Ellipsis) else: # locate where the ellipse is, count the number of items to left and right # fill in whole dim slices up to th ndim of the array left = idx.index(Ellipsis) right = n - (left + 1) new_idx = idx[:left] + ((slice(None),) * (ndim - (n - 1))) if right: new_idx += idx[-right:] idx = new_idx idx_ndim = len(idx) if idx_ndim < ndim: idx += (slice(None),) * (ndim - idx_ndim) if len(idx) > ndim: raise IndexError("too many indices for array") return idx def replace_scalars_slice(dom: Domain, idx: tuple): """Replace scalar indices with slice objects""" new_idx, drop_axes = [], [] for i in range(dom.ndim): dim = dom.dim(i) dim_idx = idx[i] if np.isscalar(dim_idx): drop_axes.append(i) if isinstance(dim_idx, _inttypes): start = int(dim_idx) if start < 0: start += int(dim.domain[1]) + 1 stop = start + 1 else: start = dim_idx stop = dim_idx new_idx.append(slice(start, stop, None)) else: new_idx.append(dim_idx) return tuple(new_idx), tuple(drop_axes) def index_domain_subarray(array: Array, dom: Domain, idx: tuple): """ Return a numpy array representation of the tiledb subarray buffer for a given domain and tuple of index slices """ ndim = dom.ndim if len(idx) != ndim: raise IndexError("number of indices does not match domain rank: " "(got {!r}, expected: {!r})".format(len(idx), ndim)) subarray = list() for r in range(ndim): # extract lower and upper bounds for domain dimension extent dim = dom.dim(r) dim_dtype = dim.dtype if np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_): ned = array.nonempty_domain() (dim_lb, dim_ub) = ned[r] if ned else (None, None) else: (dim_lb, dim_ub) = dim.domain dim_slice = idx[r] if not isinstance(dim_slice, slice): raise IndexError("invalid index type: {!r}".format(type(dim_slice))) start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): if start is None or stop is None: if start is None: start = dim_lb if stop is None: stop = dim_ub elif not isinstance(start, (bytes,unicode)) or not isinstance(stop, (bytes,unicode)): raise TileDBError(f"Non-string range '({start},{stop})' provided for string dimension '{dim.name}'") subarray.append((start,stop)) continue #if step and step < 0: # raise IndexError("only positive slice steps are supported") # Datetimes will be treated specially is_datetime = (dim_dtype.kind == 'M') # Promote to a common type if start is not None and stop is not None: if type(start) != type(stop): promoted_dtype = np.promote_types(type(start), type(stop)) start = np.array(start, dtype=promoted_dtype, ndmin=1)[0] stop = np.array(stop, dtype=promoted_dtype, ndmin=1)[0] if start is not None: if is_datetime and not isinstance(start, np.datetime64): raise IndexError('cannot index datetime dimension with non-datetime interval') # don't round / promote fp slices if np.issubdtype(dim_dtype, np.integer): if isinstance(start, (np.float32, np.float64)): raise IndexError("cannot index integral domain dimension with floating point slice") elif not isinstance(start, _inttypes): raise IndexError("cannot index integral domain dimension with non-integral slice (dtype: {})".format(type(start))) # apply negative indexing (wrap-around semantics) if not is_datetime and start < 0: start += int(dim_ub) + 1 if start < dim_lb: # numpy allows start value < the array dimension shape, # clamp to lower bound of dimension domain #start = dim_lb raise IndexError("index out of bounds ") else: start = dim_lb if stop is not None: if is_datetime and not isinstance(stop, np.datetime64): raise IndexError('cannot index datetime dimension with non-datetime interval') # don't round / promote fp slices if np.issubdtype(dim_dtype, np.integer): if isinstance(start, (np.float32, np.float64)): raise IndexError("cannot index integral domain dimension with floating point slice") elif not isinstance(start, _inttypes): raise IndexError("cannot index integral domain dimension with non-integral slice (dtype: {})".format(type(start))) if not is_datetime and stop < 0: stop += dim_ub if stop > dim_ub: # numpy allows stop value > than the array dimension shape, # clamp to upper bound of dimension domain if is_datetime: stop = dim_ub else: stop = int(dim_ub) + 1 else: if np.issubdtype(dim_dtype, np.floating) or is_datetime: stop = dim_ub else: stop = int(dim_ub) + 1 if np.issubdtype(type(stop), np.floating): # inclusive bounds for floating point / datetime ranges start = dim_dtype.type(start) stop = dim_dtype.type(stop) subarray.append((start, stop)) elif is_datetime: # need to ensure that datetime ranges are in the units of dim_dtype # so that add_range and output shapes work correctly start = start.astype(dim_dtype) stop = stop.astype(dim_dtype) subarray.append((start,stop)) elif np.issubdtype(type(stop), np.integer): # normal python indexing semantics subarray.append((start, int(stop) - 1)) else: raise IndexError("domain indexing is defined for integral and floating point values") return subarray cdef class ArraySchema(object): """ Schema class for TileDB dense / sparse array representations :param domain: Domain of schema :type attrs: tuple(tiledb.Attr, ...) :param cell_order: TileDB label for cell layout :type cell_order: 'row-major' (default) or 'C', 'col-major' or 'F' or 'hilbert' :param tile_order: TileDB label for tile layout :type tile_order: 'row-major' (default) or 'C', 'col-major' or 'F' :param int capacity: tile cell capacity :param coords_filters: (default None) coordinate filter list :type coords_filters: tiledb.FilterList :param offsets_filters: (default None) offsets filter list :type offsets_filters: tiledb.FilterList :param validity_filters: (default None) validity filter list :type validity_filters: tiledb.FilterList :param bool allows_duplicates: True if duplicates are allowed :param bool sparse: True if schema is sparse, else False \ (set by SparseArray and DenseArray derived classes) :param tiledb.Ctx ctx: A TileDB Context :raises: :py:exc:`tiledb.TileDBError` """ def __init__(self, domain=None, attrs=(), cell_order='row-major', tile_order='row-major', capacity=0, coords_filters=None, offsets_filters=None, validity_filters=None, allows_duplicates=False, sparse=False, Ctx ctx=None): if not ctx: ctx = default_ctx() cdef tiledb_array_type_t array_type =\ TILEDB_SPARSE if sparse else TILEDB_DENSE cdef tiledb_array_schema_t* schema_ptr = NULL check_error(ctx, tiledb_array_schema_alloc(ctx.ptr, array_type, &schema_ptr)) cdef tiledb_layout_t cell_layout = TILEDB_ROW_MAJOR cdef tiledb_layout_t tile_layout = TILEDB_ROW_MAJOR try: cell_layout = _tiledb_layout(cell_order if cell_order else 'row-major') tile_layout = _tiledb_layout(tile_order if tile_order else 'row-major') check_error(ctx, tiledb_array_schema_set_cell_order(ctx.ptr, schema_ptr, cell_layout)) check_error(ctx, tiledb_array_schema_set_tile_order(ctx.ptr, schema_ptr, tile_layout)) except: tiledb_array_schema_free(&schema_ptr) raise cdef uint64_t _capacity = 0 if capacity > 0: try: _capacity = capacity check_error(ctx, tiledb_array_schema_set_capacity(ctx.ptr, schema_ptr, _capacity)) except: tiledb_array_schema_free(&schema_ptr) raise cdef bint ballows_dups = 0 if allows_duplicates: ballows_dups = 1 tiledb_array_schema_set_allows_dups(ctx.ptr, schema_ptr, ballows_dups) cdef FilterList filter_list cdef tiledb_filter_list_t* filter_list_ptr = NULL try: if offsets_filters is not None: if not isinstance(offsets_filters, FilterList): offsets_filters = FilterList(offsets_filters) filter_list = offsets_filters filter_list_ptr = filter_list.ptr check_error(ctx, tiledb_array_schema_set_offsets_filter_list(ctx.ptr, schema_ptr, filter_list_ptr)) if coords_filters is not None: if not isinstance(coords_filters, FilterList): coords_filters = FilterList(coords_filters) filter_list = coords_filters filter_list_ptr = filter_list.ptr check_error(ctx, tiledb_array_schema_set_coords_filter_list(ctx.ptr, schema_ptr, filter_list_ptr)) if validity_filters is not None: if not isinstance(validity_filters, FilterList): validity_filters = FilterList(validity_filters) filter_list = validity_filters filter_list_ptr = filter_list.ptr check_error(ctx, tiledb_array_schema_set_validity_filter_list(ctx.ptr, schema_ptr, filter_list_ptr)) except: tiledb_array_schema_free(&schema_ptr) raise if not isinstance(domain, Domain): raise TypeError("'domain' must be an instance of Domain (domain is: '{}')".format(domain)) cdef tiledb_domain_t* domain_ptr = ( domain).ptr rc = tiledb_array_schema_set_domain(ctx.ptr, schema_ptr, domain_ptr) if rc != TILEDB_OK: tiledb_array_schema_free(&schema_ptr) _raise_ctx_err(ctx.ptr, rc) cdef tiledb_attribute_t* attr_ptr = NULL cdef Attr attribute for attr in attrs: if not isinstance(attr, Attr): raise TypeError("Cannot create schema with non-Attr value for 'attrs' argument") attribute = attr attr_ptr = attribute.ptr rc = tiledb_array_schema_add_attribute(ctx.ptr, schema_ptr, attr_ptr) if rc != TILEDB_OK: tiledb_array_schema_free(&schema_ptr) _raise_ctx_err(ctx.ptr, rc) rc = tiledb_array_schema_check(ctx.ptr, schema_ptr) if rc != TILEDB_OK: tiledb_array_schema_free(&schema_ptr) _raise_ctx_err(ctx.ptr, rc) self.ctx = ctx self.ptr = schema_ptr def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_array_schema_free(&self.ptr) @staticmethod cdef from_ptr(const tiledb_array_schema_t* schema_ptr, Ctx ctx=None): """ Constructs a ArraySchema class instance from a Ctx and tiledb_array_schema_t pointer """ if not ctx: ctx = default_ctx() cdef ArraySchema schema = ArraySchema.__new__(ArraySchema) schema.ctx = ctx # cast away const schema.ptr = schema_ptr return schema @staticmethod def load(uri, Ctx ctx=None, key=None): if not ctx: ctx = default_ctx() cdef bytes buri = uri.encode('UTF-8') cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef tiledb_array_schema_t* array_schema_ptr = NULL # encryption key cdef bytes bkey cdef tiledb_encryption_type_t key_type = TILEDB_NO_ENCRYPTION cdef void* key_ptr = NULL cdef unsigned int key_len = 0 if key is not None: if isinstance(key, str): bkey = key.encode('ascii') else: bkey = bytes(key) key_type = TILEDB_AES_256_GCM key_ptr = PyBytes_AS_STRING(bkey) #TODO: unsafe cast here ssize_t -> uint64_t key_len = PyBytes_GET_SIZE(bkey) cdef int rc = TILEDB_OK with nogil: rc = tiledb_array_schema_load_with_key( ctx_ptr, uri_ptr, key_type, key_ptr, key_len, &array_schema_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return ArraySchema.from_ptr(array_schema_ptr, ctx=ctx) def __eq__(self, other): """Instance is equal to another ArraySchema""" if not isinstance(other, ArraySchema): return False nattr = self.nattr if nattr != other.nattr: return False if (self.sparse != other.sparse or self.cell_order != other.cell_order or self.tile_order != other.tile_order): return False if (self.capacity != other.capacity): return False if self.domain != other.domain: return False if self.coords_filters != other.coords_filters: return False for i in range(nattr): if self.attr(i) != other.attr(i): return False return True def __len__(self): """Returns the number of Attributes in the ArraySchema""" return self.nattr def __iter__(self): """Returns a generator object that iterates over the ArraySchema's Attribute objects""" return (self.attr(i) for i in range(self.nattr)) def check(self): """Checks the correctness of the array schema :rtype: None :raises: :py:exc:`tiledb.TileDBError` if invalid """ check_error(self.ctx, tiledb_array_schema_check(self.ctx.ptr, self.ptr)) @property def sparse(self): """True if the array is a sparse array representation :rtype: bool :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_array_type_t typ = TILEDB_DENSE check_error(self.ctx, tiledb_array_schema_get_array_type(self.ctx.ptr, self.ptr, &typ)) return typ == TILEDB_SPARSE @property def allows_duplicates(self): """Returns True if the (sparse) array allows duplicates.""" if not self.sparse: raise TileDBError("ArraySchema.allows_duplicates does not apply to dense arrays") cdef int ballows_dups tiledb_array_schema_get_allows_dups(self.ctx.ptr, self.ptr, &ballows_dups) return bool(ballows_dups) @property def capacity(self): """The array capacity :rtype: int :raises: :py:exc:`tiledb.TileDBError` """ cdef uint64_t cap = 0 check_error(self.ctx, tiledb_array_schema_get_capacity(self.ctx.ptr, self.ptr, &cap)) return cap cdef _cell_order(ArraySchema self, tiledb_layout_t* cell_order_ptr): check_error(self.ctx, tiledb_array_schema_get_cell_order(self.ctx.ptr, self.ptr, cell_order_ptr)) @property def cell_order(self): """The cell order layout of the array.""" cdef tiledb_layout_t order = TILEDB_UNORDERED self._cell_order(&order) return _tiledb_layout_string(order) cdef _tile_order(ArraySchema self, tiledb_layout_t* tile_order_ptr): check_error(self.ctx, tiledb_array_schema_get_tile_order(self.ctx.ptr, self.ptr, tile_order_ptr)) @property def tile_order(self): """The tile order layout of the array. :rtype: str :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_layout_t order = TILEDB_UNORDERED self._tile_order(&order) layout_string = _tiledb_layout_string(order) if self.cell_order == "hilbert": layout_string = None return layout_string @property def coords_compressor(self): """The compressor label and level for the array's coordinates. :rtype: tuple(str, int) :raises: :py:exc:`tiledb.TileDBError` """ # reimplement on top of filter API? pass @property def offsets_compressor(self): """The compressor label and level for the array's variable-length attribute offsets. :rtype: tuple(str, int) :raises: :py:exc:`tiledb.TileDBError` """ # reimplement on top of filter API? pass @property def offsets_filters(self): """The FilterList for the array's variable-length attribute offsets :rtype: tiledb.FilterList :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_filter_list_t* filter_list_ptr = NULL check_error(self.ctx, tiledb_array_schema_get_offsets_filter_list( self.ctx.ptr, self.ptr, &filter_list_ptr)) return FilterList.from_ptr(filter_list_ptr, self.ctx) @property def coords_filters(self): """The FilterList for the array's coordinates :rtype: tiledb.FilterList :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_filter_list_t* filter_list_ptr = NULL check_error(self.ctx, tiledb_array_schema_get_coords_filter_list( self.ctx.ptr, self.ptr, &filter_list_ptr)) return FilterList.from_ptr(filter_list_ptr, self.ctx) @property def validity_filters(self): """The FilterList for the array's validity :rtype: tiledb.FilterList :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_filter_list_t* validity_list_ptr = NULL check_error(self.ctx, tiledb_array_schema_get_validity_filter_list( self.ctx.ptr, self.ptr, &validity_list_ptr)) return FilterList.from_ptr(validity_list_ptr, self.ctx) @property def domain(self): """The Domain associated with the array. :rtype: tiledb.Domain :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_domain_t* dom = NULL check_error(self.ctx, tiledb_array_schema_get_domain(self.ctx.ptr, self.ptr, &dom)) return Domain.from_ptr(dom, self.ctx) @property def nattr(self): """The number of array attributes. :rtype: int :raises: :py:exc:`tiledb.TileDBError` """ cdef unsigned int nattr = 0 check_error(self.ctx, tiledb_array_schema_get_attribute_num(self.ctx.ptr, self.ptr, &nattr)) return nattr @property def ndim(self): """The number of array domain dimensions. :rtype: int """ return self.domain.ndim @property def shape(self): """The array's shape :rtype: tuple(numpy scalar, numpy scalar) :raises TypeError: floating point (inexact) domain """ return self.domain.shape def _make_invalid(self): """This is a helper function for testing schema.check: resets schema in order to make the schema invalid.""" cdef tiledb_array_schema_t* schema_ptr = self.ptr tiledb_array_schema_free(&schema_ptr) check_error(self.ctx, tiledb_array_schema_alloc(self.ctx.ptr, TILEDB_DENSE, &self.ptr)) def _needs_var_buffer(self, unicode name): """ Returns true if the given attribute or dimension is var-sized :param name: :rtype: bool """ if self.has_attr(name): return self.attr(name).isvar elif self.domain.has_dim(name): return self.domain.dim(name).isvar else: raise ValueError(f"Requested name '{name}' is not an attribute or dimension") cdef _attr_name(self, name): cdef bytes bname = ustring(name).encode('UTF-8') cdef tiledb_attribute_t* attr_ptr = NULL check_error(self.ctx, tiledb_array_schema_get_attribute_from_name( self.ctx.ptr, self.ptr, bname, &attr_ptr)) return Attr.from_ptr(attr_ptr, self.ctx) cdef _attr_idx(self, int idx): cdef tiledb_attribute_t* attr_ptr = NULL check_error(self.ctx, tiledb_array_schema_get_attribute_from_index( self.ctx.ptr, self.ptr, idx, &attr_ptr)) return Attr.from_ptr(attr_ptr, ctx=self.ctx) def attr(self, object key not None): """Returns an Attr instance given an int index or string label :param key: attribute index (positional or associative) :type key: int or str :rtype: tiledb.Attr :return: The ArraySchema attribute at index or with the given name (label) :raises TypeError: invalid key type """ if isinstance(key, (str, unicode)): return self._attr_name(key) elif isinstance(key, _inttypes): return self._attr_idx(int(key)) raise TypeError("attr indices must be a string name, " "or an integer index, not {0!r}".format(type(key))) def has_attr(self, name): """Returns true if the given name is an Attribute of the ArraySchema :param name: attribute name :rtype: boolean """ cdef: int32_t has_attr = 0 int32_t rc = TILEDB_OK bytes bname = name.encode("UTF-8") rc = tiledb_array_schema_has_attribute( self.ctx.ptr, self.ptr, bname, &has_attr ) if rc != TILEDB_OK: _raise_ctx_err(self.ctx.ptr, rc) return bool(has_attr) def attr_or_dim_dtype(self, unicode name): if self.has_attr(name): dtype = self.attr(name).dtype elif self.domain.has_dim(name): dtype = self.domain.dim(name).dtype else: raise TileDBError(f"Unknown attribute or dimension ('{name}')") if dtype.itemsize == 0: # special handling for flexible numpy dtypes: change itemsize from 0 to 1 dtype = np.dtype((dtype, 1)) return dtype def dump(self): """Dumps a string representation of the array object to standard output (stdout)""" check_error(self.ctx, tiledb_array_schema_dump(self.ctx.ptr, self.ptr, stdout)) print("\n") return def __repr__(self): # TODO support/use __qualname__ output = io.StringIO() output.write("ArraySchema(\n") output.write(" domain=Domain(*[\n") for i in range(self.domain.ndim): output.write(f" {repr(self.domain.dim(i))},\n") output.write(" ]),\n") output.write(" attrs=[\n") for i in range(self.nattr): output.write(f" {repr(self.attr(i))},\n") output.write(" ],\n") output.write( f" cell_order='{self.cell_order}',\n" f" tile_order={repr(self.tile_order)},\n" ) output.write(f" capacity={self.capacity},\n") output.write(f" sparse={self.sparse},\n") if self.sparse: output.write(f" allows_duplicates={self.allows_duplicates},\n") if self.sparse and self.coords_filters is not None: output.write(f" coords_filters={self.coords_filters},\n") output.write(")\n") return output.getvalue() def _repr_html_(self): output = io.StringIO() output.write("
\n") output.write("

ArraySchema

\n")\ output.write("
\n") output.write(f"domain\n") output.write(self.domain._repr_html_()) output.write("
\n") output.write("
\n") output.write(f"attrs\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") output.write("\n") for i in range(self.nattr): attr = self.attr(i) output.write("\n") output.write(f"\n") dtype = 'ascii' if attr.isascii else html.escape(str(attr.dtype)) output.write(f"\n") output.write(f"\n") output.write(f"\n") output.write("\n") output.write("
NameData TypeIs Var-LenIs Nullable
{html.escape(attr.name)}{dtype}{attr.isvar}{attr.isnullable}
\n") output.write("
\n") output.write("
\n") output.write(f"cell_order\n") output.write(f"{self.cell_order}\n") output.write("
\n") output.write("
\n") output.write(f"tile_order\n") output.write(f"{self.tile_order}\n") output.write("
\n") output.write("
\n") output.write(f"capacity\n") output.write(f"{self.capacity}\n") output.write("
\n") output.write("
\n") output.write(f"sparse\n") output.write(f"{self.sparse}\n") output.write("
\n") if self.sparse and self.coords_filters is not None: output.write("
\n") output.write(f"coords_filters\n") output.write(f"{self.coords_filters}\n") output.write("
\n") output.write("
\n") return output.getvalue() # Wrapper class to allow returning a Python object so that exceptions work correctly # within preload_array cdef class ArrayPtr(object): cdef tiledb_array_t* ptr cdef ArrayPtr preload_array(uri, mode, key, timestamp, Ctx ctx=None): """Open array URI without constructing specific type of Array object (internal).""" if not ctx: ctx = default_ctx() # ctx cdef tiledb_ctx_t* ctx_ptr = ctx.ptr # uri cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) # mode cdef tiledb_query_type_t query_type = TILEDB_READ # key cdef bytes bkey cdef tiledb_encryption_type_t key_type = TILEDB_NO_ENCRYPTION cdef void* key_ptr = NULL cdef unsigned int key_len = 0 # convert python mode string to a query type if mode == 'r': query_type = TILEDB_READ elif mode == 'w': query_type = TILEDB_WRITE else: raise ValueError("TileDB array mode must be 'r' or 'w'") # check the key, and convert the key to bytes if key is not None: if isinstance(key, str): bkey = key.encode('ascii') else: bkey = bytes(key) key_type = TILEDB_AES_256_GCM key_ptr = PyBytes_AS_STRING(bkey) #TODO: unsafe cast here ssize_t -> uint64_t key_len = PyBytes_GET_SIZE(bkey) cdef uint64_t ts_start = 0 cdef uint64_t ts_end = 0 cdef bint set_start = False, set_end = False if timestamp is not None: if isinstance(timestamp, tuple): if len(timestamp) != 2: raise ValueError("'timestamp' argument expects either int or tuple(start: int, end: int)") if timestamp[0] is not None: ts_start = timestamp[0] set_start = True if timestamp[1] is not None: ts_end = timestamp[1] set_end = True elif isinstance(timestamp, int): # handle the existing behavior for unary timestamp # which is equivalent to endpoint of the range ts_end = timestamp set_end = True else: raise TypeError("Unexpected argument type for 'timestamp' keyword argument") # allocate and then open the array cdef tiledb_array_t* array_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_array_alloc(ctx_ptr, uri_ptr, &array_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) try: if set_start: check_error(ctx, tiledb_array_set_open_timestamp_start(ctx_ptr, array_ptr, ts_start) ) if set_end: check_error(ctx, tiledb_array_set_open_timestamp_end(ctx_ptr, array_ptr, ts_end) ) except: tiledb_array_free(&array_ptr) raise with nogil: rc = tiledb_array_open_with_key( ctx_ptr, array_ptr, query_type, key_type, key_ptr, key_len) if rc != TILEDB_OK: tiledb_array_free(&array_ptr) _raise_ctx_err(ctx_ptr, rc) cdef ArrayPtr retval = ArrayPtr() retval.ptr = array_ptr return retval cdef class Array(object): """Base class for TileDB array objects. Defines common properties/functionality for the different array types. When an Array instance is initialized, the array is opened with the specified mode. :param str uri: URI of array to open :param str mode: (default 'r') Open the array object in read 'r' or write 'w' mode :param str key: (default None) If not None, encryption key to decrypt the array :param tuple timestamp: (default None) If int, open the array at a given TileDB timestamp. If tuple, open at the given start and end TileDB timestamps. :param str attr: (default None) open one attribute of the array; indexing a dense array will return a Numpy ndarray directly rather than a dictionary. :param Ctx ctx: TileDB context """ def __init__(self, uri, mode='r', key=None, timestamp=None, attr=None, Ctx ctx=None): if not ctx: ctx = default_ctx() # ctx cdef tiledb_ctx_t* ctx_ptr = ctx.ptr # array cdef ArrayPtr preload_ptr if not self._isopen: preload_ptr = preload_array(uri, mode, key, timestamp, ctx) self.ptr = preload_ptr.ptr assert self.ptr != NULL, "internal error: unexpected null tiledb_array_t pointer in Array.__init__" cdef tiledb_array_t* array_ptr = self.ptr cdef ArraySchema schema cdef tiledb_array_schema_t* array_schema_ptr = NULL try: rc = TILEDB_OK with nogil: rc = tiledb_array_get_schema(ctx_ptr, array_ptr, &array_schema_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) schema = ArraySchema.from_ptr(array_schema_ptr, ctx=ctx) except: tiledb_array_close(ctx_ptr, array_ptr) tiledb_array_free(&array_ptr) self.ptr = NULL raise # view on a single attribute if attr and not any(attr == schema.attr(i).name for i in range(schema.nattr)): tiledb_array_close(ctx_ptr, array_ptr) tiledb_array_free(&array_ptr) self.ptr = NULL raise KeyError("No attribute matching '{}'".format(attr)) else: self.view_attr = unicode(attr) if (attr is not None) else None self.ctx = ctx self.uri = unicode(uri) self.mode = unicode(mode) self.schema = schema self.key = key self.domain_index = DomainIndexer(self) self.last_fragment_info = dict() self.meta = Metadata(self) def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_array_free(&self.ptr) def __capsule__(self): if self.ptr == NULL: raise TileDBError("internal error: cannot create capsule for uninitialized Ctx!") cdef const char* name = "ctx" cap = PyCapsule_New((self.ptr), name, NULL) return cap def __repr__(self): if self.isopen: return "Array(type={0}, uri={1!r}, mode={2}, ndim={3})"\ .format("Sparse" if self.schema.sparse else "Dense", self.uri, self.mode, self.schema.ndim) else: return "Array(uri={0!r}, mode=closed)" def _ctx_(self) -> Ctx: """ Get Ctx object associated with the array (internal). This method exists for serialization. :return: Ctx object used to open the array. :rtype: Ctx """ return self.ctx @classmethod def create(cls, uri, ArraySchema schema, key=None, overwrite=False, Ctx ctx=None): """Creates a TileDB Array at the given URI :param str uri: URI at which to create the new empty array. :param ArraySchema schema: Schema for the array :param str key: (default None) Encryption key to use for array :param bool oerwrite: (default False) Overwrite the array if it already exists :param ctx Ctx: (default None) Optional TileDB Ctx used when creating the array, by default uses the ArraySchema's associated context (*not* necessarily ``tiledb.default_ctx``). """ if issubclass(cls, DenseArrayImpl) and schema.sparse: raise ValueError("Array.create `schema` argument must be a dense schema for DenseArray and subclasses") if issubclass(cls, SparseArrayImpl) and not schema.sparse: raise ValueError("Array.create `schema` argument must be a sparse schema for SparseArray and subclasses") cdef tiledb_ctx_t* ctx_ptr = schema.ctx.ptr cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef tiledb_array_schema_t* schema_ptr = schema.ptr cdef bytes bkey cdef tiledb_encryption_type_t key_type = TILEDB_NO_ENCRYPTION cdef void* key_ptr = NULL cdef unsigned int key_len = 0 cdef int rc = TILEDB_OK if key is not None: if isinstance(key, str): bkey = key.encode('ascii') else: bkey = bytes(key) key_type = TILEDB_AES_256_GCM key_ptr = PyBytes_AS_STRING(bkey) #TODO: unsafe cast here ssize_t -> uint64_t key_len = PyBytes_GET_SIZE(bkey) if overwrite: if object_type(uri) == "array": if uri.startswith("file://") or "://" not in uri: if VFS().remove_dir(uri) != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) else: raise TypeError("Cannot overwrite non-local array.") else: warnings.warn("Overwrite set, but array does not exist") if ctx is not None: ctx_ptr = ctx.ptr with nogil: rc = tiledb_array_create_with_key(ctx_ptr, uri_ptr, schema_ptr, key_type, key_ptr, key_len) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return @staticmethod def load_typed(uri, mode='r', key=None, timestamp=None, attr=None, Ctx ctx=None): """Return a {Dense,Sparse}Array instance from a pre-opened Array (internal)""" if not ctx: ctx = default_ctx() cdef int32_t rc = TILEDB_OK cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef tiledb_array_schema_t* schema_ptr = NULL cdef tiledb_array_type_t array_type cdef Array new_array cdef object new_array_typed # *** preload_array owns array_ptr until it returns *** # and will free array_ptr upon exception cdef ArrayPtr tmp_array = preload_array(uri, mode, key, timestamp, ctx) assert tmp_array.ptr != NULL, "Internal error, array loading return nullptr" cdef tiledb_array_t* array_ptr = tmp_array.ptr # *** now we own array_ptr -- free in the try..except clause *** try: rc = tiledb_array_get_schema(ctx_ptr, array_ptr, &schema_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) rc = tiledb_array_schema_get_array_type(ctx_ptr, schema_ptr, &array_type) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) tiledb_array_schema_free(&schema_ptr) if array_type == TILEDB_DENSE: new_array_typed = DenseArray.__new__(DenseArray) else: new_array_typed = SparseArray.__new__(SparseArray) except: tiledb_array_free(&array_ptr) raise # *** this assignment must happen outside the try block *** # *** because the array destructor will free array_ptr *** # note: must use the immediate form `(x).m()` here # do not assign a temporary Array object if array_type == TILEDB_DENSE: (new_array_typed).ptr = array_ptr (new_array_typed)._isopen = True else: (new_array_typed).ptr = array_ptr (new_array_typed)._isopen = True # *** new_array_typed now owns array_ptr *** new_array_typed.__init__(uri, mode=mode, key=key, timestamp=timestamp, attr=attr, ctx=ctx) return new_array_typed def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def close(self): """Closes this array, flushing all buffered data.""" cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef int rc = TILEDB_OK with nogil: rc = tiledb_array_close(ctx_ptr, array_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) self.schema = None return def reopen(self, timestamp=None): """ Reopens this array. This is useful when the array is updated after it was opened. To sync-up with the updates, the user must either close the array and open again, or just use ``reopen()`` without closing. ``reopen`` will be generally faster than a close-then-open. """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef uint64_t _timestamp = 0 cdef int rc = TILEDB_OK if timestamp is None: with nogil: rc = tiledb_array_reopen(ctx_ptr, array_ptr) else: _timestamp = timestamp with nogil: rc = tiledb_array_reopen_at(ctx_ptr, array_ptr, _timestamp) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return @property def meta(self): """ Return array metadata instance :rtype: tiledb.Metadata """ return self.meta @property def schema(self): """The :py:class:`ArraySchema` for this array.""" schema = self.schema if schema is None: raise TileDBError("Cannot access schema, array is closed") return schema @property def mode(self): """The mode this array was opened with.""" return self.mode @property def iswritable(self): """This array is currently opened as writable.""" return self.mode == 'w' @property def isopen(self): """True if this array is currently open.""" cdef int isopen = 0 cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef int rc = TILEDB_OK rc = tiledb_array_is_open(ctx_ptr, array_ptr, &isopen) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return isopen == 1 @property def ndim(self): """The number of dimensions of this array.""" return self.schema.ndim @property def domain(self): """The :py:class:`Domain` of this array.""" return self.schema.domain @property def dtype(self): """The NumPy dtype of the specified attribute""" if self.view_attr is None and self.schema.nattr > 1: raise NotImplementedError("Multi-attribute does not have single dtype!") return self.schema.attr(0).dtype @property def shape(self): """The shape of this array.""" return self.schema.shape @property def nattr(self): """The number of attributes of this array.""" if self.view_attr: return 1 else: return self.schema.nattr @property def view_attr(self): """The view attribute of this array.""" return self.view_attr @property def timestamp(self): """Deprecated in 0.9.2. Use `timestamp_range` Returns the timestamp the array is opened at :rtype: int :returns: tiledb timestamp at which point the array was opened """ warnings.warn( "timestamp is deprecated; please use timestamp_range", DeprecationWarning, ) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef uint64_t timestamp = 0 cdef int rc = TILEDB_OK rc = tiledb_array_get_timestamp(ctx_ptr, array_ptr, ×tamp) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return int(timestamp) @property def timestamp_range(self): """Returns the timestamp range the array is opened at :rtype: tuple :returns: tiledb timestamp range at which point the array was opened """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef uint64_t timestamp_start = 0 cdef uint64_t timestamp_end = 0 cdef int rc = TILEDB_OK rc = tiledb_array_get_open_timestamp_start(ctx_ptr, array_ptr, ×tamp_start) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) rc = tiledb_array_get_open_timestamp_end(ctx_ptr, array_ptr, ×tamp_end) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return (int(timestamp_start), int(timestamp_end)) @property def coords_dtype(self): """Returns the numpy record array dtype of the array coordinates :rtype: numpy.dtype :returns: coord array record dtype """ # deprecated in 0.8.10 warnings.warn( """`coords_dtype` is deprecated because combined coords have been removed from libtiledb. Currently it returns a record array of each individual dimension dtype, but it will be removed because that is not applicable to split dimensions.""", DeprecationWarning, ) # returns the record array dtype of the coordinate array return np.dtype([(str(dim.name), dim.dtype) for dim in self.schema.domain]) @property def uri(self): """Returns the URI of the array""" return self.uri def subarray(self, selection, attrs=None, coords=False, order=None): raise NotImplementedError() def attr(self, key): """Returns an :py:class:`Attr` instance given an int index or string label :param key: attribute index (positional or associative) :type key: int or str :rtype: :py:class:`Attr` :return: The array attribute at index or with the given name (label) :raises TypeError: invalid key type""" return self.schema.attr(key) def dim(self, dim_id): """Returns a :py:class:`Dim` instance given a dim index or name :param key: attribute index (positional or associative) :type key: int or str :rtype: :py:class:`Attr` :return: The array attribute at index or with the given name (label) :raises TypeError: invalid key type""" return self.schema.domain.dim(dim_id) def nonempty_domain(self): """Return the minimum bounding domain which encompasses nonempty values. :rtype: tuple(tuple(numpy scalar, numpy scalar), ...) :return: A list of (inclusive) domain extent tuples, that contain all nonempty cells """ cdef list results = list() cdef Domain dom = self.schema.domain cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef int rc = TILEDB_OK cdef uint32_t dim_idx cdef uint64_t start_size cdef uint64_t end_size cdef int32_t is_empty cdef np.ndarray start_buf cdef np.ndarray end_buf cdef void* start_buf_ptr cdef void* end_buf_ptr cdef np.dtype dim_dtype for dim_idx in range(dom.ndim): dim_dtype = dom.dim(dim_idx).dtype if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): rc = tiledb_array_get_non_empty_domain_var_size_from_index( ctx_ptr, array_ptr, dim_idx, &start_size, &end_size, &is_empty) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if is_empty: results.append((None, None)) continue buf_dtype = 'S' start_buf = np.empty(end_size, 'S' + str(start_size)) end_buf = np.empty(end_size, 'S' + str(end_size)) start_buf_ptr = np.PyArray_DATA(start_buf) end_buf_ptr = np.PyArray_DATA(end_buf) else: # this one is contiguous start_buf = np.empty(2, dim_dtype) start_buf_ptr = np.PyArray_DATA(start_buf) if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): rc = tiledb_array_get_non_empty_domain_var_from_index( ctx_ptr, array_ptr, dim_idx, start_buf_ptr, end_buf_ptr, &is_empty ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if is_empty: return None results.append((start_buf.item(0), end_buf.item(0))) else: rc = tiledb_array_get_non_empty_domain_from_index( ctx_ptr, array_ptr, dim_idx, start_buf_ptr, &is_empty ) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) if is_empty: return None res_x, res_y = start_buf.item(0), start_buf.item(1) if np.issubdtype(dim_dtype, np.datetime64): # Convert to np.datetime64 date_unit = np.datetime_data(dim_dtype)[0] res_x = np.datetime64(res_x, date_unit) res_y = np.datetime64(res_y, date_unit) results.append((res_x, res_y)) return tuple(results) def consolidate(self, Config config=None, key=None, timestamp=None): """ Consolidates fragments of an array object for increased read performance. Overview: https://docs.tiledb.com/main/concepts/internal-mechanics/consolidation :param tiledb.Config config: The TileDB Config with consolidation parameters set :param key: (default None) encryption key to decrypt an encrypted array :type key: str or bytes :param timestamp: (default None) If not None, consolidate the array using the given tuple(int, int) UNIX seconds range (inclusive) :type timestamp: tuple (int, int) :raises: :py:exc:`tiledb.TileDBError` Rather than passing the timestamp into this function, it may be set with the config parameters `"sm.vacuum.timestamp_start"`and `"sm.vacuum.timestamp_end"` which takes in a time in UNIX seconds. If both are set then this function's `timestamp` argument will be used. """ if self.mode == 'r': raise TileDBError("cannot consolidate array opened in readonly mode (mode='r')") return consolidate(uri=self.uri, key=key, config=config, ctx=self.ctx, timestamp=timestamp) def dump(self): self.schema.dump() cdef _ndarray_is_varlen(self, np.ndarray array): return (np.issubdtype(array.dtype, np.bytes_) or np.issubdtype(array.dtype, np.unicode_) or array.dtype == object) @property def domain_index(self): return self.domain_index @property def dindex(self): return self.domain_index @property def multi_index(self): """Retrieve data cells with multi-range, domain-inclusive indexing. Returns the cross-product of the ranges. :param list selection: Per dimension, a scalar, ``slice``, or list of scalars or ``slice`` objects. Scalars and ``slice`` components should match the type of the underlying Dimension. :returns: dict of {'attribute': result}. Coords are included by default for Sparse arrays only (use `Array.query(coords=<>)` to select). :raises IndexError: invalid or unsupported index selection :raises: :py:exc:`tiledb.TileDBError` ``multi_index[]`` accepts, for each dimension, a scalar, ``slice``, or list of scalars or ``slice`` objects. Each item is interpreted as a point (scalar) or range (``slice``) used to query the array on the corresponding dimension. Unlike NumPy array indexing, ``multi_index`` respects TileDB's range semantics: slice ranges are *inclusive* of the start- and end-point, and negative ranges do not wrap around (because a TileDB dimensions may have a negative domain). See also: https://docs.tiledb.com/main/api-usage/reading-arrays/multi-range-subarrays ** Example ** >>> import tiledb, tempfile, numpy as np >>> >>> with tempfile.TemporaryDirectory() as tmp: ... A = tiledb.DenseArray.from_numpy(tmp, np.eye(4) * [1,2,3,4]) ... A.multi_index[1] ... A.multi_index[1,1] ... # return row 0 and 2 ... A.multi_index[[0,2]] ... # return rows 0 and 2 intersecting column 2 ... A.multi_index[[0,2], 2] ... # return rows 0:2 intersecting columns 0:2 ... A.multi_index[slice(0,2), slice(0,2)] OrderedDict([('', array([[0., 2., 0., 0.]]))]) OrderedDict([('', array([[2.]]))]) OrderedDict([('', array([[1., 0., 0., 0.], [0., 0., 3., 0.]]))]) OrderedDict([('', array([[0.], [3.]]))]) OrderedDict([('', array([[1., 0., 0.], [0., 2., 0.], [0., 0., 3.]]))]) """ # Delayed to avoid circular import from .multirange_indexing import MultiRangeIndexer return MultiRangeIndexer(self) @property def df(self): """Retrieve data cells as a Pandas dataframe, with multi-range, domain-inclusive indexing using ``multi_index``. :param list selection: Per dimension, a scalar, ``slice``, or list of scalars or ``slice`` objects. Scalars and ``slice`` components should match the type of the underlying Dimension. :returns: dict of {'attribute': result}. Coords are included by default for Sparse arrays only (use `Array.query(coords=<>)` to select). :raises IndexError: invalid or unsupported index selection :raises: :py:exc:`tiledb.TileDBError` ``df[]`` accepts, for each dimension, a scalar, ``slice``, or list of scalars or ``slice`` objects. Each item is interpreted as a point (scalar) or range (``slice``) used to query the array on the corresponding dimension. ** Example ** >>> import tiledb, tempfile, numpy as np, pandas as pd >>> >>> with tempfile.TemporaryDirectory() as tmp: ... data = {'col1_f': np.arange(0.0,1.0,step=0.1), 'col2_int': np.arange(10)} ... df = pd.DataFrame.from_dict(data) ... tiledb.from_pandas(tmp, df) ... A = tiledb.open(tmp) ... A.df[1] ... A.df[1:5] col1_f col2_int 1 0.1 1 col1_f col2_int 1 0.1 1 2 0.2 2 3 0.3 3 4 0.4 4 5 0.5 5 """ # Delayed to avoid circular import from .multirange_indexing import DataFrameIndexer return DataFrameIndexer(self, use_arrow=None) @property def last_write_info(self): return self.last_fragment_info @property def _buffers(self): return self._buffers def _set_buffers(self, object buffers): """ Helper function to set external buffers in the form of {'attr_name': (data_array, offsets_array)} Buffers will be used to satisfy the next index/query request. """ self._buffers = buffers def set_query(self, serialized_query): from tiledb.main import PyQuery q = PyQuery(self._ctx_(), self, ("",), (), 0, False) q.set_serialized_query(serialized_query) q.submit() cdef object results = OrderedDict() results = q.results() out = OrderedDict() for name in results.keys(): arr = results[name][0] arr.dtype = q.buffer_dtype(name) out[name] = arr return out # pickling support: this is a lightweight pickle for distributed use. # simply treat as wrapper around URI, not actual data. def __getstate__(self): config_dict = self._ctx_().config().dict() return (self.uri, self.mode, self.key, self.view_attr, self.timestamp_range, config_dict) def __setstate__(self, state): cdef: unicode uri, mode object view_attr = None object timestamp_range = None object key = None dict config_dict = {} uri, mode, key, view_attr, timestamp_range, config_dict = state if config_dict is not {}: config_dict = state[5] config = Config(params=config_dict) ctx = Ctx(config) else: ctx = default_ctx() self.__init__(uri, mode=mode, key=key, attr=view_attr, timestamp=timestamp_range, ctx=ctx) cdef class Query(object): """ Proxy object returned by query() to index into original array on a subselection of attribute in a defined layout order See documentation of Array.query """ def __init__(self, array, attrs=None, attr_cond=None, dims=None, coords=False, index_col=True, order=None, use_arrow=None, return_arrow=False, return_incomplete=False): if array.mode != 'r': raise ValueError("array mode must be read-only") if dims is not None and coords == True: raise ValueError("Cannot pass both dims and coords=True to Query") cdef list dims_to_set = list() if dims is False: self.dims = False elif dims != None and dims != True: domain = array.schema.domain for dname in dims: if not domain.has_dim(dname): raise TileDBError(f"Selected dimension does not exist: '{dname}'") self.dims = [unicode(dname) for dname in dims] elif coords == True or dims == True: domain = array.schema.domain self.dims = [domain.dim(i).name for i in range(domain.ndim)] if attrs is not None: for name in attrs: if not array.schema.has_attr(name): raise TileDBError(f"Selected attribute does not exist: '{name}'") self.attrs = attrs self.attr_cond = attr_cond if attr_cond is not None and not array.schema.sparse: raise TileDBError("QueryConditions may only be applied to sparse arrays") if order == None: if array.schema.sparse: self.order = 'U' # unordered else: self.order = 'C' # row-major else: self.order = order # reference to the array we are querying self.array = array self.coords = coords self.index_col = index_col self.return_arrow = return_arrow if return_arrow: if use_arrow is None: use_arrow = True if not use_arrow: raise TileDBError("Cannot initialize return_arrow with use_arrow=False") self.use_arrow = use_arrow self.return_incomplete = return_incomplete self.domain_index = DomainIndexer(array, query=self) def __getitem__(self, object selection): return self.array.subarray(selection, attrs=self.attrs, attr_cond=self.attr_cond, coords=self.coords if self.coords else self.dims, order=self.order) @property def attrs(self): """List of attributes to include in Query.""" return self.attrs @property def attr_cond(self): """QueryCondition used to filter attributes in Query.""" return self.attr_cond @property def dims(self): """List of dimensions to include in Query.""" return self.dims @property def coords(self): """ True if query should include (return) coordinate values. :rtype: bool """ return self.coords @property def order(self): """Return underlying Array order.""" return self.order @property def index_col(self): """List of columns to set as index for dataframe queries, or None.""" return self.index_col @property def use_arrow(self): return self.use_arrow @property def return_arrow(self): return self.return_arrow @property def return_incomplete(self): return self.return_incomplete @property def domain_index(self): """Apply Array.domain_index with query parameters.""" return self.domain_index @property def multi_index(self): """Apply Array.multi_index with query parameters.""" # Delayed to avoid circular import from .multirange_indexing import MultiRangeIndexer return MultiRangeIndexer(self.array, query=self) @property def df(self): """Apply Array.multi_index with query parameters and return result as a Pandas dataframe.""" # Delayed to avoid circular import from .multirange_indexing import DataFrameIndexer return DataFrameIndexer(self.array, query=self, use_arrow=self.use_arrow) def get_stats(self, print_out=True, json=False): """Retrieves the stats from a TileDB query. :param print_out: Print string to console (default True), or return as string :param json: Return stats JSON object (default: False) """ pyquery = self.array.pyquery if pyquery is None: return "" stats = self.array.pyquery.get_stats() if json: import json output = json.loads(stats) else: output = stats if print_out: print(output) else: return output cdef class DenseArrayImpl(Array): """Class representing a dense TileDB array. Inherits properties and methods of :py:class:`tiledb.Array`. """ def __init__(self, *args, **kw): super().__init__(*args, **kw) if self.schema.sparse: raise ValueError("Array at {} is not a dense array".format(self.uri)) return @staticmethod def from_numpy(uri, np.ndarray array, Ctx ctx=None, **kw): """Implementation of tiledb.from_numpy for dense arrays. See documentation of tiledb.from_numpy """ if not ctx: ctx = default_ctx() # pop the write timestamp before creating schema timestamp = kw.pop('timestamp', None) schema = schema_like_numpy(array, ctx=ctx, **kw) Array.create(uri, schema) with DenseArray(uri, mode='w', ctx=ctx, timestamp=timestamp) as arr: # probably need better typecheck here if array.dtype == object: arr[:] = array else: arr.write_direct(np.ascontiguousarray(array)) return DenseArray(uri, mode='r', ctx=ctx) def __len__(self): return self.domain.shape[0] def __getitem__(self, object selection): """Retrieve data cells for an item or region of the array. :param tuple selection: An int index, slice or tuple of integer/slice objects, specifying the selected subarray region for each dimension of the DenseArray. :rtype: :py:class:`numpy.ndarray` or :py:class:`collections.OrderedDict` :returns: If the dense array has a single attribute then a Numpy array of corresponding shape/dtype \ is returned for that attribute. If the array has multiple attributes, a \ :py:class:`collections.OrderedDict` is returned with dense Numpy subarrays \ for each attribute. :raises IndexError: invalid or unsupported index selection :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> import tiledb, numpy as np, tempfile >>> with tempfile.TemporaryDirectory() as tmp: ... # Creates array 'array' on disk. ... A = tiledb.DenseArray.from_numpy(tmp + "/array", np.ones((100, 100))) ... # Many aspects of Numpy's fancy indexing are supported: ... A[1:10, ...].shape ... A[1:10, 20:99].shape ... A[1, 2].shape (9, 100) (9, 79) () >>> # Subselect on attributes when reading: >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.DenseArray.create(tmp + "/array", schema) ... with tiledb.DenseArray(tmp + "/array", mode='w') as A: ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # Access specific attributes individually. ... A[0:5]["a1"] ... A[0:5]["a2"] array([0, 0, 0, 0, 0]) array([1, 1, 1, 1, 1]) """ if self.view_attr: result = self.subarray(selection, attrs=(self.view_attr,)) return result[self.view_attr] else: result = self.subarray(selection) return result def __repr__(self): if self.isopen: return "DenseArray(uri={0!r}, mode={1}, ndim={2})"\ .format(self.uri, self.mode, self.schema.ndim) else: return "DenseArray(uri={0!r}, mode=closed)".format(self.uri) def query(self, attrs=None, attr_cond=None, dims=None, coords=False, order='C', use_arrow=None, return_arrow=False, return_incomplete=False): """ Construct a proxy Query object for easy subarray queries of cells for an item or region of the array across one or more attributes. Optionally subselect over attributes, return dense result coordinate values, and specify a layout a result layout / cell-order. :param attrs: the DenseArray attributes to subselect over. If attrs is None (default) all array attributes will be returned. Array attributes can be defined by name or by positional index. :param attr_cond: the QueryCondition to filter attributes on. :param dims: the DenseArray dimensions to subselect over. If dims is None (default) then no dimensions are returned, unless coords=True. :param coords: if True, return array of coodinate value (default False). :param order: 'C', 'F', 'U', or 'G' (row-major, col-major, unordered, TileDB global order) :param use_arrow: if True, return dataframes via PyArrow if applicable. :param return_arrow: if True, return results as a PyArrow Table if applicable. :param return_incomplete: if True, initialize and return an iterable Query object over the indexed range. Consuming this iterable returns a result set for each TileDB incomplete query. See usage example in 'examples/incomplete_iteration.py'. To retrieve the estimated result sizes for the query ranges, use: `A.query(..., return_incomplete=True)[...].est_result_size()` If False (default False), queries will be internally run to completion by resizing buffers and resubmitting until query is complete. :return: A proxy Query object that can be used for indexing into the DenseArray over the defined attributes, in the given result layout (order). :raises ValueError: array is not opened for reads (mode = 'r') :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> # Subselect on attributes when reading: >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.DenseArray.create(tmp + "/array", schema) ... with tiledb.DenseArray(tmp + "/array", mode='w') as A: ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # Access specific attributes individually. ... A.query(attrs=("a1",))[0:5] OrderedDict([('a1', array([0, 0, 0, 0, 0]))]) """ if not self.isopen or self.mode != 'r': raise TileDBError("DenseArray is not opened for reading") return Query(self, attrs=attrs, attr_cond=attr_cond, dims=dims, coords=coords, order=order, use_arrow=use_arrow, return_arrow=return_arrow, return_incomplete=return_incomplete) def subarray(self, selection, attrs=None, attr_cond=None, coords=False, order=None): """Retrieve data cells for an item or region of the array. Optionally subselect over attributes, return dense result coordinate values, and specify a layout a result layout / cell-order. :param selection: tuple of scalar and/or slice objects :param coords: if True, return array of coordinate value (default False). :param attrs: the DenseArray attributes to subselect over. If attrs is None (default) all array attributes will be returned. Array attributes can be defined by name or by positional index. :param order: 'C', 'F', 'U', or 'G' (row-major, col-major, unordered, TileDB global order) :returns: If the dense array has a single attribute then a Numpy array of corresponding shape/dtype \ is returned for that attribute. If the array has multiple attributes, a \ :py:class:`collections.OrderedDict` is returned with dense Numpy subarrays for each attribute. :raises IndexError: invalid or unsupported index selection :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.DenseArray.create(tmp + "/array", schema) ... with tiledb.DenseArray(tmp + "/array", mode='w') as A: ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # A[0:5], attribute a1, row-major without coordinates ... A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C') OrderedDict([('a1', array([0, 0, 0, 0, 0]))]) """ if not self.isopen or self.mode != 'r': raise TileDBError("DenseArray is not opened for reading") cdef tiledb_layout_t layout = TILEDB_UNORDERED if order is None or order == 'C': layout = TILEDB_ROW_MAJOR elif order == 'F': layout = TILEDB_COL_MAJOR elif order == 'G': layout = TILEDB_GLOBAL_ORDER elif order == 'U': pass else: raise ValueError("order must be 'C' (TILEDB_ROW_MAJOR), "\ "'F' (TILEDB_COL_MAJOR), "\ "'G' (TILEDB_GLOBAL_ORDER), "\ "or 'U' (TILEDB_UNORDERED)") attr_names = list() if coords == True: attr_names.extend(self.schema.domain.dim(i).name for i in range(self.schema.ndim)) elif coords: attr_names.extend(coords) if attrs is None: attr_names.extend( self.schema.attr(i)._internal_name for i in range(self.schema.nattr) ) else: attr_names.extend(self.schema.attr(a).name for a in attrs) selection = index_as_tuple(selection) idx = replace_ellipsis(self.schema.domain.ndim, selection) idx, drop_axes = replace_scalars_slice(self.schema.domain, idx) subarray = index_domain_subarray(self, self.schema.domain, idx) # Note: we included dims (coords) above to match existing semantics out = self._read_dense_subarray(subarray, attr_names, attr_cond, layout, coords) if any(s.step for s in idx): steps = tuple(slice(None, None, s.step) for s in idx) for (k, v) in out.items(): out[k] = v.__getitem__(steps) if drop_axes: for (k, v) in out.items(): out[k] = v.squeeze(axis=drop_axes) # attribute is anonymous, just return the result if not coords and self.schema.nattr == 1: attr = self.schema.attr(0) if attr.isanon: return out[attr._internal_name] return out cdef _read_dense_subarray(self, list subarray, list attr_names, object attr_cond, tiledb_layout_t layout, bint include_coords): from tiledb.main import PyQuery q = PyQuery(self._ctx_(), self, tuple(attr_names), tuple(), layout, False) self.pyquery = q try: q.set_attr_cond(attr_cond) except TileDBError as e: raise TileDBError(e) q.set_ranges([list([x]) for x in subarray]) q.submit() cdef object results = OrderedDict() results = q.results() out = OrderedDict() cdef tuple output_shape domain_dtype = self.domain.dtype is_datetime = domain_dtype.kind == 'M' # Using the domain check is valid because dense arrays are homogeneous if is_datetime: output_shape = \ tuple(_tiledb_datetime_extent(subarray[r][0], subarray[r][1]) for r in range(self.schema.ndim)) else: output_shape = \ tuple(int(subarray[r][1]) - int(subarray[r][0]) + 1 for r in range(self.schema.ndim)) cdef Py_ssize_t nattr = len(attr_names) cdef int i for i in range(nattr): name = attr_names[i] if not self.schema.domain.has_dim(name) and self.schema.attr(name).isvar: # for var arrays we create an object array dtype = object out[name] = q.unpack_buffer(name, results[name][0], results[name][1]).reshape(output_shape) else: dtype = q.buffer_dtype(name) # sanity check the TileDB buffer size against schema? # add assert to verify np.require doesn't copy? arr = results[name][0] arr.dtype = dtype if len(arr) == 0: # special case: the C API returns 0 len for blank arrays arr = np.zeros(output_shape, dtype=dtype) elif len(arr) != np.prod(output_shape): raise Exception("Mismatched output array shape! (arr.shape: {}, output.shape: {}".format(arr.shape, output_shape)) if layout == TILEDB_ROW_MAJOR: arr.shape = output_shape arr = np.require(arr, requirements='C') elif layout == TILEDB_COL_MAJOR: arr.shape = output_shape arr = np.require(arr, requirements='F') else: arr.shape = np.prod(output_shape) out[name] = arr return out def __setitem__(self, object selection, object val): """Set / update dense data cells :param tuple selection: An int index, slice or tuple of integer/slice objects, specifiying the selected subarray region for each dimension of the DenseArray. :param value: a dictionary of array attribute values, values must able to be converted to n-d numpy arrays.\ if the number of attributes is one, then a n-d numpy array is accepted. :type value: dict or :py:class:`numpy.ndarray` :raises IndexError: invalid or unsupported index selection :raises ValueError: value / coordinate length mismatch :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> import tiledb, numpy as np, tempfile >>> # Write to single-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... # Create an array initially with all zero values ... with tiledb.DenseArray.from_numpy(tmp + "/array", np.zeros((2, 2))) as A: ... pass ... with tiledb.DenseArray(tmp + "/array", mode='w') as A: ... # Write to the single (anonymous) attribute ... A[:] = np.array(([1,2], [3,4])) >>> >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( ... tiledb.Dim(domain=(0, 1), tile=2, dtype=np.uint64), ... tiledb.Dim(domain=(0, 1), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.DenseArray.create(tmp + "/array", schema) ... with tiledb.DenseArray(tmp + "/array", mode='w') as A: ... # Write to each attribute ... A[0:2, 0:2] = {"a1": np.array(([-3, -4], [-5, -6])), ... "a2": np.array(([1, 2], [3, 4]))} """ selection_tuple = (selection,) if not isinstance(selection, tuple) else selection if any(isinstance(s, np.ndarray) for s in selection_tuple): warnings.warn( "Sparse writes to dense arrays is deprecated", DeprecationWarning, ) _setitem_impl_sparse(self, selection, val, dict()) return self._setitem_impl(selection, val, dict()) def _setitem_impl(self, object selection, object val, dict nullmaps): """Implementation for setitem with optional support for validity bitmaps.""" if not self.isopen or self.mode != 'w': raise TileDBError("DenseArray is not opened for writing") cdef Domain domain = self.domain cdef tuple idx = replace_ellipsis(domain.ndim, index_as_tuple(selection)) idx,_drop = replace_scalars_slice(domain, idx) cdef object subarray = index_domain_subarray(self, domain, idx) cdef Attr attr cdef list attributes = list() cdef list values = list() if isinstance(val, dict): for attr_idx in range(self.schema.nattr): attr = self.schema.attr(attr_idx) k = attr.name v = val[k] attr = self.schema.attr(k) attributes.append(attr._internal_name) # object arrays are var-len and handled later if type(v) is np.ndarray and v.dtype is not np.dtype('O'): v = np.ascontiguousarray(v, dtype=attr.dtype) values.append(v) elif np.isscalar(val): for i in range(self.schema.nattr): attr = self.schema.attr(i) subarray_shape = tuple(int(subarray[r][1] - subarray[r][0]) + 1 for r in range(len(subarray))) attributes.append(attr._internal_name) A = np.empty(subarray_shape, dtype=attr.dtype) A[:] = val values.append(A) elif self.schema.nattr == 1: attr = self.schema.attr(0) attributes.append(attr._internal_name) # object arrays are var-len and handled later if type(val) is np.ndarray and val.dtype is not np.dtype('O'): val = np.ascontiguousarray(val, dtype=attr.dtype) values.append(val) elif self.view_attr is not None: # Support single-attribute assignment for multi-attr array # This is a hack pending # https://github.com/TileDB-Inc/TileDB/issues/1162 # (note: implicitly relies on the fact that we treat all arrays # as zero initialized as long as query returns TILEDB_OK) # see also: https://github.com/TileDB-Inc/TileDB-Py/issues/128 if self.schema.nattr == 1: attributes.append(self.schema.attr(0).name) values.append(val) else: dtype = self.schema.attr(self.view_attr).dtype with DenseArrayImpl(self.uri, 'r', ctx=Ctx(self.ctx.config())) as readable: current = readable[selection] current[self.view_attr] = \ np.ascontiguousarray(val, dtype=dtype) # `current` is an OrderedDict attributes.extend(current.keys()) values.extend(current.values()) else: raise ValueError("ambiguous attribute assignment, " "more than one array attribute " "(use a dict({'attr': val}) to " "assign multiple attributes)") if nullmaps: for key,val in nullmaps.items(): if not self.schema.has_attr(key): raise TileDBError("Cannot set validity for non-existent attribute.") if not self.schema.attr(key).isnullable: raise ValueError("Cannot set validity map for non-nullable attribute.") if not isinstance(val, np.ndarray): raise TypeError(f"Expected NumPy array for attribute '{key}' " f"validity bitmap, got {type(val)}") if val.dtype != np.uint8: raise TypeError(f"Expected NumPy uint8 array for attribute '{key}' " f"validity bitmap, got {val.dtype}") _write_array(self.ctx.ptr, self.ptr, self, subarray, attributes, values, nullmaps, self.last_fragment_info, False) return def __array__(self, dtype=None, **kw): """Implementation of numpy __array__ protocol (internal). :return: Numpy ndarray resulting from indexing the entire array. """ if self.view_attr is None and self.nattr > 1: raise ValueError("cannot call __array__ for TileDB array with more than one attribute") cdef unicode name if self.view_attr: name = self.view_attr else: name = self.schema.attr(0).name array = self.read_direct(name=name) if dtype and array.dtype != dtype: return array.astype(dtype) return array def write_direct(self, np.ndarray array not None): """ Write directly to given array attribute with minimal checks, assumes that the numpy array is the same shape as the array's domain :param np.ndarray array: Numpy contiguous dense array of the same dtype \ and shape and layout of the DenseArray instance :raises ValueError: array is not contiguous :raises: :py:exc:`tiledb.TileDBError` """ if not self.isopen or self.mode != 'w': raise TileDBError("DenseArray is not opened for writing") if self.schema.nattr != 1: raise ValueError("cannot write_direct to a multi-attribute DenseArray") if not array.flags.c_contiguous and not array.flags.f_contiguous: raise ValueError("array is not contiguous") cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr # attr name cdef Attr attr = self.schema.attr(0) cdef bytes battr_name = attr._internal_name.encode('UTF-8') cdef const char* attr_name_ptr = PyBytes_AS_STRING(battr_name) cdef void* buff_ptr = np.PyArray_DATA(array) cdef uint64_t buff_size = array.nbytes cdef tiledb_layout_t layout = TILEDB_ROW_MAJOR if array.ndim == 1: layout = TILEDB_GLOBAL_ORDER elif array.ndim > 1 and array.flags.f_contiguous: layout = TILEDB_COL_MAJOR cdef tiledb_query_t* query_ptr = NULL cdef int rc = TILEDB_OK rc = tiledb_query_alloc(ctx_ptr, array_ptr, TILEDB_WRITE, &query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) try: rc = tiledb_query_set_layout(ctx_ptr, query_ptr, layout) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) rc = tiledb_query_set_buffer(ctx_ptr, query_ptr, attr_name_ptr, buff_ptr, &buff_size) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) with nogil: rc = tiledb_query_submit(ctx_ptr, query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) with nogil: rc = tiledb_query_finalize(ctx_ptr, query_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) finally: tiledb_query_free(&query_ptr) return def read_direct(self, unicode name=None): """Read attribute directly with minimal overhead, returns a numpy ndarray over the entire domain :param str attr_name: read directly to an attribute name (default ) :rtype: numpy.ndarray :return: numpy.ndarray of `attr_name` values over the entire array domain :raises: :py:exc:`tiledb.TileDBError` """ if not self.isopen or self.mode != 'r': raise TileDBError("DenseArray is not opened for reading") cdef Ctx ctx = self.ctx cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_array_t* array_ptr = self.ptr cdef Attr attr cdef unicode attr_name if name is None and self.schema.nattr != 1: raise ValueError( "read_direct with no provided attribute is ambiguous for multi-attribute arrays") elif name is None: attr = self.schema.attr(0) attr_name = attr._internal_name else: attr = self.schema.attr(name) attr_name = attr._internal_name order = 'C' cdef tiledb_layout_t cell_layout = TILEDB_ROW_MAJOR if self.schema.cell_order == 'col-major' and self.schema.tile_order == 'col-major': order = 'F' cell_layout = TILEDB_COL_MAJOR cdef ArraySchema schema = self.schema cdef Domain domain = schema.domain idx = tuple(slice(None) for _ in range(domain.ndim)) subarray = index_domain_subarray(self, domain, idx) out = self._read_dense_subarray(subarray, [attr_name,], None, cell_layout, False) return out[attr_name] # point query index a tiledb array (zips) columnar index vectors def index_domain_coords(dom: Domain, idx: tuple, check_ndim: bool): """ Returns a (zipped) coordinate array representation given coordinate indices in numpy's point indexing format """ ndim = len(idx) if check_ndim: if ndim != dom.ndim: raise IndexError("sparse index ndim must match domain ndim: " "{0!r} != {1!r}".format(ndim, dom.ndim)) domain_coords = [] for dim, sel in zip(dom, idx): dim_is_string = (np.issubdtype(dim.dtype, np.str_) or np.issubdtype(dim.dtype, np.bytes_)) if dim_is_string: try: # ensure strings contain only ASCII characters domain_coords.append(np.array(sel, dtype=np.bytes_, ndmin=1)) except Exception as exc: raise TileDBError(f'Dim\' strings may only contain ASCII characters') else: domain_coords.append(np.array(sel, dtype=dim.dtype, ndmin=1)) idx = tuple(domain_coords) # check that all sparse coordinates are the same size and dtype dim0 = dom.dim(0) dim0_type = dim0.dtype len0 = len(idx[0]) for dim_idx in range(ndim): dim_dtype = dom.dim(dim_idx).dtype if len(idx[dim_idx]) != len0: raise IndexError("sparse index dimension length mismatch") if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): if not (np.issubdtype(idx[dim_idx].dtype, np.str_) or \ np.issubdtype(idx[dim_idx].dtype, np.bytes_)): raise IndexError("sparse index dimension dtype mismatch") elif idx[dim_idx].dtype != dim_dtype: raise IndexError("sparse index dimension dtype mismatch") return idx def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps): if not self.isopen or self.mode != 'w': raise TileDBError("SparseArray is not opened for writing") set_dims_only = val is None sparse_attributes = list() sparse_values = list() idx = index_as_tuple(selection) sparse_coords = list(index_domain_coords(self.schema.domain, idx, not set_dims_only)) if set_dims_only: _write_array( self.ctx.ptr, self.ptr, self, sparse_coords, sparse_attributes, sparse_values, nullmaps, self.last_fragment_info, True ) return if not isinstance(val, dict): if self.nattr > 1: raise ValueError("Expected dict-like object {name: value} for multi-attribute " "array.") val = dict({self.attr(0).name: val}) # must iterate in Attr order to ensure that value order matches for attr_idx in range(self.schema.nattr): attr = self.attr(attr_idx) name = attr.name attr_val = val[name] try: if attr.isvar: # ensure that the value is array-convertible, for example: pandas.Series attr_val = np.asarray(attr_val) else: if (np.issubdtype(attr.dtype, np.string_) and not (np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) attr_val = np.ascontiguousarray(attr_val, dtype=attr.dtype) if attr.isnullable and attr.name not in nullmaps: nullmaps[attr.name] = np.array([int(v is None) for v in attr_val], dtype=np.uint8) except Exception as exc: raise ValueError(f"NumPy array conversion check failed for attr '{name}'") from exc # set nullmap if nullable attribute does not have a nullmap already set if attr.isnullable and attr.name not in nullmaps: nullmaps[attr.name] = np.ones(attr_val.shape) # if dtype is ASCII, ensure all characters are valid if attr.isascii: try: np.asarray(attr_val, dtype=np.bytes_) except Exception as exc: raise TileDBError(f'Attr\'s dtype is "ascii" but attr_val contains invalid ASCII characters') ncells = sparse_coords[0].shape[0] if attr_val.size != ncells: raise ValueError("value length ({}) does not match " "coordinate length ({})".format(attr_val.size, ncells)) sparse_attributes.append(attr._internal_name) sparse_values.append(attr_val) if (len(sparse_attributes) != len(val.keys())) \ or (len(sparse_values) != len(val.values())): raise TileDBError("Sparse write input data count does not match number of attributes") _write_array( self.ctx.ptr, self.ptr, self, sparse_coords, sparse_attributes, sparse_values, nullmaps, self.last_fragment_info, True ) return cdef class SparseArrayImpl(Array): """Class representing a sparse TileDB array (internal). Inherits properties and methods of :py:class:`tiledb.Array`. """ def __init__(self, *args, **kw): super().__init__(*args, **kw) if not self.schema.sparse: raise ValueError("Array at '{}' is not a sparse array".format(self.uri)) return def __len__(self): raise TypeError("SparseArray length is ambiguous; use shape[0]") def __setitem__(self, selection, val): """Set / update sparse data cells :param tuple selection: N coordinate value arrays (dim0, dim1, ...) where N in the ndim of the SparseArray, The format follows numpy sparse (point) indexing semantics. :param value: a dictionary of nonempty array attribute values, values must able to be converted to 1-d numpy arrays.\ if the number of attributes is one, then a 1-d numpy array is accepted. :type value: dict or :py:class:`numpy.ndarray` :raises IndexError: invalid or unsupported index selection :raises ValueError: value / coordinate length mismatch :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> import tiledb, numpy as np, tempfile >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( ... tiledb.Dim(domain=(0, 1), tile=2, dtype=np.uint64), ... tiledb.Dim(domain=(0, 1), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, sparse=True, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.SparseArray.create(tmp + "/array", schema) ... with tiledb.SparseArray(tmp + "/array", mode='w') as A: ... # Write in the corner cells (0,0) and (1,1) only. ... I, J = [0, 1], [0, 1] ... # Write to each attribute ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} """ _setitem_impl_sparse(self, selection, val, dict()) def __getitem__(self, object selection): """Retrieve nonempty cell data for an item or region of the array :param tuple selection: An int index, slice or tuple of integer/slice objects, specifying the selected subarray region for each dimension of the SparseArray. :rtype: :py:class:`collections.OrderedDict` :returns: An OrderedDict is returned with dimension and attribute names as keys. \ Nonempty attribute values are returned as Numpy 1-d arrays. :raises IndexError: invalid or unsupported index selection :raises: :py:exc:`tiledb.TileDBError` **Example:** >>> import tiledb, numpy as np, tempfile >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( ... tiledb.Dim(name="y", domain=(0, 9), tile=2, dtype=np.uint64), ... tiledb.Dim(name="x", domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, sparse=True, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.SparseArray.create(tmp + "/array", schema) ... with tiledb.SparseArray(tmp + "/array", mode='w') as A: ... # Write in the twp cells (0,0) and (2,3) only. ... I, J = [0, 2], [0, 3] ... # Write to each attribute ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # Return an OrderedDict with values and coordinates ... A[0:3, 0:10] ... # Return just the "x" coordinates values ... A[0:3, 0:10]["x"] OrderedDict([('a1', array([1, 2])), ('a2', array([3, 4])), ('y', array([0, 2], dtype=uint64)), ('x', array([0, 3], dtype=uint64))]) array([0, 3], dtype=uint64) With a floating-point array domain, index bounds are inclusive, e.g.: >>> # Return nonempty cells within a floating point array domain (fp index bounds are inclusive): >>> # A[5.0:579.9] """ return self.subarray(selection) def query(self, attrs=None, attr_cond=None, dims=None, index_col=True, coords=None, order='U', use_arrow=None, return_arrow=None, return_incomplete=False): """ Construct a proxy Query object for easy subarray queries of cells for an item or region of the array across one or more attributes. Optionally subselect over attributes, return dense result coordinate values, and specify a layout a result layout / cell-order. :param attrs: the SparseArray attributes to subselect over. If attrs is None (default) all array attributes will be returned. Array attributes can be defined by name or by positional index. :param attr_cond: the QueryCondition to filter attributes on. :param dims: the SparseArray dimensions to subselect over. If dims is None (default) then all dimensions are returned, unless coords=False. :param index_col: For dataframe queries, override the saved index information, and only set specified index(es) in the final dataframe, or None. :param coords: (deprecated) if True, return array of coordinate value (default False). :param order: 'C', 'F', or 'G' (row-major, col-major, tiledb global order) :param use_arrow: if True, return dataframes via PyArrow if applicable. :param return_arrow: if True, return results as a PyArrow Table if applicable. :return: A proxy Query object that can be used for indexing into the SparseArray over the defined attributes, in the given result layout (order). **Example:** >>> import tiledb, numpy as np, tempfile >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( ... tiledb.Dim(name="y", domain=(0, 9), tile=2, dtype=np.uint64), ... tiledb.Dim(name="x", domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, sparse=True, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.SparseArray.create(tmp + "/array", schema) ... with tiledb.SparseArray(tmp + "/array", mode='w') as A: ... # Write in the twp cells (0,0) and (2,3) only. ... I, J = [0, 2], [0, 3] ... # Write to each attribute ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10] OrderedDict([('a1', array([1, 2]))]) """ if not self.isopen: raise TileDBError("SparseArray is not opened") # backwards compatibility _coords = coords if dims is False: _coords = False elif dims is None and coords is None: _coords = True return Query(self, attrs=attrs, attr_cond=attr_cond, dims=dims, coords=_coords, index_col=index_col, order=order, use_arrow=use_arrow, return_arrow=return_arrow, return_incomplete=return_incomplete) def subarray(self, selection, coords=True, attrs=None, attr_cond=None, order=None): """ Retrieve dimension and data cells for an item or region of the array. Optionally subselect over attributes, return sparse result coordinate values, and specify a layout a result layout / cell-order. :param selection: tuple of scalar and/or slice objects :param coords: if True, return array of coordinate value (default True). :param attrs: the SparseArray attributes to subselect over. If attrs is None (default) all array attributes will be returned. Array attributes can be defined by name or by positional index. :param order: 'C', 'F', or 'G' (row-major, col-major, tiledb global order) :returns: An OrderedDict is returned with dimension and attribute names as keys. \ Nonempty attribute values are returned as Numpy 1-d arrays. **Example:** >>> import tiledb, numpy as np, tempfile >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( ... tiledb.Dim(name="y", domain=(0, 9), tile=2, dtype=np.uint64), ... tiledb.Dim(name="x", domain=(0, 9), tile=2, dtype=np.uint64)) ... schema = tiledb.ArraySchema(domain=dom, sparse=True, ... attrs=(tiledb.Attr(name="a1", dtype=np.int64), ... tiledb.Attr(name="a2", dtype=np.int64))) ... tiledb.SparseArray.create(tmp + "/array", schema) ... with tiledb.SparseArray(tmp + "/array", mode='w') as A: ... # Write in the twp cells (0,0) and (2,3) only. ... I, J = [0, 2], [0, 3] ... # Write to each attribute ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # A[0:3, 0:10], attribute a1, row-major without coordinates ... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G') OrderedDict([('a1', array([1, 2]))]) """ if not self.isopen or self.mode != 'r': raise TileDBError("SparseArray is not opened for reading") cdef tiledb_layout_t layout = TILEDB_UNORDERED if order is None or order == 'U': layout = TILEDB_UNORDERED elif order == 'C': layout = TILEDB_ROW_MAJOR elif order == 'F': layout = TILEDB_COL_MAJOR elif order == 'G': layout = TILEDB_GLOBAL_ORDER else: raise ValueError("order must be 'C' (TILEDB_ROW_MAJOR), "\ "'F' (TILEDB_COL_MAJOR), "\ "'G' (TILEDB_GLOBAL_ORDER), "\ "or 'U' (TILEDB_UNORDERED)") attr_names = list() if attrs is None: attr_names.extend(self.schema.attr(i)._internal_name for i in range(self.schema.nattr)) else: attr_names.extend(self.schema.attr(a)._internal_name for a in attrs) if coords == True: attr_names.extend(self.schema.domain.dim(i).name for i in range(self.schema.ndim)) elif coords: attr_names.extend(coords) dom = self.schema.domain idx = index_as_tuple(selection) idx = replace_ellipsis(dom.ndim, idx) idx, drop_axes = replace_scalars_slice(dom, idx) subarray = index_domain_subarray(self, dom, idx) return self._read_sparse_subarray(subarray, attr_names, attr_cond, layout) def __repr__(self): if self.isopen: return "SparseArray(uri={0!r}, mode={1}, ndim={2})"\ .format(self.uri, self.mode, self.schema.ndim) else: return "SparseArray(uri={0!r}, mode=closed)".format(self.uri) cdef _read_sparse_subarray(self, list subarray, list attr_names, object attr_cond, tiledb_layout_t layout): cdef object out = OrderedDict() # all results are 1-d vectors cdef np.npy_intp dims[1] cdef Py_ssize_t nattr = len(attr_names) from tiledb.main import PyQuery q = PyQuery(self._ctx_(), self, tuple(attr_names), tuple(), layout, False) self.pyquery = q try: q.set_attr_cond(attr_cond) except TileDBError as e: raise TileDBError(e) q.set_ranges([list([x]) for x in subarray]) q.submit() cdef object results = OrderedDict() results = q.results() # collect a list of dtypes for resulting to construct array dtypes = list() for i in range(nattr): name, final_name = attr_names[i], attr_names[i] if name == '__attr': final_name = '' if self.schema._needs_var_buffer(name): if len(results[name][1]) > 0: # note: len(offsets) > 0 arr = q.unpack_buffer(name, results[name][0], results[name][1]) else: arr = results[name][0] arr.dtype = self.schema.attr_or_dim_dtype(name) out[final_name] = arr else: if self.schema.domain.has_dim(name): el_dtype = self.schema.domain.dim(name).dtype else: el_dtype = self.attr(name).dtype arr = results[name][0] # this is a work-around for NumPy restrictions removed in 1.16 if el_dtype == np.dtype('S0'): out[final_name] = b'' elif el_dtype == np.dtype('U0'): out[final_name] = u'' else: arr.dtype = el_dtype out[final_name] = arr return out def unique_dim_values(self, dim=None): if dim is not None and not isinstance(dim, str): raise ValueError("Given Dimension {} is not a string.".format(dim)) if dim is not None and not self.domain.has_dim(dim): raise ValueError("Array does not contain Dimension '{}'.".format(dim)) query = self.query(attrs=[])[:] if dim: dim_values = tuple(np.unique(query[dim])) else: dim_values = OrderedDict() for dim in query: dim_values[dim] = tuple(np.unique(query[dim])) return dim_values def consolidate(uri, key=None, Config config=None, Ctx ctx=None, timestamp=None): """Consolidates TileDB array fragments for improved read performance :param str uri: URI to the TileDB Array :param str key: (default None) Key to decrypt array if the array is encrypted :param tiledb.Config config: The TileDB Config with consolidation parameters set :param tiledb.Ctx ctx: (default None) The TileDB Context :param timestamp: (default None) If not None, consolidate the array using the given tuple(int, int) UNIX seconds range (inclusive) :rtype: str or bytes :return: path (URI) to the consolidated TileDB Array :raises TypeError: cannot convert path to unicode string :raises: :py:exc:`tiledb.TileDBError` Rather than passing the timestamp into this function, it may be set with the config parameters `"sm.vacuum.timestamp_start"`and `"sm.vacuum.timestamp_end"` which takes in a time in UNIX seconds. If both are set then this function's `timestamp` argument will be used. """ if not ctx: ctx = default_ctx() cdef tiledb_ctx_t* ctx_ptr = ctx.ptr if timestamp: if config is None: config = Config() if not isinstance(timestamp, tuple) and len(timestamp) != 2: raise TypeError("'timestamp' argument expects tuple(start: int, end: int)") if timestamp[0] is not None: config["sm.consolidation.timestamp_start"] = timestamp[0] if timestamp[1] is not None: config["sm.consolidation.timestamp_end"] = timestamp[1] cdef tiledb_config_t* config_ptr = NULL if config is not None: config_ptr = config.ptr cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) # encryption key cdef: bytes bkey tiledb_encryption_type_t key_type = TILEDB_NO_ENCRYPTION void* key_ptr = NULL unsigned int key_len = 0 if key is not None: if isinstance(key, str): bkey = key.encode('ascii') else: bkey = bytes(key) key_type = TILEDB_AES_256_GCM key_ptr = PyBytes_AS_STRING(bkey) #TODO: unsafe cast here ssize_t -> uint64_t key_len = PyBytes_GET_SIZE(bkey) cdef int rc = TILEDB_OK with nogil: rc = tiledb_array_consolidate_with_key(ctx_ptr, uri_ptr, key_type, key_ptr, key_len, config_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return uri def group_create(uri, Ctx ctx=None): """ Create a TileDB Group object at the specified path (URI) :param str uri: URI of the TileDB Group to be created :rtype: str :param tiledb.Ctx ctx: The TileDB Context :return: The URI of the created TileDB Group :raises TypeError: cannot convert path to unicode string :raises: :py:exc:`tiledb.TileDBError` """ if not ctx: ctx = default_ctx() cdef int rc = TILEDB_OK cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) with nogil: rc = tiledb_group_create(ctx_ptr, uri_ptr) if rc != TILEDB_OK: check_error(ctx, rc) return uri def object_type(uri, Ctx ctx=None): """Returns the TileDB object type at the specified path (URI) :param str path: path (URI) of the TileDB resource :rtype: str :param tiledb.Ctx ctx: The TileDB Context :return: object type string :raises TypeError: cannot convert path to unicode string """ if not ctx: ctx = default_ctx() cdef int rc = TILEDB_OK cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef bytes buri = unicode_path(uri) cdef const char* path_ptr = PyBytes_AS_STRING(buri) cdef tiledb_object_t obj = TILEDB_INVALID with nogil: rc = tiledb_object_type(ctx_ptr, path_ptr, &obj) if rc != TILEDB_OK: check_error(ctx, rc) objtype = None if obj == TILEDB_ARRAY: objtype = "array" # removed in libtiledb 1.7 #elif obj == TILEDB_KEY_VALUE: # objtype = "kv" elif obj == TILEDB_GROUP: objtype = "group" return objtype def remove(uri, Ctx ctx=None): """Removes (deletes) the TileDB object at the specified path (URI) :param str uri: URI of the TileDB resource :param tiledb.Ctx ctx: The TileDB Context :raises TypeError: uri cannot be converted to a unicode string :raises: :py:exc:`tiledb.TileDBError` """ if not ctx: ctx = default_ctx() cdef int rc = TILEDB_OK cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) with nogil: rc = tiledb_object_remove(ctx_ptr, uri_ptr) if rc != TILEDB_OK: check_error(ctx, rc) return def move(old_uri, new_uri, Ctx ctx=None): """Moves a TileDB resource (group, array, key-value). :param tiledb.Ctx ctx: The TileDB Context :param str old_uri: path (URI) of the TileDB resource to move :param str new_uri: path (URI) of the destination :raises TypeError: uri cannot be converted to a unicode string :raises: :py:exc:`TileDBError` """ if not ctx: ctx = default_ctx() cdef int rc = TILEDB_OK cdef tiledb_ctx_t* ctx_ptr = ctx.ptr cdef bytes b_old_path = unicode_path(old_uri) cdef bytes b_new_path = unicode_path(new_uri) cdef const char* old_path_ptr = PyBytes_AS_STRING(b_old_path) cdef const char* new_path_ptr = PyBytes_AS_STRING(b_new_path) with nogil: rc = tiledb_object_move(ctx_ptr, old_path_ptr, new_path_ptr) if rc != TILEDB_OK: check_error(ctx, rc) return cdef int vfs_ls_callback(const char* path_ptr, void* py_list): cdef list result_list cdef unicode path try: result_list = py_list path = path_ptr.decode('UTF-8') result_list.append(path) except StopIteration: return 0 return 1 cdef int walk_callback(const char* path_ptr, tiledb_object_t obj, void* pyfunc): objtype = None if obj == TILEDB_GROUP: objtype = "group" if obj == TILEDB_ARRAY: objtype = "array" # removed in 1.7 #elif obj == TILEDB_KEY_VALUE: # objtype = "kv" try: ( pyfunc)(path_ptr.decode('UTF-8'), objtype) except StopIteration: return 0 return 1 def ls(path, func, Ctx ctx=None): """Lists TileDB resources and applies a callback that have a prefix of ``path`` (one level deep). :param str path: URI of TileDB group object :param function func: callback to execute on every listed TileDB resource,\ URI resource path and object type label are passed as arguments to the callback :param tiledb.Ctx ctx: TileDB context :raises TypeError: cannot convert path to unicode string :raises: :py:exc:`tiledb.TileDBError` """ if not ctx: ctx = default_ctx() cdef bytes bpath = unicode_path(path) check_error(ctx, tiledb_object_ls(ctx.ptr, bpath, walk_callback, func)) return def walk(path, func, order="preorder", Ctx ctx=None): """Recursively visits TileDB resources and applies a callback to resources that have a prefix of ``path`` :param str path: URI of TileDB group object :param function func: callback to execute on every listed TileDB resource,\ URI resource path and object type label are passed as arguments to the callback :param tiledb.Ctx ctx: The TileDB context :param str order: 'preorder' (default) or 'postorder' tree traversal :raises TypeError: cannot convert path to unicode string :raises ValueError: unknown order :raises: :py:exc:`tiledb.TileDBError` """ if not ctx: ctx = default_ctx() cdef bytes bpath = unicode_path(path) cdef tiledb_walk_order_t walk_order if order == "postorder": walk_order = TILEDB_POSTORDER elif order == "preorder": walk_order = TILEDB_PREORDER else: raise ValueError("unknown walk order {}".format(order)) check_error(ctx, tiledb_object_walk(ctx.ptr, bpath, walk_order, walk_callback, func)) return cdef class FileHandle(object): """ Wraps a TileDB VFS file handle object Instances of this class are returned by TileDB VFS methods and are not instantiated directly """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_vfs_fh_free(&self.ptr) @staticmethod cdef from_ptr(VFS vfs, unicode uri, tiledb_vfs_fh_t* fh_ptr): """Constructs a FileHandle class instance from a URI and a tiledb_vfs_fh_t pointer""" assert(fh_ptr != NULL) cdef FileHandle fh = FileHandle.__new__(FileHandle) fh.vfs = vfs fh.uri = uri fh.ptr = fh_ptr return fh cpdef closed(self): """Returns true if the file handle is closed""" cdef Ctx ctx = self.vfs.ctx cdef int isclosed = 0 check_error(ctx, tiledb_vfs_fh_is_closed(ctx.ptr, self.ptr, &isclosed)) return bool(isclosed) class FileIO(io.RawIOBase): def __init__(self, VFS vfs, uri, mode="rb"): cdef tiledb_vfs_mode_t vfs_mode if mode == "rb": vfs_mode = TILEDB_VFS_READ elif mode == "wb": vfs_mode = TILEDB_VFS_WRITE elif mode == "ab": vfs_mode = TILEDB_VFS_APPEND else: raise ValueError("invalid mode {0!r}".format(mode)) cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = vfs.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = vfs.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef tiledb_vfs_fh_t* fh_ptr = NULL cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_open(ctx_ptr, vfs_ptr, uri_ptr, vfs_mode, &fh_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) self.fh = FileHandle.from_ptr(vfs, buri.decode('UTF-8'), fh_ptr) self.vfs = vfs self._offset = 0 self._closed = False self._readonly = True if mode == "rb": try: self._nbytes = vfs.file_size(uri) except: raise IOError("URI {0!r} is not a valid file") self._readonly = True elif mode == "wb" or mode == "ab": self._readonly = False self._nbytes = 0 else: raise ValueError("invalid mode {0!r}".format(mode)) self._mode = mode return def __len__(self): return self._nbytes def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.flush() self.close() return @property def mode(self): return self._mode @property def closed(self): return self.fh.closed() def close(self): self.vfs.close(self.fh) def flush(self): self.vfs.sync(self.fh) def seekable(self): return True def readable(self): return self._readonly def seek(self, offset, whence=0): if not isinstance(offset, (int, long)): raise TypeError(f"Offset must be an integer or None (got {safe_repr(offset)})") if whence == 0: if offset < 0: raise ValueError("offset must be a positive or zero value when SEEK_SET") self._offset = offset elif whence == 1: self._offset += offset elif whence == 2: self._offset = self._nbytes + offset else: raise ValueError('whence must be equal to SEEK_SET, SEEK_START, SEEK_END') if self._offset < 0: self._offset = 0 elif self._offset > self._nbytes: self._offset = self._nbytes return self._offset def tell(self): return self._offset def writable(self): return not self._readonly def read(self, size=-1): if not isinstance(size, (int, long)): raise TypeError(f"size must be an integer or None (got {safe_repr(size)})") if self._mode == "wb": raise IOError("Cannot read from write-only FileIO handle") if self.closed: raise IOError("Cannot read from closed FileIO handle") nbytes_remaining = self._nbytes - self._offset cdef Py_ssize_t nbytes if size < 0: nbytes = nbytes_remaining elif size > nbytes_remaining: nbytes = nbytes_remaining else: nbytes = size if nbytes == 0: return b'' cdef bytes buff = PyBytes_FromStringAndSize(NULL, nbytes) self.vfs.readinto(self.fh, buff, self._offset, nbytes) self._offset += nbytes return buff def read1(self, size=-1): return self.read(size) def readall(self): if self._mode == "wb": raise IOError("cannot read from a write-only FileIO handle") if self.closed: raise IOError("cannot read from closed FileIO handle") cdef Py_ssize_t nbytes = self._nbytes - self._offset if nbytes == 0: return PyBytes_FromStringAndSize(NULL, 0) cdef bytes buff = PyBytes_FromStringAndSize(NULL, nbytes) self.vfs.readinto(self.fh, buff, self._offset, nbytes) self._offset += nbytes return buff def readinto(self, buff): if self._mode == "wb": raise IOError("cannot read from a write-only FileIO handle") if self.closed: raise IOError("cannot read from closed FileIO handle") nbytes = len(buff) if nbytes > self._nbytes: nbytes = self._nbytes if nbytes == 0: return 0 self.vfs.readinto(self.fh, buff, self._offset, nbytes) self._offset += nbytes # RawIOBase contract is to return the number of bytes read return nbytes def write(self, buff): if not self.writable(): raise IOError("cannot write to read-only FileIO handle") if isinstance(buff, str): buff = buff.encode() nbytes = len(buff) self.vfs.write(self.fh, buff) self._nbytes += nbytes self._offset += nbytes return nbytes cdef class VFS(object): """TileDB VFS class Encapsulates the TileDB VFS module instance with a specific configuration (config). :param tiledb.Ctx ctx: The TileDB Context :param config: Override `ctx` VFS configurations with updated values in config. :type config: tiledb.Config or dict """ def __cinit__(self): self.ptr = NULL def __dealloc__(self): if self.ptr != NULL: tiledb_vfs_free(&self.ptr) def __init__(self, config=None, Ctx ctx=None): if not ctx: ctx = default_ctx() cdef Config _config = Config(ctx.config()) if config is not None: if isinstance(config, Config): _config = config else: _config.update(config) cdef tiledb_vfs_t* vfs_ptr = NULL check_error(ctx, tiledb_vfs_alloc(ctx.ptr, _config.ptr, &vfs_ptr)) self.ctx = ctx self.ptr = vfs_ptr def create_bucket(self, uri): """Create an object store bucket at the given URI :param str uri: full URI of bucket resource to be created. :rtype: str :returns: created bucket URI :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_create_bucket(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return uri def remove_bucket(self, uri): """Remove an object store bucket at the given URI :param str uri: URI of bucket resource to be removed. :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` ..note: Consistency is not enforced for bucket removal so although this function will return immediately on success, the actual removal of the bucket make take some (indeterminate) amount of time. """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_remove_bucket(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return uri def empty_bucket(self, uri): """Empty an object store bucket of all objects at the given URI This function blocks until all objects are verified to be removed from the given bucket. :param str uri: URI of bucket resource to be emptied :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_empty_bucket(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return def is_empty_bucket(self, uri): """Returns true if the object store bucket is empty (contains no objects). If the bucket is versioned, this returns the status of the latest bucket version state. :param str uri: URI of bucket resource :rtype: bool :return: True if bucket at given URI is empty, False otherwise :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int isempty = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_is_empty_bucket(ctx_ptr, vfs_ptr, uri_ptr, &isempty) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return bool(isempty) def is_bucket(self, uri): """Returns True if the URI resource is a valid object store bucket :param str uri: URI of bucket resource :rtype: bool :return: True if given URI is a valid object store bucket, False otherwise :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int is_bucket = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_is_bucket(ctx_ptr, vfs_ptr, uri_ptr, &is_bucket) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return bool(is_bucket) def create_dir(self, uri): """Create a VFS directory at the given URI :param str uri: URI of directory to be created :rtype: str :return: URI of created VFS directory :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_create_dir(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return uri def is_dir(self, uri): """Returns True if the given URI is a VFS directory object :param str uri: URI of the directory resource :rtype: bool :return: True if `uri` is a VFS directory, False otherwise :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int is_dir = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_is_dir(ctx_ptr, vfs_ptr, uri_ptr, &is_dir) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return bool(is_dir) def remove_dir(self, uri): """Removes a VFS directory at the given URI :param str uri: URI of the directory resource to remove :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_remove_dir(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return def is_file(self, uri): """Returns True if the given URI is a VFS file object :param str uri: URI of the file resource :rtype: bool :return: True if `uri` is a VFS file, False otherwise :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int is_file = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_is_file(ctx_ptr, vfs_ptr, uri_ptr, &is_file) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return bool(is_file) def remove_file(self, uri): """Removes a VFS file at the given URI :param str uri: URI of a VFS file resource :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_remove_file(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return def ls(self, uri): """Lists contents of directory at the given URI. Raises TileDBError for non-existent directory. :param str uri: URI of a VFS directory resource :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef list result_list = list() cdef int rc = TILEDB_OK check_error(self.ctx, tiledb_vfs_ls(ctx_ptr, vfs_ptr, uri_ptr, vfs_ls_callback, result_list)) return result_list def file_size(self, uri): """Returns the size (in bytes) of a VFS file at the given URI :param str uri: URI of a VFS file resource :rtype: int :return: file size in number of bytes :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef uint64_t nbytes = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_file_size(ctx_ptr, vfs_ptr, uri_ptr, &nbytes) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return int(nbytes) def dir_size(self, uri): """Returns the size (in bytes) of a VFS directory at the given URI :param str uri: URI of a VFS directory resource :rtype: int :return: dir size in number of bytes :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef uint64_t nbytes = 0 cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_dir_size(ctx_ptr, vfs_ptr, uri_ptr, &nbytes) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return int(nbytes) def move_file(self, old_uri, new_uri): """ Moves a VFS file from old URI to new URI :param str old_uri: Existing VFS file or directory resource URI :param str new_uri: URI to move existing VFS resource to :param bool force: if VFS resource at `new_uri` exists, delete the resource and overwrite :rtype: str :return: new URI of VFS resource :raises TypeError: cannot convert `old_uri`/`new_uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes bold_uri = unicode_path(old_uri) cdef bytes bnew_uri = unicode_path(new_uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* old_uri_ptr = PyBytes_AS_STRING(bold_uri) cdef const char* new_uri_ptr = PyBytes_AS_STRING(bnew_uri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_move_file(ctx_ptr, vfs_ptr, old_uri_ptr, new_uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return new_uri def move_dir(self, old_uri, new_uri): """ Moves a VFS dir from old URI to new URI :param str old_uri: Existing VFS file or directory resource URI :param str new_uri: URI to move existing VFS resource to :param bool force: if VFS resource at `new_uri` exists, delete the resource and overwrite :rtype: str :return: new URI of VFS resource :raises TypeError: cannot convert `old_uri`/`new_uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes bold_uri = unicode_path(old_uri) cdef bytes bnew_uri = unicode_path(new_uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* old_uri_ptr = PyBytes_AS_STRING(bold_uri) cdef const char* new_uri_ptr = PyBytes_AS_STRING(bnew_uri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_move_dir(ctx_ptr, vfs_ptr, old_uri_ptr, new_uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return new_uri def copy_file(self, old_uri, new_uri): """ Copiies a VFS file from old URI to new URI :param str old_uri: Existing VFS file or directory resource URI :param str new_uri: URI to copy existing VFS resource to :param bool force: if VFS resource at `new_uri` exists, delete the resource and overwrite :rtype: str :return: new URI of VFS resource :raises TypeError: cannot convert `old_uri`/`new_uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes bold_uri = unicode_path(old_uri) cdef bytes bnew_uri = unicode_path(new_uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* old_uri_ptr = PyBytes_AS_STRING(bold_uri) cdef const char* new_uri_ptr = PyBytes_AS_STRING(bnew_uri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_copy_file(ctx_ptr, vfs_ptr, old_uri_ptr, new_uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return new_uri def copy_dir(self, old_uri, new_uri): """ Copiies a VFS dir from old URI to new URI :param str old_uri: Existing VFS file or directory resource URI :param str new_uri: URI to copy existing VFS resource to :param bool force: if VFS resource at `new_uri` exists, delete the resource and overwrite :rtype: str :return: new URI of VFS resource :raises TypeError: cannot convert `old_uri`/`new_uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes bold_uri = unicode_path(old_uri) cdef bytes bnew_uri = unicode_path(new_uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* old_uri_ptr = PyBytes_AS_STRING(bold_uri) cdef const char* new_uri_ptr = PyBytes_AS_STRING(bnew_uri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_copy_dir(ctx_ptr, vfs_ptr, old_uri_ptr, new_uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return new_uri def open(self, uri, mode='rb'): """Opens a VFS file resource for reading / writing / appends at URI If the file did not exist upon opening, a new file is created. :param str uri: URI of VFS file resource :param mode str: 'rb' for opening the file to read, 'wb' to write, 'ab' to append :rtype: FileHandle :return: TileDB FileIO :raises TypeError: cannot convert `uri` to unicode string :raises ValueError: invalid mode :raises: :py:exc:`tiledb.TileDBError` """ return FileIO(self, uri, mode) def close(self, file): """Closes a VFS FileHandle object :param FileHandle fh: An opened VFS FileHandle :rtype: FileHandle :return: closed VFS FileHandle :raises: :py:exc:`tiledb.TileDBError` """ if isinstance(file, FileIO): warnings.warn( f"`tiledb.VFS().open` now returns a a FileIO object. Use " "`FileIO.close`.", DeprecationWarning, ) return file.close() cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef FileHandle fh = file cdef tiledb_vfs_fh_t* fh_ptr = fh.ptr cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_close(ctx_ptr, fh_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return fh def readinto(self, FileHandle fh, const unsigned char[:] buffer, offset, nbytes): """Read nbytes from an opened VFS FileHandle at a given offset into a preallocated bytes buffer :param FileHandle fh: An opened VFS FileHandle in 'r' mode :param bytes buffer: A preallocated bytes buffer object :param int offset: offset position in bytes to read from :param int nbytes: number of bytes to read :return: bytes `buffer` :raises ValueError: invalid `offset` or `nbytes` values :raises: :py:exc:`tiledb.TileDBError` """ if offset < 0: raise ValueError("read offset must be >= 0") if nbytes < 0: raise ValueError("read nbytes but be >= 0") if nbytes > len(buffer): raise ValueError("read buffer is smaller than nbytes") cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_fh_t* fh_ptr = fh.ptr cdef uint64_t _offset = offset cdef uint64_t _nbytes = nbytes cdef const unsigned char* buffer_ptr = &buffer[0] cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_read(ctx_ptr, fh_ptr, _offset, buffer_ptr, _nbytes) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) # TileDB will error if the requested bytes are not read exactly return nbytes def read(self, file, offset, nbytes): """Read nbytes from an opened VFS FileHandle at a given offset :param FileHandle fh: An opened VFS FileHandle in 'r' mode :param int offset: offset position in bytes to read from :param int nbytes: number of bytes to read :rtype: :py:func:`bytes` :return: read bytes :raises: :py:exc:`tiledb.TileDBError` """ if isinstance(file, FileIO): warnings.warn( f"`tiledb.VFS().open` now returns a a FileIO object. Use " "`FileIO.read`.", DeprecationWarning, ) return file.read(nbytes) if nbytes == 0: return b'' cdef Py_ssize_t _nbytes = nbytes cdef bytes buffer = PyBytes_FromStringAndSize(NULL, _nbytes) cdef Py_ssize_t res_nbytes = self.readinto( file, buffer, offset, nbytes) return buffer def write(self, file, buff): """Writes buffer to opened VFS FileHandle :param FileHandle fh: An opened VFS FileHandle in 'w' mode :param buff: a Python object that supports the byte buffer protocol :raises TypeError: cannot convert buff to bytes :raises: :py:exc:`tiledb.TileDBError` """ if isinstance(file, FileIO): warnings.warn( f"`tiledb.VFS().open` now returns a a FileIO object. Use " "`FileIO.write`.", DeprecationWarning, ) return file.write(buff) cdef bytes buffer = bytes(buff) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef FileHandle fh = file cdef tiledb_vfs_fh_t* fh_ptr = fh.ptr cdef const char* buffer_ptr = PyBytes_AS_STRING(buffer) cdef Py_ssize_t _nbytes = PyBytes_GET_SIZE(buffer) assert(_nbytes >= 0) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_write(ctx_ptr, fh_ptr, buffer_ptr, _nbytes) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return def sync(self, FileHandle fh): """Sync / flush an opened VFS FileHandle to storage backend :param FileHandle fh: An opened VFS FileHandle in 'w' or 'a' mode :raises: :py:exc:`tiledb.TileDBError` """ cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_fh_t* fh_ptr = fh.ptr cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_sync(ctx_ptr, fh_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return fh def touch(self, uri): """Creates an empty VFS file at the given URI :param str uri: URI of a VFS file resource :rtype: str :return: URI of touched VFS file :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` """ cdef bytes buri = unicode_path(uri) cdef tiledb_ctx_t* ctx_ptr = self.ctx.ptr cdef tiledb_vfs_t* vfs_ptr = self.ptr cdef const char* uri_ptr = PyBytes_AS_STRING(buri) cdef int rc = TILEDB_OK with nogil: rc = tiledb_vfs_touch(ctx_ptr, vfs_ptr, uri_ptr) if rc != TILEDB_OK: _raise_ctx_err(ctx_ptr, rc) return uri def supports(self, scheme): """Returns true if the given URI scheme (storage backend) is supported :param str scheme: scheme component of a VFS resource URI (ex. 'file' / 'hdfs' / 's3') :rtype: bool :return: True if the linked libtiledb version supports the storage backend, False otherwise :raises ValueError: VFS storage backend is not supported """ cdef tiledb_filesystem_t fs cdef int supports = 0 if scheme == "file": return True elif scheme == "s3": check_error(self.ctx, tiledb_ctx_is_supported_fs(self.ctx.ptr, TILEDB_S3, &supports)) return bool(supports) elif scheme == "azure": check_error(self.ctx, tiledb_ctx_is_supported_fs(self.ctx.ptr, TILEDB_AZURE, &supports)) return bool(supports) elif scheme == "gcs": check_error(self.ctx, tiledb_ctx_is_supported_fs(self.ctx.ptr, TILEDB_GCS, &supports)) return bool(supports) elif scheme == "hdfs": check_error(self.ctx, tiledb_ctx_is_supported_fs(self.ctx.ptr, TILEDB_HDFS, &supports)) return bool(supports) else: raise ValueError("unsupported vfs scheme '{0!s}://'".format(scheme)) def config(self): """Returns the Config instance associated with the VFS.""" cdef tiledb_config_t* config_ptr = NULL check_error(self.ctx, tiledb_vfs_get_config(self.ctx.ptr, self.ptr, &config_ptr)) return Config.from_ptr(config_ptr) def vacuum(uri, Config config=None, Ctx ctx=None, timestamp=None): """ Vacuum underlying array fragments after consolidation. :param str uri: URI of array to be vacuumed :param config: Override the context configuration for vacuuming. Defaults to None, inheriting the context parameters. :param (ctx: tiledb.Ctx, optional): Context. Defaults to `tiledb.default_ctx()`. :param (int, int) timestamp: (default None) If not None, vacuum the array using the given range (inclusive) :raises TypeError: cannot convert `uri` to unicode string :raises: :py:exc:`tiledb.TileDBError` This operation of this function is controlled by the `"sm.vacuum.mode"` parameter, which accepts the values ``fragments``, ``fragment_meta``, and ``array_meta``. Rather than passing the timestamp into this function, it may be set by using `"sm.vacuum.timestamp_start"`and `"sm.vacuum.timestamp_end"` which takes in a time in UNIX seconds. If both are set then this function's `timestamp` argument will be used. **Example:** >>> import tiledb, numpy as np >>> import tempfile >>> path = tempfile.mkdtemp() >>> with tiledb.from_numpy(path, np.random.rand(4)) as A: ... pass # make sure to close >>> with tiledb.open(path, 'w') as A: ... for i in range(4): ... A[:] = np.ones(4, dtype=np.int64) * i >>> paths = tiledb.VFS().ls(path) >>> # should be 12 (2 base files + 2*5 fragment+ok files) >>> (); len(paths); () # doctest:+ELLIPSIS (...) >>> () ; tiledb.consolidate(path) ; () # doctest:+ELLIPSIS (...) >>> tiledb.vacuum(path) >>> paths = tiledb.VFS().ls(path) >>> # should now be 4 ( base files + 2 fragment+ok files) >>> (); len(paths); () # doctest:+ELLIPSIS (...) """ cdef tiledb_ctx_t* ctx_ptr = NULL cdef tiledb_config_t* config_ptr = NULL if not ctx: ctx = default_ctx() if timestamp: if config is None: config = Config() if not isinstance(timestamp, tuple) and len(timestamp) != 2: raise TypeError("'timestamp' argument expects tuple(start: int, end: int)") if timestamp[0] is not None: config["sm.vacuum.timestamp_start"] = timestamp[0] if timestamp[1] is not None: config["sm.vacuum.timestamp_end"] = timestamp[1] ctx_ptr = ctx.ptr config_ptr = config.ptr if config is not None else NULL cdef bytes buri = unicode_path(uri) cdef const char* uri_ptr = PyBytes_AS_STRING(buri) check_error(ctx, tiledb_array_vacuum(ctx_ptr, uri_ptr, config_ptr)) TileDB-Py-0.12.2/tiledb/main.cc000066400000000000000000000011551417663620700160010ustar00rootroot00000000000000#include namespace tiledbpy { namespace py = pybind11; void init_core(py::module &); // void _debug(py::module &); void init_fragment(py::module &); // void init_query_condition(py::module &); void init_schema_evolution(py::module &); void init_serialization(py::module &); void init_test_serialization(py::module &); void init_test_metadata(py::module &); PYBIND11_MODULE(main, m) { init_core(m); //_debug(m); init_fragment(m); //_query_condition(m); init_schema_evolution(m); init_serialization(m); init_test_serialization(m); init_test_metadata(m); } } // namespace tiledbpy TileDB-Py-0.12.2/tiledb/multirange_indexing.py000066400000000000000000000354501417663620700211610ustar00rootroot00000000000000import dataclasses import json import time import weakref from collections import OrderedDict from contextlib import contextmanager from contextvars import ContextVar, copy_context from numbers import Real from dataclasses import dataclass from itertools import zip_longest from typing import ( Any, ContextManager, Dict, Iterator, List, Optional, Sequence, Tuple, Union, cast, ) import numpy as np from tiledb import Array, ArraySchema, TileDBError, libtiledb from tiledb.main import PyQuery, increment_stat, use_stats from tiledb.libtiledb import Metadata, Query from .dataframe_ import check_dataframe_deps current_timer: ContextVar[str] = ContextVar("timer_scope") try: import pyarrow Table = Union[pyarrow.Table] except ImportError: pyarrow = Table = None try: from pandas import DataFrame except ImportError: DataFrame = None # sentinel value to denote selecting an empty range EmptyRange = object() # TODO: expand with more accepted scalar types Scalar = Real Range = Tuple[Scalar, Scalar] @dataclass class EstimatedResultSize: offsets_bytes: int data_bytes: int @contextmanager def timing(key: str) -> Iterator[None]: if not use_stats(): yield else: scoped_name = f"{current_timer.get('py')}.{key}" parent_token = current_timer.set(scoped_name) start = time.time() try: yield finally: increment_stat(current_timer.get(), time.time() - start) current_timer.reset(parent_token) def mr_dense_result_shape( ranges: Sequence[Sequence[Range]], base_shape: Optional[Tuple[int, ...]] = None ) -> Tuple[int, ...]: if base_shape is not None: assert len(ranges) == len(base_shape), "internal error: mismatched shapes" new_shape = [] for i, subranges in enumerate(ranges): if subranges: total_length = sum(abs(stop - start) + 1 for start, stop in subranges) new_shape.append(np.uint64(total_length)) elif base_shape is not None: # empty range covers dimension new_shape.append(base_shape[i]) else: raise ValueError("Missing required base_shape for whole-dimension slices") return tuple(new_shape) def to_scalar(obj: Any) -> Scalar: if np.isscalar(obj): return cast(Scalar, obj) if isinstance(obj, np.ndarray) and obj.ndim == 0: return cast(Scalar, obj[()]) raise ValueError(f"Cannot convert {type(obj)} to scalar") def iter_ranges( sel: Union[Scalar, slice, Range, List[Scalar]], sparse: bool, nonempty_domain: Optional[Range] = None, ) -> Iterator[Range]: if isinstance(sel, slice): if sel.step is not None: raise ValueError("Stepped slice ranges are not supported") rstart = sel.start if rstart is None and nonempty_domain: rstart = nonempty_domain[0] rend = sel.stop if rend is None and nonempty_domain: rend = nonempty_domain[1] if sparse and sel.start is None and sel.stop is None: # don't set nonempty_domain for full-domain slices w/ sparse # because TileDB query is faster without the constraint pass elif rstart is None or rend is None: pass else: yield to_scalar(rstart), to_scalar(rend) elif isinstance(sel, tuple): assert len(sel) == 2 yield to_scalar(sel[0]), to_scalar(sel[1]) elif isinstance(sel, list): for scalar in map(to_scalar, sel): yield scalar, scalar else: scalar = to_scalar(sel) yield scalar, scalar def getitem_ranges(array: Array, idx: Any) -> Sequence[Sequence[Range]]: ranges: List[Sequence[Range]] = [()] * array.schema.domain.ndim ned = array.nonempty_domain() is_sparse = array.schema.sparse for i, dim_sel in enumerate([idx] if not isinstance(idx, tuple) else idx): # don't try to index nonempty_domain if None nonempty_domain = ned[i] if ned else None if isinstance(dim_sel, np.ndarray): ranges[i] = dim_sel continue elif not isinstance(dim_sel, list): dim_sel = [dim_sel] ranges[i] = tuple( rng for sel in dim_sel for rng in iter_ranges(sel, is_sparse, nonempty_domain) ) return tuple(ranges) class MultiRangeIndexer(object): """ Implements multi-range indexing. """ def __init__(self, array: Array, query: Optional[Query] = None) -> None: if not isinstance(array, Array): raise TypeError("Internal error: MultiRangeIndexer expected tiledb.Array") self.array_ref = weakref.ref(array) self.query = query self.pyquery = None self.use_arrow = None @property def array(self) -> Array: array = self.array_ref() if array is None: raise RuntimeError( "Internal error: invariant violation (indexing call w/ dead array_ref)" ) return array def __getitem__(self, idx: Any) -> Dict[str, np.ndarray]: with timing("getitem_time"): if idx is EmptyRange: return _get_empty_results(self.array.schema, self.query) self.ranges = getitem_ranges(self.array, idx) if self.query and self.query.return_incomplete: return self return self._run_query(self.query) def _run_query( self, query: Optional[Query] = None, preload_metadata: bool = False ) -> Union[Dict[str, np.ndarray], DataFrame, Table]: if self.pyquery is None or not self.pyquery.is_incomplete: self.pyquery = _get_pyquery(self.array, query, self.use_arrow) self.pyquery._preload_metadata = preload_metadata with timing("py.add_ranges"): if libtiledb.version() >= (2, 6) and any( [lambda x: isinstance(x, np.ndarray), self.ranges] ): self.pyquery.set_ranges_bulk(self.ranges) else: self.pyquery.set_ranges(self.ranges) has_attr_cond = self.query is not None and query.attr_cond is not None if has_attr_cond: try: self.pyquery.set_attr_cond(query.attr_cond) except TileDBError as e: raise TileDBError(e) self.pyquery._return_incomplete = ( self.query and self.query.return_incomplete ) self.pyquery.submit() schema = self.array.schema if query is not None and self.use_arrow: # TODO currently there is lack of support for Arrow list types. # This prevents multi-value attributes, asides from strings, from being # queried properly. Until list attributes are supported in core, # error with a clear message to pass use_arrow=False. attrs = map(schema.attr, query.attrs or ()) if any( (attr.isvar or len(attr.dtype) > 1) and (not attr.dtype in (np.unicode_, np.bytes_)) for attr in attrs ): raise TileDBError( "Multi-value attributes are not currently supported when use_arrow=True. " "This includes all variable-length attributes and fixed-length " "attributes with more than one value. Use `query(use_arrow=False)`." ) with timing("buffer_conversion_time"): table = self.pyquery._buffers_to_pa_table() return table if query.return_arrow else table.to_pandas() result_dict = _get_pyquery_results(self.pyquery, schema) if not schema.sparse: result_shape = mr_dense_result_shape(self.ranges, schema.shape) for arr in result_dict.values(): # TODO check/test layout arr.shape = result_shape return result_dict def estimated_result_sizes(self): """ Get the estimated result buffer sizes for a TileDB Query Sizes are returned in bytes as an EstimatedResultSize dataclass with two fields: `offset_bytes` and `data_bytes`, with buffer name as the OrderedDict key. See the corresponding TileDB Embedded API documentation for additional details: https://tiledb-inc-tiledb.readthedocs-hosted.com/en/stable/c++-api.html#query :return: OrderedDict of key: str -> EstimatedResultSize """ results = {} if not self.pyquery: raise TileDBError("Query not initialized") tmp = self.pyquery.estimated_result_sizes() for name, val in tmp.items(): results[name] = EstimatedResultSize(val[0], val[1]) return results def __iter__(self): if not self.query.return_incomplete: raise TileDBError( "Cannot iterate unless query is initialized with return_incomplete=True" ) return self def __next__(self): if self.pyquery and not self.pyquery.is_incomplete: raise StopIteration() return self._run_query(self.query) class DataFrameIndexer(MultiRangeIndexer): """ Implements `.df[]` indexing to directly return a dataframe [] operator uses multi_index semantics. """ def __init__( self, array: Array, query: Optional[Query] = None, use_arrow: Optional[bool] = None, ) -> None: super().__init__(array, query) if pyarrow and use_arrow is None: use_arrow = True self.use_arrow = use_arrow def __getitem__(self, idx: Any) -> Union[DataFrame, Table]: with timing("getitem_time"): check_dataframe_deps() array = self.array # we need to use a Query in order to get coords for a dense array query = self.query if self.query else Query(array, coords=True) if idx is EmptyRange: result = _get_empty_results(array.schema, query) else: self.ranges = getitem_ranges(self.array, idx) if self.query and self.query.return_incomplete: return self result = self._run_query(query, preload_metadata=True) if not (pyarrow and isinstance(result, pyarrow.Table)): if not isinstance(result, DataFrame): result = DataFrame.from_dict(result) with timing("pandas_index_update_time"): result = _update_df_from_meta(result, array.meta, query.index_col) return result def _get_pyquery(array: Array, query: Optional[Query], use_arrow: bool) -> PyQuery: schema = array.schema if query is not None: order = query.order else: # set default order: TILEDB_UNORDERED for sparse, TILEDB_ROW_MAJOR for dense order = "U" if schema.sparse else "C" try: layout = "CFGU".index(order) except ValueError: raise ValueError( "order must be 'C' (TILEDB_ROW_MAJOR), 'F' (TILEDB_COL_MAJOR), " "'U' (TILEDB_UNORDERED), or 'G' (TILEDB_GLOBAL_ORDER)" ) return PyQuery( array._ctx_(), array, tuple( [array.view_attr] if array.view_attr is not None else _iter_attr_names(schema, query) ), tuple(_iter_dim_names(schema, query)), layout, use_arrow, ) def _iter_attr_names( schema: ArraySchema, query: Optional[Query] = None ) -> Iterator[str]: if query is not None and query.attrs is not None: return iter(query.attrs) return (schema.attr(i)._internal_name for i in range(schema.nattr)) def _iter_dim_names( schema: ArraySchema, query: Optional[Query] = None ) -> Iterator[str]: if query is not None: if query.dims is not None: return iter(query.dims or ()) if query.coords is False: return iter(()) if not schema.sparse: return iter(()) dom = schema.domain return (dom.dim(i).name for i in range(dom.ndim)) def _get_pyquery_results( pyquery: PyQuery, schema: ArraySchema ) -> Dict[str, np.ndarray]: result_dict = OrderedDict() for name, item in pyquery.results().items(): if len(item[1]) > 0: arr = pyquery.unpack_buffer(name, item[0], item[1]) else: arr = item[0] arr.dtype = schema.attr_or_dim_dtype(name) result_dict[name if name != "__attr" else ""] = arr return result_dict def _get_empty_results( schema: ArraySchema, query: Optional[Query] = None ) -> Dict[str, np.ndarray]: names = [] query_dims = frozenset(_iter_dim_names(schema, query)) query_attrs = frozenset(_iter_attr_names(schema, query)) # return dims first, if any dom = schema.domain for i in range(dom.ndim): dim = dom.dim(i).name # we need to also check if this is an attr for backward-compatibility if dim in query_dims or dim in query_attrs: names.append(dim) for i in range(schema.nattr): attr = schema.attr(i)._internal_name if attr in query_attrs: names.append(attr) result_dict = OrderedDict() for name in names: arr = np.array([], schema.attr_or_dim_dtype(name)) result_dict[name if name != "__attr" else ""] = arr return result_dict def _update_df_from_meta( df: DataFrame, array_meta: Metadata, index_col: Union[List[str], bool, None] = True ) -> DataFrame: col_dtypes = {} if "__pandas_attribute_repr" in array_meta: attr_dtypes = json.loads(array_meta["__pandas_attribute_repr"]) for name, dtype in attr_dtypes.items(): if name in df: col_dtypes[name] = dtype index_cols = [] if "__pandas_index_dims" in array_meta: index_dtypes = json.loads(array_meta["__pandas_index_dims"]) index_cols.extend(col for col in index_dtypes.keys() if col in df) for name, dtype in index_dtypes.items(): if name in df: col_dtypes[name] = dtype if col_dtypes: df = df.astype(col_dtypes, copy=False) if index_col: if index_col is not True: # if we have a query with index_col set, then override any # index information saved with the array. df.set_index(index_col, inplace=True) elif index_cols: # set index the index names that exist as columns df.set_index(index_cols, inplace=True) # rename __tiledb_rows to None if "__tiledb_rows" in index_cols: index_cols[index_cols.index("__tiledb_rows")] = None if len(index_cols) == 1: df.index.rename(index_cols[0], inplace=True) else: df.index.rename(index_cols, inplace=True) return df TileDB-Py-0.12.2/tiledb/np2buf.pyx000066400000000000000000000176411417663620700165130ustar00rootroot00000000000000# Set true to enable modular compilation IF TILEDBPY_MODULAR: include "common.pxi" from .libtiledb cimport * from cpython.version cimport PY_MAJOR_VERSION from collections import deque cdef _varlen_dtype_itemsize(object item): if (isinstance(item, np.dtype) and np.issubdtype(item, np.bytes_)): return sizeof(char) elif isinstance(item, np.dtype): return item.itemsize elif item == np.bytes_: return sizeof(char) elif item == np.unicode_: # Note this is just a place-holder, we call CPython API to get actual size return sizeof(char) raise TypeError("Unknown dtype itemsize for '{}'.".format(item)) cdef _varlen_cell_dtype(object var): cdef np.dtype dtype if isinstance(var, np.ndarray): dtype = var.dtype if np.issubdtype(dtype, np.bytes_): # handles 'S[n]' dtypes for all n return np.bytes_ elif np.issubdtype(dtype, np.unicode_): # handles 'U[n]' dtypes for all n return np.unicode_ else: return dtype elif isinstance(var, bytes): return np.bytes_ elif isinstance(var, unicode): return np.unicode_ try: actual_type = str(type(var)) except: actual_type = "[failed to get type]" raise TypeError(f"Unsupported varlen cell datatype ('{actual_type}')") def array_to_buffer(object val): cdef arr = val if len(arr) == 0: raise Exception("Empty arrays are not supported.") assert((arr.dtype == np.dtype('O') or np.issubdtype(arr.dtype, np.bytes_) or np.issubdtype(arr.dtype, np.unicode_)), "array_to_buffer: input array must be np.object or np.bytes!") firstdtype = _varlen_cell_dtype(arr.flat[0]) # item size cdef uint64_t el_size = _varlen_dtype_itemsize(firstdtype) if el_size==0: raise TypeError("Zero-size cell elements are not supported.") # total buffer size cdef uint64_t buffer_size = 0 cdef uint64_t buffer_n_elem = np.prod(arr.shape) cdef np.ndarray buffer_offsets = np.empty(buffer_n_elem, dtype=np.uint64) cdef uint64_t el_buffer_size = 0 cdef uint64_t item_len = 0 # first pass: check types and calculate offsets for (i, item) in enumerate(arr.flat): if firstdtype != _varlen_cell_dtype(item): msg = ("Data types of variable-length sub-arrays must be consistent. " "Type '{}', of 1st sub-array, is inconsistent with type '{}', of item {}." ).format(firstdtype, _varlen_cell_dtype(item), i) raise TypeError(msg) # current offset is last buffer_size buffer_offsets[i] = buffer_size if firstdtype == np.unicode_: # this will cache the materialized (if any) UTF8 object if PY_MAJOR_VERSION >= 3: utf8 = (item).encode('UTF-8') else: utf8 = (item).encode('UTF-8') el_buffer_size = len(utf8) else: if hasattr(item, '__len__'): item_len = len(item) else: item_len = 1 el_buffer_size = el_size * item_len if (el_buffer_size == 0) and ( (firstdtype == np.bytes_) or (firstdtype == np.unicode_)): el_buffer_size = 1 # *running total* buffer size buffer_size += el_buffer_size # return a numpy buffer because that is what the caller uses for non-varlen buffers cdef np.ndarray buffer = np.zeros(shape=buffer_size, dtype=np.uint8) # should be np.empty(shape=buffer_size, dtype=np.uint8) cdef char* buffer_ptr = np.PyArray_DATA(buffer) cdef char* input_ptr = NULL cdef object tmp_utf8 = None # bytes to copy in this block cdef uint64_t nbytes = 0 # loop over sub-items and copy into buffer for (i, subitem) in enumerate(val.flat): if (isinstance(subitem, bytes) or (isinstance(subitem, np.ndarray) and np.issubdtype(subitem.dtype, np.bytes_))): input_ptr = PyBytes_AS_STRING(subitem) elif (isinstance(subitem, str) or (isinstance(subitem, unicode)) or (isinstance(subitem, np.ndarray) and np.issubdtype(subitem.dtype, np.unicode_))): tmp_utf8 = subitem.encode("UTF-8") input_ptr = tmp_utf8 else: input_ptr = np.PyArray_DATA(subitem) if i == buffer_n_elem - 1: nbytes = buffer_size - buffer_offsets[i] else: nbytes = buffer_offsets[i+1] - buffer_offsets[i] memcpy(buffer_ptr, input_ptr, nbytes) buffer_ptr += nbytes # clean up the encoded object *after* storing if tmp_utf8: del tmp_utf8 return buffer, buffer_offsets cdef tiledb_datatype_t c_dtype_to_tiledb(np.dtype dtype) except? TILEDB_CHAR: """Return tiledb_datatype_t enum value for a given numpy dtype object """ if dtype == np.int32: return TILEDB_INT32 elif dtype == np.uint32: return TILEDB_UINT32 elif dtype == np.int64: return TILEDB_INT64 elif dtype == np.uint64: return TILEDB_UINT64 elif dtype == np.float32: return TILEDB_FLOAT32 elif dtype == np.float64: return TILEDB_FLOAT64 elif dtype == np.int8: return TILEDB_INT8 elif dtype == np.uint8: return TILEDB_UINT8 elif dtype == np.int16: return TILEDB_INT16 elif dtype == np.uint16: return TILEDB_UINT16 elif dtype == np.unicode_: return TILEDB_STRING_UTF8 elif dtype == np.bytes_: return TILEDB_CHAR elif dtype == np.complex64: return TILEDB_FLOAT32 elif dtype == np.complex128: return TILEDB_FLOAT64 elif dtype.kind == 'M': return _tiledb_dtype_datetime(dtype) raise TypeError("data type {0!r} not understood".format(dtype)) def dtype_to_tiledb(np.dtype dtype): return c_dtype_to_tiledb(dtype) def array_type_ncells(np.dtype dtype): """ Returns the TILEDB_{TYPE} and ncells corresponding to a given numpy dtype """ cdef np.dtype checked_dtype = np.dtype(dtype) cdef uint32_t ncells # - flexible datatypes of unknown size have an itemsize of 0 (str, bytes, etc.) # - unicode and string types are always stored as VAR because we don't want to # store the pad (numpy pads to max length for 'S' and 'U' dtypes) if np.issubdtype(checked_dtype, np.bytes_): tdb_type = TILEDB_CHAR if checked_dtype.itemsize == 0: ncells = TILEDB_VAR_NUM else: ncells = checked_dtype.itemsize elif np.issubdtype(checked_dtype, np.unicode_): np_unicode_size = np.dtype("U1").itemsize # TODO depending on np_unicode_size, tdb_type may be UTF16 or UTF32 tdb_type = TILEDB_STRING_UTF8 if checked_dtype.itemsize == 0: ncells = TILEDB_VAR_NUM else: ncells = checked_dtype.itemsize // np_unicode_size elif np.issubdtype(checked_dtype, np.complexfloating): # handle complex dtypes tdb_type = dtype_to_tiledb(checked_dtype) ncells = 2 elif checked_dtype.kind == 'V': # handles n fixed-size record dtypes if checked_dtype.shape != (): raise TypeError("nested sub-array numpy dtypes are not supported") # check that types are the same # TODO: make sure this is not too slow for large record types deq = deque(checked_dtype.fields.values()) typ0, _ = deq.popleft() nfields = 1 for (typ, _) in deq: nfields += 1 if typ != typ0: raise TypeError('heterogenous record numpy dtypes are not supported') tdb_type = dtype_to_tiledb(typ0) ncells = (len(checked_dtype.fields.values())) else: # scalar cell type tdb_type = c_dtype_to_tiledb(checked_dtype) ncells = 1 return tdb_type, ncells TileDB-Py-0.12.2/tiledb/npbuffer.cc000066400000000000000000000373151417663620700166730ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "util.h" #include #include #include #if !defined(NDEBUG) //#include "debug.cc" #endif #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include // C++ // anonymous namespace for helper functions namespace { namespace py = pybind11; bool issubdtype(py::dtype t1, py::dtype t2) { // TODO importing every time is Not Great... auto np = py::module::import("numpy"); auto npsubdtype = np.attr("issubdtype"); return py::cast(npsubdtype(t1, t2)); } template py::dtype get_dtype(T obj) { auto &api = py::detail::npy_api::get(); if (api.PyArray_Check_(obj.ptr())) { return py::cast(obj).dtype(); } return py::reinterpret_steal( api.PyArray_DescrFromScalar_(obj.ptr())); } // check whether dtypes are equivalent from numpy perspective // note: d1::dtype.is(d2) checks *object identity* which is // not what we want. bool dtype_equal(py::dtype d1, py::dtype d2) { auto &api = py::detail::npy_api::get(); return api.PyArray_EquivTypes_(d1.ptr(), d2.ptr()); } }; // namespace namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; #if PY_MAJOR_VERSION >= 3 class NumpyConvert { private: bool use_iter_ = false; bool allow_unicode_ = true; size_t data_nbytes_ = 0; size_t input_len_ = 0; py::array input_; // we are using vector as a buffer here because they are grown in some // situations std::vector *data_buf_; std::vector *offset_buf_; void convert_unicode() { // Convert array of strings to UTF-8 buffer+offsets // NOTE: NumPy fixed-length string arrays *do not support* embedded nulls. // There is no string size stored, so string end is demarcated by \0 // and the slot is filled to the next boundary with \0. // For consistency and to avoid complications in other APIs, we are storing // all string arrays as var-length. // must have fixed-width element assert(input_.itemsize() > 0); // we know exact offset count offset_buf_->resize(input_len_); // we reserve the input length as a minimum size for output data_buf_->resize(input_len_); // size (bytes) of current object data Py_ssize_t sz = 0; // object data (string or bytes) const char *input_p = nullptr; unsigned char *output_p = nullptr; output_p = data_buf_->data(); // avoid one interpreter roundtrip auto npstrencode = py::module::import("numpy").attr("str_").attr("encode"); // return status int rc; // encoded object: this must live outside the if block or else it may be // GC'd // putting outside for loop to avoid repeat unused // construction py::object u_encoded; // loop over array objects and write to output buffer size_t idx = 0; for (auto u : input_) { // don't encode if we already have bytes if (PyUnicode_Check(u.ptr())) { // TODO see if we can do this with PyUnicode_AsUTF8String u_encoded = npstrencode(u); rc = PyBytes_AsStringAndSize(u_encoded.ptr(), const_cast(&input_p), &sz); } else { rc = PyBytes_AsStringAndSize(u.ptr(), const_cast(&input_p), &sz); } if (rc == -1) { throw std::runtime_error( "PyBytes_AsStringAndSize failed to encode string"); } // record the offset (equal to the current bytes written) offset_buf_->data()[idx] = data_nbytes_; if (data_buf_->size() < data_nbytes_ + sz) { data_buf_->resize(data_nbytes_ + sz); // update the output pointer and adjust for previous iteration output_p = data_buf_->data() + data_nbytes_; } memcpy(output_p, input_p, sz); data_nbytes_ += sz; output_p += sz; idx++; } } void convert_bytes() { // Convert array of bytes objects or ASCII strings to buffer+offsets assert(input_.itemsize() > 0); // must have fixed-length array // we know exact offset count offset_buf_->resize(input_len_); // we reserve the input length as a minimum size for output data_buf_->resize(input_len_); // size (bytes) of current object data Py_ssize_t sz = 0; // object data (string or bytes) const char *input_p = nullptr; unsigned char *output_p = nullptr; output_p = data_buf_->data(); int rc; // avoid one interpreter roundtrip // auto npstrencode = // py::module::import("numpy").attr("str_").attr("encode"); // TODO: ideally we would encode directly here without the intermediate // unicode object // TODO add test for different memory orderings // loop over array objects and write to output buffer size_t idx = 0; for (auto obj : input_) { auto o = obj.ptr(); // don't encode if we already have bytes /* if (PyUnicode_Check(u.ptr())) { // TODO see if we can do this with PyUnicode_AsUTF8String u_encoded = npstrencode(u); } */ rc = PyBytes_AsStringAndSize(o, const_cast(&input_p), &sz); if (rc == -1) { throw std::runtime_error( "PyBytes_AsStringAndSize failed to encode string"); } // record the offset (equal to the current bytes written) offset_buf_->data()[idx] = data_nbytes_; if (data_buf_->size() < data_nbytes_ + sz) { data_buf_->resize(data_nbytes_ + sz); // update the output pointer and adjust for previous iteration output_p = data_buf_->data() + data_nbytes_; } memcpy(output_p, input_p, sz); data_nbytes_ += sz; output_p += sz; idx++; } } void convert_object() { // Convert np.dtype("O") array of objects to buffer+offsets auto &api = py::detail::npy_api::get(); offset_buf_->resize(input_len_); auto input_unchecked = input_.unchecked(); // size (bytes) of current object data Py_ssize_t sz = 0; // current data const char *input_p = nullptr; auto input_size = input_.size(); py::dtype first_dtype; // first pass: calculate final buffer length and cache UTF-8 representations for (int64_t idx = 0; idx < input_size; idx++) { offset_buf_->data()[idx] = data_nbytes_; PyObject *o = input_unchecked.data(idx)->ptr(); assert(o != nullptr); // NOTE: every branch below *must* initialize first_dtype if (PyUnicode_Check(o)) { if (!allow_unicode_) { // TODO TPY_ERROR_LOC auto errmsg = std::string( "Unexpected unicode object for TILEDB_STRING_ASCII attribute"); throw std::runtime_error(errmsg); } if (idx < 1) first_dtype = py::dtype("unicode"); // this will cache a utf-8 representation owned by the PyObject input_p = PyUnicode_AsUTF8AndSize(o, &sz); if (!input_p) { TPY_ERROR_LOC("Internal error: failed to convert unicode to UTF-8"); } } else if (PyBytes_Check(o)) { // ASCII only auto res = PyBytes_AsStringAndSize(o, const_cast(&input_p), &sz); if (idx < 1) first_dtype = py::dtype("bytes"); if (res == -1) { // TODO TPY_ERROR_LOC throw std::runtime_error( "Internal error: failed to get char* from bytes object"); } } else if (api.PyArray_Check_(o)) { auto a = py::cast(o); // handle (potentially) var-len embedded arrays if (idx < 1) { first_dtype = get_dtype(a); } else if (!dtype_equal(get_dtype(a), first_dtype)) { throw py::type_error( "Mismatched dtype in object array to buffer conversion!"); } sz = a.nbytes(); } else { // TODO write the type in the error here // auto o_h = py::reinterpret_borrow(o); // auto o_t = py::type::of(o); auto errmsg = std::string("Unexpected object type in string conversion"); TPY_ERROR_LOC(errmsg); } data_nbytes_ += sz; } data_buf_->resize(data_nbytes_); // second pass: copy the data to output buffer unsigned char *output_p = data_buf_->data(); // copy data to output buffers for (int64_t idx = 0; idx < input_size; idx++) { PyObject *pyobj_p = input_unchecked.data(idx)->ptr(); assert(pyobj_p != nullptr); if (PyUnicode_Check(pyobj_p)) { input_p = PyUnicode_AsUTF8AndSize(pyobj_p, &sz); assert(input_p != nullptr); } else if (PyBytes_Check(pyobj_p)) { // TODO error check? PyBytes_AsStringAndSize(pyobj_p, const_cast(&input_p), &sz); } else if (api.PyArray_Check_(pyobj_p)) { auto arr = py::cast(pyobj_p); sz = arr.nbytes(); input_p = (const char *)arr.data(); } else { // TODO add object type TPY_ERROR_LOC("Unexpected object type in buffer conversion"); } memcpy(output_p, input_p, sz); // increment the output pointer for the next object output_p += sz; } } void convert_iter() { // Convert array of non-contiguous objects to buffer+offsets // using iterator protocol. // For non-contiguous arrays (such as views) we must iterate rather // than indexing directly. auto &npy_api = py::detail::npy_api::get(); offset_buf_->resize(input_.size()); auto iter = input_.attr("flat"); // size (bytes) of current object data Py_ssize_t sz = 0; // current data const char *input_p = nullptr; size_t idx = 0; py::dtype first_dtype; for (auto obj_h : iter) { if (idx < 1) { // record the first dtype for consistency check first_dtype = get_dtype(obj_h); } offset_buf_->data()[idx] = data_nbytes_; PyObject *obj_p = obj_h.ptr(); // we must check each dtype because object arrays are not guaranteed to // be homogenous auto cur_dtype = get_dtype(obj_h); auto err_str = std::string("Mismatched element type in buffer conversion!"); if ((first_dtype.kind() == cur_dtype.kind()) || (first_dtype.kind() == cur_dtype.kind())) { // pass } else if (!dtype_equal(cur_dtype, first_dtype)) { throw py::type_error(err_str); } if (PyUnicode_Check(obj_p)) { if (!allow_unicode_) { // TODO TPY_ERROR_LOC auto errmsg = std::string( "Unexpected unicode object for TILEDB_STRING_ASCII attribute"); throw std::runtime_error(errmsg); } // this will cache a utf-8 representation owned by the PyObject input_p = PyUnicode_AsUTF8AndSize(obj_p, &sz); if (!input_p) { TPY_ERROR_LOC("Internal error: failed to convert unicode to UTF-8"); } } else if (PyBytes_Check(obj_p)) { // ASCII only auto res = PyBytes_AsStringAndSize(obj_p, const_cast(&input_p), &sz); if (res == -1) { // TODO TPY_ERROR_LOC throw std::runtime_error( "Internal error: failed to get char* from bytes object"); } } else if (npy_api.PyArray_Check_(obj_p)) { // handle (potentially) var-len embedded arrays sz = py::cast(obj_p).nbytes(); } else { auto errmsg = std::string("Unexpected object type in string conversion"); TPY_ERROR_LOC(errmsg); } data_nbytes_ += sz; idx++; } data_buf_->resize(data_nbytes_); // second pass: write the data to output buffer unsigned char *output_p = data_buf_->data(); // reset the iterator iter = input_.attr("flat"); // copy data to output buffers for (auto obj_h : iter) { auto obj_p = obj_h.ptr(); if (PyUnicode_Check(obj_p)) { input_p = PyUnicode_AsUTF8AndSize(obj_p, &sz); assert(input_p != nullptr); } else if (PyBytes_Check(obj_p)) { // TODO error check? PyBytes_AsStringAndSize(obj_p, const_cast(&input_p), &sz); } else if (npy_api.PyArray_Check_(obj_p)) { // auto pao = (PyArrayObject*)o; // input_p = (const char*)PyArray_DATA(pao); // sz = PyArray_NBYTES(pao); auto o_a = py::cast(obj_h); sz = o_a.nbytes(); input_p = (const char *)o_a.data(); } else { TPY_ERROR_LOC("Unexpected object type in buffer conversion"); } memcpy(output_p, input_p, sz); // increment the output pointer for the next object output_p += sz; } } public: /* Initialize the converter */ NumpyConvert(py::array input) { // require a flat buffer if (input.ndim() != 1) { // try to take a 1D view on the input auto v = input.attr("view")(); // this will throw if the shape cannot be modified zero-copy, // which is what we want try { v.attr("shape") = py::int_(input.size()); } catch (py::error_already_set &e) { if (e.matches(PyExc_AttributeError)) { use_iter_ = true; } else { throw; } } catch (std::exception &e) { std::cout << e.what() << std::endl; } input_ = v; } else { input_ = input; } input_len_ = py::len(input_); data_buf_ = new std::vector(); offset_buf_ = new std::vector(input_len_); } ~NumpyConvert() { if (data_buf_) delete data_buf_; if (offset_buf_) delete offset_buf_; } /* Set allow_unicode_ flag */ bool allow_unicode() { return allow_unicode_; } void allow_unicode(bool allow_unicode) { allow_unicode_ = allow_unicode; } /* Returns a tuple of py::array containing (data:array_t, offsets:array_t) */ py::tuple get() { auto input_dtype = input_.dtype(); if (use_iter_) { // slow, safe path convert_iter(); } else if (issubdtype(input_dtype, py::dtype("unicode"))) { if (allow_unicode_) { convert_unicode(); } else { throw std::runtime_error("Unexpected fixed-length unicode array"); } } else if (issubdtype(input_dtype, py::dtype("bytes"))) { convert_bytes(); } else if (!input_dtype.is(py::dtype("O"))) { // TODO TPY_ERROR_LOC throw std::runtime_error("expected object array"); } else { convert_object(); } auto tmp_data_buf_p = data_buf_; auto data_ref = py::capsule(data_buf_, [](void *v) { delete reinterpret_cast *>(v); }); data_buf_ = nullptr; // disown: capsule owns it auto tmp_offset_buf_p = offset_buf_; auto offset_ref = py::capsule(offset_buf_, [](void *v) { delete reinterpret_cast *>(v); }); offset_buf_ = nullptr; // disown: capsule owns it now auto data_np = py::array_t(tmp_data_buf_p->size(), tmp_data_buf_p->data(), data_ref); auto offset_np = py::array_t( tmp_offset_buf_p->size(), tmp_offset_buf_p->data(), offset_ref); return py::make_tuple(data_np, offset_np); } }; #endif py::tuple convert_np(py::array input, bool allow_unicode, bool use_fallback = false) { #if PY_MAJOR_VERSION >= 3 if (use_fallback) { #endif auto tiledb = py::module::import("tiledb"); auto libtiledb = tiledb.attr("libtiledb"); auto array_to_buffer = libtiledb.attr("array_to_buffer"); return array_to_buffer(input); #if PY_MAJOR_VERSION >= 3 } else { NumpyConvert cvt(input); cvt.allow_unicode(allow_unicode); return cvt.get(); } #endif } }; // namespace tiledbpy TileDB-Py-0.12.2/tiledb/npbuffer.h000066400000000000000000000005121417663620700165220ustar00rootroot00000000000000#include "util.h" #include #include #include namespace tiledbpy { namespace py = pybind11; using namespace pybind11::literals; py::tuple convert_np(py::array input, bool allow_unicode = true, bool use_fallback = false); } // namespace tiledbpyTileDB-Py-0.12.2/tiledb/numpyFlags.h000066400000000000000000000001431417663620700170400ustar00rootroot00000000000000// Turn off cython generated deprecation warnings #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSIONTileDB-Py-0.12.2/tiledb/parquet_.py000066400000000000000000000004121417663620700167330ustar00rootroot00000000000000from os import PathLike from typing import TYPE_CHECKING import tiledb if TYPE_CHECKING: import pandas as pd def from_parquet(uri, parquet_uri) -> "pd.DataFrame": import pandas as pd df = pd.read_parquet(parquet_uri) tiledb.from_pandas(uri, df) TileDB-Py-0.12.2/tiledb/query_condition.cc000066400000000000000000000134341417663620700202730ustar00rootroot00000000000000#include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include "util.h" #include // C++ #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2 #if !defined(NDEBUG) //#include "debug.cc" #endif namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; class PyQueryCondition { private: Context ctx_; shared_ptr qc_; public: PyQueryCondition() = delete; PyQueryCondition(py::object ctx) { try { set_ctx(ctx); qc_ = shared_ptr(new QueryCondition(ctx_)); } catch (TileDBError &e) { TPY_ERROR_LOC(e.what()); } } void init(const string &attribute_name, const string &condition_value, tiledb_query_condition_op_t op) { try { qc_->init(attribute_name, condition_value, op); } catch (TileDBError &e) { TPY_ERROR_LOC(e.what()); } } template void init(const string &attribute_name, T condition_value, tiledb_query_condition_op_t op) { try { qc_->init(attribute_name, &condition_value, sizeof(condition_value), op); } catch (TileDBError &e) { TPY_ERROR_LOC(e.what()); } } shared_ptr ptr() { return qc_; } py::capsule get_capsule() { return py::capsule(&qc_, "qc", nullptr); } PyQueryCondition combine(PyQueryCondition rhs, tiledb_query_condition_combination_op_t combination_op) const { auto pyqc = PyQueryCondition(nullptr, ctx_.ptr().get()); tiledb_query_condition_t *combined_qc = nullptr; ctx_.handle_error( tiledb_query_condition_alloc(ctx_.ptr().get(), &combined_qc)); ctx_.handle_error(tiledb_query_condition_combine( ctx_.ptr().get(), qc_->ptr().get(), rhs.qc_->ptr().get(), combination_op, &combined_qc)); pyqc.qc_ = std::shared_ptr( new QueryCondition(pyqc.ctx_, combined_qc)); return pyqc; } private: PyQueryCondition(shared_ptr qc, tiledb_ctx_t *c_ctx) : qc_(qc) { ctx_ = Context(c_ctx, false); } void set_ctx(py::object ctx) { tiledb_ctx_t *c_ctx; if ((c_ctx = (py::capsule)ctx.attr("__capsule__")()) == nullptr) TPY_ERROR_LOC("Invalid context pointer!") ctx_ = Context(c_ctx, false); } }; // namespace tiledbpy void init_query_condition(py::module &m) { py::class_(m, "PyQueryCondition") .def(py::init(), py::arg("ctx") = py::none()) /* TODO surely there's a better way to deal with templated PyBind11 * functions? but maybe not? * https://github.com/pybind/pybind11/issues/1667 */ .def("init_string", static_cast( &PyQueryCondition::init)) .def("init_uint64", static_cast( &PyQueryCondition::init)) .def("init_int64", static_cast( &PyQueryCondition::init)) .def("init_uint32", static_cast( &PyQueryCondition::init)) .def("init_int32", static_cast( &PyQueryCondition::init)) .def("init_uint16", static_cast( &PyQueryCondition::init)) .def("init_int16", static_cast( &PyQueryCondition::init)) .def("init_uint8", static_cast( &PyQueryCondition::init)) .def("init_int8", static_cast( &PyQueryCondition::init)) .def("init_float32", static_cast( &PyQueryCondition::init)) .def("init_float64", static_cast( &PyQueryCondition::init)) .def("combine", &PyQueryCondition::combine) .def("get_capsule", &PyQueryCondition::get_capsule); py::enum_(m, "tiledb_query_condition_op_t", py::arithmetic()) .value("TILEDB_LT", TILEDB_LT) .value("TILEDB_LE", TILEDB_LE) .value("TILEDB_GT", TILEDB_GT) .value("TILEDB_GE", TILEDB_GE) .value("TILEDB_EQ", TILEDB_EQ) .value("TILEDB_NE", TILEDB_NE) .export_values(); py::enum_( m, "tiledb_query_condition_combination_op_t", py::arithmetic()) .value("TILEDB_AND", TILEDB_AND) .export_values(); } }; // namespace tiledbpy #endif TileDB-Py-0.12.2/tiledb/query_condition.py000066400000000000000000000304731417663620700203400ustar00rootroot00000000000000import ast from dataclasses import dataclass, field import numpy as np from typing import Any, Callable, List, Tuple, Type, Union import tiledb import tiledb.main as qc from tiledb.main import PyQueryCondition """ A high level wrapper around the Pybind11 query_condition.cc implementation for filtering query results on attribute values. """ QueryConditionNodeElem = Union[ ast.Name, ast.Constant, ast.Call, ast.Num, ast.Str, ast.Bytes ] @dataclass class QueryCondition: """ Class representing a TileDB query condition object for attribute filtering pushdown. Set the query condition with a string representing an expression as defined by the grammar below. A more straight forward example of usage is given beneath. BNF --- A query condition is made up of one or more Boolean expressions. Multiple Boolean expressions are chained together with Boolean operators. query_cond ::= bool_expr | bool_expr bool_op query_cond A Boolean expression contains a comparison operator. The operator works on a TileDB attribute name and value. bool_expr ::= attr compare_op val | val compare_op attr | val compare_op attr compare_op val "and" and "&" are the only Boolean operators supported at the moment. We intend to support "or" and "not" in future releases. bool_op ::= and | & All comparison operators are supported. compare_op ::= < | > | <= | >= | == | != TileDB attribute names are Python valid variables or a attr() casted string. attr ::= | attr() Values are any Python-valid number or string. They may also be casted with val(). val ::= | | val(val) Example ------- with tiledb.open(uri, mode="r") as A: # select cells where the attribute values for foo are less than 5 # and bar equal to string asdf. qc = QueryCondition("foo > 5 and 'asdf' == attr('b a r') and baz <= val(1.0)") A.query(attr_cond=qc) """ expression: str ctx: tiledb.Ctx = field(default_factory=tiledb.default_ctx, repr=False) tree: ast.Expression = field(init=False, repr=False) c_obj: PyQueryCondition = field(init=False, repr=False) def __post_init__(self): try: self.tree = ast.parse(self.expression, mode="eval") except: raise tiledb.TileDBError( "Could not parse the given QueryCondition statement: " f"{self.expression}" ) if not self.tree: raise tiledb.TileDBError( "The query condition statement could not be parsed properly. " "(Is this an empty expression?)" ) def init_query_condition(self, schema: tiledb.ArraySchema, query_attrs: List[str]): qctree = QueryConditionTree(self.ctx, schema, query_attrs) self.c_obj = qctree.visit(self.tree.body) if not isinstance(self.c_obj, PyQueryCondition): raise tiledb.TileDBError( "Malformed query condition statement. A query condition must " "be made up of one or more Boolean expressions." ) @dataclass class QueryConditionTree(ast.NodeVisitor): ctx: tiledb.Ctx schema: tiledb.ArraySchema query_attrs: List[str] def visit_Gt(self, node): return qc.TILEDB_GT def visit_GtE(self, node): return qc.TILEDB_GE def visit_Lt(self, node): return qc.TILEDB_LT def visit_LtE(self, node): return qc.TILEDB_LE def visit_Eq(self, node): return qc.TILEDB_EQ def visit_NotEq(self, node): return qc.TILEDB_NE def visit_BitAnd(self, node): return qc.TILEDB_AND def visit_And(self, node): return qc.TILEDB_AND def visit_Compare(self, node: Type[ast.Compare]) -> PyQueryCondition: result = self.aux_visit_Compare( self.visit(node.left), self.visit(node.ops[0]), self.visit(node.comparators[0]), ) for lhs, op, rhs in zip( node.comparators[:-1], node.ops[1:], node.comparators[1:] ): value = self.aux_visit_Compare( self.visit(lhs), self.visit(op), self.visit(rhs) ) result = result.combine(value, qc.TILEDB_AND) return result def aux_visit_Compare( self, lhs: QueryConditionNodeElem, op_node: qc.tiledb_query_condition_op_t, rhs: QueryConditionNodeElem, ) -> PyQueryCondition: att, val, op = self.order_nodes(lhs, rhs, op_node) att = self.get_att_from_node(att) val = self.get_val_from_node(val) dt = self.schema.attr(att).dtype dtype = "string" if dt.kind in "SUa" else dt.name val = self.cast_val_to_dtype(val, dtype) pyqc = PyQueryCondition(self.ctx) try: self.init_pyqc(pyqc, dtype)(att, val, op) except tiledb.TileDBError as e: raise tiledb.TileDBError(e) return pyqc def is_att_node(self, att: QueryConditionNodeElem) -> bool: if isinstance(att, ast.Call): if not isinstance(att.func, ast.Name): raise tiledb.TileDBError(f"Unrecognized expression {att.func}.") if att.func.id != "attr": return False return ( isinstance(att.args[0], ast.Constant) or isinstance(att.args[0], ast.Str) or isinstance(att.args[0], ast.Bytes) ) return isinstance(att, ast.Name) def order_nodes( self, att: QueryConditionNodeElem, val: QueryConditionNodeElem, op: qc.tiledb_query_condition_op_t, ) -> Tuple[ QueryConditionNodeElem, QueryConditionNodeElem, qc.tiledb_query_condition_op_t, ]: if not self.is_att_node(att): REVERSE_OP = { qc.TILEDB_GT: qc.TILEDB_LT, qc.TILEDB_GE: qc.TILEDB_LE, qc.TILEDB_LT: qc.TILEDB_GT, qc.TILEDB_LE: qc.TILEDB_GE, qc.TILEDB_EQ: qc.TILEDB_EQ, qc.TILEDB_NE: qc.TILEDB_NE, } op = REVERSE_OP[op] att, val = val, att return att, val, op def get_att_from_node(self, node: QueryConditionNodeElem) -> Any: if self.is_att_node(node): att_node = node if isinstance(att_node, ast.Call): if not isinstance(att_node.func, ast.Name): raise tiledb.TileDBError( f"Unrecognized expression {att_node.func}." ) att_node = att_node.args[0] if isinstance(att_node, ast.Name): att = att_node.id elif isinstance(att_node, ast.Constant): att = att_node.value elif isinstance(att_node, ast.Str) or isinstance(att_node, ast.Bytes): # deprecated in 3.8 att = att_node.s else: raise tiledb.TileDBError( f"Incorrect type for attribute name: {ast.dump(att_node)}" ) else: raise tiledb.TileDBError( f"Incorrect type for attribute name: {ast.dump(node)}" ) if not self.schema.has_attr(att): if self.schema.domain.has_dim(att): raise tiledb.TileDBError( f"`{att}` is a dimension. QueryConditions currently only " "work on attributes." ) raise tiledb.TileDBError(f"Attribute `{att}` not found in schema.") if att not in self.query_attrs: raise tiledb.TileDBError( f"Attribute `{att}` given to filter in query's `attr_cond` " "arg but not found in `attr` arg." ) return att def get_val_from_node(self, node: QueryConditionNodeElem) -> Any: val_node = node if isinstance(node, ast.Call): if not isinstance(node.func, ast.Name): raise tiledb.TileDBError(f"Unrecognized expression {node.func}.") if node.func.id == "val": val_node = node.args[0] else: raise tiledb.TileDBError( f"Incorrect type for cast value: {node.func.id}" ) if isinstance(val_node, ast.Constant): val = val_node.value elif isinstance(val_node, ast.Num): # deprecated in 3.8 val = val_node.n elif isinstance(val_node, ast.Str) or isinstance(val_node, ast.Bytes): # deprecated in 3.8 val = val_node.s else: raise tiledb.TileDBError( f"Incorrect type for comparison value: {ast.dump(val_node)}" ) return val def cast_val_to_dtype( self, val: Union[str, int, float, bytes], dtype: str ) -> Union[str, int, float, bytes]: if dtype != "string": try: # this prevents numeric strings ("1", '123.32') from getting # casted to numeric types if isinstance(val, str): raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.") cast = getattr(np, dtype) val = cast(val) except ValueError: raise tiledb.TileDBError(f"Cannot cast `{val}` to {dtype}.") return val def init_pyqc(self, pyqc: PyQueryCondition, dtype: str) -> Callable: init_fn_name = f"init_{dtype}" if not hasattr(pyqc, init_fn_name): raise tiledb.TileDBError(f"PyQueryCondition.{init_fn_name}() not found.") return getattr(pyqc, init_fn_name) def visit_BinOp(self, node: ast.BinOp) -> PyQueryCondition: try: op = self.visit(node.op) except KeyError: raise tiledb.TileDBError( f"Unsupported binary operator: {ast.dump(node.op)}. Only & is currently supported." ) result = self.visit(node.left) rhs = node.right[1:] if isinstance(node.right, list) else [node.right] for value in rhs: result = result.combine(self.visit(value), op) return result def visit_BoolOp(self, node: ast.BoolOp) -> PyQueryCondition: try: op = self.visit(node.op) except KeyError: raise tiledb.TileDBError( f"Unsupported Boolean operator: {ast.dump(node.op)}. " 'Only "and" is currently supported.' ) result = self.visit(node.values[0]) for value in node.values[1:]: result = result.combine(self.visit(value), op) return result def visit_Call(self, node: ast.Call) -> ast.Call: if not isinstance(node.func, ast.Name): raise tiledb.TileDBError(f"Unrecognized expression {node.func}.") if node.func.id not in ["attr", "val"]: raise tiledb.TileDBError(f"Valid casts are attr() or val().") if len(node.args) != 1: raise tiledb.TileDBError( f"Exactly one argument must be provided to {node.func.id}()." ) return node def visit_Name(self, node: ast.Name) -> ast.Name: return node def visit_Constant(self, node: ast.Constant) -> ast.Constant: return node def visit_UnaryOp(self, node: ast.UnaryOp, sign: int = 1): if isinstance(node.op, ast.UAdd): sign *= 1 elif isinstance(node.op, ast.USub): sign *= -1 else: raise tiledb.TileDBError(f"Unsupported UnaryOp type. Saw {ast.dump(node)}.") if isinstance(node.operand, ast.UnaryOp): return self.visit_UnaryOp(node.operand, sign) else: if isinstance(node.operand, ast.Constant): node.operand.value *= sign elif isinstance(node.operand, ast.Num): node.operand.n *= sign else: raise tiledb.TileDBError( f"Unexpected node type following UnaryOp. Saw {ast.dump(node)}." ) return node.operand def visit_Num(self, node: ast.Num) -> ast.Num: # deprecated in 3.8 return node def visit_Str(self, node: ast.Str) -> ast.Str: # deprecated in 3.8 return node def visit_Bytes(self, node: ast.Bytes) -> ast.Bytes: # deprecated in 3.8 return node TileDB-Py-0.12.2/tiledb/schema_evolution.cc000066400000000000000000000043101417663620700204150ustar00rootroot00000000000000#include //#include #include #include #include "util.h" namespace tiledbpy { // using namespace tiledb; namespace py = pybind11; typedef struct { tiledb_ctx_t *ctx_; tiledb_array_schema_evolution_t *evol_; } PyArraySchemaEvolution; using ArraySchemaEvolution = PyArraySchemaEvolution; void init_schema_evolution(py::module &m) { py::class_(m, "ArraySchemaEvolution") .def(py::init([](py::object ctx_py) { tiledb_ctx_t *ctx_c = (py::capsule)ctx_py.attr("__capsule__")(); if (ctx_c == nullptr) TPY_ERROR_LOC("Invalid context pointer"); tiledb_array_schema_evolution_t *evol_p; int rc = tiledb_array_schema_evolution_alloc(ctx_c, &evol_p); if (rc != TILEDB_OK) { TPY_ERROR_LOC("Failed to allocate ArraySchemaEvolution"); } return new PyArraySchemaEvolution({ctx_c, evol_p}); })) .def("add_attribute", [](ArraySchemaEvolution &inst, py::object attr_py) { tiledb_attribute_t *attr_c = (py::capsule)attr_py.attr("__capsule__")(); if (attr_c == nullptr) TPY_ERROR_LOC("Invalid Attribute!"); int rc = tiledb_array_schema_evolution_add_attribute( inst.ctx_, inst.evol_, attr_c); if (rc != TILEDB_OK) { TPY_ERROR_LOC("Failed to add attribute to ArraySchemaEvolution"); } }) .def("drop_attribute", [](ArraySchemaEvolution &inst, std::string attr_name) { int rc = tiledb_array_schema_evolution_drop_attribute( inst.ctx_, inst.evol_, attr_name.c_str()); if (rc != TILEDB_OK) { TPY_ERROR_LOC( "Failed to drop attribute from ArraySchemaEvolution"); } }) .def("array_evolve", [](ArraySchemaEvolution &inst, std::string uri) { int rc = tiledb_array_evolve(inst.ctx_, uri.c_str(), inst.evol_); if (rc != TILEDB_OK) { TPY_ERROR_LOC("Failed to drop attribute from ArraySchemaEvolution"); } }); } }; // namespace tiledbpy TileDB-Py-0.12.2/tiledb/schema_evolution.py000066400000000000000000000021741417663620700204660ustar00rootroot00000000000000from typing import Optional import tiledb from .main import ArraySchemaEvolution as ASE class ArraySchemaEvolution: """This class provides the capability to evolve the ArraySchema of a TileDB array in place by adding and removing attributes. """ def __init__(self, ctx: Optional[tiledb.Ctx] = None): ctx = ctx or tiledb.default_ctx() self.ase = ASE(ctx) def add_attribute(self, attr: tiledb.Attr): """Add the given attribute to the schema evolution plan. Note: this function does not apply any changes; the changes are only applied when `ArraySchemaEvolution.array_evolve` is called.""" self.ase.add_attribute(attr) def drop_attribute(self, attr_name: str): """Drop the given attribute (by name) in the schema evolution. Note: this function does not apply any changes; the changes are only applied when `ArraySchemaEvolution.array_evolve` is called.""" self.ase.drop_attribute(attr_name) def array_evolve(self, uri: str): """Apply ArraySchemaEvolution actions to Array at given URI.""" self.ase.array_evolve(uri) TileDB-Py-0.12.2/tiledb/serialization.cc000066400000000000000000000044601417663620700177340ustar00rootroot00000000000000 #include #include #include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include "util.h" #include // C++ #include // C #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2 #if !defined(NDEBUG) //#include "debug.cc" #endif namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; class PySerialization { public: static void *deserialize_query(py::object ctx, py::object array, py::buffer buffer, tiledb_serialization_type_t serialize_type, int32_t client_side) { int rc; tiledb_ctx_t *ctx_c; tiledb_array_t *arr_c; tiledb_query_t *qry_c; tiledb_buffer_t *buf_c; ctx_c = (py::capsule)ctx.attr("__capsule__")(); if (ctx_c == nullptr) TPY_ERROR_LOC("Invalid context pointer."); arr_c = (py::capsule)array.attr("__capsule__")(); if (arr_c == nullptr) TPY_ERROR_LOC("Invalid array pointer."); rc = tiledb_query_alloc(ctx_c, arr_c, TILEDB_READ, &qry_c); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not allocate query."); rc = tiledb_buffer_alloc(ctx_c, &buf_c); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not allocate buffer."); py::buffer_info buf_info = buffer.request(); rc = tiledb_buffer_set_data(ctx_c, buf_c, buf_info.ptr, buf_info.shape[0]); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not set buffer."); rc = tiledb_deserialize_query(ctx_c, buf_c, serialize_type, client_side, qry_c); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not deserialize query."); return qry_c; } }; void init_serialization(py::module &m) { py::class_(m, "serialization") .def_static("deserialize_query", &PySerialization::deserialize_query); py::enum_(m, "tiledb_serialization_type_t", py::arithmetic()) .value("TILEDB_CAPNP", TILEDB_CAPNP) .value("TILEDB_JSON", TILEDB_JSON) .export_values(); } }; // namespace tiledbpy #endif TileDB-Py-0.12.2/tiledb/tests/000077500000000000000000000000001417663620700157065ustar00rootroot00000000000000TileDB-Py-0.12.2/tiledb/tests/__init__.py000066400000000000000000000003201417663620700200120ustar00rootroot00000000000000""" Unit tests for TileDB ===================== This package contains modules which provide a ``suite()`` function which returns a test suite for tiledb functionality """ from tiledb.tests.all import suite TileDB-Py-0.12.2/tiledb/tests/all.py000066400000000000000000000007261417663620700170350ustar00rootroot00000000000000""" Run all test casess """ import os import sys import unittest def suite(): test_dir = os.path.dirname(__file__) return unittest.TestLoader().discover(start_dir=test_dir, pattern="test_*.py") def suite_test(): """ suite_test() Run all the tests in the test suite """ ret = unittest.TextTestRunner(verbosity=2).run(suite()) sys.exit(not ret.wasSuccessful()) if __name__ == "__main__": unittest.TextTestRunner().run(suite()) TileDB-Py-0.12.2/tiledb/tests/check_csv_dir.py000066400000000000000000000023161417663620700210500ustar00rootroot00000000000000# This is a helper function to run tests on an external # directory, for example the contents of the Pandas # CSV tests: # https://github.com/pandas-dev/pandas/tree/master/pandas/tests/io/data/csv # It takes one argument, the test directory, and checks that all # .csv files contained within are correctly round-tripped via # `tiledb.from_csv` and `tiledb.open_dataframe` import tiledb import os import sys import tempfile import pandas as pd import pandas._testing as tm from glob import glob def check_csv_roundtrip(input_csv): basename = os.path.basename(input_csv) tmp = tempfile.mktemp(prefix="csvtest-" + basename) os.mkdir(tmp) array_uri = os.path.join(tmp, "tiledb_from_csv") tiledb.from_csv(array_uri, input_csv) df_csv = pd.read_csv(input_csv) df_back = tiledb.open_dataframe(array_uri) tm.assert_frame_equal(df_csv, df_back) return True def check_csv_dir(path): files = glob(os.path.join(path, "*.csv")) res = [check_csv_roundtrip(f) for f in files] assert len(res) == len(files), "Failed to check all files!" if __name__ == "__main__": if len(sys.argv) != 2: print("expected one argument: path to CSV directory") check_csv_dir(sys.argv[1]) TileDB-Py-0.12.2/tiledb/tests/common.py000066400000000000000000000237161417663620700175610ustar00rootroot00000000000000import tiledb import contextlib import datetime import glob import os import random import shutil import subprocess import sys import tempfile import traceback import tiledb import uuid import numpy as np import pytest from numpy.testing import assert_almost_equal, assert_array_equal, assert_equal SUPPORTED_DATETIME64_DTYPES = tuple( np.dtype(f"M8[{res}]") for res in "Y M W D h m s ms us ns".split() ) def assert_tail_equal(a, *rest, **kwargs): """Assert that all arrays in target equal first array""" for target in rest: assert_array_equal(a, target, **kwargs) def create_vfs_dir(path): """Create a directory at the given scheme-prefixed path, first creating the base bucket if needed.""" import urllib split_uri = urllib.parse.urlsplit(path) scheme = split_uri.scheme bucket = split_uri.netloc bucket_uri = scheme + "://" + bucket vfs = tiledb.VFS() if not vfs.is_bucket(bucket_uri): vfs.create_bucket(bucket_uri) vfs.create_dir(path) class DiskTestCase: """Helper class to store paths and associated allocation frames. This is both a cleanup step and a test of resource management. Some platforms will refuse to delete an open file, indicating a potentially leaked resource. """ @classmethod def setup_method(self): # .lower: because bucket name must be all lowercase prefix = "tiledb-" + self.__name__.lower() if hasattr(pytest, "tiledb_vfs") and pytest.tiledb_vfs == "s3": self.path_scheme = pytest.tiledb_vfs + "://" self.rootdir = self.path_scheme + prefix + str(random.randint(0, 10e10)) create_vfs_dir(self.rootdir) else: self.path_scheme = "" self.rootdir = tempfile.mkdtemp(prefix=prefix) self.vfs = tiledb.VFS() self.pathmap = dict() @classmethod def teardown_method(self): # Remove every directory starting with rootdir # This is both a clean-up step and an implicit test # of proper resource deallocation (see notes below) for dirpath in glob.glob(self.rootdir + "*"): try: shutil.rmtree(dirpath) except OSError as exc: print( "test '{}' error deleting '{}'".format( self.__class__.__name__, dirpath ) ) print("registered paths and originating functions:") for path, frame in self.pathmap.items(): print(" '{}' <- '{}'".format(path, frame)) raise exc def path(self, basename=None, shared=False): if self.path_scheme: basename = basename if basename else str(uuid.uuid4()) out = os.path.join(self.rootdir, basename) self.vfs.create_dir(out) else: if basename is not None: # Note: this must be `is not None` because we need to match empty string out = os.path.abspath(os.path.join(self.rootdir, basename)) else: out = tempfile.mkdtemp(dir=self.rootdir) if os.name == "nt" and shared: subprocess.run( f'cmd //c "net share tiledb-shared={out}"', shell=True, check=True ) # We have had issues in both py and libtiledb in the past # where files were not released (eg: destructor not called) # Often this is invisible on POSIX platforms, but will # cause errors on Windows because two processes cannot access # the same file at once. # In order to debug this issue, we save the caller where # this path was allocated so that we can determine what # test created an unreleased file frame = traceback.extract_stack(limit=2)[-2][2] self.pathmap[out] = frame return out def assertRaises(self, *args): return pytest.raises(*args) def assertRaisesRegex(self, e, m): return pytest.raises(e, match=m) @contextlib.contextmanager def assertEqual(self, *args): if not len(args) == 2: raise Exception("Unexpected input len > 2 to assertEquals") assert args[0] == args[1] @contextlib.contextmanager def assertNotEqual(self, *args): if not len(args) == 2: raise Exception("Unexpected input len > 2 to assertEquals") assert args[0] != args[1] @contextlib.contextmanager def assertTrue(self, a, msg=None): if msg: assert a, msg else: assert a @contextlib.contextmanager def assertFalse(self, a): assert a == False @contextlib.contextmanager def assertIsInstance(self, v, t): assert isinstance(v, t) @contextlib.contextmanager def assertSetEqual(self, s1, s2): assert all(isinstance(x, set) for x in (s1, s2)) assert s1 == s2 @contextlib.contextmanager def assertIsNone(self, a1): assert a1 is None @contextlib.contextmanager def assertTupleEqual(self, a1, a2): assert a1 == a2 @contextlib.contextmanager def assertAlmostEqual(self, a1, a2): assert_almost_equal(a1, a2) # fixture wrapper to use with pytest: mark.parametrize does not # work with DiskTestCase subclasses (unittest.TestCase methods # cannot take arguments) @pytest.fixture(scope="class") def checked_path(): dtc = DiskTestCase() dtc.setup_method() yield dtc dtc.teardown_method() # exclude whitespace: if we generate unquoted newline then pandas will be confused _ws_set = set("\n\t\r") def gen_chr(max, printable=False): while True: # TODO we exclude 0x0 here because the key API does not embedded NULL s = chr(random.randrange(1, max)) if printable and (not s.isprintable()) or (s in _ws_set): continue if len(s) > 0: break return s def rand_utf8(size=5): return "".join([gen_chr(0xD7FF) for _ in range(0, size)]) def rand_ascii(size=5, printable=False): return "".join([gen_chr(127, printable) for _ in range(0, size)]) def rand_ascii_bytes(size=5, printable=False): return b"".join([gen_chr(127, printable).encode("utf-8") for _ in range(0, size)]) def dtype_max(dtype): if not np.issubdtype(dtype, np.generic): raise TypeError("expected numpy dtype!") if np.issubdtype(dtype, np.floating): finfo = np.finfo(dtype) return finfo.max elif np.issubdtype(dtype, np.integer): iinfo = np.iinfo(dtype) return int(iinfo.max) elif np.issubdtype(dtype, np.datetime64): return np.datetime64(datetime.datetime.max) raise "Unknown dtype for dtype_max '{}'".format(str(dtype)) def dtype_min(dtype): if not np.issubdtype(dtype, np.generic): raise TypeError("expected numpy dtype!") if np.issubdtype(dtype, np.floating): finfo = np.finfo(dtype) return finfo.min elif np.issubdtype(dtype, np.integer): iinfo = np.iinfo(dtype) return int(iinfo.min) elif np.issubdtype(dtype, np.datetime64): return np.datetime64(datetime.datetime.min) raise "Unknown dtype for dtype_min '{dtype}'".format(str(dtype)) def rand_int_sequential(size, dtype=np.uint64): dtype_min, dtype_max = tiledb.libtiledb.dtype_range(dtype) arr = np.random.randint(dtype_min, high=dtype_max, size=size, dtype=dtype) return np.sort(arr) def rand_datetime64_array( size, start=None, stop=None, include_extremes=True, dtype=None ): if not dtype: dtype = np.dtype("M8[ns]") # generate randint inbounds on the range of the dtype units = np.datetime_data(dtype)[0] intmin, intmax = np.iinfo(np.int64).min, np.iinfo(np.int64).max if start is None: start = np.datetime64(intmin + 1, units) else: start = np.datetime64(start) if stop is None: stop = np.datetime64(intmax, units) else: stop = np.datetime64(stop) arr = np.random.randint( start.astype(dtype).astype(np.int64), stop.astype(dtype).astype(np.int64), size=size, dtype=np.int64, ) arr.sort() arr = arr.astype(dtype) # enable after fix for core issue: ch 7192 if include_extremes: arr[0] = start arr[-1] = stop return arr def intspace(start, stop, num=50, dtype=np.int64): """ Return evenly spaced values over range ensuring that stop is always the maximum (will not overflow with int dtype as linspace) :param start: :param stop: :param num: :param dtype: :return: """ rval = np.zeros(num, dtype=dtype) step = (stop - start) // num nextval = start if np.issubdtype(dtype, np.integer) and step < 1: raise ValueError( "Cannot use non-integral step value '{}' for integer dtype!".format(step) ) for i in range(num): rval[i] = nextval nextval += step rval[-1] = stop return rval import pprint as _pprint pp = _pprint.PrettyPrinter(indent=4) def xprint(*x): for xp in x: pp.pprint(xp) def assert_unordered_equal(a1, a2, unordered=True): """Assert that arrays are equal after sorting if `unordered==True`""" if unordered: a1 = np.sort(a1) a2 = np.sort(a2) assert_array_equal(a1, a2) def assert_subarrays_equal(a, b, ordered=True): assert_equal(a.shape, b.shape) if not ordered: a = np.sort(a) b = np.sort(b) for a_el, b_el in zip(a.flat, b.flat): assert_array_equal(a_el, b_el) def assert_all_arrays_equal(*arrays): # TODO this should display raise in the calling location if possible assert len(arrays) % 2 == 0, "Expected even number of arrays" for a1, a2 in zip(arrays[0::2], arrays[1::2]): assert_array_equal(a1, a2) def assert_captured(cap, expected): if sys.platform == "win32": return else: import ctypes libc = ctypes.CDLL(None) libc.fflush(None) out, err = cap.readouterr() assert not err assert expected in out TileDB-Py-0.12.2/tiledb/tests/conftest.py000066400000000000000000000041701417663620700201070ustar00rootroot00000000000000import tiledb import ctypes import pytest import sys if sys.platform != "win32": @pytest.fixture(scope="function", autouse=True) def no_output(capfd): yield # flush stdout libc = ctypes.CDLL(None) libc.fflush(None) out, err = capfd.readouterr() if out or err: pytest.fail(f"Output captured: {out + err}") def pytest_addoption(parser): parser.addoption("--vfs", default="file") parser.addoption("--vfs-config", default=None) def pytest_configure(config): # we need to try importing here so that we don't potentially cause # a slowdown in the DenseArray/SparseArray.__new__ path when # running `tiledb.open`. try: import tiledb.cloud except ImportError: pass # default must be set here rather than globally pytest.tiledb_vfs = "file" vfs_config(config) def vfs_config(pytestconfig): vfs_config_override = {} vfs = pytestconfig.getoption("vfs") if vfs == "s3": pytest.tiledb_vfs = "s3" vfs_config_override.update( { "vfs.s3.endpoint_override": "localhost:9999", "vfs.s3.aws_access_key_id": "minio", "vfs.s3.aws_secret_access_key": "miniosecretkey", "vfs.s3.scheme": "https", "vfs.s3.verify_ssl": False, "vfs.s3.use_virtual_addressing": False, } ) vfs_config_arg = pytestconfig.getoption("vfs-config", None) if vfs_config_arg: pass tiledb._orig_ctx = tiledb.Ctx def get_config(config): final_config = {} if isinstance(config, tiledb.Config): final_config = config.dict() elif config: final_config = config final_config.update(vfs_config_override) return final_config class PatchedCtx(tiledb.Ctx): def __init__(self, config=None): super().__init__(get_config(config)) class PatchedConfig(tiledb.Config): def __init__(self, params=None): super().__init__(get_config(params)) tiledb.Ctx = PatchedCtx tiledb.Config = PatchedConfig TileDB-Py-0.12.2/tiledb/tests/datatypes.py000066400000000000000000000027711417663620700202650ustar00rootroot00000000000000"""Minimal Pandas ExtensionDtype and ExtensionArray for representing ragged arrays""" import re import numpy as np from pandas.api.extensions import ( ExtensionArray, ExtensionDtype, register_extension_dtype, ) @register_extension_dtype class RaggedDtype(ExtensionDtype): type = np.ndarray na_value = None def __init__(self, subtype=np.float64): self.subtype = np.dtype(subtype) @property def name(self): return f"Ragged[{self.subtype}]" @classmethod def construct_array_type(cls): return RaggedArray @classmethod def construct_from_string(cls, string): if string.lower() == "ragged": return cls() match = re.match(r"^ragged\[(\w+)\]$", string, re.IGNORECASE) if match: return cls(match.group(1)) raise TypeError(f"Cannot construct a 'RaggedDtype' from '{string}'") class RaggedArray(ExtensionArray): def __init__(self, arrays, dtype): assert isinstance(dtype, RaggedDtype) self._dtype = dtype self._flat_arrays = [np.asarray(array, dtype=dtype.subtype) for array in arrays] @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): return cls(scalars, dtype) def __len__(self): return len(self._flat_arrays) def __getitem__(self, i): return self._flat_arrays[i] @property def dtype(self): return self._dtype def copy(self): return type(self)(self._flat_arrays, self._dtype) TileDB-Py-0.12.2/tiledb/tests/fixtures.py000066400000000000000000000021341417663620700201310ustar00rootroot00000000000000import pytest import tiledb import numpy as np from tiledb.tests.common import assert_subarrays_equal, rand_utf8 INTEGER_DTYPES = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8"] @pytest.fixture(scope="module", params=["hilbert", "row-major"]) def sparse_cell_order(request): yield request.param @pytest.fixture(scope="class") def test_incomplete_return_array(tmpdir_factory, request): tmp_path = str(tmpdir_factory.mktemp("array")) ncells = 20 nvals = 10 data = np.array([rand_utf8(nvals - i % 2) for i in range(ncells)], dtype="O") dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) - 1), tile=len(data))) att = tiledb.Attr(dtype=str, var=True) allows_duplicates = request.param schema = tiledb.ArraySchema( dom, (att,), sparse=True, allows_duplicates=allows_duplicates ) coords = np.arange(ncells) tiledb.SparseArray.create(tmp_path, schema) with tiledb.SparseArray(tmp_path, mode="w") as T: T[coords] = data with tiledb.SparseArray(tmp_path, mode="r") as T: assert_subarrays_equal(data, T[:][""]) return tmp_path TileDB-Py-0.12.2/tiledb/tests/perf/000077500000000000000000000000001417663620700166425ustar00rootroot00000000000000TileDB-Py-0.12.2/tiledb/tests/perf/asv.conf.json000066400000000000000000000152561417663620700212630ustar00rootroot00000000000000{ // The version of the config file format. Do not change, unless // you know what you are doing. "version": 1, // The name of the project being benchmarked "project": "TileDB-Py", // The project's homepage "project_url": "http://github.com/TileDB-Inc/TileDB-Py/", // The URL or local path of the source code repository for the // project being benchmarked "repo": "../../../", // The Python project's subdirectory in your repo. If missing or // the empty string, the project is assumed to be located at the root // of the repository. // "repo_subdir": "", // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], // "build_command": [ // "python setup.py build", // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" // ], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": ["dev"], // for git // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL // (if remote), or by looking for special directories, such as // ".git" (if local). // "dvcs": "git", // The tool to use to create environments. May be "conda", // "virtualenv" or other value depending on the plugins in use. // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. "environment_type": "conda", // timeout in seconds for installing any dependencies in environment // defaults to 10 min //"install_timeout": 600, // the base URL to show a commit for the project. "show_commit_url": "http://github.com/TileDB-Inc/TileDB-Py/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.6"], // The list of conda channel names to be searched for benchmark // dependency packages in the specified order // "conda_channels": ["conda-forge", "defaults"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty // list or empty string indicates to just test against the default // (latest) version. null indicates that the package is to not be // installed. If the package to be tested is only available from // PyPi, and the 'environment_type' is conda, then you can preface // the package name by 'pip+', and the package will be installed via // pip (with all the conda available packages installed first, // followed by the pip installed packages). // // "matrix": { // "numpy": ["1.6", "1.7"], // "six": ["", null], // test with and without six installed // "pip+emcee": [""], // emcee is only available for install with pip. // }, // Combinations of libraries/python versions can be excluded/included // from the set to test. Each entry is a dictionary containing additional // key-value pairs to include/exclude. // // An exclude entry excludes entries where all values match. The // values are regexps that should match the whole string. // // An include entry adds an environment. Only the packages listed // are installed. The 'python' key is required. The exclude rules // do not apply to includes. // // In addition to package names, the following keys are available: // // - python // Python version, as in the *pythons* variable above. // - environment_type // Environment type, as above. // - sys_platform // Platform, as in sys.platform. Possible values for the common // cases: 'linux2', 'win32', 'cygwin', 'darwin'. // // "exclude": [ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows // {"environment_type": "conda", "six": null}, // don't run without six on conda // ], // // "include": [ // // additional env for python2.7 // {"python": "2.7", "numpy": "1.8"}, // // additional env if run on windows+conda // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, // ], // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" // "benchmark_dir": "benchmarks", // The directory (relative to the current directory) to cache the Python // environments in. If not provided, defaults to "env" "env_dir": ".asv/env", // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": ".asv/results", // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". "html_dir": ".asv/html", // The number of characters to retain in the commit hashes. // "hash_length": 8, // `asv` will cache results of the recent builds in each // environment, making them faster to install next time. This is // the number of builds to keep, per environment. // "build_cache_size": 2, // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are // regexps matching to benchmark names, and values corresponding to // the commit (exclusive) after which to start looking for // regressions. The default is to start from the first commit // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether // }, // The thresholds for relative change in results, after which `asv // publish` starts reporting regressions. Dictionary of the same // form as in ``regressions_first_commits``, with values // indicating the thresholds. If multiple entries match, the // maximum is taken. If no entry matches, the default is 5%. // // "regressions_thresholds": { // "some_benchmark": 0.01, // Threshold of 1% // "another_benchmark": 0.5, // Threshold of 50% // }, } TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/000077500000000000000000000000001417663620700207575ustar00rootroot00000000000000TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/__init__.py000066400000000000000000000000011417663620700230570ustar00rootroot00000000000000 TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/array.py000066400000000000000000000022241417663620700224470ustar00rootroot00000000000000import tiledb import numpy as np import tempfile, shutil class Basic: def setup(self, *shape): self.path = tempfile.mkdtemp() self.array = np.random.rand(4) tiledb.from_numpy(self.path, self.array) def time_open(self): for i in range(5_000): with tiledb.open(self.path) as A: pass class DenseRead: # parameterize over different array shapes # the functions below will be called with permutations # of these tuples params = [ (100, 500), (1000, 100000), ] def setup(self, *shape): self.path = tempfile.mkdtemp() self.array = np.random.rand(*shape) tiledb.from_numpy(self.path, self.array) def time_read(self, *shape): with tiledb.open(self.path) as A: A[:] def teardown(self, *shape): shutil.rmtree(self.path) class DenseWrite: params = [ (100, 500), (1000, 100000), ] paths = [] def setup(self, *shape): self.array = np.random.rand(*shape) def time_write(self, *shape): path = tempfile.mkdtemp() tiledb.from_numpy(path, self.array) TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/benchmarks.py000066400000000000000000000003311417663620700234430ustar00rootroot00000000000000import tiledb import numpy as np import tempfile, shutil # TODO # [x] dense # - simple rw # [] sparse # [] metadata # [] property access # [] strings (attrs and dims) # [] "interesting" query range distributions? TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/indexing.py000066400000000000000000000023231417663620700231360ustar00rootroot00000000000000import tiledb import numpy as np import tempfile, shutil class MultiIndex: params = [10, 100, 1000, 10_000, 100_000] def setup(self, _): self.uri = tempfile.mkdtemp() self.dmin = -10_000_000 self.dmax = 10_000_000 self.ncoords = 3_000_000 schema = tiledb.ArraySchema( tiledb.Domain([tiledb.Dim(dtype=np.int64, domain=(self.dmin, self.dmax))]), attrs=[ tiledb.Attr(name="", dtype="float64", var=False, nullable=False), ], cell_order="row-major", tile_order="row-major", capacity=1000, sparse=True, ) tiledb.Array.create(self.uri, schema) # use `choice` here because randint doesn't support non-replacement self.coords = np.random.choice( np.arange(self.dmin, self.dmax + 1), size=self.ncoords, replace=False ) with tiledb.open(self.uri, "w") as A: A[self.coords] = np.random.rand(self.ncoords) def time_multiindex_read(self, coords_count): coords = np.random.choice(self.coords, size=coords_count, replace=False) with tiledb.open(self.uri) as A: A.multi_index[list(coords)] TileDB-Py-0.12.2/tiledb/tests/perf/benchmarks/metadata.py000066400000000000000000000014271417663620700231150ustar00rootroot00000000000000import tiledb import numpy as np import tempfile, shutil import time class MetadataTest: def setup(self): self.path = tempfile.mkdtemp() print(self.path) self.array = np.random.rand(4) tiledb.from_numpy(self.path, self.array) class MetadataWrite(MetadataTest): def setup(self): super().setup() def time_write(self): with tiledb.open(self.path, "w") as A: for i in range(1_000_000): A.meta["x"] = "xyz" class MetadataRead(MetadataTest): def setup(self): super().setup() with tiledb.open(self.path, "w") as A: A.meta["x"] = "xyz" def time_read(self): with tiledb.open(self.path) as A: for i in range(1_000_000): A.meta["x"] TileDB-Py-0.12.2/tiledb/tests/strategies.py000066400000000000000000000013521417663620700204330ustar00rootroot00000000000000from hypothesis import given from hypothesis import strategies as st from hypothesis.strategies import composite # Helpers for Hypothesis-Python based property tests # (custom strategies, etc.) @composite def bounded_ntuple(draw, *, length=1, min_value=0, max_value=10): """hypothesis composite strategy that returns a `length` tuple of integers within the range (min_value, max_value) """ return draw(st.tuples(*[st.integers(min_value, max_value) for _ in range(length)])) @composite def ranged_slices(draw, min_value=0, max_value=10): bdd = st.one_of(st.none(), st.integers(min_value=min_value, max_value=max_value)) start = draw(bdd) stop = draw(bdd) step = draw(bdd) return slice(start, stop, step) TileDB-Py-0.12.2/tiledb/tests/test_compat.py000066400000000000000000000232451417663620700206100ustar00rootroot00000000000000import base64 import io import tarfile import numpy as np from numpy.testing import assert_array_equal import pytest import tiledb from tiledb.tests.common import DiskTestCase # This test writes to local filesystem, skip # TODO: unskip if we support transparent file ops on a VFS @pytest.mark.skipif( pytest.tiledb_vfs != "file", reason="Do not run compat test against non-file VFS" ) class TestBackwardCompatibility(DiskTestCase): def test_compat_tiledb_py_0_5_anon_attr_dense(self): # array written with the following script: """ import tiledb, numpy as np dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 0), tile=1, dtype=np.uint8)) attrs = (tiledb.Attr(name="_attr_", dtype=np.uint8),) schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) path = "py0.5.9-test" tiledb.DenseArray.create(path, schema) with tiledb.open(path, "w") as A: A[0] = 1 """ # save and print tgz of array directory: # f = open("/tmp/py0.5.9-testa2.tgz",'rb').read() # s = base64.encodebytes(f) # print(f"{s.decode():>32}") array_tgz = b"""H4sIADjvS2AAA+2YzW4TMRCA7fIX0SJVFdz9AAg8XtubvbR9AF6gEpLjJg4FmgRttwJuReKAuFFe oUcO9A165NJ7jxWPwBOwXq3RZgnNtmkiBPNJ2bEnY89uRjMZrzGgQal2ArFUXNZm0sa8D7GL2tpJ SKIk6XIFTiVxlIg4UY9JEzjnMeeskFoVkpfzAAPJhYh1LLVmXIDgQJhqtPuM7O9lNs1v5flwlGaj 4R/tXu84t3vBPuMPxa79PueEmS3+xvRT+2zghpkZuMz2bGYfZb3tcR9T4g8AuhZ/paOYML6IH+A/ j//N/KPL8b2go+HbteJKiVfQW/5SjCr23mK1nNOK7g3t9jqd86Vtzfr59JCseU+hXoQVTT15++Wa p6DznjbzFYwsoYtLuPi1Y2X8gFzMi1KelpKXCz/TSdbI38/M9d9mWfp7yR9j6v+/ULX6H4GUWP8X Aa1IWtMh/z55AqepfWv2ujtuMKF3uw6m5b+AWv6DiiTH/F8EvhPYKsdPg65hs+Ht/Rmt2mwEXd5s WHKD7rdOT05a71dWnnxh3zdWOx+/vrt/8Oruh9twdtBeXz8+Omo9vPPJdQj58W15Y47PiUzGmN1R 9+V88j5w6fM/RFoIzP9FYIpze7P3OFflCvGHSOL7HwRBEARBEARBEARBEARBkFn4CRFQSoEAKAAA""" path = self.path("tiledb_py_0_6_anon_attr") with tarfile.open(fileobj=io.BytesIO(base64.b64decode(array_tgz))) as tf: tf.extractall(path) with tiledb.open(path) as A: self.assertEqual(A.schema.attr(0).name, "") self.assertEqual(A.schema.attr(0)._internal_name, "__attr") self.assertEqual(A[0], 1) mres = A.multi_index[0] self.assertEqual(mres[""], 1) qres = A.query(coords=True).multi_index[0] self.assertEqual(qres["d"], 0) def test_compat_py_0_5_anon_attr_sparse(self): # This array was written with TileDB-Py 0.5.9: # - using the invocation below, followed by """ tiledb.Array.create("path", tiledb.ArraySchema( domain=tiledb.Domain(*[ tiledb.Dim(name='d', domain=(0, 2), tile=2, dtype='uint64'),]), attrs=[tiledb.Attr(name='', dtype='int64'),], sparse=True,)) with tiledb.open("path", 'w') as A: A[[0,1,2]] = np.array([1.0,2.0,5.0]) """ # - followed by `tar czf array.tgz -C path` # - followed by `base64.encodebytes(open("sp6.tgz", 'rb').read())` test_array = b"""H4sIANDnmV8AA+2Xz2vUQBTHJ6mLlnpYBGkRD0EQBGV3ZpLJdBFk9bBnj3pKJpvESrsbmo2otyoI Pe/JSy9ePXnwruJBPPYv0P4VRRDNhAxm07o/dBN6eJ9lMpmXSd6Eb96bt602qhyMMcfYyHqbZT3O xwqDmNyyzfRnWwYmFDOCDFb90hB6MkpEnC7l8TCKk2j413lPt4JgZ8pzJl/KWPo6K6LVdpxBkIgq P4OF9Gck1d+kHPSvBan/TtTfbiW+V5WPf9CfM44MXNWCioD+johj8dwZ9beCgajiO5ilP6V2SX9m cdC/Fs6lTQm+q2yaunopO2pIGrSGPGRnhfl30tbMx1rB9kzrC9d1fbd5//yh++HCEcXvXu7/6qJx 7/J3fffuZmP/497qgTYOVo6Ojz+Px9d6zfU3r15o6O322r0q3xgoIuOf2NjsULJppVHHSiOPh6Hn 9ZnAWFicsk4YspCEOOAd7jFO56kbFq7/KCXEhv2/Dv5bf8cJY/FoEAyTrI70RXJiD5mhPyEWKelv M0Yh/9eBzP+38/PryjZn/pfz19Fk/le2NP/7rvtNFz1D+/Rlb/WrhvQf6Ip0p1KGum1ed3L+Wsmd skl33fQOA+ngYgEXf9ALkyUreX8r77vodKK8P8x7lj/gtXbabOCMsYT8L5Iknvq3Yeb+z6xS/rew bUL+rwMVpRt5K9pUSmjUuiKgTpYQ//0oiv3RlAwwK/7JifrfMjnUf7VQjP+raLJmULYb79s/jY0D hB6kdpUUdHTz4cWspAAAAAAAAAAAAAAA4IzzG7vsp0oAKAAA""" path = self.path("test_tiledb_py_0_5_anon_attr_sparse") with tarfile.open(fileobj=io.BytesIO(base64.b64decode(test_array))) as tf: tf.extractall(path) with tiledb.open(path) as A: assert_array_equal(A[:][""], np.array([1.0, 2.0, 5.0])) def test_tiledb_py_0_6_anon_attr(self): # same creation steps as above for 0.5 tgz_sparse = b"""H4sIAJKNpWAAA+2aPW/TQBjHz2nTFlGJClUoAxIuA0ICpXf2vdhbGWBiYEIgihI7MRT1JVKairKh qgNfgA2kDnwFVga+ABtfgE8AEwsS5/ROTUzBjWpbKv3/JPexLxc/l/59zz3PJc1lUjqUUiWEO7Ty 0GqsPbxgnArmUymk71LmUc6JK8ofGiE724Oor4fyYqvXH/S2/trv5VqSbPzjPuMfyi18nCXRXG61 NpNBVOZjMIH+XEip9fc9xaB/FaT6b/Q6681BNy7Lh/5/SM4n0l8JPf9pWQMaBfq3on4/etXa7qwl m1EZz0Ge/p6X1V9wKaF/FdT1sWrOXxs77dhXLw//OiRtcNKuzvBspH+gjwVz7Yy07TqdhNTuzcw4 OwtT0407qzM3Hi58vzZH7678cN99rl9f2ji40JZ77T0Wzb+JD/rdp8SZnfta2gcFx5LOfyY9xqXn ByoIVeYqDJMu44GOyGHCeRIGKuHCF1HsRRGLaacl8jOHifM/z2M+8r9KOL3+zd56jo8J1n+rPxcC 8b8KjvRnvlSh8rJXcRJ2Euor7gne8XgsJdVPhAoSFXZFogrWX6//aqg/p9C/Ck6vf6Hx3+rPmEL8 r4IC9G+1nvWj55vJ1mC4k9CNBpkqImf+a7VFRn8phI/5XwVpUh+Yc9fYk+b/af9FMp7/27Zd51vc brf3Y7c+e//BFeJ8IJfSG9hoYd9zUl9p/4sZX7ZN1xrdlXrquwYXcAEXx7s4ojbSOWXK2NtknBVy Mmxc/GKsZ2781tifxj4xjj8Zu2Qc79sBgKopYP3v5u0Z5uX/7I/8z6ce9n8rwYaAhj6ukvE4Yttu flz+5TbeE/JIt9vYUSO3Hs8Pwww4wxQw/3O/Msit/wXP1n9Sof6vhNH538i02ak+njyA/4kC9v+L rP/N/q8UmP/VgPofLuDiXLg4AvU/MBSw/hdZ/5v1XxcCDOt/FaD+P98UMP+LrP/t7z8Uxe8/KgH1 PwAAAAAAAAAAAAAAAAAAAAAAAHD2+Q18oX51AFAAAA==""" path = self.path("0_6_anon_sparse") with tarfile.open(fileobj=io.BytesIO(base64.b64decode(tgz_sparse))) as tf: tf.extractall(path) with tiledb.open(path) as A: if A.schema.sparse: assert_array_equal(A[:][""], np.array([1.0, 2.0, 5.0])) ########################################################################################### # This test checks that anonymous attributes internally stored as "__attr" are presented # as "". # The following steps were run under TileDB-Py 0.6 # Normally, we can't actually write an attribute named "__attr" anymore, so # restored a schema written by a patched libtiledb, and rename the attr file. # schema_data = b"\x05\x00\x00\x00]\x00\x00\x00\x00\x00\x00\x00q\x00\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x00\x00\x00\x00\x00\x00\x12\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\x01\x05\x00\x00\x00\x01\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00q\x00\x00\x009\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00q\x00\x00\x009\x00\x00\x00x\x01ce\x80\x00\x01u(\x83\x81\x11\x08\x19\x18\x98XA\xc4\x7f `\xc0\x10\x01\xc9\x83p\n\x1b\x88\x84\xb0\x81\x8a\xc1l\x88\x00H\x9c\r\x88\xe3\xe3\x13KJ\x8aP\x94\x01\x00\xa2c\x0bD" # path = self.path("tiledb_py_0_6_anon_attr") # ctx = tiledb.default_ctx() # dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 0), tile=1, dtype=np.uint8)) # attrs = (tiledb.Attr(name="_attr_", dtype=np.uint8, ctx=ctx),) # schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False, ctx=ctx) # tiledb.DenseArray.create(path, schema, ctx=ctx) # with tiledb.open(path, "w") as A: # A[0] = 1 # fragment_name = os.path.split(list(A.last_write_info.keys())[0])[-1] # fragment_path = os.path.join(path, fragment_name) ## fix up the array the override schema # with open(os.path.join(path, "__array_schema.tdb"), "wb") as f: # f.write(schema_data) # shutil.move( # os.path.join(fragment_path, "_attr_.tdb"), # os.path.join(fragment_path, "__attr.tdb"), # ) tgz_dense = b"""H4sIAL6RpWAAA+2YPW/TQBjH71qQKiKkAEIqYvEIS3p3uRd5A4kB0QUxdUHm/AJFzQu4rlrUoa3K EFWMDB2Y+AQs7CAhJD5HPgBfgXNyRq4pdVNyHtDzk5z/3fni55y/L8+TdFaQcwghSghvonKqhkKn HcqJoF1KpOx6hDLCCfKE+6UhtLWZ6dQs5eVgmGbDwV/nba8nSe+M65y8KW/u63REZyUI+kmmXT4G s/vfZZKD/02Q+98bRhudLA5dxTCfh+R8Jv8VV8gjrhZUBvwPdJrqN8FmtJ70tYvnoM5/xmjFf8El A/+b4LI5ntr2a6uXcHH2+uQVo3wA51PxpFWa75ujbfu4NLaDo2Qf4a07hwfXlm4tH6/d/7bnPfvS xj8OX125PXr76eDoa2+EHn64OhqPb6w+Onr8HqOPUeuBy5sF/iDf/1QyymWXK6GYqvS4r3gcR2Gi lc9JSLTvKxVqbRK6r0jsB6Iz3KiJMfP3P2OCwf5vhH/3v75ynLn+Y4wRCvVfE8zB/yB4nuoX/WSQ TX5JxDqrVBE1+59RKSv+S8lh/zdCntSLHbxk9bz5P5/fQifzfzG2g8fhvtE11CqHKKaeN0T7lBDF mCkx4nvmHR5agBAQAkKcHuL3FUvtm+hiRFa/W71rL/jO6k+rTxam+tnq8uJUdxcvGBhwxFzyv86y 9Iw/DmrrfyYq+Z9TTiH/NwEuKa6MAQAAAAAAAAAAAADwf/ALzPk2VwAoAAA=""" path = self.path("0_6_anon_dense") with tarfile.open(fileobj=io.BytesIO(base64.b64decode(tgz_dense))) as tf: tf.extractall(path) with tiledb.open(path) as A: self.assertEqual(A.schema.attr(0).name, "") self.assertEqual(A.schema.attr(0)._internal_name, "__attr") self.assertEqual(A[0], 1) mres = A.multi_index[0] self.assertEqual(mres[""], 1) qres = A.query(coords=True).multi_index[0] self.assertEqual(qres["d"], 0) TileDB-Py-0.12.2/tiledb/tests/test_core.py000066400000000000000000000141241417663620700202510ustar00rootroot00000000000000import copy import random import numpy as np from numpy.testing import assert_array_equal import tiledb from tiledb import TileDBError import tiledb.main as core from tiledb.tests.common import DiskTestCase, rand_ascii class CoreCCTest(DiskTestCase): def test_pyquery_basic(self): ctx = tiledb.Ctx() uri = self.path("test_pyquery_basic") with tiledb.from_numpy(uri, np.random.rand(4)) as A: pass with tiledb.open(uri) as a: with tiledb.scope_ctx({"py.init_buffer_bytes": "abcd"}) as testctx: with self.assertRaises(ValueError): core.PyQuery(testctx, a, ("",), (), 0, False) q = core.PyQuery(ctx, a, ("",), (), 0, False) try: q._test_err("bad foo happened") except Exception as exc: assert isinstance(exc, tiledb.TileDBError) assert exc.message == "bad foo happened" q.set_ranges([[(0, 3)]]) with self.assertRaises(TileDBError): q.set_ranges([[(0, 3.0)]]) q.set_ranges([[(0, np.int32(3))]]) with self.assertRaises(TileDBError): q.set_ranges([[(3, "a")]]) with self.assertRaisesRegex( TileDBError, "Failed to cast dim range '\\(1.2344, 5.6789\\)' to dim type UINT64.*$", ): q.set_ranges([[(1.2344, 5.6789)]]) with self.assertRaisesRegex( TileDBError, "Failed to cast dim range '\\('aa', 'bbbb'\\)' to dim type UINT64.*$", ): q.set_ranges([[("aa", "bbbb")]]) with tiledb.open(uri) as a: q2 = core.PyQuery(ctx, a, ("",), (), 0, False) q2.set_ranges([[(0, 3)]]) q2.submit() res = q2.results()[""][0] res.dtype = np.double assert_array_equal(res, a[:]) def test_pyquery_init(self): uri = self.path("test_pyquery_init") intmax = np.iinfo(np.int64).max config_dict = { "sm.tile_cache_size": "100", "py.init_buffer_bytes": str(intmax), "py.alloc_max_bytes": str(intmax), } with tiledb.scope_ctx(config_dict) as ctx: with tiledb.from_numpy(uri, np.random.rand(4)) as A: pass with tiledb.open(uri) as a: q = core.PyQuery(ctx, a, ("",), (), 0, False) self.assertEqual(q._test_init_buffer_bytes, intmax) self.assertEqual(q._test_alloc_max_bytes, intmax) with self.assertRaisesRegex( ValueError, "Invalid parameter: 'py.alloc_max_bytes' must be >= 1 MB ", ), tiledb.scope_ctx({"py.alloc_max_bytes": 10}) as ctx2: q = core.PyQuery(ctx2, a, ("",), (), 0, False) def test_import_buffer(self): uri = self.path("test_import_buffer") def_tile = 1 if tiledb.libtiledb.version() < (2, 2): def_tile = 2 dom = tiledb.Domain( tiledb.Dim(domain=(0, 3), tile=def_tile, dtype=np.int64), tiledb.Dim(domain=(0, 3), tile=def_tile, dtype=np.int64), ) attrs = [ tiledb.Attr(name="", dtype=np.float64), tiledb.Attr(name="foo", dtype=np.int32), tiledb.Attr(name="str", dtype=str), ] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) tiledb.DenseArray.create(uri, schema) data_orig = { "": 2.5 * np.identity(4, dtype=np.float64), "foo": 8 * np.identity(4, dtype=np.int32), "str": np.array( [rand_ascii(random.randint(0, 5)) for _ in range(16)], dtype="U0" ).reshape(4, 4), } with tiledb.open(uri, "w") as A: A[:] = data_orig with tiledb.open(uri) as B: assert_array_equal(B[:][""], data_orig[""]), assert_array_equal(B[:]["foo"], data_orig["foo"]) data_mod = { "": 5 * np.identity(4, dtype=np.float64), "foo": 32 * np.identity(4, dtype=np.int32), "str": np.array( [rand_ascii(random.randint(1, 7)) for _ in range(16)], dtype="U0" ).reshape(4, 4), } str_offsets = np.array( [0] + [len(x) for x in data_mod["str"].flatten()[:-1]], dtype=np.uint64 ) str_offsets = np.cumsum(str_offsets) str_raw = np.array( [ord(c) for c in "".join([x for x in data_mod["str"].flatten()])], dtype=np.uint8, ) data_mod_bfr = { "": (data_mod[""].flatten().view(np.uint8), np.array([], dtype=np.uint64)), "foo": ( data_mod["foo"].flatten().view(np.uint8), np.array([], dtype=np.uint64), ), "str": (str_raw.flatten().view(np.uint8), str_offsets), } with tiledb.open(uri) as C: res = C.multi_index[0:3, 0:3] assert_array_equal(res[""], data_orig[""]) assert_array_equal(res["foo"], data_orig["foo"]) assert_array_equal(res["str"], data_orig["str"]) C._set_buffers(copy.deepcopy(data_mod_bfr)) res = C.multi_index[0:3, 0:3] assert_array_equal(res[""], data_mod[""]) assert_array_equal(res["foo"], data_mod["foo"]) assert_array_equal(res["str"], data_mod["str"]) with tiledb.open(uri) as D: D._set_buffers(copy.deepcopy(data_mod_bfr)) res = D[:, :] assert_array_equal(res[""], data_mod[""]) assert_array_equal(res["foo"], data_mod["foo"]) assert_array_equal(res["str"], data_mod["str"]) with tiledb.DenseArray(uri, mode="r") as E, tiledb.scope_ctx() as ctx: # Ensure that query only returns specified attributes q = core.PyQuery(ctx, E, ("foo",), (), 0, False) q.set_ranges([[(0, 1)]]) q.submit() r = q.results() self.assertTrue("foo" in r) self.assertTrue("str" not in r) del q TileDB-Py-0.12.2/tiledb/tests/test_dask.py000066400000000000000000000123071417663620700202440ustar00rootroot00000000000000import pytest da = pytest.importorskip("dask.array") import sys import tiledb from tiledb.tests.common import DiskTestCase import numpy as np from numpy.testing import assert_array_equal, assert_approx_equal # override the no_output fixture because it conflicts with these tests # eg: "ResourceWarning: unclosed event loop" @pytest.fixture(scope="function", autouse=True) def no_output(): pass @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") class TestDaskSupport(DiskTestCase): def test_dask_from_numpy_1d(self): uri = self.path("np_1attr") A = np.random.randn(50, 50) T = tiledb.from_numpy(uri, A, tile=50) T.close() with tiledb.open(uri) as T: D = da.from_tiledb(T) assert_array_equal(D, A) D2 = da.from_tiledb(uri) assert_array_equal(D2, A) self.assertAlmostEqual( np.mean(A), D2.mean().compute(scheduler="single-threaded") ) def _make_multiattr_2d(self, uri, shape=(0, 100), tile=10): dom = tiledb.Domain( tiledb.Dim("x", (0, 10), dtype=np.uint64, tile=tile), tiledb.Dim("y", (0, 50), dtype=np.uint64, tile=tile), ) schema = tiledb.ArraySchema( attrs=(tiledb.Attr("attr1"), tiledb.Attr("attr2")), domain=dom ) tiledb.DenseArray.create(uri, schema) @pytest.mark.filterwarnings("ignore:There is no current event loop") def test_dask_multiattr_2d(self): uri = self.path("multiattr") self._make_multiattr_2d(uri) with tiledb.DenseArray(uri, "w") as T: ar1 = np.random.randn(*T.schema.shape) ar2 = np.random.randn(*T.schema.shape) T[:] = {"attr1": ar1, "attr2": ar2} with tiledb.DenseArray(uri, mode="r", attr="attr2") as T: # basic round-trip from dask.array D = da.from_tiledb(T, attribute="attr2") assert_array_equal(ar2, np.array(D)) # smoke-test computation # note: re-init from_tiledb each time, or else dask just uses the cached materialization D = da.from_tiledb(uri, attribute="attr2") self.assertAlmostEqual(np.mean(ar2), D.mean().compute(scheduler="threads")) D = da.from_tiledb(uri, attribute="attr2") self.assertAlmostEqual( np.mean(ar2), D.mean().compute(scheduler="single-threaded") ) D = da.from_tiledb(uri, attribute="attr2") self.assertAlmostEqual(np.mean(ar2), D.mean().compute(scheduler="processes")) # test dask.distributed from dask.distributed import Client D = da.from_tiledb(uri, attribute="attr2") with Client() as client: assert_approx_equal(D.mean().compute(), np.mean(ar2)) def test_dask_write(self): uri = self.path("dask_w") D = da.random.random(10, 10) D.to_tiledb(uri) DT = da.from_tiledb(uri) assert_array_equal(D, DT) def test_dask_overlap_blocks(self): uri = self.path("np_overlap_blocks") A = np.ones((2, 50, 50)) T = tiledb.from_numpy(uri, A, tile=(1, 5, 5)) T.close() with tiledb.open(uri) as T: D = da.from_tiledb(T) assert_array_equal(D, A) D2 = da.from_tiledb(uri) assert_array_equal(D2, A) D3 = D2.map_overlap( lambda x: x + 1, depth={0: 0, 1: 1, 2: 1}, dtype=A.dtype, boundary="none" ).compute() assert_array_equal(D2 * 2, D3) def test_labeled_dask_overlap_blocks(self): uri = self.path("np_labeled_overlap_blocks") A = np.ones((2, 50, 50)) dom = tiledb.Domain( tiledb.Dim(name="BANDS", domain=(0, 1), tile=1), tiledb.Dim(name="Y", domain=(0, 49), tile=5, dtype=np.uint64), tiledb.Dim(name="X", domain=(0, 49), tile=5, dtype=np.uint64), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="TDB_VALUES", dtype=A.dtype)], ) tiledb.DenseArray.create(uri, schema) with tiledb.open(uri, "w", attr="TDB_VALUES") as T: T[:] = A D2 = da.from_tiledb(uri, attribute="TDB_VALUES") D3 = D2.map_overlap( lambda x: x + 1, depth={0: 0, 1: 1, 2: 1}, dtype=D2.dtype, boundary="none" ).compute() assert_array_equal(D2 + 1, D3) def test_labeled_dask_blocks(self): uri = self.path("np_labeled_map_blocks") A = np.ones((2, 50, 50)) dom = tiledb.Domain( tiledb.Dim(name="BANDS", domain=(0, 1), tile=1), tiledb.Dim(name="Y", domain=(0, 49), tile=5, dtype=np.uint64), tiledb.Dim(name="X", domain=(0, 49), tile=5, dtype=np.uint64), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="TDB_VALUES", dtype=A.dtype)], ) tiledb.DenseArray.create(uri, schema) with tiledb.open(uri, "w", attr="TDB_VALUES") as D1: D1[:] = A D2 = da.from_tiledb(uri, attribute="TDB_VALUES") D3 = D2.map_blocks(lambda x: x + 1, dtype=D2.dtype).compute( scheduler="processes" ) assert_array_equal(D2 + 1, D3) TileDB-Py-0.12.2/tiledb/tests/test_domain_index.py000066400000000000000000000130531417663620700217570ustar00rootroot00000000000000#%% import numpy as np import tiledb from tiledb.tests.common import * class DomainIndexingSparseTest(DiskTestCase): def test_int_domain_indexing(self): path = self.path("int_domain_indexing") dom = tiledb.Domain( tiledb.Dim(name="x", domain=(-10, 10), tile=1, dtype=np.int64) ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.float64)] ) tiledb.SparseArray.create(path, schema) X = np.arange(-10, 11, step=1) val = np.random.rand(len(X)) with tiledb.SparseArray(path, mode="w") as A: A[X] = val with tiledb.SparseArray(path) as A: assert_array_equal(A.domain_index[X[0]]["a"], val[0]) assert_array_equal(A.domain_index[X[-1]]["a"], val[-1]) assert_array_equal(A.domain_index[X[0] : X[-1]]["a"], val[:]) # sanity check assert_array_equal(A.domain_index[X[0] : X[-1]]["x"], X[:]) def test_fp_domain_indexing(self): array_path = self.path("test_domain_idx") # test case from https://github.com/TileDB-Inc/TileDB-Py/issues/201 tile = 1 dom = tiledb.Domain( tiledb.Dim(name="x", domain=(-89.75, 89.75), tile=tile, dtype=np.float64), tiledb.Dim(name="y", domain=(-179.75, 179.75), tile=tile, dtype=np.float64), tiledb.Dim(name="z", domain=(157498, 157863), tile=tile, dtype=np.float64), ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="data", dtype=np.float64)] ) tiledb.SparseArray.create(array_path, schema) # fake data X = np.linspace(-89.75, 89.75, 359) Y = np.linspace(-179.75, 179.75, 359) Z = np.linspace(157498, 157857, 359) # data = np.random.rand(*map(lambda x: x[0], (X.shape, Y.shape, Z.shape))) data = np.random.rand(X.shape[0]) with tiledb.SparseArray(array_path, mode="w") as A: A[X, Y, Z] = data with tiledb.SparseArray(array_path) as A: # check direct slicing assert_array_equal(A.domain_index[X[0], Y[0], Z[0]]["data"], data[0]) # check small slice ranges tmp = A.domain_index[ X[0] : np.nextafter(X[0], 0), Y[0] : np.nextafter(Y[0], 0), Z[0] : np.nextafter(Z[0], Z[0] + 1), ] assert_array_equal(tmp["data"], data[0]) # check slicing last element tmp = A.domain_index[X[-1], Y[-1], Z[-1]] assert_array_equal(tmp["data"], data[-1]) # check slice range multiple components tmp = A.domain_index[X[1] : X[2], Y[1] : Y[2], Z[1] : Z[2]] assert_array_equal(tmp["data"], data[1:3]) # check an interior point coords = X[145], Y[145], Z[145] tmp = A.domain_index[coords] assert_array_equal(tmp["x"], X[145]) assert_array_equal(tmp["data"], data[145]) # check entire domain tmp = A.domain_index[X[0] : X[-1], Y[0] : Y[-1], Z[0] : Z[-1]] assert_array_equal(tmp["data"], data[:]) # check entire domain # TODO uncomment if vectorized indexing is available # coords = np.array([X,Y,Z]).transpose().flatten() # tmp = A.domain_index[X,Y,Z] # assert_array_equal( # tmp['data'], # data[:] # ) def test_fp_domain_count(self): array_path = self.path("test_domain_count") tile = 1 dom = tiledb.Domain( tiledb.Dim(name="x", domain=(0.0, 2.0), tile=tile, dtype=np.float64), tiledb.Dim(name="y", domain=(0.0, 2.0), tile=tile, dtype=np.float64), ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="data", dtype=np.float64)] ) tiledb.SparseArray.create(array_path, schema) # fake data X = [1.0] Y = [1.0] data = [1.0] with tiledb.SparseArray(array_path, mode="w") as A: A[X, Y] = data with tiledb.SparseArray(array_path) as A: # check direct slicing assert_array_equal(A.domain_index[X[0], Y[0]]["data"], data[0]) # check counting by slice assert_equal(A.domain_index[0:2.0, 0:1.0]["x"].shape[0], 1) assert_equal(A.domain_index[0:2.0, 0:1.0]["y"].shape[0], 1) assert_equal(A.domain_index[0:2.0, np.nextafter(1.0, 2.0)]["x"].shape[0], 0) assert_equal(A.domain_index[0:2.0, np.nextafter(1.0, 2.0)]["y"].shape[0], 0) class DomainIndexingDenseTest(DiskTestCase): def test_int_domain_indexing(self): path = self.path("dense_int_domain_indexing") dom = tiledb.Domain( tiledb.Dim(name="x", domain=(0, 10), tile=1, dtype=np.int64) ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float64)] ) tiledb.DenseArray.create(path, schema) X = np.arange(0, 11, step=1) val = np.random.rand(len(X)) with tiledb.DenseArray(path, mode="w") as A: A[:] = val with tiledb.DenseArray(path) as A: assert_array_equal(A.domain_index[X[0]]["a"], val[0]) assert_array_equal(A.domain_index[X[-1]]["a"], val[-1]) assert_array_equal(A.domain_index[X[0] : X[-1]]["a"], val[:]) # sanity check assert_array_equal(A.domain_index[X[0] : X[-1]]["x"], X[:]) TileDB-Py-0.12.2/tiledb/tests/test_examples.py000066400000000000000000000034441417663620700211420ustar00rootroot00000000000000import doctest import glob import os import subprocess import sys import tempfile import pytest # override locally to avoid conflict with capsys used below @pytest.fixture(scope="function", autouse=True) def no_output(): pass class ExamplesTest: """Test runnability of scripts in examples/""" PROJECT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) @pytest.mark.parametrize( "path", glob.glob(os.path.join(PROJECT_DIR, "examples", "*.py")) ) def test_examples(self, path): # run example script # - in a separate process # - in tmpdir so we don't pollute the source tree # - with exit status checking (should fail tests if example fails) with tempfile.TemporaryDirectory() as tmpdir: try: subprocess.run( [sys.executable, path], cwd=tmpdir, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf8", ) except subprocess.CalledProcessError as ex: pytest.fail(ex.stderr, pytrace=False) @pytest.mark.skipif( sys.platform == "win32", reason="Some doctests are missing a clean-up step on windows", ) @pytest.mark.parametrize( "path", [ os.path.join(PROJECT_DIR, "tiledb", "libtiledb.pyx"), os.path.join(PROJECT_DIR, "tiledb", "fragment.py"), ], ) def test_docs(self, path, capsys): failures, _ = doctest.testfile( path, module_relative=False, verbose=False, optionflags=doctest.NORMALIZE_WHITESPACE, ) if failures: pytest.fail(capsys.readouterr().out) TileDB-Py-0.12.2/tiledb/tests/test_fixes.py000066400000000000000000000067631417663620700204510ustar00rootroot00000000000000import numpy as np import tiledb import concurrent, concurrent.futures from tiledb.tests.common import DiskTestCase from numpy.testing import assert_array_equal class FixesTest(DiskTestCase): def test_ch7727_float32_dim_estimate_incorrect(self): # set max allocation: because windows won't overallocate with tiledb.scope_ctx({"py.alloc_max_bytes": 1024**2 * 100}): uri = self.path() dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 100), dtype=np.float32)) att = tiledb.Attr("", dtype=np.bytes_) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as T: T[50.4] = b"hello" with tiledb.open(uri, mode="r") as T: assert T[:][""] == b"hello" assert T[50.4][""] == b"hello" def test_ch8292(self): # test fix for ch8292 # We need to ensure that py.alloc_max_bytes is *not* applied to # dense arrays. Dense arrays should have exact estimates based # on the ranges, so there should be no risk of over-estimates. # This test sets py.alloc_max_bytes to 1 less than the expected # result array size, and asserts that the allocated buffers match # the expected result size rather than py.alloc_max_bytes. uri = self.path() max = 1024**2 + 1 with tiledb.from_numpy(uri, np.uint8(range(max))): pass with tiledb.scope_ctx( {"py.init_buffer_bytes": 2 * 1024**2, "py.alloc_max_bytes": 1024**2} ) as ctx3: with tiledb.open(uri) as b: q = tiledb.main.PyQuery(ctx3, b, ("",), (), 0, False) q._return_incomplete = True q.set_ranges([[(0, max)]]) q._allocate_buffers() buffers = q._get_buffers() assert buffers[0].nbytes == max def test_ch10282_concurrent_multi_index(self): """Test concurrent access to a single tiledb.Array using Array.multi_index and Array.df. We pass an array and slice into a function run by a set of futures, along with expected result; then assert that the result from TileDB matches the expectation. """ def slice_array(a: tiledb.Array, indexer, selection, expected): """Helper function to slice a given tiledb.Array with an indexer and assert that the selection matches the expected result.""" res = getattr(a, indexer)[selection][""] if indexer == "df": res = res.values assert_array_equal(res, expected) uri = self.path() data = np.random.rand(100) with tiledb.from_numpy(uri, data): pass futures = [] with tiledb.open(uri) as A: with concurrent.futures.ThreadPoolExecutor(10) as executor: for indexer in ["multi_index", "df"]: # for end_idx in range(1, 100, 5): sel = slice(0, end_idx) expected = data[sel.start : sel.stop + 1] futures.append( executor.submit(slice_array, A, indexer, sel, expected) ) concurrent.futures.wait(futures) # Important: must get each result here or else assertion # failures or exceptions will disappear. list(map(lambda x: x.result(), futures)) TileDB-Py-0.12.2/tiledb/tests/test_fragments.py000066400000000000000000000611411417663620700213100ustar00rootroot00000000000000import itertools import numpy as np import pytest import sys import tiledb from tiledb.main import PyFragmentInfo from tiledb.tests.common import DiskTestCase from numpy.testing import assert_array_equal class FragmentInfoTest(DiskTestCase): def setUp(self): super().setUp() if not tiledb.libtiledb.version() >= (2, 2): pytest.skip("Only run FragmentInfo test with TileDB>=2.2") def test_uri_dne(self): with self.assertRaises(tiledb.TileDBError): fragment_info = tiledb.array_fragments("does_not_exist") def test_array_fragments(self): fragments = 3 A = np.zeros(fragments) uri = self.path("test_dense_fragments") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) for fragment_idx in range(fragments): timestamp = fragment_idx + 1 with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: T[fragment_idx : fragment_idx + 1] = fragment_idx fragments_info = tiledb.array_fragments(uri) self.assertEqual(len(fragments_info), 3) self.assertEqual(fragments_info.unconsolidated_metadata_num, 3) self.assertEqual(fragments_info.cell_num, (3, 3, 3)) self.assertEqual( fragments_info.has_consolidated_metadata, (False, False, False) ) self.assertEqual( fragments_info.nonempty_domain, (((0, 0),), ((1, 1),), ((2, 2),)) ) self.assertEqual(fragments_info.sparse, (False, False, False)) self.assertEqual(fragments_info.timestamp_range, ((1, 1), (2, 2), (3, 3))) self.assertEqual(fragments_info.to_vacuum, ()) for idx, frag in enumerate(fragments_info): self.assertEqual(frag.cell_num, 3) self.assertEqual(frag.has_consolidated_metadata, False) self.assertEqual(frag.nonempty_domain, ((idx, idx),)) self.assertEqual(frag.sparse, False) self.assertEqual(frag.timestamp_range, (idx + 1, idx + 1)) if tiledb.libtiledb.version() < (2, 2, 3): assert frag.version == 7 elif tiledb.libtiledb.version() < (2, 3, 0): assert frag.version == 8 else: # make sure the version is within some reasonable bound # but don't pin because that makes testing against dev # more difficult assert frag.version >= 9 assert frag.version < 12 def test_array_fragments_var(self): fragments = 3 uri = self.path("test_array_fragments_var") dom = tiledb.Domain( tiledb.Dim(name="dim", domain=(None, None), tile=None, dtype=np.bytes_) ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="1s", dtype=np.int32, var=True)], ) tiledb.SparseArray.create(uri, schema) for fragment_idx in range(fragments): timestamp = fragment_idx + 1 data = np.array( [ np.array([timestamp] * 1, dtype=np.int32), np.array([timestamp] * 2, dtype=np.int32), np.array([timestamp] * 3, dtype=np.int32), ], dtype="O", ) with tiledb.SparseArray(uri, mode="w", timestamp=timestamp) as T: T[["zero", "one", "two"]] = data fragments_info = tiledb.array_fragments(uri) self.assertEqual( fragments_info.nonempty_domain, ((("one", "zero"),), (("one", "zero"),), (("one", "zero"),)), ) for frag in fragments_info: self.assertEqual(frag.nonempty_domain, (("one", "zero"),)) def test_dense_fragments(self): fragments = 3 A = np.zeros(fragments) uri = self.path("test_dense_fragments") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) for fragment_idx in range(fragments): timestamp = fragment_idx + 1 with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: T[fragment_idx : fragment_idx + 1] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) all_expected_uris = [] for fragment_idx in range(fragments): timestamp = fragment_idx + 1 self.assertEqual( fragment_info.get_timestamp_range()[fragment_idx], (timestamp, timestamp), ) expected_uri = "__{ts}_{ts}".format(uri=uri, ts=timestamp) actual_uri = fragment_info.get_uri()[fragment_idx] all_expected_uris.append(expected_uri) # use .contains because the protocol can vary self.assertTrue(expected_uri in actual_uri) self.assertTrue( actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) ) self.assertFalse(fragment_info.get_sparse()[fragment_idx]) all_actual_uris = fragment_info.get_uri() for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): self.assertTrue(expected_uri in actual_uri) self.assertTrue( actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) ) self.assertEqual(fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3))) self.assertEqual(fragment_info.get_sparse(), (False, False, False)) if tiledb.libtiledb.version() < (2, 2, 3): assert fragment_info.get_version()[0] == 7 elif tiledb.libtiledb.version() < (2, 3, 0): assert fragment_info.get_version()[0] == 8 else: # make sure the version is within some reasonable bound # but don't pin because that makes testing against dev # more difficult assert fragment_info.get_version()[0] >= 9 assert fragment_info.get_version()[0] < 12 def test_sparse_fragments(self): fragments = 3 A = np.zeros(fragments) uri = self.path("test_sparse_fragments") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(uri, schema) for fragment_idx in range(fragments): timestamp = fragment_idx + 1 with tiledb.SparseArray(uri, mode="w", timestamp=timestamp) as T: T[fragment_idx] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) all_expected_uris = [] for fragment_idx in range(fragments): timestamp = fragment_idx + 1 self.assertEqual( fragment_info.get_timestamp_range()[fragment_idx], (timestamp, timestamp), ) if uri[0] != "/": uri = "/" + uri.replace("\\", "/") expected_uri = "/__{ts}_{ts}".format(uri=uri, ts=timestamp) actual_uri = fragment_info.get_uri()[fragment_idx] all_expected_uris.append(expected_uri) self.assertTrue(expected_uri in actual_uri) self.assertTrue( actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) ) self.assertTrue(fragment_info.get_sparse()[fragment_idx]) all_actual_uris = fragment_info.get_uri() for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): self.assertTrue(expected_uri in actual_uri) self.assertTrue( actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) ) self.assertEqual(fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3))) self.assertEqual(fragment_info.get_sparse(), (True, True, True)) if tiledb.libtiledb.version() < (2, 2, 3): assert fragment_info.get_version()[0] == 7 elif tiledb.libtiledb.version() < (2, 3, 0): assert fragment_info.get_version()[0] == 8 else: # make sure the version is within some reasonable bound # but don't pin because that makes testing against dev # more difficult assert fragment_info.get_version()[0] >= 9 assert fragment_info.get_version()[0] < 12 def test_nonempty_domain(self): uri = self.path("test_nonempty_domain") dom = tiledb.Domain( tiledb.Dim(name="x", domain=(1, 4)), tiledb.Dim(name="y", domain=(-2.0, 2.0), dtype=np.float32), ) att = tiledb.Attr() schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(uri, schema) with tiledb.SparseArray(uri, mode="w") as T: coords = np.array( list(itertools.product(np.arange(1, 5), np.arange(-1, 3))) ) x = coords[:, 0] y = coords[:, 1] T[x, y] = np.array(range(16)) with tiledb.SparseArray(uri, mode="w") as T: x = [1, 3] y = [-1.5, -1.25] T[x, y] = np.array(range(2)) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual( fragment_info.get_nonempty_domain(), (((1, 4), (-1.0, 2.0)), ((1, 3), (-1.5, -1.25))), ) def test_nonempty_domain_date(self): uri = self.path("test_nonempty_domain") dom = tiledb.Domain( tiledb.Dim( name="day", domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), dtype="datetime64[D]", ) ) att = tiledb.Attr() schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(uri, schema) with tiledb.SparseArray(uri, mode="w") as T: dates = np.array( ["2017-04-01", "2019-10-02", "2019-10-03", "2019-12-04"], dtype="datetime64[D]", ) T[dates] = np.array(range(4)) with tiledb.SparseArray(uri, mode="w") as T: dates = np.array( ["2010-01-01", "2013-10-02", "2014-10-03"], dtype="datetime64[D]" ) T[dates] = np.array(range(3)) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual( fragment_info.get_nonempty_domain(), ( ((np.datetime64("2017-04-01"), np.datetime64("2019-12-04")),), ((np.datetime64("2010-01-01"), np.datetime64("2014-10-03")),), ), ) def test_nonempty_domain_strings(self): uri = self.path("test_nonempty_domain_strings") dom = tiledb.Domain( tiledb.Dim(name="x", domain=(None, None), dtype=np.bytes_), tiledb.Dim(name="y", domain=(None, None), dtype=np.bytes_), ) att = tiledb.Attr() schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(uri, schema) with tiledb.SparseArray(uri, mode="w") as T: x_dims = [b"a", b"b", b"c", b"d"] y_dims = [b"e", b"f", b"g", b"h"] T[x_dims, y_dims] = np.array([1, 2, 3, 4]) with tiledb.SparseArray(uri, mode="w") as T: x_dims = [b"a", b"b"] y_dims = [b"e", b"f"] T[x_dims, y_dims] = np.array([1, 2]) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual( fragment_info.get_nonempty_domain(), ((("a", "d"), ("e", "h")), (("a", "b"), ("e", "f"))), ) def test_cell_num(self): uri = self.path("test_cell_num") dom = tiledb.Domain(tiledb.Dim(domain=(1, 4))) att = tiledb.Attr() schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.SparseArray.create(uri, schema) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) with tiledb.SparseArray(uri, mode="w") as T: a = np.array([1, 2, 3, 4]) T[a] = a with tiledb.SparseArray(uri, mode="w") as T: b = np.array([1, 2]) T[b] = b fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_cell_num(), (len(a), len(b))) def test_consolidated_fragment_metadata(self): fragments = 3 A = np.zeros(fragments) uri = self.path("test_consolidated_fragment_metadata") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) for fragment_idx in range(fragments): with tiledb.DenseArray(uri, mode="w") as T: T[fragment_idx : fragment_idx + 1] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_unconsolidated_metadata_num(), 3) self.assertEqual( fragment_info.get_has_consolidated_metadata(), (False, False, False) ) tiledb.consolidate( uri, config=tiledb.Config(params={"sm.consolidation.mode": "fragment_meta"}) ) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_unconsolidated_metadata_num(), 0) self.assertEqual( fragment_info.get_has_consolidated_metadata(), (True, True, True) ) def test_fragments_to_vacuum(self): fragments = 3 A = np.zeros(fragments) uri = self.path("test_fragments_to_vacuum") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) for fragment_idx in range(fragments): with tiledb.DenseArray(uri, mode="w") as T: T[fragment_idx : fragment_idx + 1] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) expected_vacuum_uri = fragment_info.get_uri()[0] tiledb.consolidate( uri, config=tiledb.Config(params={"sm.vacuum.mode": "fragments"}) ) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) assert len(fragment_info.get_to_vacuum()) == 3 assert fragment_info.get_to_vacuum()[0] == expected_vacuum_uri tiledb.vacuum(uri) fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) assert len(fragment_info.get_to_vacuum()) == 0 @pytest.mark.skipif( tiledb.libtiledb.version() < (2, 5, 0), reason="MBRs in FragmentInfo only availabe in ilbtiledb<=2.5.0", ) def test_get_mbr(self): fragments = 3 uri = self.path("test_get_mbr") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) att = tiledb.Attr(dtype=np.uint64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.Array.create(uri, schema) for fragi in range(fragments): timestamp = fragi + 1 with tiledb.open(uri, mode="w", timestamp=timestamp) as T: T[np.array(range(0, fragi + 1))] = [fragi] * (fragi + 1) expected_mbrs = ((((0, 0),),), (((0, 1),),), (((0, 2),),)) py_fragment_info = PyFragmentInfo(uri, schema, True, tiledb.default_ctx()) assert py_fragment_info.get_mbrs() == expected_mbrs array_fragments = tiledb.array_fragments(uri) with pytest.raises(AttributeError) as excinfo: array_fragments.mbrs assert "retrieving minimum bounding rectangles is disabled" in str( excinfo.value ) with self.assertRaises(AttributeError): array_fragments[0].mbrs assert "retrieving minimum bounding rectangles is disabled" in str( excinfo.value ) array_fragments = tiledb.array_fragments(uri, include_mbrs=True) assert array_fragments.mbrs == expected_mbrs assert array_fragments[0].mbrs == expected_mbrs[0] assert array_fragments[1].mbrs == expected_mbrs[1] assert array_fragments[2].mbrs == expected_mbrs[2] class CreateArrayFromFragmentsTest(DiskTestCase): @pytest.mark.skipif( sys.platform == "win32", reason="VFS.copy() does not run on windows" ) def test_create_array_from_fragments(self): dshape = (1, 3) num_frags = 10 def create_array(target_path, dshape): dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path, dshape, num_frags): for i in range(1, num_frags + 1): with tiledb.open(target_path, "w", timestamp=i) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) src_path = self.path("test_create_array_from_fragments_src") dst_path = self.path("test_create_array_from_fragments_dst") ts = tuple((t, t) for t in range(1, 11)) create_array(src_path, dshape) write_fragments(src_path, dshape, num_frags) frags = tiledb.FragmentInfoList(src_path) assert len(frags) == 10 assert frags.timestamp_range == ts tiledb.create_array_from_fragments(src_path, dst_path, (3, 6)) frags = tiledb.FragmentInfoList(dst_path) assert len(frags) == 4 assert frags.timestamp_range == ts[2:6] class CopyFragmentsToExistingArrayTest(DiskTestCase): @pytest.mark.skipif( sys.platform == "win32", reason="VFS.copy() does not run on windows" ) def test_copy_fragments_to_existing_array(self): def create_array(target_path, dshape): dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path, dshape, num_frags, ts_start=1): for i in range(ts_start, ts_start + num_frags): with tiledb.open(target_path, "w", timestamp=i) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) src_dshape = (1, 3) src_num_frags = 10 src_path = self.path("test_copy_fragments_to_existing_array_src") create_array(src_path, src_dshape) write_fragments(src_path, src_dshape, src_num_frags) dst_dshape = (1, 3) dst_num_frags = 10 dst_path = self.path("test_copy_fragments_to_existing_array_dst") create_array(dst_path, dst_dshape) write_fragments(dst_path, dst_dshape, dst_num_frags, 11) ts = tuple((t, t) for t in range(1, 21)) frags = tiledb.FragmentInfoList(dst_path) assert len(frags) == 10 assert frags.timestamp_range == ts[10:] tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) frags = tiledb.FragmentInfoList(dst_path) assert len(frags) == 14 assert frags.timestamp_range == ts[2:6] + ts[10:] @pytest.mark.skipif( sys.platform == "win32", reason="VFS.copy() does not run on windows" ) def test_copy_fragments_to_existing_array_mismatch(self): def create_array(target_path, attr_type): dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) att = tiledb.Attr(dtype=attr_type) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path): for i in range(10): with tiledb.open(target_path, "w") as A: A[[1, 2, 3]] = np.random.rand(3) src_path = self.path("test_copy_fragments_to_existing_array_evolved_src") create_array(src_path, "int64") write_fragments(src_path) dst_path = self.path("test_copy_fragments_to_existing_array_evolved_dst") create_array(dst_path, "int32") write_fragments(dst_path) with self.assertRaises(tiledb.TileDBError): tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) @pytest.mark.skipif( sys.platform == "win32", reason="VFS.copy() does not run on windows" ) def test_copy_fragments_to_existing_array_evolved(self): def create_array(target_path): dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path): for i in range(10): with tiledb.open(target_path, "w") as A: A[[1, 2, 3]] = np.random.rand(3) src_path = self.path("test_copy_fragments_to_existing_array_evolved_src") create_array(src_path) write_fragments(src_path) dst_path = self.path("test_copy_fragments_to_existing_array_evolved_dst") create_array(dst_path) write_fragments(dst_path) ctx = tiledb.default_ctx() se = tiledb.ArraySchemaEvolution(ctx) se.add_attribute(tiledb.Attr("a2", dtype=np.float64)) se.array_evolve(src_path) with self.assertRaises(tiledb.TileDBError): tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) class DeleteFragmentsTest(DiskTestCase): def test_delete_fragments(self): dshape = (1, 3) num_writes = 10 def create_array(target_path, dshape): dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path, dshape, num_writes): for i in range(1, num_writes + 1): with tiledb.open(target_path, "w", timestamp=i) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) path = self.path("test_delete_fragments") ts = tuple((t, t) for t in range(1, 11)) create_array(path, dshape) write_fragments(path, dshape, num_writes) frags = tiledb.array_fragments(path) assert len(frags) == 10 assert frags.timestamp_range == ts tiledb.delete_fragments(path, (3, 6)) frags = tiledb.array_fragments(path) assert len(frags) == 6 assert frags.timestamp_range == ts[:2] + ts[6:] def test_delete_fragments_with_schema_evolution(self): path = self.path("test_delete_fragments_with_schema_evolution") dshape = (1, 3) dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) att = tiledb.Attr(name="a1", dtype=np.float64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(path, schema) ts1_data = np.random.rand(3) with tiledb.open(path, "w", timestamp=1) as A: A[[1, 2, 3]] = ts1_data ctx = tiledb.default_ctx() se = tiledb.ArraySchemaEvolution(ctx) se.add_attribute(tiledb.Attr("a2", dtype=np.float64)) se.array_evolve(path) ts2_data = np.random.rand(3) with tiledb.open(path, "w", timestamp=2) as A: A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} with tiledb.open(path, "r") as A: assert A.schema.has_attr("a1") assert A.schema.has_attr("a2") assert_array_equal(A[:]["a1"], ts2_data) assert_array_equal(A[:]["a2"], ts2_data) tiledb.delete_fragments(path, (2, 2)) with tiledb.open(path, "r") as A: assert A.schema.has_attr("a1") assert not A.schema.has_attr("a2") assert_array_equal(A[:]["a1"], ts1_data) TileDB-Py-0.12.2/tiledb/tests/test_hypothesis.py000066400000000000000000000042311417663620700215160ustar00rootroot00000000000000import tiledb import numpy as np import pandas as pd import pandas._testing as tm import hypothesis import hypothesis.strategies as st from hypothesis import given from numpy.testing import assert_array_equal from tiledb.tests.common import DiskTestCase class AttrDataTest(DiskTestCase): @hypothesis.settings(deadline=1000) @given(st.binary()) def test_bytes_numpy(self, data): # TODO this test is slow. might be nice to run with in-memory # VFS (if faster) but need to figure out correct setup # uri = "mem://" + str(uri_int) uri = self.path() if data == b"" or data.count(b"\x00") == len(data): # single-cell empty writes are not supported; TileDB PR 1646 array = np.array([data, b"1"], dtype="S0") else: array = np.array([data], dtype="S0") # DEBUG tiledb.stats_enable() tiledb.stats_reset() # END DEBUG with tiledb.from_numpy(uri, array) as A: pass with tiledb.open(uri) as A: assert_array_equal(A.multi_index[:][""], array) hypothesis.note(tiledb.stats_dump(print_out=False)) # DEBUG tiledb.stats_disable() @hypothesis.settings(deadline=1000) @given(st.binary()) def test_bytes_df(self, data): # TODO this test is slow. might be nice to run with in-memory # VFS (if faster) but need to figure out correct setup # uri = "mem://" + str(uri_int) uri_df = self.path() if data == b"" or data.count(b"\x00") == len(data): # single-cell empty writes are not supported; TileDB PR 1646 array = np.array([data, b"1"], dtype="S0") else: array = np.array([data], dtype="S0") series = pd.Series(array) df = pd.DataFrame({"": series}) # DEBUG tiledb.stats_enable() tiledb.stats_reset() # END DEBUG tiledb.from_pandas(uri_df, df, sparse=False) with tiledb.open(uri_df) as A: tm.assert_frame_equal(A.df[:], df) hypothesis.note(tiledb.stats_dump(print_out=False)) # DEBUG tiledb.stats_disable() TileDB-Py-0.12.2/tiledb/tests/test_libtiledb.py000066400000000000000000005031131417663620700212540ustar00rootroot00000000000000import gc import io import itertools import os import pickle import random import re import urllib import subprocess import sys import textwrap import time import unittest import warnings from collections import OrderedDict from contextlib import redirect_stdout import numpy as np import psutil import pytest from numpy.testing import assert_array_equal import tiledb from tiledb.tests.common import ( assert_captured, assert_subarrays_equal, assert_unordered_equal, DiskTestCase, rand_ascii, rand_ascii_bytes, rand_utf8, ) from tiledb.tests.fixtures import ( sparse_cell_order, test_incomplete_return_array, INTEGER_DTYPES, ) # pyright: reportUnusedVariable=warning from tiledb.util import schema_from_dict class VersionTest(DiskTestCase): def test_libtiledb_version(self): v = tiledb.libtiledb.version() self.assertIsInstance(v, tuple) self.assertTrue(len(v) == 3) self.assertTrue(v[0] >= 1, "TileDB major version must be >= 1") def test_tiledbpy_version(self): v = tiledb.version.version self.assertIsInstance(v, str) v = tiledb.version() self.assertIsInstance(v, tuple) self.assertTrue(3 <= len(v) <= 5) class StatsTest(DiskTestCase): def test_stats(self, capfd): tiledb.libtiledb.stats_enable() tiledb.libtiledb.stats_reset() tiledb.libtiledb.stats_disable() tiledb.libtiledb.stats_enable() with tiledb.from_numpy(self.path("test_stats"), np.arange(10)) as T: pass # basic output check for read stats tiledb.libtiledb.stats_reset() with tiledb.open(self.path("test_stats")) as T: tiledb.libtiledb.stats_enable() assert_array_equal(T, np.arange(10)) # test stdout version tiledb.stats_dump() assert_captured(capfd, "TileDB Embedded Version:") # test string version stats_v = tiledb.stats_dump(print_out=False) if tiledb.libtiledb.version() < (2, 3): self.assertTrue("==== READ ====" in stats_v) else: self.assertTrue('"timers": {' in stats_v) self.assertTrue("==== Python Stats ====" in stats_v) if tiledb.libtiledb.version() < (2, 3): stats_quiet = tiledb.stats_dump(print_out=False, verbose=False) self.assertTrue("Time to load array schema" not in stats_quiet) # TODO seems to be a regression, no JSON stats_json = tiledb.stats_dump(json=True) self.assertTrue(isinstance(stats_json, dict)) self.assertTrue("CONSOLIDATE_COPY_ARRAY" in stats_json) @pytest.mark.skipif( "pytest.tiledb_vfs == 's3'", reason="Test not yet supported with S3" ) class TestConfig(DiskTestCase): def test_config(self): config = tiledb.Config() config["sm.tile_cache_size"] = 100 assert repr(config) is not None tiledb.Ctx(config) def test_ctx_config(self): ctx = tiledb.Ctx({"sm.tile_cache_size": 100}) config = ctx.config() self.assertEqual(config["sm.tile_cache_size"], "100") def test_vfs_config(self): config = tiledb.Config() config["vfs.min_parallel_size"] = 1 ctx = tiledb.Ctx() self.assertEqual(ctx.config()["vfs.min_parallel_size"], "10485760") vfs = tiledb.VFS(config, ctx=ctx) self.assertEqual(vfs.config()["vfs.min_parallel_size"], "1") def test_config_iter(self): config = tiledb.Config() k, v = [], [] for p in config.items(): k.append(p[0]) v.append(p[1]) self.assertTrue(len(k) > 0) k, v = [], [] for p in config.items("vfs.s3."): k.append(p[0]) v.append(p[1]) self.assertTrue(len(k) > 0) def test_config_bad_param(self): config = tiledb.Config() config["sm.foo"] = "bar" ctx = tiledb.Ctx(config) self.assertEqual(ctx.config()["sm.foo"], "bar") def test_config_unset(self): config = tiledb.Config() config["sm.tile_cach_size"] = 100 del config["sm.tile_cache_size"] # check that config parameter is default self.assertEqual( config["sm.tile_cache_size"], tiledb.Config()["sm.tile_cache_size"] ) def test_config_from_file(self): # skip: beacuse Config.load doesn't support VFS-supported URIs? if pytest.tiledb_vfs == "s3": pytest.skip( "TODO need more plumbing to make pandas use TileDB VFS to read CSV files" ) config_path = self.path("config") with tiledb.FileIO(self.vfs, config_path, "wb") as fh: fh.write("sm.tile_cache_size 100") config = tiledb.Config.load(config_path) self.assertEqual(config["sm.tile_cache_size"], "100") def test_ctx_config_from_file(self): config_path = self.path("config") vfs = tiledb.VFS() with tiledb.FileIO(vfs, config_path, "wb") as fh: fh.write("sm.tile_cache_size 100") ctx = tiledb.Ctx(config=tiledb.Config.load(config_path)) config = ctx.config() self.assertEqual(config["sm.tile_cache_size"], "100") def test_ctx_config_dict(self): ctx = tiledb.Ctx(config={"sm.tile_cache_size": "100"}) config = ctx.config() assert issubclass(type(config), tiledb.libtiledb.Config) self.assertEqual(config["sm.tile_cache_size"], "100") class GroupTestCase(DiskTestCase): def setup_method(self): super().setup_method() self.group1 = self.path("group1") self.group2 = self.path("group1/group2") self.group3 = self.path("group1/group3") self.group4 = self.path("group1/group3/group4") tiledb.group_create(self.group1) tiledb.group_create(self.group2) tiledb.group_create(self.group3) tiledb.group_create(self.group4) def is_group(self, uri): return tiledb.object_type(uri) == "group" class GroupTest(GroupTestCase): def test_is_group(self): self.assertTrue(self.is_group(self.group1)) self.assertTrue(self.is_group(self.group2)) self.assertTrue(self.is_group(self.group3)) self.assertTrue(self.is_group(self.group4)) def test_walk_group(self): if pytest.tiledb_vfs == "s3": pytest.skip("S3 does not have empty directories") groups = [] def append_to_groups(path, obj): groups.append((os.path.normpath(path), obj)) tiledb.walk(self.path(""), append_to_groups, order="preorder") groups.sort() self.assertTrue(groups[0][0].endswith(self.group1) and groups[0][1] == "group") self.assertTrue(groups[1][0].endswith(self.group2) and groups[1][1] == "group") self.assertTrue(groups[2][0].endswith(self.group3) and groups[2][1] == "group") self.assertTrue(groups[3][0].endswith(self.group4) and groups[3][1] == "group") groups = [] tiledb.walk(self.path(""), append_to_groups, order="postorder") self.assertTrue(groups[0][0].endswith(self.group2) and groups[0][1] == "group") self.assertTrue(groups[1][0].endswith(self.group4) and groups[1][1] == "group") self.assertTrue(groups[2][0].endswith(self.group3) and groups[2][1] == "group") self.assertTrue(groups[3][0].endswith(self.group1) and groups[3][1] == "group") def test_remove_group(self): tiledb.remove(self.group3) self.assertFalse(self.is_group(self.group3)) self.assertFalse(self.is_group(self.group4)) def test_move_group(self): self.assertTrue(self.is_group(self.group2)) tiledb.move(self.group2, self.group2 + "_moved") self.assertFalse(self.is_group(self.group2)) self.assertTrue(self.is_group(self.group2 + "_moved")) class DimensionTest(unittest.TestCase): def test_minimal_dimension(self): dim = tiledb.Dim(domain=(0, 4), tile=5) self.assertEqual(dim.name, "__dim_0", "automatic dimension name is incorrect") self.assertEqual(dim.shape, (5,)) self.assertEqual(dim.tile, 5) def test_dimension(self): dim = tiledb.Dim(name="d1", domain=(0, 3), tile=2) self.assertEqual(dim.name, "d1") self.assertEqual(dim.shape, (4,)) self.assertEqual(dim.tile, 2) def test_dimension_filter(self): filters = [tiledb.GzipFilter(2)] dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filters) self.assertEqual(dim.filters, filters) filter_list = tiledb.FilterList(filters) dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filter_list) self.assertEqual(dim.filters, filter_list) with self.assertRaises(TypeError): tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=1) def test_datetime_dimension(self): # Regular usage dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), tile=np.timedelta64(20, "D"), dtype=np.datetime64("", "D"), ) self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) self.assertEqual(dim.tile, np.timedelta64(20, "D")) self.assertNotEqual(dim.tile, np.timedelta64(21, "D")) self.assertNotEqual(dim.tile, np.timedelta64(20, "W")) # Sanity check unit self.assertTupleEqual( dim.domain, (np.datetime64("2010-01-01"), np.datetime64("2020-01-01")) ) self.assertEqual(dim.shape, (3653,)) # No tile extent specified: this is not an error in 2.2 if tiledb.libtiledb.version() < (2, 2): with self.assertRaises(tiledb.TileDBError): tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), dtype=np.datetime64("", "D"), ) # Integer tile extent is ok dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), tile=20, dtype=np.datetime64("", "D"), ) self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) self.assertEqual(dim.tile, np.timedelta64(20, "D")) # Year resolution dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010"), np.datetime64("2020")), tile=5, dtype=np.datetime64("", "Y"), ) self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "Y"))) self.assertEqual(dim.tile, np.timedelta64(5, "Y")) self.assertTupleEqual( dim.domain, (np.datetime64("2010", "Y"), np.datetime64("2020", "Y")) ) # End domain promoted to day resolution dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), tile=2, dtype=np.datetime64("", "D"), ) self.assertEqual(dim.tile, np.timedelta64(2, "D")) self.assertTupleEqual( dim.domain, (np.datetime64("2010-01-01", "D"), np.datetime64("2020-01-01", "D")), ) # Domain values can't be integral with self.assertRaises(TypeError): dim = tiledb.Dim( name="d1", domain=(-10, 10), tile=2, dtype=np.datetime64("", "D") ) class DomainTest(DiskTestCase): def test_domain(self, capfd): dims = [ tiledb.Dim("d1", (1, 4), 2, dtype="u8"), tiledb.Dim("d2", (1, 4), 2, dtype="u8"), ] dom = tiledb.Domain(*dims) # check that dumping works dom.dump() assert_captured(capfd, "Name: d1") self.assertEqual(dom.ndim, 2) self.assertEqual(dom.dtype, np.dtype("uint64")) self.assertEqual(dom.shape, (4, 4)) # check that we can iterate over the dimensions dim_names = [dim.name for dim in dom] self.assertEqual(["d1", "d2"], dim_names) # check that we can access dim by name dim_d1 = dom.dim("d1") self.assertEqual(dim_d1, dom.dim(0)) # check that we can construct directly from a List[Dim] dom2 = tiledb.Domain(dims) self.assertEqual(dom, dom2) def test_datetime_domain(self): dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), tile=np.timedelta64(20, "D"), dtype=np.datetime64("", "D"), ) dom = tiledb.Domain(dim) self.assertEqual(dom.dtype, np.datetime64("", "D")) def test_domain_mixed_names_error(self): with self.assertRaises(tiledb.TileDBError): tiledb.Domain( tiledb.Dim("d1", (1, 4), 2, dtype="u8"), tiledb.Dim("__dim_0", (1, 4), 2, dtype="u8"), ) def test_ascii_domain(self, capfd): path = self.path("test_ascii_domain") dim = tiledb.Dim(name="d", dtype="ascii") assert dim.dtype == np.bytes_ dom = tiledb.Domain(dim) dom.dump() assert_captured(capfd, "Type: STRING_ASCII") att = tiledb.Attr(name="a", dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.SparseArray.create(path, schema) ascii_coords = ["a", "b", "c", "ABC"] unicode_coords = ["±", "×", "÷", "√"] data = [1, 2, 3, 4] with tiledb.open(path, "w") as A: with self.assertRaises(tiledb.TileDBError): A[unicode_coords] = data A[ascii_coords] = data class AttributeTest(DiskTestCase): def test_minimal_attribute(self): attr = tiledb.Attr() self.assertTrue(attr.isanon) self.assertEqual(attr.name, "") self.assertEqual(attr.dtype, np.float_) # self.assertEqual(attr.compressor, (None, -1)) self.assertFalse(attr.isvar) self.assertFalse(attr.isnullable) def test_attribute(self, capfd): attr = tiledb.Attr("foo") attr.dump() assert_captured(capfd, "Name: foo") assert attr.name == "foo" assert attr.dtype == np.float64, "default attribute type is float64" # compressor, level = attr.compressor # self.assertEqual(compressor, None, "default to no compression") # self.assertEqual(level, -1, "default compression level when none is specified") @pytest.mark.parametrize( "dtype, fill", [ (np.dtype(bytes), b"abc"), # (str, "defg"), (np.float32, np.float32(0.4023573667780681)), (np.float64, np.float64(0.0560602549760851)), (np.dtype("M8[ns]"), np.timedelta64(11, "ns")), (np.dtype([("f0", " # compressor, level = attr.compressor # self.assertEqual(compressor, "zstd") # self.assertEqual(level, 10) def test_ncell_attribute(self): dtype = np.dtype([("", np.int32), ("", np.int32), ("", np.int32)]) attr = tiledb.Attr("foo", dtype=dtype) self.assertEqual(attr.dtype, dtype) self.assertEqual(attr.ncells, 3) # dtype subarrays not supported with self.assertRaises(TypeError): tiledb.Attr("foo", dtype=np.dtype((np.int32, 2))) # mixed type record arrays not supported with self.assertRaises(TypeError): tiledb.Attr("foo", dtype=np.dtype([("", np.float32), ("", np.int32)])) def test_ncell_bytes_attribute(self): dtype = np.dtype((np.bytes_, 10)) attr = tiledb.Attr("foo", dtype=dtype) self.assertEqual(attr.dtype, dtype) self.assertEqual(attr.ncells, 10) def test_bytes_var_attribute(self): with pytest.warns(DeprecationWarning, match="Attr given `var=True` but"): attr = tiledb.Attr("foo", var=True, dtype="S1") self.assertEqual(attr.dtype, np.dtype("S")) self.assertTrue(attr.isvar) with pytest.warns(DeprecationWarning, match="Attr given `var=False` but"): attr = tiledb.Attr("foo", var=False, dtype="S") self.assertEqual(attr.dtype, np.dtype("S")) self.assertTrue(attr.isvar) attr = tiledb.Attr("foo", var=True, dtype="S") self.assertEqual(attr.dtype, np.dtype("S")) self.assertTrue(attr.isvar) attr = tiledb.Attr("foo", var=False, dtype="S1") self.assertEqual(attr.dtype, np.dtype("S1")) self.assertFalse(attr.isvar) attr = tiledb.Attr("foo", dtype="S1") self.assertEqual(attr.dtype, np.dtype("S1")) self.assertFalse(attr.isvar) attr = tiledb.Attr("foo", dtype="S") self.assertEqual(attr.dtype, np.dtype("S")) self.assertTrue(attr.isvar) def test_nullable_attribute(self): attr = tiledb.Attr("nullable", nullable=True, dtype=np.int32) self.assertEqual(attr.dtype, np.dtype(np.int32)) self.assertTrue(attr.isnullable) def test_datetime_attribute(self): attr = tiledb.Attr("foo", dtype=np.datetime64("", "D")) assert attr.dtype == np.dtype(np.datetime64("", "D")) assert attr.dtype != np.dtype(np.datetime64("", "Y")) assert attr.dtype != np.dtype(np.datetime64) @pytest.mark.parametrize("sparse", [True, False]) def test_ascii_attribute(self, sparse, capfd): path = self.path("test_ascii") dom = tiledb.Domain( tiledb.Dim(name="d", domain=(1, 4), tile=1, dtype=np.uint32) ) attrs = [tiledb.Attr(name="A", dtype="ascii", var=True)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=sparse) tiledb.Array.create(path, schema) ascii_data = ["a", "b", "c", "ABC"] unicode_data = ["±", "×", "÷", "√"] with tiledb.open(path, "w") as A: if sparse: with self.assertRaises(tiledb.TileDBError): A[np.arange(1, 5)] = unicode_data A[np.arange(1, 5)] = ascii_data else: with self.assertRaises(tiledb.TileDBError): A[:] = unicode_data A[:] = ascii_data with tiledb.open(path, "r") as A: assert A.schema.nattr == 1 A.schema.dump() assert_captured(capfd, "Type: STRING_ASCII") assert A.schema.attr("A").dtype == np.bytes_ assert A.schema.attr("A").isascii assert_array_equal(A[:]["A"], np.asarray(ascii_data, dtype=np.bytes_)) class ArraySchemaTest(DiskTestCase): def test_schema_basic(self): dom = tiledb.Domain( tiledb.Dim("d1", (1, 4), 2, dtype="u8"), tiledb.Dim("d2", (1, 4), 2, dtype="u8"), ) attr1 = tiledb.Attr("foo", dtype=float) attr2 = tiledb.Attr("foo", dtype=int) # test unique attributes with self.assertRaises(tiledb.TileDBError): tiledb.ArraySchema(domain=dom, attrs=(attr1, attr2)) # test schema.check schema = tiledb.ArraySchema(domain=dom, attrs=(attr1,)) # valid schema does not raise schema.check() with self.assertRaises(tiledb.TileDBError): schema._make_invalid() schema.check() def test_dense_array_schema(self): domain = tiledb.Domain( tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) ) a1 = tiledb.Attr("val", dtype="f8") schema = tiledb.ArraySchema(domain=domain, attrs=(a1,)) self.assertFalse(schema.sparse) self.assertEqual(schema.cell_order, "row-major") self.assertEqual(schema.tile_order, "row-major") self.assertEqual(schema.domain, domain) self.assertEqual(schema.ndim, 2) self.assertEqual(schema.shape, (8, 8)) self.assertEqual(schema.nattr, 1) self.assertEqual(schema.domain.homogeneous, True) self.assertEqual(schema.attr(0), a1) self.assertTrue(schema.has_attr("val")) self.assertFalse(schema.has_attr("nononoattr")) self.assertEqual(schema, tiledb.ArraySchema(domain=domain, attrs=(a1,))) self.assertNotEqual( schema, tiledb.ArraySchema(domain=domain, attrs=(a1,), sparse=True) ) with self.assertRaises(tiledb.TileDBError): schema.allows_duplicates # test iteration over attributes self.assertEqual(list(schema), [a1]) with self.assertRaisesRegex( tiledb.TileDBError, "Cannot set cell order; Hilbert order is only applicable to sparse arrays", ): tiledb.ArraySchema( domain=domain, attrs=(a1,), sparse=False, cell_order="hilbert" ) def test_dense_array_schema_fp_domain_error(self): dom = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2, dtype=np.float64)) att = tiledb.Attr("val", dtype=np.float64) with self.assertRaises(tiledb.TileDBError): tiledb.ArraySchema(domain=dom, attrs=(att,)) def test_sparse_schema(self, capfd): # create dimensions d1 = tiledb.Dim("d1", domain=(1, 1000), tile=10, dtype="uint64") d2 = tiledb.Dim("d2", domain=(101, 10000), tile=100, dtype="uint64") # create domain domain = tiledb.Domain(d1, d2) # create attributes a1 = tiledb.Attr("a1", dtype="int32,int32,int32") a2 = tiledb.Attr( "a2", filters=tiledb.FilterList([tiledb.GzipFilter(-1)]), dtype="float32" ) # create sparse array with schema coords_filters = tiledb.FilterList([tiledb.ZstdFilter(4)]) offsets_filters = tiledb.FilterList([tiledb.LZ4Filter(5)]) validity_filters = tiledb.FilterList([tiledb.GzipFilter(9)]) schema = tiledb.ArraySchema( domain=domain, attrs=(a1, a2), capacity=10, cell_order="col-major", tile_order="row-major", allows_duplicates=True, sparse=True, coords_filters=coords_filters, offsets_filters=offsets_filters, validity_filters=validity_filters, ) schema.dump() assert_captured(capfd, "Array type: sparse") self.assertTrue(schema.sparse) self.assertEqual(schema.capacity, 10) self.assertEqual(schema.cell_order, "col-major") self.assertEqual(schema.tile_order, "row-major") # # self.assertEqual(schema.coords_compressor, ('zstd', 4)) # self.assertEqual(schema.offsets_compressor, ('lz4', 5)) self.assertEqual(len(schema.coords_filters), 1) self.assertEqual(len(schema.offsets_filters), 1) self.assertEqual(len(schema.validity_filters), 1) self.assertEqual(schema.domain, domain) self.assertEqual(schema.ndim, 2) self.assertEqual(schema.shape, (1000, 9900)) self.assertEqual(schema.nattr, 2) self.assertEqual(schema.attr(0), a1) self.assertEqual(schema.attr("a2"), a2) self.assertEqual(schema.allows_duplicates, True) self.assertEqual( schema, tiledb.ArraySchema( domain=domain, attrs=(a1, a2), capacity=10, cell_order="col-major", tile_order="row-major", allows_duplicates=True, sparse=True, coords_filters=coords_filters, offsets_filters=offsets_filters, validity_filters=validity_filters, ), ) # test iteration over attributes self.assertEqual(list(schema), [a1, a2]) with self.assertRaisesRegex( tiledb.TileDBError, "Cannot set tile order; Hilbert order is not applicable to tiles", ): tiledb.ArraySchema( domain=domain, attrs=(a1,), sparse=True, tile_order="hilbert" ) def test_sparse_schema_filter_list(self, capfd): # create dimensions d1 = tiledb.Dim("d1", domain=(1, 1000), tile=10, dtype="uint64") d2 = tiledb.Dim("d2", domain=(101, 10000), tile=100, dtype="uint64") # create domain domain = tiledb.Domain(d1, d2) # create attributes a1 = tiledb.Attr("a1", dtype="int32,int32,int32") filter_list = tiledb.FilterList([tiledb.GzipFilter()]) a2 = tiledb.Attr("a2", filters=filter_list, dtype="float32") off_filters_pylist = [tiledb.libtiledb.ZstdFilter(level=10)] off_filters = tiledb.libtiledb.FilterList( filters=off_filters_pylist, chunksize=2048 ) coords_filters_pylist = [tiledb.libtiledb.Bzip2Filter(level=5)] coords_filters = tiledb.libtiledb.FilterList( filters=coords_filters_pylist, chunksize=4096 ) validity_filters_pylist = [tiledb.libtiledb.GzipFilter(level=9)] validity_filters = tiledb.libtiledb.FilterList( filters=validity_filters_pylist, chunksize=1024 ) # create sparse array with schema schema = tiledb.ArraySchema( domain=domain, attrs=(a1, a2), capacity=10, cell_order="col-major", tile_order="row-major", coords_filters=coords_filters, offsets_filters=off_filters, validity_filters=validity_filters, sparse=True, ) self.assertTrue(schema.sparse) schema.dump() assert_captured(capfd, "Array type: sparse") # make sure we can construct ArraySchema with python lists of filters schema2 = tiledb.ArraySchema( domain=domain, attrs=(a1, a2), capacity=10, cell_order="col-major", tile_order="row-major", coords_filters=coords_filters_pylist, offsets_filters=off_filters, validity_filters=validity_filters, sparse=True, ) assert len(schema2.coords_filters) == 1 assert schema2.coords_filters[0] == tiledb.Bzip2Filter(level=5) assert len(schema2.offsets_filters) == 1 assert schema2.offsets_filters[0] == tiledb.ZstdFilter(level=10) assert len(schema2.validity_filters) == 1 assert schema2.validity_filters[0] == tiledb.GzipFilter(level=9) def test_none_filter_list(self): with self.assertRaises(ValueError): tiledb.FilterList([None]) with self.assertRaises(ValueError): fl = tiledb.FilterList() fl.append(None) def test_mixed_string_schema(self): path = self.path("test_mixed_string_schema") dims = [ tiledb.Dim(name="dpos", domain=(-100.0, 100.0), tile=10, dtype=np.float64), tiledb.Dim(name="str_index", tile=None, dtype=np.bytes_), ] dom = tiledb.Domain(*dims) attrs = [tiledb.Attr(name="val", dtype=np.float64)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) self.assertTrue(schema.domain.has_dim("str_index")) self.assertFalse(schema.domain.has_dim("nonono_str_index")) self.assertTrue(schema.domain.dim("str_index").isvar) self.assertFalse(schema.domain.dim("dpos").isvar) self.assertEqual(schema.domain.dim("dpos").dtype, np.double) self.assertEqual(schema.domain.dim("str_index").dtype, np.bytes_) self.assertFalse(schema.domain.homogeneous) tiledb.Array.create(path, schema) with tiledb.open(path, "r") as arr: assert_array_equal(arr[:]["str_index"], np.array([], dtype="|S1")) class ArrayTest(DiskTestCase): def create_array_schema(self): domain = tiledb.Domain( tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) ) a1 = tiledb.Attr("val", dtype="f8") return tiledb.ArraySchema(domain=domain, attrs=(a1,)) def test_array_create(self): config = tiledb.Config() config["sm.consolidation.step_min_frag"] = 0 config["sm.consolidation.steps"] = 1 schema = self.create_array_schema() # persist array schema tiledb.libtiledb.Array.create(self.path("foo"), schema) # these should be no-ops # full signature tiledb.consolidate(self.path("foo"), config=config) # kw signature tiledb.consolidate(uri=self.path("foo")) # load array in readonly mode array = tiledb.libtiledb.Array(self.path("foo"), mode="r") self.assertTrue(array.isopen) self.assertEqual(array.schema, schema) self.assertEqual(array.mode, "r") self.assertEqual(array.uri, self.path("foo")) # test that we cannot consolidate an array in readonly mode with self.assertRaises(tiledb.TileDBError): array.consolidate() # we have not written anything, so the array is empty self.assertIsNone(array.nonempty_domain()) array.reopen() self.assertTrue(array.isopen) array.close() self.assertEqual(array.isopen, False) with self.assertRaises(tiledb.TileDBError): # cannot get schema from closed array array.schema with self.assertRaises(tiledb.TileDBError): # cannot re-open a closed array array.reopen() def test_array_create_with_ctx(self): schema = self.create_array_schema() with self.assertRaises(TypeError): tiledb.libtiledb.Array.create(self.path("foo"), schema, ctx="foo") # persist array schema tiledb.libtiledb.Array.create(self.path("foo"), schema, ctx=tiledb.Ctx()) @pytest.mark.skipif( not (sys.platform == "win32" and tiledb.libtiledb.version() >= (2, 3, 0)), reason="Shared network drive only on Win32", ) def test_array_create_on_shared_drive(self): schema = self.create_array_schema() uri = self.path(basename="foo", shared=True) tiledb.libtiledb.Array.create(uri, schema) # load array in readonly mode array = tiledb.libtiledb.Array(uri, mode="r") self.assertTrue(array.isopen) self.assertEqual(array.schema, schema) self.assertEqual(array.mode, "r") self.assertEqual(array.uri, uri) # we have not written anything, so the array is empty self.assertIsNone(array.nonempty_domain()) array.reopen() self.assertTrue(array.isopen) array.close() self.assertEqual(array.isopen, False) with self.assertRaises(tiledb.TileDBError): # cannot get schema from closed array array.schema with self.assertRaises(tiledb.TileDBError): # cannot re-open a closed array array.reopen() def test_array_create_encrypted(self): config = tiledb.Config() config["sm.consolidation.step_min_frags"] = 0 config["sm.consolidation.steps"] = 1 schema = self.create_array_schema() # persist array schema tiledb.libtiledb.Array.create( self.path("foo"), schema, key=b"0123456789abcdeF0123456789abcdeF" ) # check that we can open the array sucessfully for key in ( b"0123456789abcdeF0123456789abcdeF", "0123456789abcdeF0123456789abcdeF", ): with tiledb.libtiledb.Array(self.path("foo"), mode="r", key=key) as array: self.assertTrue(array.isopen) self.assertEqual(array.schema, schema) self.assertEqual(array.mode, "r") with tiledb.open(self.path("foo"), mode="r", key=key) as array: self.assertTrue(array.isopen) self.assertEqual(array.schema, schema) self.assertEqual(array.mode, "r") tiledb.consolidate(uri=self.path("foo"), config=config, key=key) # check that opening the array with the wrong key fails: with self.assertRaises(tiledb.TileDBError): tiledb.libtiledb.Array( self.path("foo"), mode="r", key=b"0123456789abcdeF0123456789abcdeX" ) # check that opening the array with the wrong key length fails: with self.assertRaises(tiledb.TileDBError): tiledb.libtiledb.Array( self.path("foo"), mode="r", key=b"0123456789abcdeF0123456789abcde" ) # check that consolidating the array with the wrong key fails: with self.assertRaises(tiledb.TileDBError): tiledb.consolidate( self.path("foo"), config=config, key=b"0123456789abcdeF0123456789abcde" ) # needs core fix in 2.2.4 @pytest.mark.skipif( (sys.platform == "win32" and tiledb.libtiledb.version() == (2, 2, 3)), reason="Skip array_doesnt_exist test on Win32 / libtiledb 2.2.3", ) def test_array_doesnt_exist(self): with self.assertRaises(tiledb.TileDBError): tiledb.libtiledb.Array(self.path("foo"), mode="r") def test_create_schema_matches(self): dims = (tiledb.Dim(domain=(0, 6), tile=2),) dom = tiledb.Domain(*dims) att = tiledb.Attr(dtype=np.byte) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) uri = self.path("s1") with self.assertRaises(ValueError): tiledb.DenseArray.create(uri, schema) dense_schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) uri = self.path("d1") with self.assertRaises(ValueError): tiledb.SparseArray.create(uri, dense_schema) class MySparseArray(tiledb.SparseArray): pass with self.assertRaises(ValueError): MySparseArray.create(uri, dense_schema) def test_nonempty_domain_scalar(self): uri = self.path("test_nonempty_domain_scalar") dims = tiledb.Dim(domain=(-10, 10), dtype=np.int64, tile=1) schema = tiledb.ArraySchema( tiledb.Domain(dims), attrs=[tiledb.Attr(dtype=np.int32)], sparse=True ) tiledb.Array.create(uri, schema) with tiledb.open(uri, "w") as A: A[-1] = 10 A[1] = 11 with tiledb.open(uri, "r") as A: ned = A.nonempty_domain() assert_array_equal(ned, ((-1, 1),)) assert isinstance(ned[0][0], int) assert isinstance(ned[0][1], int) def test_create_array_overwrite(self): uri = self.path("test_create_array_overwrite") dims = tiledb.Dim(domain=(0, 10), dtype=np.int64) schema = tiledb.ArraySchema( tiledb.Domain(dims), attrs=[tiledb.Attr(dtype=np.int32)], sparse=True ) with pytest.warns(UserWarning, match="Overwrite set, but array does not exist"): tiledb.Array.create(uri, schema, overwrite=True) with tiledb.open(uri, "w") as A: A[0] = 1 with tiledb.open(uri, "r") as A: assert A.nonempty_domain() == ((0, 0),) # cannot overwrite the array by default with self.assertRaises(tiledb.TileDBError): tiledb.Array.create(uri, schema) tiledb.Array.create(uri, schema, overwrite=True) # make the old array has been deleted and replaced with tiledb.open(uri, "r") as A: assert A.nonempty_domain() is None class DenseArrayTest(DiskTestCase): def test_array_1d(self): A = np.arange(1050) dom = tiledb.Domain(tiledb.Dim(domain=(0, 1049), tile=100, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="r") as T: self.assertEqual(len(A), len(T)) self.assertEqual(A.ndim, T.ndim) self.assertEqual(A.shape, T.shape) self.assertEqual(1, T.nattr) self.assertEqual(A.dtype, T.attr(0).dtype) self.assertEqual(T.dim(T.schema.domain.dim(0).name), T.dim(0)) with self.assertRaises(ValueError): T.dim(1.0) self.assertIsInstance(T.timestamp_range, tuple) self.assertTrue(T.timestamp_range[1] > 0) # check empty array B = T[:] self.assertEqual(A.shape, B.shape) self.assertEqual(A.dtype, B.dtype) self.assertIsNone(T.nonempty_domain()) with tiledb.DenseArray(self.path("foo"), mode="w") as T: # check set array T[:] = A read1_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: self.assertEqual(((0, 1049),), T.nonempty_domain()) # check timestamp read1_timestamp = T.timestamp_range self.assertTrue(read1_timestamp[1] > 0) # check slicing assert_array_equal(A, np.array(T)) assert_array_equal(A, T[:]) assert_array_equal(A, T[...]) assert_array_equal(A, T[slice(None)]) assert_array_equal(A[:10], T[:10]) assert_array_equal(A[10:20], T[10:20]) assert_array_equal(A[-10:], T[-10:]) # ellipsis assert_array_equal(A[:10, ...], T[:10, ...]) assert_array_equal(A[10:50, ...], T[10:50, ...]) assert_array_equal(A[-50:, ...], T[-50:, ...]) assert_array_equal(A[..., :10], T[..., :10]) assert_array_equal(A[..., 10:20], T[..., 10:20]) assert_array_equal(A[..., -50:], T[..., -50:]) # across tiles assert_array_equal(A[:150], T[:150]) assert_array_equal(A[-250:], T[-250:]) # point index self.assertEqual(A[0], T[0]) self.assertEqual(A[-1], T[-1]) # point index with all index types self.assertEqual(A[123], T[np.int8(123)]) self.assertEqual(A[123], T[np.uint8(123)]) self.assertEqual(A[123], T[np.int16(123)]) self.assertEqual(A[123], T[np.uint16(123)]) self.assertEqual(A[123], T[np.int64(123)]) self.assertEqual(A[123], T[np.uint64(123)]) self.assertEqual(A[123], T[np.int32(123)]) self.assertEqual(A[123], T[np.uint32(123)]) # mixed-type slicing # https://github.com/TileDB-Inc/TileDB-Py/issues/140 self.assertEqual(A[0:1], T[0 : np.uint16(1)]) self.assertEqual(A[0:1], T[np.int64(0) : 1]) with self.assertRaises(IndexError): # this is a consequence of NumPy promotion rules self.assertEqual(A[0:1], T[np.uint64(0) : 1]) # basic step assert_array_equal(A[:50:2], T[:50:2]) assert_array_equal(A[:2:50], T[:2:50]) assert_array_equal(A[10:-1:50], T[10:-1:50]) # indexing errors with self.assertRaises(IndexError): T[:, :] with self.assertRaises(IndexError): T[:, 50] with self.assertRaises(IndexError): T[50, :] with self.assertRaises(IndexError): T[0, 0] # check single ellipsis with self.assertRaises(IndexError): T[..., 1:5, ...] with tiledb.DenseArray(self.path("foo"), mode="w") as T: # check partial assignment B = np.arange(1e5, 2e5).astype(A.dtype) T[190:310] = B[190:310] read2_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:190], T[:190]) assert_array_equal(B[190:310], T[190:310]) assert_array_equal(A[310:], T[310:]) # test timestamps are updated read2_timestamp = T.timestamp_range self.assertTrue(read2_timestamp > read1_timestamp) def test_array_1d_set_scalar(self): A = np.zeros(50) dom = tiledb.Domain(tiledb.Dim(domain=(0, 49), tile=50)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) with tiledb.DenseArray(self.path("foo"), mode="w") as T: value = -1, 3, 10 A[0], A[1], A[3] = value T[0], T[1], T[3] = value with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) for value in (-1, 3, 10): with tiledb.DenseArray(self.path("foo"), mode="w") as T: A[5:25] = value T[5:25] = value with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) with tiledb.DenseArray(self.path("foo"), mode="w") as T: A[:] = value T[:] = value with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) def test_array_id_point_queries(self): # TODO: handle queries like T[[2, 5, 10]] = ? pass @pytest.mark.parametrize("dtype", INTEGER_DTYPES) def test_dense_index_dtypes(self, dtype): path = self.path() data = np.arange(0, 3).astype(dtype) with tiledb.from_numpy(path, data) as A: pass with tiledb.open(path) as B: assert_array_equal(B[:], data) def test_array_2d(self): A = np.arange(10000).reshape((1000, 10)) dom = tiledb.Domain( tiledb.Dim(domain=(0, 999), tile=100), tiledb.Dim(domain=(0, 9), tile=2) ) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="r") as T: self.assertEqual(len(A), len(T)) self.assertEqual(A.ndim, T.ndim) self.assertEqual(A.shape, T.shape) self.assertEqual(1, T.nattr) self.assertEqual(A.dtype, T.attr(0).dtype) # check that the non-empty domain is None self.assertIsNone(T.nonempty_domain()) with tiledb.DenseArray(self.path("foo"), mode="w") as T: # Set data T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) # check the non-empty domain spans the whole domain self.assertEqual(((0, 999), (0, 9)), T.nonempty_domain()) # check array-like assert_array_equal(A, np.array(T)) # slicing assert_array_equal(A, T[:]) assert_array_equal(A, T[...]) assert_array_equal(A, T[slice(None)]) # slice first dimension assert_array_equal(A[:10], T[:10]) assert_array_equal(A[:10], T[:10]) assert_array_equal(A[10:20], T[10:20]) assert_array_equal(A[-10:], T[-10:]) assert_array_equal(A[:10, :], T[:10, :]) assert_array_equal(A[10:20, :], T[10:20, :]) assert_array_equal(A[-10:, :], T[-10:, :]) assert_array_equal(A[:10, ...], T[:10, ...]) assert_array_equal(A[10:20, ...], T[10:20, ...]) assert_array_equal(A[-10:, ...], T[-10:, ...]) assert_array_equal(A[:10, :, ...], T[:10, :, ...]) assert_array_equal(A[10:20, :, ...], T[10:20, :, ...]) assert_array_equal(A[-10:, :, ...], T[-10:, :, ...]) # slice second dimension assert_array_equal(A[:, :2], T[:, :2]) assert_array_equal(A[:, 2:4], T[:, 2:4]) assert_array_equal(A[:, -2:], T[:, -2:]) assert_array_equal(A[..., :2], T[..., :2]) assert_array_equal(A[..., 2:4], T[..., 2:4]) assert_array_equal(A[..., -2:], T[..., -2:]) assert_array_equal(A[:, ..., :2], T[:, ..., :2]) assert_array_equal(A[:, ..., 2:4], T[:, ..., 2:4]) assert_array_equal(A[:, ..., -2:], T[:, ..., -2:]) # slice both dimensions assert_array_equal(A[:10, :2], T[:10, :2]) assert_array_equal(A[10:20, 2:4], T[10:20, 2:4]) assert_array_equal(A[-10:, -2:], T[-10:, -2:]) # slice across tile boundries assert_array_equal(A[:110], T[:110]) assert_array_equal(A[190:310], T[190:310]) assert_array_equal(A[-110:], T[-110:]) assert_array_equal(A[:110, :], T[:110, :]) assert_array_equal(A[190:310, :], T[190:310, :]) assert_array_equal(A[-110:, :], T[-110:, :]) assert_array_equal(A[:, :3], T[:, :3]) assert_array_equal(A[:, 3:7], T[:, 3:7]) assert_array_equal(A[:, -3:], T[:, -3:]) assert_array_equal(A[:110, :3], T[:110, :3]) assert_array_equal(A[190:310, 3:7], T[190:310, 3:7]) assert_array_equal(A[-110:, -3:], T[-110:, -3:]) # single row/col/item assert_array_equal(A[0], T[0]) assert_array_equal(A[-1], T[-1]) assert_array_equal(A[:, 0], T[:, 0]) assert_array_equal(A[:, -1], T[:, -1]) self.assertEqual(A[0, 0], T[0, 0]) self.assertEqual(A[-1, -1], T[-1, -1]) # too many indices with self.assertRaises(IndexError): T[:, :, :] with self.assertRaises(IndexError): T[0, :, :] with self.assertRaises(IndexError): T[:, 0, :] with self.assertRaises(IndexError): T[:, :, 0] with self.assertRaises(IndexError): T[0, 0, 0] # only single ellipsis allowed with self.assertRaises(IndexError): T[..., ...] with tiledb.DenseArray(self.path("foo"), mode="w") as T: # check partial assignment B = np.arange(10000, 20000).reshape((1000, 10)) T[190:310, 3:7] = B[190:310, 3:7] with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:190], T[:190]) assert_array_equal(A[:, :3], T[:, :3]) assert_array_equal(B[190:310, 3:7], T[190:310, 3:7]) assert_array_equal(A[310:], T[310:]) assert_array_equal(A[:, 7:], T[:, 7:]) @pytest.mark.skipif( not (sys.platform == "win32" and tiledb.libtiledb.version() >= (2, 3, 0)), reason="Shared network drive only on Win32", ) def test_array_1d_shared_drive(self): A = np.zeros(50) dom = tiledb.Domain(tiledb.Dim(domain=(0, 49), tile=50)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(dom, (att,)) uri = self.path("foo", shared=True) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[:] = A with tiledb.DenseArray(uri, mode="r") as T: assert_array_equal(A, T[:]) with tiledb.DenseArray(uri, mode="w") as T: value = -1, 3, 10 A[0], A[1], A[3] = value T[0], T[1], T[3] = value with tiledb.DenseArray(uri, mode="r") as T: assert_array_equal(A, T[:]) for value in (-1, 3, 10): with tiledb.DenseArray(uri, mode="w") as T: A[5:25] = value T[5:25] = value with tiledb.DenseArray(uri, mode="r") as T: assert_array_equal(A, T[:]) with tiledb.DenseArray(uri, mode="w") as T: A[:] = value T[:] = value with tiledb.DenseArray(uri, mode="r") as T: assert_array_equal(A, T[:]) def test_fixed_string(self): a = np.array(["ab", "cd", "ef", "gh", "ij", "kl", "", "op"], dtype="|S2") with tiledb.from_numpy(self.path("fixed_string"), a) as T: with tiledb.open(self.path("fixed_string")) as R: self.assertEqual(T.dtype, R.dtype) self.assertEqual(R.attr(0).ncells, 2) assert_array_equal(T, R) def test_ncell_int(self): a = np.array([(1, 2), (3, 4), (5, 6)], dtype=[("", np.int16), ("", np.int16)]) with tiledb.from_numpy(self.path("ncell_int16"), a) as T: with tiledb.open(self.path("ncell_int16")) as R: self.assertEqual(T.dtype, R.dtype) self.assertEqual(R.attr(0).ncells, 2) assert_array_equal(T, R) assert_array_equal(T, R.multi_index[0:2][""]) def test_open_with_timestamp(self): A = np.zeros(3) dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(self.path("foo"), schema) # write with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A read1_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: read1_timestamp = T.timestamp_range self.assertEqual(T[0], 0) self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) # sleep 200ms and write time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[0:1] = 1 read2_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: read2_timestamp = T.timestamp_range self.assertTrue(read2_timestamp > read1_timestamp) # sleep 200ms and write time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[1:2] = 2 read3_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: read3_timestamp = T.timestamp_range self.assertTrue(read3_timestamp > read2_timestamp > read1_timestamp) # read at first timestamp with tiledb.DenseArray( self.path("foo"), timestamp=read1_timestamp, mode="r" ) as T: self.assertEqual(T[0], 0) self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) # read at second timestamp with tiledb.DenseArray( self.path("foo"), timestamp=read2_timestamp, mode="r" ) as T: self.assertEqual(T[0], 1) self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) # read at third timestamp with tiledb.DenseArray( self.path("foo"), timestamp=read3_timestamp, mode="r" ) as T: self.assertEqual(T[0], 1) self.assertEqual(T[1], 2) self.assertEqual(T[2], 0) def test_open_timestamp_range(self): A = np.zeros(3) path = self.path("open_timestamp_range") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(path, schema) # write with tiledb.DenseArray(path, timestamp=1, mode="w") as T: T[:] = A * 1 with tiledb.DenseArray(path, timestamp=2, mode="w") as T: T[:] = A * 2 with tiledb.DenseArray(path, timestamp=3, mode="w") as T: T[:] = A * 3 with tiledb.DenseArray(path, timestamp=4, mode="w") as T: T[:] = A * 4 def assert_ts(timestamp, result): with tiledb.DenseArray(path, mode="r", timestamp=timestamp) as T: assert_array_equal(T, result) assert_ts(0, A * np.nan) assert_ts(1, A * 1) assert_ts(2, A * 2) assert_ts(3, A * 3) assert_ts((1, 2), A * 2) assert_ts((0, 3), A * 3) assert_ts((1, 3), A * 3) assert_ts((2, 3), A * 3) assert_ts((2, 4), A * 3) assert_ts((None, 2), A * 2) assert_ts((None, 3), A * 3) assert_ts((2, None), A * 3) assert_ts((3, None), A * 3) assert_ts((3, None), A * 3) def test_open_attr(self): uri = self.path("test_open_attr") schema = tiledb.ArraySchema( domain=tiledb.Domain( tiledb.Dim(name="dim0", dtype=np.uint32, domain=(1, 4)) ), attrs=( tiledb.Attr(name="x", dtype=np.int32), tiledb.Attr(name="y", dtype=np.int32), ), ) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as A: A[:] = {"x": np.array((1, 2, 3, 4)), "y": np.array((5, 6, 7, 8))} with self.assertRaises(KeyError): tiledb.open(uri, attr="z") with self.assertRaises(KeyError): tiledb.open(uri, attr="dim0") with tiledb.open(uri, attr="x") as A: assert_array_equal(A[:], np.array((1, 2, 3, 4))) assert list(A.multi_index[:].keys()) == ["x"] def test_ncell_attributes(self): dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=int)) attr = tiledb.Attr(dtype=[("", np.int32), ("", np.int32), ("", np.int32)]) schema = tiledb.ArraySchema(domain=dom, attrs=(attr,)) tiledb.DenseArray.create(self.path("foo"), schema) A = np.ones((10,), dtype=[("", np.int32), ("", np.int32), ("", np.int32)]) self.assertEqual(A.dtype, attr.dtype) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) assert_array_equal(A[:5], T[:5]) def test_complex_attributes(self): dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=int)) attr = tiledb.Attr(dtype=np.complex64) schema = tiledb.ArraySchema(domain=dom, attrs=(attr,)) tiledb.DenseArray.create(self.path("foo"), schema) A = np.random.rand(20).astype(np.float32).view(dtype=np.complex64) self.assertEqual(schema, tiledb.schema_like(A, dim_dtype=int)) self.assertEqual(A.dtype, attr.dtype) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A, T[:]) assert_array_equal(A[:5], T[:5]) def test_multiple_attributes(self): dom = tiledb.Domain( tiledb.Dim(domain=(0, 1), tile=1, dtype=np.int64), tiledb.Dim(domain=(0, 3), tile=4, dtype=np.int64), ) attr_int = tiledb.Attr("ints", dtype=int) attr_float = tiledb.Attr("floats", dtype=float) schema = tiledb.ArraySchema(domain=dom, attrs=(attr_int, attr_float)) tiledb.DenseArray.create(self.path("foo"), schema) V_ints = np.array([[0, 1, 2, 3], [4, 6, 7, 5]]) V_floats = np.array([[0.0, 1.0, 2.0, 3.0], [4.0, 6.0, 7.0, 5.0]]) V = {"ints": V_ints, "floats": V_floats} with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = V # check setting attribute in different order from Attr definition # https://github.com/TileDB-Inc/TileDB-Py/issues/299 V2 = {"floats": V_floats, "ints": V_ints} with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = V with tiledb.DenseArray(self.path("foo"), mode="r") as T: R = T[:] assert_array_equal(V["ints"], R["ints"]) assert_array_equal(V["floats"], R["floats"]) R = T.query(attrs=("ints",))[1:3] assert_array_equal(V["ints"][1:3], R["ints"]) R = T.query(attrs=("floats",), order="F")[:] self.assertTrue(R["floats"].flags.f_contiguous) R = T.query(attrs=("ints",), coords=True)[0, 0:3] self.assertTrue("__dim_0" in R) self.assertTrue("__dim_1" in R) assert_array_equal(R["__dim_0"], np.array([0, 0, 0])) assert_array_equal(R["__dim_1"], np.array([0, 1, 2])) # Global order returns results as a linear buffer R = T.query(attrs=("ints",), order="G")[:] self.assertEqual(R["ints"].shape, (8,)) with self.assertRaises(tiledb.TileDBError): T.query(attrs=("unknown",))[:] with tiledb.DenseArray(self.path("foo"), mode="w") as T: # check error ncells length V["ints"] = V["ints"][1:2].copy() with self.assertRaises(tiledb.TileDBError): T[:] = V # check error attribute does not exist V["foo"] = V["ints"].astype(np.int8) with self.assertRaises(tiledb.TileDBError): T[:] = V def test_array_2d_s1(self): # This array is currently read back with dtype object A = np.array([["A", "B"], ["C", ""]], dtype="S") uri = self.path() dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S")] ) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[...] = A with tiledb.DenseArray(uri) as T: assert_array_equal(A, T) res = T.multi_index[(0, 1), (0, 1)]["a"] assert_array_equal(A, res) def test_nd_roundtrip(self): dim_set = np.int64([3 + x % 2 for x in range(2, 12)]) for i, last in enumerate(range(2, len(dim_set))): dims = dim_set[:last] data = np.random.rand(*dims).astype("int32") with tiledb.from_numpy(self.path(f"nd_roundtrip{i}"), data) as A: assert_array_equal(data, A[:]) def test_array_2d_s3_mixed(self): # This array is currently read back with dtype object A = np.array([["AAA", "B"], ["AB", "C"]], dtype="S3") uri = self.path() dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S3")] ) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[...] = A with tiledb.DenseArray(uri) as T: assert_array_equal(A, T) res = T.multi_index[(0, 1), (0, 1)]["a"] assert_array_equal(A, res) def test_incomplete_dense(self): path = self.path("incomplete_dense") # create 10 MB array data = np.arange(1310720, dtype=np.int64) # if `tile` is not set, it defaults to the full array and we # only read 8 bytes at a time. use_tile = 131072 # use_tile = None with tiledb.from_numpy(path, data, tile=use_tile) as A: pass # create context with 1 MB memory budget (2 MB total, 1 MB usable) config = tiledb.Config( {"sm.memory_budget": 2 * 1024**2, "py.init_buffer_bytes": 1024**2} ) self.assertEqual(config["py.init_buffer_bytes"], str(1024**2)) # TODO would be good to check repeat count here. Not currently exposed by retry loop. with tiledb.DenseArray(path, ctx=tiledb.Ctx(config)) as A: res_mr = A.multi_index[slice(0, len(data) - 1)] assert_array_equal(res_mr[""], data) res_idx = A[:] assert_array_equal(res_idx, data) df = A.df[:] assert_array_equal(df[""], data) def test_written_fragment_info(self): uri = self.path("test_written_fragment_info") dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[:] = np.arange(0, 10, dtype=np.int64) self.assertTrue(T.last_write_info is not None) self.assertTrue(len(T.last_write_info.keys()) == 1) t_w1, t_w2 = list(T.last_write_info.values())[0] self.assertTrue(t_w1 > 0) self.assertTrue(t_w2 > 0) def test_missing_schema_error(self): uri = self.path("test_missing_schema_error") dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[:] = np.arange(0, 10, dtype=np.int64) if tiledb.libtiledb.version() < (2, 4): tiledb.VFS().remove_file(os.path.join(uri, "__array_schema.tdb")) else: tiledb.VFS().remove_dir(os.path.join(uri, "__schema")) # new ctx is required running against S3 because otherwise the schema # will simply be read from the cache. with tiledb.scope_ctx(): with self.assertRaises(tiledb.TileDBError): tiledb.DenseArray(uri) @pytest.mark.xfail( tiledb.libtiledb.version() >= (2, 5), reason="Skip sparse_write_to_dense with libtiledb 2.5+", ) def test_sparse_write_to_dense(self): class AssignAndCheck: def __init__(self, outer, *shape): self.outer = outer self.shape = shape def __setitem__(self, s, v): A = np.random.rand(*self.shape) uri = self.outer.path( f"sparse_write_to_dense{random.randint(0,np.uint64(-1))}" ) tiledb.from_numpy(uri, A).close() with tiledb.open(uri, "w") as B: B[s] = v A[s] = v with tiledb.open(uri) as B: assert_array_equal(A, B[:]) D = AssignAndCheck(self, 5, 5) with pytest.warns( DeprecationWarning, match="Sparse writes to dense arrays is deprecated" ): D[np.array([1, 2]), np.array([0, 0])] = np.array([0, 2]) def test_reopen_dense_array(self): uri = self.path("test_reopen_dense_array") dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(uri, schema) data = np.arange(0, 10, dtype=np.int64) with tiledb.DenseArray(uri, mode="w", timestamp=1) as T: T[:] = data with tiledb.DenseArray(uri, mode="w", timestamp=2) as T: T[:] = data * 2 T = tiledb.DenseArray(uri, mode="r", timestamp=1) assert_array_equal(T[:], data) T.reopen() assert_array_equal(T[:], data * 2) T.close() class TestVarlen(DiskTestCase): def test_varlen_write_bytes(self): A = np.array( [ "aa", "bbb", "ccccc", "ddddddddddddddddddddd", "ee", "ffffff", "g", "hhhhhhhhhh", ], dtype=bytes, ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.bytes_) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:], T[:]) assert_array_equal(A, T.multi_index[1 : len(A)][""]) def test_varlen_sparse_all_empty_strings(self): # this test addresses a fix for specific need for reads on a # large existing array, see # https://github.com/TileDB-Inc/TileDB-Py/pull/475 # we currently have to write a placeholder at the end to # avoid zero-length cell error # TODO: follow-up with improved testing for empty var-length/strings A = np.array(["", "", "", "", "", "\x00"], dtype=object) dim_len = len(A) uri = self.path("varlen_all_empty_strings") dom = tiledb.Domain(tiledb.Dim(domain=(1, dim_len), tile=dim_len)) att = tiledb.Attr(name="a1", dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,), sparse=True) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as T: T[np.arange(1, dim_len + 1)] = {"a1": A} with tiledb.open(uri, mode="r") as T: # check interior range assert_array_equal(A[1:-1], T[2:-1]["a1"]) assert_array_equal(A[1:-1], T.multi_index[2 : dim_len - 1]["a1"]) def test_varlen_write_unicode(self): A = np.array( [ "aa", "bbb", "ccccc", "ddddddddddddddddddddd", "ee", "ffffff", "g", "", "hhhhhhhhhh", ], dtype=np.unicode_, ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.unicode_, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:], T[:]) def test_varlen_write_floats(self): # Generates 8 variable-length float64 subarrays (subarray len and content are randomized) A = np.array( [np.random.rand(x) for x in np.random.randint(1, 12, 8)], dtype=object ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.float64, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: T_ = T[:] # TODO/note: the return is a 0-element array. assert_array_equal(A[0], T[1][()]) assert_array_equal(A[-1], T[-1][()]) self.assertEqual(len(A), len(T_)) # can't use assert_array_equal w/ object array self.assertTrue(all(np.array_equal(x, A[i]) for i, x in enumerate(T_))) def test_varlen_write_floats_2d(self): A = np.array( [np.random.rand(x) for x in np.arange(1, 10)], dtype=object ).reshape(3, 3) # basic write dom = tiledb.Domain( tiledb.Dim(domain=(1, 3), tile=len(A)), tiledb.Dim(domain=(1, 3), tile=len(A)), ) att = tiledb.Attr(dtype=np.float64, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: T_ = T[:] self.assertEqual(len(A), len(T_)) # can't use assert_array_equal w/ object array self.assertTrue( np.all( [np.array_equal(A.flat[i], T[:].flat[i]) for i in np.arange(0, 9)] ) ) def test_varlen_write_int_subarray(self): A = np.array( list( map( lambda x: np.array(x, dtype=np.uint64), [np.arange(i, 2 * i + 1) for i in np.arange(0, 16)], ) ), dtype="O", ).reshape(4, 4) uri = self.path("test_varlen_write_int_subarray") dom = tiledb.Domain( tiledb.Dim(domain=(0, 3), tile=len(A)), tiledb.Dim(domain=(0, 3), tile=len(A)), ) att = tiledb.Attr(dtype=np.uint64, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(uri, schema) # NumPy forces single-element object arrays into a contiguous layout # so we alternate the size to get a consistent baseline array. A_onestwos = np.array( list( map( lambda x: np.array(x, dtype=np.uint64), list([(1,) if x % 2 == 0 else (1, 2) for x in range(16)]), ) ), dtype=np.dtype("O"), ).reshape(4, 4) with tiledb.open(uri, "w") as T: T[:] = A_onestwos with tiledb.open(uri, "w") as T: T[1:3, 1:3] = A[1:3, 1:3] A_assigned = A_onestwos.copy() A_assigned[1:3, 1:3] = A[1:3, 1:3] with tiledb.open(uri) as T: assert_subarrays_equal(A_assigned, T[:]) def test_varlen_write_fixedbytes(self): # The actual dtype of this array is 'S21' A = np.array( [ "aa", "bbb", "ccccc", "ddddddddddddddddddddd", "ee", "ffffff", "g", "hhhhhhhhhh", ], dtype=np.dtype("S"), ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.bytes_) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:], T[:]) def test_varlen_write_fixedunicode(self): A = np.array( [ "aa", "bbb", "ccccc", "ddddddddddddddddddddd", "ee", "ffffff", "", "g", "hhhhhhhhhh", ], dtype=np.dtype("U"), ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.unicode_) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: assert_array_equal(A[:], T[:]) def test_varlen_write_ints(self): A = np.array( [ np.uint64(np.random.randint(0, pow(10, 6), x)) for x in np.random.randint(1, 12, 8) ], dtype=object, ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) att = tiledb.Attr(dtype=np.int64, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: T_ = T[:] self.assertEqual(len(A), len(T)) # can't use assert_array_equal w/ object array self.assertTrue(all(np.array_equal(x, A[i]) for i, x in enumerate(T_))) def test_varlen_wrong_domain(self): A = np.array( [ "aa", "bbb", "ccccc", "ddddddddddddddddddddd", "ee", "ffffff", "g", "hhhhhhhhhh", ] ) dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) att = tiledb.Attr(dtype=np.bytes_) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: with self.assertRaises(tiledb.TileDBError): T[:] = A def test_array_varlen_mismatched(self): # Test that we raise a TypeError when passing a heterogeneous object array. A = np.array([b"aa", b"bbb", b"cccc", np.uint64([1, 3, 4])], dtype=object) dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4)) att = tiledb.Attr(dtype=np.bytes_, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: with self.assertRaises(TypeError): T[:] = A def test_array_varlen_2d_s_fixed(self): A = np.array( [["AAAAAAAAAa", "BBB"], ["ACCC", "BBBCBCBCBCCCBBCBCBCCBC"]], dtype="S" ) uri = self.path("varlen_2d_s_fixed") dom = tiledb.Domain( tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), ) schema = tiledb.ArraySchema( domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S", var=True)] ) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as T: T[...] = A with tiledb.DenseArray(uri) as T: assert_array_equal(A, T) class TestSparseArray(DiskTestCase): @pytest.mark.xfail def test_simple_1d_sparse_vector(self): dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4, dtype=int)) att = tiledb.Attr(dtype=int) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.SparseArray.create(self.path("foo"), schema) values = np.array([3, 4]) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[1, 2]] = values with tiledb.SparseArray(self.path("foo"), mode="r") as T: assert_array_equal(T[[1, 2]], values) @pytest.mark.xfail def test_simple_2d_sparse_vector(self): attr = tiledb.Attr(ctx, dtype=float) dom = tiledb.Domain( tiledb.Dim(ctx, domain=(0, 3), tile=4, dtype=int), tiledb.Dim(ctx, domain=(0, 3), tile=4, dtype=int), ) schema = tiledb.ArraySchema(ctx, domain=dom, attrs=(attr,), sparse=True) tiledb.SparseArray.create(self.path("foo"), schema) values = np.array([3, 4], dtype=float) with tiledb.SparseArray(ctx, self.path("foo"), mode="w") as T: T[[1, 2], [1, 2]] = values with tiledb.SparseArray(ctx, self.path("foo"), mode="r") as T: assert_array_equal(T[[1, 2], [1, 2]], values) @pytest.mark.xfail def test_simple3d_sparse_vector(self): dom = tiledb.Domain( ctx, tiledb.Dim(ctx, "x", domain=(0, 3), tile=4, dtype=int), tiledb.Dim(ctx, "y", domain=(0, 3), tile=4, dtype=int), tiledb.Dim(ctx, "z", domain=(0, 3), tile=4, dtype=int), ) attr = tiledb.Attr(ctx, dtype=float) schema = tiledb.ArraySchema(ctx, domain=dom, attrs=(attr,), sparse=True) tiledb.SparseArray.create(self.path("foo"), schema) values = np.array([3, 4], dtype=float) with tiledb.SparseArray(ctx, self.path("foo"), mode="w") as T: T[[1, 2], [1, 2], [1, 2]] = values with tiledb.SparseArray(ctx, self.path("foo"), mode="r") as T: assert_array_equal(T[[1, 2], [1, 2], [1, 2]], values) @pytest.mark.xfail def test_sparse_ordered_fp_domain(self): dom = tiledb.Domain(tiledb.Dim("x", domain=(0.0, 10.0), tile=2.0, dtype=float)) attr = tiledb.Attr(dtype=float) attr = tiledb.Attr(dtype=float) schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) tiledb.SparseArray.create(self.path("foo"), schema) values = np.array([3.3, 2.7]) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[2.5, 4.2]] = values with tiledb.SparseArray(self.path("foo"), mode="r") as T: assert_array_equal(T[[2.5, 4.2]], values) @pytest.mark.xfail def test_sparse_unordered_fp_domain(self): dom = tiledb.Domain(tiledb.Dim("x", domain=(0.0, 10.0), tile=2.0, dtype=float)) attr = tiledb.Attr(dtype=float) schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) tiledb.SparseArray.create(self.path("foo"), schema) values = np.array([3.3, 2.7]) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[4.2, 2.5]] = values with tiledb.SparseArray(self.path("foo"), mode="r") as T: assert_array_equal(T[[2.5, 4.2]], values[::-1]) @pytest.mark.xfail def test_multiple_attributes(self): uri = self.path() dom = tiledb.Domain( tiledb.Dim(domain=(1, 10), tile=10, dtype=int), tiledb.Dim(domain=(1, 10), tile=10, dtype=int), ) attr_int = tiledb.Attr("ints", dtype=int) attr_float = tiledb.Attr("floats", dtype="float") schema = tiledb.ArraySchema( domain=dom, attrs=(attr_int, attr_float), sparse=True ) tiledb.SparseArray.create(self.path("foo"), schema) I = np.array([1, 1, 1, 2, 3, 3, 3, 4]) J = np.array([1, 2, 4, 3, 1, 6, 7, 5]) V_ints = np.array([0, 1, 2, 3, 4, 6, 7, 5]) V_floats = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 5.0]) V = {"ints": V_ints, "floats": V_floats} with tiledb.SparseArray(uri, mode="w") as T: T[I, J] = V with tiledb.SparseArray(uri, mode="r") as T: R = T[I, J] assert_array_equal(V["ints"], R["ints"]) assert_array_equal(V["floats"], R["floats"]) # check error attribute does not exist # TODO: should this be an attribute error? with tiledb.SparseArray(uri, mode="w") as T: V["foo"] = V["ints"].astype(np.int8) with self.assertRaises(tiledb.TileDBError): T[I, J] = V # check error ncells length V["ints"] = V["ints"][1:2].copy() with self.assertRaises(AttributeError): T[I, J] = V def test_query_real_multi_index(self, sparse_cell_order): uri = self.path("query_real_multi_index") dom = tiledb.Domain( tiledb.Dim("x", domain=(-10.0, 10.0), tile=2.0, dtype=float) ) attr = tiledb.Attr("a", dtype=np.float32) schema = tiledb.ArraySchema( domain=dom, attrs=(attr,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) values = np.array([3.3, 2.7]) with tiledb.SparseArray(uri, mode="w") as T: T[[2.5, 4.2]] = values with tiledb.SparseArray(uri, mode="r") as T: assert_array_equal( T.query(coords=True).multi_index[-10.0 : np.nextafter(4.2, 0)]["a"], np.float32(3.3), ) assert_array_equal( T.query(coords=True).multi_index[-10.0 : np.nextafter(4.2, 0)]["x"], np.float32([2.5]), ) assert_array_equal( T.query(coords=False).multi_index[-10.0:5.0]["a"], np.float32([3.3, 2.7]), ) self.assertTrue( "coords" not in T.query(coords=False).multi_index[-10.0:5.0] ) @pytest.mark.parametrize("dtype", INTEGER_DTYPES) def test_sparse_index_dtypes(self, dtype): path = self.path() data = np.arange(0, 3).astype(dtype) schema = schema_from_dict(attrs={"attr": data}, dims={"d0": data}) tiledb.SparseArray.create(path, schema) with tiledb.open(path, "w") as A: A[data] = data with tiledb.open(path) as B: assert_array_equal(B[:]["attr"], data) assert B[data[0]]["attr"] == data[0] assert B[data[1]]["attr"] == data[1] assert B.multi_index[data[0]]["attr"] == data[0] def test_query_real_exact(self, sparse_cell_order): """ Test and demo of querying at floating point representable boundaries Concise representation of expected behavior: c0,c1,c2 = [3.0100000000000002, 3.0100000000000007, 3.010000000000001] values = [1,2,3] [c0:c0] -> [1] [c1:c1] -> [2] [c2:c2] -> [3] [c0:c1] -> [1,2] [c0:c2] -> [1,2,3] [c0 - nextafter(c0,0) : c0] -> [1] [c0 - nextafter(c0,0) : c0 - nextafter(c0,0)] -> [] [c2:c2+nextafter(c2)] -> [3] [c2+nextafter(c2) : c2+nextafter(c2)] -> [] """ uri = self.path() dom = tiledb.Domain( tiledb.Dim("x", domain=(-10.0, 10.0), tile=2.0, dtype=float) ) attr = tiledb.Attr("", dtype=np.float32) schema = tiledb.ArraySchema( domain=dom, attrs=(attr,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) c0 = np.nextafter(3.01, 4) # smaller c1 = np.nextafter(c0, 4) c2 = np.nextafter(c1, 4) # larger # for debugging use: # np.set_printoptions(precision=16, floatmode='maxprec') # print(c0,c1,c2) values = np.array([1, 2, 3]) with tiledb.SparseArray(uri, mode="w") as T: T[[c0, c1, c2]] = values with tiledb.SparseArray(uri, mode="r") as T: for i, c in enumerate([c0, c1, c2]): assert_array_equal(T.query(coords=True).multi_index[c:c][""], values[i]) # test (coord, coord + nextafter) c0_prev = np.nextafter(c0, 0) c2_next = np.nextafter(c2, 4) assert_array_equal(T.query(coords=True).multi_index[c0:c1][""], [1, 2]) assert_array_equal(T.query(coords=True).multi_index[c0:c2][""], [1, 2, 3]) assert_array_equal(T.query(coords=True).multi_index[c2:c2_next][""], 3) assert_array_equal(T.query(coords=True).multi_index[c0_prev:c0][""], 1) assert_array_equal( T.query(coords=True).multi_index[c0_prev:c0_prev][""], [] ) # test (coord + nextafter, coord + nextafter) assert_array_equal( T.query(coords=True).multi_index[c2_next:c2_next][""], np.array([]) ) # test (coord - nextafter, coord) assert_array_equal( T.query(coords=True).multi_index[c0:c1][""], values[[0, 1]] ) # test (coord - nextafter, coord + nextafter) assert_array_equal( T.query(coords=True).multi_index[c0:c2][""], values[[0, 1, 2]] ) def test_sparse_query_specified_dim_coords(self, sparse_cell_order): uri = self.path("sparse_query_specified_dim_coords") dom = tiledb.Domain( tiledb.Dim("i", domain=(1, 10), tile=1, dtype=int), tiledb.Dim("j", domain=(11, 20), tile=1, dtype=int), ) att = tiledb.Attr("", dtype=int) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) i = np.array([1, 1, 1, 2, 3, 3, 3, 4]) j = np.array([11, 12, 14, 13, 11, 16, 17, 15]) with tiledb.SparseArray(uri, mode="w") as A: A[i, j] = np.array([0, 1, 2, 3, 4, 6, 7, 5]) # data is returned in Hilbert order, so we need to check sorted with tiledb.SparseArray(uri, mode="r") as A: Ai = A.query(dims=["i"])[:] self.assertTrue("i" in Ai) self.assertFalse("j" in Ai) assert_unordered_equal(Ai["i"], i, sparse_cell_order == "hilbert") Aj = A.query(dims=["j"])[:] self.assertFalse("i" in Aj) self.assertTrue("j" in Aj) assert_unordered_equal(Aj["j"], j, sparse_cell_order == "hilbert") Aij = A.query(dims=["i", "j"])[:] self.assertTrue("i" in Aij) self.assertTrue("j" in Aij) assert_unordered_equal(Aij["i"], i, sparse_cell_order == "hilbert") assert_unordered_equal(Aij["j"], j, sparse_cell_order == "hilbert") def test_dense_query_specified_dim_coords(self): uri = self.path("dense_query_specified_dim_coords") dom = tiledb.Domain( tiledb.Dim("i", domain=(1, 3), tile=1, dtype=int), tiledb.Dim("j", domain=(4, 6), tile=1, dtype=int), ) att = tiledb.Attr("", dtype=int) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) tiledb.DenseArray.create(uri, schema) with tiledb.DenseArray(uri, mode="w") as A: A[:, :] = np.arange(9) with tiledb.DenseArray(uri, mode="r") as A: i = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) j = np.array([[4, 5, 6], [4, 5, 6], [4, 5, 6]]) Ai = A.query(dims=["i"])[:] self.assertTrue("i" in Ai) self.assertFalse("j" in Ai) assert_array_equal(Ai["i"], i) Aj = A.query(dims=["j"])[:] self.assertFalse("i" in Aj) self.assertTrue("j" in Aj) assert_array_equal(Aj["j"], j) Aij = A.query(dims=["i", "j"])[:] self.assertTrue("i" in Aij) self.assertTrue("j" in Aij) assert_array_equal(Aij["i"], i) assert_array_equal(Aij["j"], j) def test_subarray(self, sparse_cell_order): dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) att = tiledb.Attr("", dtype=float) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(self.path("foo"), schema) with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertIsNone(T.nonempty_domain()) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[50, 60, 100]] = [1.0, 2.0, 3.0] with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertEqual(((50, 100),), T.nonempty_domain()) # retrieve just valid coordinates in subarray T[40:60] assert_array_equal(T[40:61]["x"], [50, 60]) # TODO: dropping coords with one anon value returns just an array res = T.query(coords=False)[40:61] assert_array_equal(res[""], [1.0, 2.0]) self.assertEqual(("coords" in res), False) def test_sparse_bytes(self, sparse_cell_order): dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) att = tiledb.Attr("", var=True, dtype=np.bytes_) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(self.path("foo"), schema) with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertIsNone(T.nonempty_domain()) A = np.array( [b"aaa", b"bbbbbbbbbbbbbbbbbbbb", b"ccccccccccccccccccccccccc"], dtype=np.bytes_, ) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[50, 60, 100]] = A with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertEqual(((50, 100),), T.nonempty_domain()) # retrieve just valid coordinates in subarray T[40:60] assert_array_equal(T[40:61]["x"], [50, 60]) # TODO: dropping coords with one anon value returns just an array res = T.query(coords=False)[40:61] assert_array_equal(res[""], A[0:2]) self.assertEqual(("coords" in res), False) # empty sparse varlen result res = T[1000] assert_array_equal(res[""], np.array("", dtype="S1")) assert_array_equal(res["x"], np.array([], dtype=np.int64)) def test_sparse_unicode(self, sparse_cell_order): dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) att = tiledb.Attr("", var=True, dtype=np.unicode_) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(self.path("foo"), schema) with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertIsNone(T.nonempty_domain()) A = np_array = np.array( [ "1234545lkjalsdfj", "mnopqrs", "ijkl", "gh", "abcdef", "aαbββcγγγdδδδδ", "aαbββc", "", "γγγdδδδδ", ], dtype=object, ) with tiledb.SparseArray(self.path("foo"), mode="w") as T: T[[3, 4, 5, 6, 7, 50, 60, 70, 100]] = A with tiledb.SparseArray(self.path("foo"), mode="r") as T: self.assertEqual(((3, 100),), T.nonempty_domain()) # retrieve just valid coordinates in subarray T[40:60] assert_array_equal(T[40:61]["x"], [50, 60]) # TODO: dropping coords with one anon value returns just an array res = T.query(coords=False)[40:61] assert_array_equal(res[""], A[5:7]) self.assertEqual(("coords" in res), False) # empty sparse varlen result res = T[1000] assert_array_equal(res[""], np.array("", dtype="U1")) assert_array_equal(res["x"], np.array([], dtype=np.int64)) def test_sparse_query(self, sparse_cell_order): uri = self.path("test_sparse_query") dom = tiledb.Domain( tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=np.float64) ) att = tiledb.Attr("", dtype=float) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) coords = np.random.uniform(low=1, high=10000, size=100) data = np.random.rand(100) with tiledb.SparseArray(uri, mode="w") as T: T[coords] = data # Test that TILEDB_UNORDERED works correctly with tiledb.SparseArray(uri, mode="r") as A: res = A[1:10001][""] # index past the end here to ensure inclusive result res = A.multi_index[1:10000][""] assert_array_equal(np.sort(res), np.sort(data)) res = A.query(order="U").multi_index[1:10000][""] assert_array_equal(np.sort(res), np.sort(data)) def test_sparse_fixes(self, sparse_cell_order): uri = self.path("test_sparse_fixes") # indexing a 1 element item in a sparse array # (issue directly reported) # the test here is that the indexing does not raise dims = ( tiledb.Dim("foo", domain=(0, 6), tile=2), tiledb.Dim("bar", domain=(0, 6), tile=1), tiledb.Dim("baz", domain=(0, 100), tile=1), ) dom = tiledb.Domain(*dims) att = tiledb.Attr(name="strattr", dtype="S1") schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) with tiledb.SparseArray(uri) as T: T[:] # - test that assigning incompatible value to fixed-len str raises error # - test that value-conversion error raises exception w/ attr name context c = np.vstack( list((x, y, z) for x in range(7) for y in range(7) for z in range(101)) ) with tiledb.SparseArray(uri, "w") as T: with self.assertRaises(ValueError): T[c[:, 0], c[:, 1], c[:, 2]] = {"strattr": np.random.rand(7, 7, 101)} save_exc = list() try: T[c[:, 0], c[:, 1], c[:, 2]] = {"strattr": np.random.rand(7, 7, 101)} except ValueError as e: save_exc.append(e) exc = save_exc.pop() self.assertEqual( str(exc.__context__), "Cannot write a string value to non-string typed attribute 'strattr'!", ) @tiledb.scope_ctx({"sm.check_coord_dups": False}) def test_sparse_fixes_ch1560(self, sparse_cell_order): uri = self.path("sparse_fixes_ch1560") schema = tiledb.ArraySchema( domain=tiledb.Domain( *[tiledb.Dim(name="id", domain=(1, 5000), tile=25, dtype="int32")] ), attrs=[ tiledb.Attr(name="a1", dtype="datetime64[s]"), tiledb.Attr(name="a2", dtype="|S0"), tiledb.Attr(name="a3", dtype="|S0"), tiledb.Attr(name="a4", dtype="int32"), tiledb.Attr(name="a5", dtype="int8"), tiledb.Attr(name="a6", dtype="int32"), ], cell_order=sparse_cell_order, tile_order="row-major", sparse=True, ) tiledb.SparseArray.create(uri, schema) data = OrderedDict( [ ( "a1", np.array( [ "2017-04-01T04:00:00", "2019-10-01T00:00:00", "2019-10-01T00:00:00", "2019-10-01T00:00:00", ], dtype="datetime64[s]", ), ), ("a2", [b"Bus", b"The RIDE", b"The RIDE", b"The RIDE"]), ("a3", [b"Bus", b"The RIDE", b"The RIDE", b"The RIDE"]), ("a4", np.array([6911721, 138048, 138048, 138048], dtype="int32")), ("a5", np.array([20, 23, 23, 23], dtype="int8")), ("a6", np.array([345586, 6002, 6002, 6002], dtype="int32")), ] ) with tiledb.open(uri, "w") as A: A[[1, 462, 462, 462]] = data with tiledb.open(uri) as A: res = A[:] res.pop("id") for k, v in res.items(): if isinstance(data[k], (np.ndarray, list)): assert_array_equal(res[k], data[k]) else: self.assertEqual(res[k], data[k]) def test_sparse_2d_varlen_int(self, sparse_cell_order): path = self.path("test_sparse_2d_varlen_int") dtype = np.int32 dom = tiledb.Domain( tiledb.Dim(domain=(1, 4), tile=2), tiledb.Dim(domain=(1, 4), tile=2) ) att = tiledb.Attr(dtype=dtype, var=True) schema = tiledb.ArraySchema( dom, (att,), sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(path, schema) if tiledb.libtiledb.version() >= (2, 3) and sparse_cell_order == "hilbert": c1 = np.array([2, 1, 3, 4]) c2 = np.array([1, 2, 3, 4]) else: c1 = np.array([1, 2, 3, 4]) c2 = np.array([2, 1, 3, 4]) data = np.array( [ np.array([1, 1], dtype=np.int32), np.array([2], dtype=np.int32), np.array([3, 3, 3], dtype=np.int32), np.array([4], dtype=np.int32), ], dtype="O", ) with tiledb.SparseArray(path, "w") as A: A[c1, c2] = data with tiledb.SparseArray(path) as A: res = A[:] assert_subarrays_equal(res[""], data) assert_unordered_equal(res["__dim_0"], c1) assert_unordered_equal(res["__dim_1"], c2) def test_sparse_mixed_domain_uint_float64(self, sparse_cell_order): path = self.path("mixed_domain_uint_float64") dims = [ tiledb.Dim(name="index", domain=(0, 51), tile=11, dtype=np.uint64), tiledb.Dim(name="dpos", domain=(-100.0, 100.0), tile=10, dtype=np.float64), ] dom = tiledb.Domain(*dims) attrs = [tiledb.Attr(name="val", dtype=np.float64)] schema = tiledb.ArraySchema( domain=dom, attrs=attrs, sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(path, schema) data = np.random.rand(50, 63) coords1 = np.repeat(np.arange(0, 50), 63) coords2 = np.linspace(-100.0, 100.0, num=3150) with tiledb.open(path, "w") as A: A[coords1, coords2] = data # tiledb returns coordinates in sorted order, so we need to check the output # sorted by the first dim coordinates sidx = np.argsort(coords1, kind="stable") coords2_idx = np.tile(np.arange(0, 63), 50)[sidx] with tiledb.open(path) as A: res = A[:] assert_subarrays_equal( data[coords1[sidx], coords2_idx[sidx]], res["val"], sparse_cell_order != "hilbert", ) a_nonempty = A.nonempty_domain() self.assertEqual(a_nonempty[0], (0, 49)) self.assertEqual(a_nonempty[1], (-100.0, 100.0)) def test_sparse_string_domain(self, sparse_cell_order): path = self.path("sparse_string_domain") dom = tiledb.Domain(tiledb.Dim(name="d", domain=(None, None), dtype=np.bytes_)) att = tiledb.Attr(name="a", dtype=np.int64) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=sparse_cell_order, capacity=10000, ) tiledb.SparseArray.create(path, schema) data = [1, 2, 3, 4] coords = [b"aa", b"bbb", b"c", b"dddd"] with tiledb.open(path, "w") as A: A[coords] = data with tiledb.open(path) as A: ned = A.nonempty_domain()[0] res = A[ned[0] : ned[1]] assert_array_equal(res["a"], data) self.assertEqual(set(res["d"]), set(coords)) self.assertEqual(A.nonempty_domain(), ((b"aa", b"dddd"),)) def test_sparse_string_domain2(self, sparse_cell_order): path = self.path("sparse_string_domain2") with self.assertRaises(ValueError): dims = [ tiledb.Dim( name="str", domain=(None, None, None), tile=None, dtype=np.bytes_ ) ] dims = [tiledb.Dim(name="str", domain=(None, None), tile=None, dtype=np.bytes_)] dom = tiledb.Domain(*dims) attrs = [tiledb.Attr(name="val", dtype=np.float64)] schema = tiledb.ArraySchema( domain=dom, attrs=attrs, sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(path, schema) data = np.random.rand(10) coords = [rand_ascii_bytes(random.randint(5, 50)) for _ in range(10)] with tiledb.open(path, "w") as A: A[coords] = data with tiledb.open(path) as A: ned = A.nonempty_domain()[0] res = A[ned[0] : ned[1]] self.assertTrue(set(res["str"]) == set(coords)) # must check data ordered by coords assert_array_equal(res["val"], data[np.argsort(coords, kind="stable")]) def test_sparse_mixed_domain(self, sparse_cell_order): uri = self.path("sparse_mixed_domain") dims = [ tiledb.Dim(name="p", domain=(-100.0, 100.0), tile=10, dtype=np.float64), tiledb.Dim(name="str", domain=(None, None), tile=None, dtype=np.bytes_), ] dom = tiledb.Domain(*dims) attrs = [tiledb.Attr(name="val", dtype=np.float64)] schema = tiledb.ArraySchema( domain=dom, attrs=attrs, sparse=True, cell_order=sparse_cell_order ) tiledb.SparseArray.create(uri, schema) nrows = 5 idx_f64 = np.random.rand(nrows) idx_str = [rand_ascii(5).encode("utf-8") for _ in range(nrows)] data = np.random.rand(nrows) with tiledb.SparseArray(uri, "w") as A: A[idx_f64, idx_str] = {"val": data} # test heterogeneous dim nonempty_domain ned_f64 = (np.array(np.min(idx_f64)), np.array(np.max(idx_f64))) idx_str.sort() ned_str = idx_str[0], idx_str[-1] with tiledb.SparseArray(uri, "r") as A: self.assertEqual(A.nonempty_domain(), (ned_f64, ned_str)) def test_sparse_get_unique_dim_values(self, sparse_cell_order): uri = self.path("get_non_empty_coords") dim1 = tiledb.Dim(name="dim1", domain=(None, None), tile=None, dtype=np.bytes_) dim2 = tiledb.Dim(name="dim2", domain=(0, 1), tile=1, dtype=np.float64) attr = tiledb.Attr(name="attr", dtype=np.float32) dom = tiledb.Domain(dim1, dim2) schema = tiledb.ArraySchema( domain=dom, sparse=True, cell_order=sparse_cell_order, attrs=[attr] ) tiledb.Array.create(uri, schema) with tiledb.open(uri, "w") as A: A["a1", 0] = 1 A["a1", 0.25] = 2 A["a2", 0.5] = 3 A["a3", 0.25] = 4 with tiledb.open(uri, "r") as A: self.assertEqual( A.unique_dim_values(), OrderedDict( [("dim1", (b"a1", b"a2", b"a3")), ("dim2", (0.0, 0.25, 0.5))] ), ) self.assertEqual(A.unique_dim_values("dim1"), (b"a1", b"a2", b"a3")) self.assertEqual(A.unique_dim_values("dim2"), (0, 0.25, 0.5)) with self.assertRaises(ValueError): A.unique_dim_values(0) with self.assertRaises(ValueError): A.unique_dim_values("dim3") def test_sparse_write_for_zero_attrs(self): uri = self.path("test_sparse_write_to_zero_attrs") dim = tiledb.Dim(name="dim", domain=(0, 9), dtype=np.float64) schema = tiledb.ArraySchema(domain=tiledb.Domain(dim), sparse=True) tiledb.Array.create(uri, schema) coords = [1, 2.0, 3.5] with tiledb.open(uri, "w") as A: A[coords] = None with tiledb.open(uri, "r") as A: output = A.query()[:] assert list(output.keys()) == ["dim"] assert_array_equal(output["dim"][:], coords) class TestDenseIndexing(DiskTestCase): def _test_index(self, A, T, idx): expected = A[idx] actual = T[idx] assert_array_equal(expected, actual) good_index_1d = [ # single value 42, -1, # slices slice(0, 1050), slice(50, 150), slice(0, 2000), slice(-150, -50), # TODO: indexing failures # slice(-2000, 2000), # slice(0, 0), # empty result # slice(-1, 0), # empty result # total selections slice(None), Ellipsis, (), (Ellipsis, slice(None)), # slice with step slice(None), slice(None, None), slice(None, None, 1), slice(None, None, 10), slice(None, None, 100), slice(None, None, 1000), slice(None, None, 10000), slice(0, 1050), slice(0, 1050, 1), slice(0, 1050, 10), slice(0, 1050, 100), slice(0, 1050, 1000), slice(0, 1050, 10000), slice(1, 31, 3), slice(1, 31, 30), slice(1, 31, 300), slice(81, 121, 3), slice(81, 121, 30), slice(81, 121, 300), slice(50, 150), slice(50, 150, 1), slice(50, 150, 10), # TODO: negative steps slice(None, None, -1), slice(None, None, -10), slice(None, None, -100), slice(None, None, -1000), slice(None, None, -10000), # slice(1050, -1, -1), # slice(1050, -1, -10), # slice(1050, -1, -100), # slice(1050, -1, -1000), # slice(1050, -1, -10000), # slice(1050, 0, -1), # slice(1050, 0, -10), # slice(1050, 0, -100), # slice(1050, 0, -1000), # slice(1050, 0, -10000), # slice(150, 50, -1), # slice(150, 50, -10), # slice(31, 1, -3), # slice(121, 81, -3), # slice(-1, 0, -1), ] bad_index_1d = [2.3, "foo", b"xxx", None, (0, 0), (slice(None), slice(None))] def test_index_1d(self): A = np.arange(1050, dtype=int) dom = tiledb.Domain(tiledb.Dim(domain=(0, 1049), tile=100)) att = tiledb.Attr(dtype=int) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: for idx in self.good_index_1d: self._test_index(A, T, idx) for idx in self.bad_index_1d: with self.assertRaises(IndexError): T[idx] good_index_2d = [ # single row 42, -1, (42, slice(None)), (-1, slice(None)), # single col (slice(None), 4), (slice(None), -1), # row slices slice(None), slice(0, 1000), slice(250, 350), slice(0, 2000), slice(-350, -250), slice(0, 0), # empty result slice(-1, 0), # empty result slice(-2000, 0), slice(-2000, 2000), # 2D slices (slice(None), slice(1, 5)), (slice(250, 350), slice(None)), (slice(250, 350), slice(1, 5)), (slice(250, 350), slice(-5, -1)), (slice(250, 350), slice(-50, 50)), (slice(250, 350, 10), slice(1, 5)), (slice(250, 350), slice(1, 5, 2)), (slice(250, 350, 33), slice(1, 5, 3)), # total selections (slice(None), slice(None)), Ellipsis, (), (Ellipsis, slice(None)), (Ellipsis, slice(None), slice(None)), # TODO: negative steps # slice(None, None, -1), # (slice(None, None, -1), slice(None)), ] bad_index_2d = [ 2.3, "foo", b"xxx", None, (2.3, slice(None)), (0, 0, 0), (slice(None), slice(None), slice(None)), ] def test_index_2d(self): A = np.arange(10000).reshape((1000, 10)) dom = tiledb.Domain( tiledb.Dim(domain=(0, 999), tile=100), tiledb.Dim(domain=(0, 9), tile=2) ) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(self.path("foo"), schema) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A with tiledb.DenseArray(self.path("foo"), mode="r") as T: for idx in self.good_index_1d: self._test_index(A, T, idx) for idx in self.bad_index_2d: with self.assertRaises(IndexError): T[idx] class TestFilterTest(unittest.TestCase): def test_filter(self): gzip_filter = tiledb.libtiledb.GzipFilter(level=10) self.assertIsInstance(gzip_filter, tiledb.libtiledb.Filter) self.assertEqual(gzip_filter.level, 10) bw_filter = tiledb.libtiledb.BitWidthReductionFilter(window=10) self.assertIsInstance(bw_filter, tiledb.libtiledb.Filter) self.assertEqual(bw_filter.window, 10) filter_list = tiledb.libtiledb.FilterList( [gzip_filter, bw_filter], chunksize=1024 ) self.assertEqual(filter_list.chunksize, 1024) self.assertEqual(len(filter_list), 2) self.assertEqual(filter_list[0].level, gzip_filter.level) self.assertEqual(filter_list[1].window, bw_filter.window) # test filter list iteration self.assertEqual(len(list(filter_list)), 2) # test `filters` kwarg accepts python list of filters tiledb.Attr("foo", dtype=np.int64, filters=[gzip_filter]) tiledb.Attr("foo", dtype=np.int64, filters=(gzip_filter,)) attr = tiledb.Attr("foo", dtype=np.int64, filters=filter_list) self.assertEqual(len(attr.filters), 2) self.assertEqual(attr.filters.chunksize, filter_list.chunksize) def test_filter_list(self): # should be constructible without a `filters` keyword arg set filter_list1 = tiledb.FilterList() filter_list1.append(tiledb.GzipFilter()) self.assertEqual(len(filter_list1), 1) filter_list2 = [x for x in filter_list1] attr = tiledb.Attr(filters=filter_list2) self.assertEqual(len(attr.filters), 1) def test_all_filters(self): # test initialization filters = [ tiledb.NoOpFilter(), tiledb.GzipFilter(), tiledb.ZstdFilter(), tiledb.LZ4Filter(), tiledb.RleFilter(), tiledb.Bzip2Filter(), tiledb.DoubleDeltaFilter(), tiledb.BitWidthReductionFilter(), tiledb.BitShuffleFilter(), tiledb.ByteShuffleFilter(), tiledb.PositiveDeltaFilter(), tiledb.ChecksumSHA256Filter(), tiledb.ChecksumMD5Filter(), ] # make sure that repr works and round-trips correctly for f in filters: # some of these have attributes, so we just check the class name here self.assertTrue(type(f).__name__ in repr(f)) tmp_globals = dict() setup = "from tiledb import *" exec(setup, tmp_globals) filter_repr = repr(f) new_filter = None try: new_filter = eval(filter_repr, tmp_globals) except Exception as exc: warn_str = ( """Exception during FilterTest filter repr eval""" + """, filter repr string was:\n""" + """'''""" + """\n{}\n'''""".format(filter_repr) ) warnings.warn(warn_str) raise self.assertEqual(new_filter, f) class TestDatetimeSlicing(DiskTestCase): def test_dense_datetime_vector(self): uri = self.path("foo_datetime_vector") # Domain is 10 years, day resolution, one tile per 365 days dim = tiledb.Dim( name="d1", domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), tile=np.timedelta64(365, "D"), dtype=np.datetime64("", "D").dtype, ) dom = tiledb.Domain(dim) schema = tiledb.ArraySchema( domain=dom, attrs=(tiledb.Attr("a1", dtype=np.float64),) ) tiledb.Array.create(uri, schema) # Write a few years of data at the beginning using a timedelta object ndays = 365 * 2 a1_vals = np.random.rand(ndays) start = np.datetime64("2010-01-01") # Datetime indexing is inclusive, so a delta of one less end = start + np.timedelta64(ndays - 1, "D") with tiledb.DenseArray(uri, "w") as T: T[start:end] = {"a1": a1_vals} # Read back data with tiledb.DenseArray(uri, "r", attr="a1") as T: assert_array_equal(T[start:end], a1_vals) # Check nonempty domain with tiledb.DenseArray(uri, "r") as T: nonempty = T.nonempty_domain() d1_nonempty = nonempty[0] self.assertEqual(d1_nonempty[0].dtype, np.datetime64("", "D")) self.assertEqual(d1_nonempty[1].dtype, np.datetime64("", "D")) self.assertTupleEqual(d1_nonempty, (start, end)) # Slice a few days from the middle using two datetimes with tiledb.DenseArray(uri, "r", attr="a1") as T: # Slice using datetimes actual = T[np.datetime64("2010-11-01") : np.datetime64("2011-01-31")] # Convert datetime interval to integer offset/length into original array # must be cast to int because float slices are not allowed in NumPy 1.12+ read_offset = int( (np.datetime64("2010-11-01") - start) / np.timedelta64(1, "D") ) read_ndays = int( (np.datetime64("2011-01-31") - np.datetime64("2010-11-01") + 1) / np.timedelta64(1, "D") ) expected = a1_vals[read_offset : read_offset + read_ndays] assert_array_equal(actual, expected) # Slice the first year with tiledb.DenseArray(uri, "r", attr="a1") as T: actual = T[np.datetime64("2010") : np.datetime64("2011")] # Convert datetime interval to integer offset/length into original array read_offset = int( (np.datetime64("2010-01-01") - start) / np.timedelta64(1, "D") ) read_ndays = int( (np.datetime64("2011-01-01") - np.datetime64("2010-01-01") + 1) / np.timedelta64(1, "D") ) expected = a1_vals[read_offset : read_offset + read_ndays] assert_array_equal(actual, expected) # Slice open spans with tiledb.DenseArray(uri, "r", attr="a1") as T: # Convert datetime interval to integer offset/length into original array read_offset = int( (np.datetime64("2010-01-01") - start) / np.timedelta64(1, "D") ) read_ndays = int( (np.datetime64("2011-01-31") - np.datetime64("2010-01-01") + 1) / np.timedelta64(1, "D") ) expected = a1_vals[read_offset : read_offset + read_ndays] # note we only wrote first two years actual = T.multi_index[np.datetime64("2010-01-01") :]["a1"][:read_ndays] assert_array_equal(actual, expected) actual2 = T[np.datetime64("2010-01-01") :][:read_ndays] assert_array_equal(actual2, expected) def test_sparse_datetime_vector(self, sparse_cell_order): uri = self.path("foo_datetime_sparse_vector") # ns resolution, one tile per second, max domain possible dim = tiledb.Dim( name="d1", domain=( np.datetime64(0, "ns"), np.datetime64(int(np.iinfo(np.int64).max) - 1000000000, "ns"), ), tile=np.timedelta64(1, "s"), dtype=np.datetime64("", "ns").dtype, ) self.assertEqual(dim.tile, np.timedelta64("1000000000", "ns")) dom = tiledb.Domain(dim) schema = tiledb.ArraySchema( domain=dom, sparse=True, cell_order=sparse_cell_order, attrs=(tiledb.Attr("a1", dtype=np.float64),), ) tiledb.Array.create(uri, schema) # Write 10k cells every 1000 ns starting at time 0 coords = np.datetime64(0, "ns") + np.arange(0, 10000 * 1000, 1000) a1_vals = np.random.rand(len(coords)) with tiledb.SparseArray(uri, "w") as T: T[coords] = {"a1": a1_vals} # Read all with tiledb.SparseArray(uri, "r") as T: assert_array_equal(T[:]["a1"], a1_vals) # Read back first 10 cells with tiledb.SparseArray(uri, "r") as T: start = np.datetime64(0, "ns") vals = T[start : start + np.timedelta64(10000, "ns")]["a1"] assert_array_equal(vals, a1_vals[0:11]) # Test open ended ranges multi_index vals2 = T.multi_index[start:]["a1"] assert_array_equal(vals2, a1_vals) stop = np.datetime64(int(np.iinfo(np.int64).max) - 1000000000, "ns") vals3 = T.multi_index[:stop]["a1"] assert_array_equal(vals3, a1_vals) def test_datetime_types(self, sparse_cell_order): units = ["h", "m", "s", "ms", "us", "ns", "ps", "fs"] for res in units: uri = self.path("test_datetime_type_" + res) tmax = 1000 tile = np.timedelta64(1, res) dim = tiledb.Dim( name="d1", domain=(None, None), tile=tile, dtype=np.datetime64("", res).dtype, ) dom = tiledb.Domain(dim) schema = tiledb.ArraySchema( domain=dom, sparse=True, cell_order=sparse_cell_order, attrs=(tiledb.Attr("a1", dtype=np.float64),), ) tiledb.Array.create(uri, schema) # Write tmax cells every 10 units starting at time 0 coords = np.datetime64(0, res) + np.arange( 0, tmax, 10 ) # np.arange(0, 10000 * 1000, 1000) a1_vals = np.random.rand(len(coords)) with tiledb.SparseArray(uri, "w") as T: T[coords] = {"a1": a1_vals} # Read all with tiledb.SparseArray(uri, "r") as T: assert_array_equal(T[:]["a1"], a1_vals) # Read back first 10 cells with tiledb.SparseArray(uri, "r") as T: start = np.datetime64(0, res) vals = T[start : start + np.timedelta64(int(tmax / 10), res)]["a1"] assert_array_equal(vals, a1_vals[0:11]) class PickleTest(DiskTestCase): # test that DenseArray and View can be pickled for multiprocess use # note that the current pickling is by URI and attributes (it is # not, and likely should not be, a way to serialize array data) @pytest.mark.parametrize("sparse", [True, False]) def test_pickle_roundtrip(self, sparse): uri = self.path("test_pickle_roundtrip") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3)) schema = tiledb.ArraySchema(domain=dom, attrs=(tiledb.Attr(""),), sparse=sparse) tiledb.libtiledb.Array.create(uri, schema) with tiledb.open(uri, "w") as T: if sparse: T[[0, 1, 2]] = np.random.randint(10, size=3) else: T[:] = np.random.randint(10, size=3) with tiledb.open(uri, "r") as T: with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) with pickle.load(buf) as T2: assert_array_equal(T.df[:], T2.df[:]) with io.BytesIO() as buf, tiledb.open(uri) as V: pickle.dump(V, buf) buf.seek(0) with pickle.load(buf) as V2: # make sure anonymous view pickles and round-trips assert_array_equal(V.df[:], V2.df[:]) @tiledb.scope_ctx({"vfs.s3.region": "kuyper-belt-1", "vfs.max_parallel_ops": "1"}) def test_pickle_with_config(self): uri = self.path("pickle_config") T = tiledb.DenseArray.from_numpy(uri, np.random.rand(3, 3)) with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) T2 = pickle.load(buf) assert_array_equal(T, T2) self.maxDiff = None d1 = tiledb.default_ctx().config().dict() d2 = T2._ctx_().config().dict() self.assertEqual(d1["vfs.s3.region"], d2["vfs.s3.region"]) self.assertEqual(d1["vfs.max_parallel_ops"], d2["vfs.max_parallel_ops"]) T.close() T2.close() @pytest.mark.parametrize("sparse", [True, False]) def test_pickle_with_tuple_timestamps(self, sparse): A = np.random.randint(10, size=3) path = self.path("test_pickle_with_tuple_timestamps") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) att = tiledb.Attr(dtype=A.dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=sparse) tiledb.libtiledb.Array.create(path, schema) for ts in range(1, 5): with tiledb.open(path, timestamp=ts, mode="w") as T: if sparse: T[[0, 1, 2]] = A * ts else: T[:] = A * ts with tiledb.open(path, timestamp=(2, 3), mode="r") as T: with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) with pickle.load(buf) as T2: assert_array_equal(T.df[:], T2.df[:]) assert T2.timestamp_range == (2, 3) with io.BytesIO() as buf, tiledb.open(path, timestamp=(2, 3)) as V: pickle.dump(V, buf) buf.seek(0) with pickle.load(buf) as V2: # make sure anonymous view pickles and round-trips assert_array_equal(V.df[:], V2.df[:]) assert V2.timestamp_range == (2, 3) class ArrayViewTest(DiskTestCase): def test_view_multiattr(self): uri = self.path("foo_multiattr") dom = tiledb.Domain( tiledb.Dim(domain=(0, 2), tile=3), tiledb.Dim(domain=(0, 2), tile=3) ) schema = tiledb.ArraySchema( domain=dom, attrs=(tiledb.Attr(""), tiledb.Attr("named")) ) tiledb.libtiledb.Array.create(uri, schema) anon_ar = np.random.rand(3, 3) named_ar = np.random.rand(3, 3) with tiledb.DenseArray(uri, "w") as T: T[:] = {"": anon_ar, "named": named_ar} with self.assertRaises(KeyError): T = tiledb.DenseArray(uri, "r", attr="foo111") with tiledb.DenseArray(uri, "r", attr="named") as T: assert_array_equal(T, named_ar) # make sure each attr view can pickle and round-trip with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) with pickle.load(buf) as T_rt: assert_array_equal(T, T_rt) with tiledb.DenseArray(uri, "r", attr="") as T: assert_array_equal(T, anon_ar) with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) with pickle.load(buf) as tmp: assert_array_equal(tmp, anon_ar) # set subarray on multi-attribute range_ar = np.arange(0, 9).reshape(3, 3) with tiledb.DenseArray(uri, "w", attr="named") as V_named: V_named[1:3, 1:3] = range_ar[1:3, 1:3] with tiledb.DenseArray(uri, "r", attr="named") as V_named: assert_array_equal(V_named[1:3, 1:3], range_ar[1:3, 1:3]) class RWTest(DiskTestCase): def test_read_write(self, capfd): dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3)) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.libtiledb.Array.create(self.path("foo"), schema) np_array = np.array([1, 2, 3], dtype="int64") with tiledb.DenseArray(self.path("foo"), mode="w") as arr: arr.write_direct(np_array) with tiledb.DenseArray(self.path("foo"), mode="r") as arr: arr.dump() assert_captured(capfd, "Array type: dense") self.assertEqual(arr.nonempty_domain(), ((0, 2),)) self.assertEqual(arr.ndim, np_array.ndim) assert_array_equal(arr.read_direct(), np_array) class TestNumpyToArray(DiskTestCase): def test_to_array0d(self): # Cannot create 0-dim arrays in TileDB np_array = np.array(1) with self.assertRaises(tiledb.TileDBError): with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as A: pass def test_to_array1d(self): np_array = np.array([1.0, 2.0, 3.0]) with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[:], np_array) def test_to_array2d(self): np_array = np.ones((100, 100), dtype="i8") with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[:], np_array) def test_to_array3d(self): np_array = np.ones((1, 1, 1), dtype="i1") with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[:], np_array) def test_bytes_to_array1d(self): np_array = np.array( [b"abcdef", b"gh", b"ijkl", b"mnopqrs", b"", b"1234545lkjalsdfj"], dtype=object, ) with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[:], np_array) with tiledb.DenseArray(self.path("foo")) as arr_reload: assert_array_equal(arr_reload[:], np_array) def test_unicode_to_array1d(self): np_array = np.array( [ "1234545lkjalsdfj", "mnopqrs", "ijkl", "gh", "abcdef", "aαbββcγγγdδδδδ", "", '"aαbββc', "", "γγγdδδδδ", ], dtype=object, ) with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[:], np_array) with tiledb.DenseArray(self.path("foo")) as arr_reload: assert_array_equal(arr_reload[:], np_array) def test_array_interface(self): # Tests that __array__ interface works np_array1 = np.arange(1, 10) with tiledb.DenseArray.from_numpy(self.path("arr1"), np_array1) as arr1: assert_array_equal(np.array(arr1), np_array1) # Test that __array__ interface throws an error when number of attributes > 1 dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3)) foo = tiledb.Attr("foo", dtype="i8") bar = tiledb.Attr("bar", dtype="i8") schema = tiledb.ArraySchema(domain=dom, attrs=(foo, bar)) tiledb.DenseArray.create(self.path("arr2"), schema) with self.assertRaises(ValueError): with tiledb.DenseArray(self.path("arr2"), mode="r") as arr2: np.array(arr2) def test_array_getindex(self): # Tests that __getindex__ interface works np_array = np.arange(1, 10) with tiledb.DenseArray.from_numpy(self.path("foo"), np_array) as arr: assert_array_equal(arr[5:10], np_array[5:10]) def test_to_array1d_attr_name(self): np_array = np.array([1.0, 2.0, 3.0]) with tiledb.DenseArray.from_numpy( self.path("foo"), np_array, attr_name="a" ) as arr: assert_array_equal(arr[:]["a"], np_array) def test_from_numpy_timestamp(self): path = self.path() with tiledb.from_numpy(path, np.array([1, 2, 3]), timestamp=10) as A: pass with tiledb.open(path, timestamp=(0, 9)) as A: assert A.nonempty_domain() == None with tiledb.open(path, timestamp=(10, 10)) as A: assert A.nonempty_domain() == ((0, 2),) class TestVFS(DiskTestCase): def test_supports(self): vfs = tiledb.VFS() self.assertTrue(vfs.supports("file")) self.assertIsInstance(vfs.supports("s3"), bool) self.assertIsInstance(vfs.supports("hdfs"), bool) self.assertIsInstance(vfs.supports("gcs"), bool) self.assertIsInstance(vfs.supports("azure"), bool) with self.assertRaises(ValueError): vfs.supports("invalid") def test_vfs_config(self): opt = {"region": "us-west-x1234"} params = [opt, tiledb.Config(opt)] for param in params: vfs = tiledb.VFS(param) assert vfs.config()["region"] == opt["region"] def test_dir(self): vfs = tiledb.VFS() dir = self.path("foo") self.assertFalse(vfs.is_dir(dir)) # create vfs.create_dir(dir) if pytest.tiledb_vfs != "s3": self.assertTrue(vfs.is_dir(dir)) # remove vfs.remove_dir(dir) self.assertFalse(vfs.is_dir(dir)) # create nested path dir = self.path("foo/bar") if pytest.tiledb_vfs != "s3": # this fails locally because "foo" base path does not exist # this will not fail on s3 because there is no concept of directory with self.assertRaises(tiledb.TileDBError): vfs.create_dir(dir) vfs.create_dir(self.path("foo")) vfs.create_dir(self.path("foo/bar")) if pytest.tiledb_vfs != "s3": self.assertTrue(vfs.is_dir(dir)) def test_file(self): vfs = tiledb.VFS() file = self.path("foo") self.assertFalse(vfs.is_file(file)) # create vfs.touch(file) self.assertTrue(vfs.is_file(file)) # remove vfs.remove_file(file) self.assertFalse(vfs.is_file(file)) # check nested path file = self.path("foo/bar") if pytest.tiledb_vfs != "s3": # this fails locally because "foo" base path does not exist # this will not fail on s3 because there is no concept of directory with self.assertRaises(tiledb.TileDBError): vfs.touch(file) def test_move(self): vfs = tiledb.VFS() vfs.create_dir(self.path("foo")) vfs.create_dir(self.path("bar")) vfs.touch(self.path("bar/baz")) self.assertTrue(vfs.is_file(self.path("bar/baz"))) vfs.move_file(self.path("bar/baz"), self.path("foo/baz")) self.assertFalse(vfs.is_file(self.path("bar/baz"))) self.assertTrue(vfs.is_file(self.path("foo/baz"))) # moving to invalid dir should raise an error if pytest.tiledb_vfs != "s3": # this fails locally because "foo" base path does not exist # this will not fail on s3 because there is no concept of directory with self.assertRaises(tiledb.TileDBError): vfs.move_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) @pytest.mark.skipif( sys.platform == "win32", reason="VFS copy commands from core are not supported on Windows", ) def test_copy(self): vfs = tiledb.VFS() vfs.create_dir(self.path("foo")) vfs.create_dir(self.path("bar")) vfs.touch(self.path("foo/baz")) self.assertTrue(vfs.is_file(self.path("foo/baz"))) vfs.copy_file(self.path("foo/baz"), self.path("bar/baz")) self.assertTrue(vfs.is_file(self.path("foo/baz"))) self.assertTrue(vfs.is_file(self.path("bar/baz"))) vfs.copy_dir(self.path("foo"), self.path("baz")) self.assertTrue(vfs.is_file(self.path("baz/baz"))) # copying to invalid dir should raise an error if pytest.tiledb_vfs != "s3": # this fails locally because "foo" base path does not exist # this will not fail on s3 because there is no concept of directory with self.assertRaises(tiledb.TileDBError): vfs.copy_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) def test_write_read(self): vfs = tiledb.VFS() buffer = b"bar" fio = vfs.open(self.path("foo"), "wb") fio.write(buffer) self.assertEqual(vfs.file_size(self.path("foo")), 3) fio = vfs.open(self.path("foo"), "rb") self.assertEqual(fio.read(3), buffer) fio.close() buffer = b"abc" fio = vfs.open(self.path("abc"), "wb") with pytest.warns(DeprecationWarning, match="Use `FileIO.write`"): vfs.write(fio, buffer) with pytest.warns(DeprecationWarning, match="Use `FileIO.close`"): vfs.close(fio) self.assertEqual(vfs.file_size(self.path("abc")), 3) fio = vfs.open(self.path("abc"), "rb") with pytest.warns(DeprecationWarning, match="Use `FileIO.read`"): self.assertEqual(vfs.read(fio, 0, 3), buffer) fio.close() # write / read empty input fio = vfs.open(self.path("baz"), "wb") fio.write(b"") fio.close() self.assertEqual(vfs.file_size(self.path("baz")), 0) fio = vfs.open(self.path("baz"), "rb") self.assertEqual(fio.read(0), b"") fio.close() # read from file that does not exist with self.assertRaises(tiledb.TileDBError): vfs.open(self.path("do_not_exist"), "rb") def test_io(self): vfs = tiledb.VFS() buffer = b"0123456789" with tiledb.FileIO(vfs, self.path("foo"), mode="wb") as fio: fio.write(buffer) fio.flush() self.assertEqual(fio.tell(), len(buffer)) with tiledb.FileIO(vfs, self.path("foo"), mode="rb") as fio: with self.assertRaises(IOError): fio.write(b"foo") self.assertEqual(vfs.file_size(self.path("foo")), len(buffer)) fio = tiledb.FileIO(vfs, self.path("foo"), mode="rb") self.assertEqual(fio.read(3), b"012") self.assertEqual(fio.tell(), 3) self.assertEqual(fio.read(3), b"345") self.assertEqual(fio.tell(), 6) self.assertEqual(fio.read(10), b"6789") self.assertEqual(fio.tell(), 10) # seek from beginning fio.seek(0) self.assertEqual(fio.tell(), 0) self.assertEqual(fio.read(), buffer) # seek must be positive when SEEK_SET with self.assertRaises(ValueError): fio.seek(-1, 0) # seek from current positfion fio.seek(5) self.assertEqual(fio.tell(), 5) fio.seek(3, 1) self.assertEqual(fio.tell(), 8) fio.seek(-3, 1) self.assertEqual(fio.tell(), 5) # seek from end fio.seek(-4, 2) self.assertEqual(fio.tell(), 6) # Test readall fio.seek(0) self.assertEqual(fio.readall(), buffer) self.assertEqual(fio.tell(), 10) fio.seek(5) self.assertEqual(fio.readall(), buffer[5:]) self.assertEqual(fio.readall(), b"") # Reading from the end should return empty fio.seek(0) fio.read() self.assertEqual(fio.read(), b"") # Test writing and reading lines with TextIOWrapper lines = [rand_utf8(random.randint(0, 50)) + "\n" for _ in range(10)] rand_uri = self.path("test_fio.rand") with tiledb.FileIO(vfs, rand_uri, "wb") as f: txtio = io.TextIOWrapper(f, encoding="utf-8") txtio.writelines(lines) txtio.flush() with tiledb.FileIO(vfs, rand_uri, "rb") as f2: txtio = io.TextIOWrapper(f2, encoding="utf-8") self.assertEqual(txtio.readlines(), lines) def test_ls(self): basepath = self.path("test_vfs_ls") self.vfs.create_dir(basepath) for id in (1, 2, 3): dir = os.path.join(basepath, "dir" + str(id)) self.vfs.create_dir(dir) fname = os.path.join(basepath, "file_" + str(id)) with tiledb.FileIO(self.vfs, fname, "wb") as fio: fio.write(b"") expected = ("file_1", "file_2", "file_3") # empty directories do not "exist" on s3 if pytest.tiledb_vfs != "s3": expected = expected + ("dir1", "dir2", "dir3") self.assertSetEqual( set(expected), set( map( lambda x: os.path.basename(x.split("test_vfs_ls")[1]), self.vfs.ls(basepath), ) ), ) def test_dir_size(self): vfs = tiledb.VFS() path = self.path("test_vfs_dir_size") vfs.create_dir(path) rand_sizes = np.random.choice(100, size=4, replace=False) for size in rand_sizes: file_path = os.path.join(path, "f_" + str(size)) with tiledb.FileIO(vfs, file_path, "wb") as f: data = os.urandom(size) f.write(data) self.assertEqual(vfs.dir_size(path), sum(rand_sizes)) def test_open_with(self): uri = self.path("test_open_with") vfs = tiledb.VFS() buffer = b"0123456789" with vfs.open(uri, mode="wb") as fio: fio.write(buffer) fio.flush() self.assertEqual(fio.tell(), len(buffer)) with vfs.open(uri, mode="rb") as fio: with self.assertRaises(IOError): fio.write(b"foo") self.assertEqual(fio.read(len(buffer)), buffer) class ConsolidationTest(DiskTestCase): def test_array_vacuum(self): dshape = (0, 19) num_writes = 10 def create_array(target_path): dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=3)) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path): for i in range(num_writes): with tiledb.open(target_path, "w") as A: A[i : dshape[1]] = np.random.rand(dshape[1] - i) # array #1 path = self.path("test_array_vacuum") create_array(path) write_fragments(path) fi = tiledb.FragmentInfoList(path) self.assertEqual(len(fi), num_writes) tiledb.consolidate(path) tiledb.vacuum(path) fi = tiledb.FragmentInfoList(path) self.assertEqual(len(fi), 1) # array #2 path2 = self.path("test_array_vacuum_fragment_meta") create_array(path2) write_fragments(path2) fi = tiledb.FragmentInfoList(path2) self.assertEqual(fi.unconsolidated_metadata_num, num_writes) tiledb.consolidate( path2, config=tiledb.Config({"sm.consolidation.mode": "fragment_meta"}) ) tiledb.vacuum(path2, config=tiledb.Config({"sm.vacuum.mode": "fragment_meta"})) fi = tiledb.FragmentInfoList(path2) self.assertEqual(fi.unconsolidated_metadata_num, 0) # array #3 path3 = self.path("test_array_vacuum2") create_array(path3) write_fragments(path3) fi = tiledb.FragmentInfoList(path3) self.assertEqual(fi.unconsolidated_metadata_num, num_writes) conf = tiledb.Config({"sm.consolidation.mode": "fragment_meta"}) with tiledb.open(path3, "w") as A: A.consolidate(config=conf) fi = tiledb.FragmentInfoList(path3) self.assertEqual(fi.unconsolidated_metadata_num, 0) def test_array_consolidate_with_timestamp(self): dshape = (1, 3) num_writes = 10 def create_array(target_path, dshape): dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) att = tiledb.Attr(dtype="int64") schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.libtiledb.Array.create(target_path, schema) def write_fragments(target_path, dshape, num_writes): for i in range(1, num_writes + 1): with tiledb.open(target_path, "w", timestamp=i) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) path = self.path("test_array_consolidate_with_timestamp") create_array(path, dshape) write_fragments(path, dshape, num_writes) frags = tiledb.FragmentInfoList(path) assert len(frags) == 10 tiledb.consolidate(path, timestamp=(1, 4)) frags = tiledb.FragmentInfoList(path) assert len(frags) == 7 assert len(frags.to_vacuum) == 4 tiledb.vacuum(path, timestamp=(1, 2)) frags = tiledb.FragmentInfoList(path) assert len(frags.to_vacuum) == 2 tiledb.vacuum(path) frags = tiledb.FragmentInfoList(path) assert len(frags.to_vacuum) == 0 conf = tiledb.Config( {"sm.consolidation.timestamp_start": 5, "sm.consolidation.timestamp_end": 9} ) tiledb.consolidate(path, config=conf) tiledb.vacuum(path) frags = tiledb.FragmentInfoList(path) assert len(frags.timestamp_range) == 3 @pytest.mark.skipif(sys.platform == "win32", reason="Only run MemoryTest on linux") class MemoryTest(DiskTestCase): # sanity check that memory usage doesn't increase more than 2x when reading 40MB 100x # https://github.com/TileDB-Inc/TileDB-Py/issues/150 @staticmethod def use_many_buffers(path): # https://stackoverflow.com/questions/938733/total-memory-used-by-python-process process = psutil.Process(os.getpid()) x = np.ones(10000000, dtype=np.float32) d1 = tiledb.Dim( "test_domain", domain=(0, x.shape[0] - 1), tile=10000, dtype="uint32" ) domain = tiledb.Domain(d1) v = tiledb.Attr("test_value", dtype="float32") schema = tiledb.ArraySchema( domain=domain, attrs=(v,), cell_order="row-major", tile_order="row-major" ) A = tiledb.DenseArray.create(path, schema) with tiledb.DenseArray(path, mode="w") as A: A[:] = {"test_value": x} with tiledb.DenseArray(path, mode="r") as data: data[:] initial = process.memory_info().rss print(" initial RSS: {}".format(round(initial / (10**6)), 2)) for i in range(100): # read but don't store: this memory should be freed data[:] if i % 10 == 0: print( " read iter {}, RSS (MB): {}".format( i, round(process.memory_info().rss / (10**6), 2) ) ) return initial def test_memory_cleanup(self, capfd): # run function which reads 100x from a 40MB test array # TODO: RSS is too loose to do this end-to-end, so should use instrumentation. print("Starting TileDB-Py memory test:") initial = self.use_many_buffers(self.path("test_memory_cleanup")) process = psutil.Process(os.getpid()) final = process.memory_info().rss print(" final RSS: {}".format(round(final / (10**6)), 2)) gc.collect() final_gc = process.memory_info().rss print(" final RSS after forced GC: {}".format(round(final_gc / (10**6)), 2)) assert_captured(capfd, "final RSS") self.assertTrue(final < (2 * initial)) class TestHighlevel(DiskTestCase): def test_open(self): uri = self.path("test_open") array = np.random.rand(10) schema = tiledb.schema_like(array) tiledb.Array.create(uri, schema) with tiledb.open(uri, "w") as A: A[:] = array * 10 A[:] = array last_fragment_ts = list(A.last_write_info.items())[0][1][0] ctx = tiledb.Ctx() with tiledb.DenseArray(uri, ctx=ctx) as A: self.assertEqual(A._ctx_(), ctx) # test `open` with timestamp with tiledb.open(uri, timestamp=last_fragment_ts) as A: assert_array_equal(A[:], array) with tiledb.open(uri, ctx=ctx) as A: self.assertEqual(A._ctx_(), ctx) config = tiledb.Config() with tiledb.open(uri, config=config) as A: self.assertEqual(A._ctx_().config(), config) with self.assertRaises(KeyError): # This path must test `tiledb.open` specifically # https://github.com/TileDB-Inc/TileDB-Py/issues/277 tiledb.open(uri, "r", attr="the-missing-attr") def test_ctx_thread_cleanup(self): # This test checks that contexts are destroyed correctly. # It creates new contexts repeatedly, in-process, and # checks that the total number of threads stays stable. config = {"sm.num_reader_threads": 128} ll = list() uri = self.path("test_ctx_thread_cleanup") with tiledb.from_numpy(uri, np.random.rand(100)) as A: pass thisproc = psutil.Process(os.getpid()) for n in range(0, 10): if n > 0: retry = 0 while retry < 3: try: # checking exact thread count is unreliable, so # make sure we are holding < 2x per run. self.assertTrue(len(thisproc.threads()) < 2 * start_threads) break except AssertionError as exc: raise exc except RuntimeError as rterr: retry += 1 if retry > 2: raise rterr warnings.warn( "Thread cleanup test RuntimeError: {} \n on iteration: {}".format( str(rterr), n ) ) with tiledb.DenseArray(uri, ctx=tiledb.Ctx(config)) as A: res = A[:] if n == 0: start_threads = len(thisproc.threads()) # Wrapper to execute specific code in subprocess so that we can ensure the thread count # init is correct. Necessary because multiprocess.get_context is only available in Python 3.4+, # and the multiprocessing method may be set to fork by other tests (e.g. dask). def init_test_wrapper(cfg=None): python_exe = sys.executable cmd = "from test_libtiledb import *; init_test_helper({})".format(cfg) test_path = os.path.dirname(os.path.abspath(__file__)) sp_output = subprocess.check_output([python_exe, "-c", cmd], cwd=test_path) return int(sp_output.decode("UTF-8").strip()) def init_test_helper(cfg=None): tiledb.libtiledb.default_ctx(cfg) concurrency_level = tiledb.default_ctx().config()["sm.io_concurrency_level"] print(int(concurrency_level)) class ContextTest(unittest.TestCase): def test_default_ctx(self): ctx = tiledb.default_ctx() self.assertIsInstance(ctx, tiledb.Ctx) assert isinstance(ctx.config(), tiledb.libtiledb.Config) def test_scope_ctx(self): key = "sm.tile_cache_size" ctx0 = tiledb.default_ctx() new_config_dict = {key: 42} new_config = tiledb.Config({key: 78}) new_ctx = tiledb.Ctx({key: 61}) assert tiledb.default_ctx() is ctx0 assert tiledb.default_ctx().config()[key] == "10000000" with tiledb.scope_ctx(new_config_dict) as ctx1: assert tiledb.default_ctx() is ctx1 assert tiledb.default_ctx().config()[key] == "42" with tiledb.scope_ctx(new_config) as ctx2: assert tiledb.default_ctx() is ctx2 assert tiledb.default_ctx().config()[key] == "78" with tiledb.scope_ctx(new_ctx) as ctx3: assert tiledb.default_ctx() is ctx3 is new_ctx assert tiledb.default_ctx().config()[key] == "61" assert tiledb.default_ctx() is ctx2 assert tiledb.default_ctx().config()[key] == "78" assert tiledb.default_ctx() is ctx1 assert tiledb.default_ctx().config()[key] == "42" assert tiledb.default_ctx() is ctx0 assert tiledb.default_ctx().config()[key] == "10000000" @pytest.mark.skipif( "pytest.tiledb_vfs == 's3'", reason="Test not yet supported with S3" ) def test_init_config(self): self.assertEqual( int(tiledb.default_ctx().config()["sm.io_concurrency_level"]), init_test_wrapper(), ) self.assertEqual(3, init_test_wrapper({"sm.io_concurrency_level": 3})) class GetStatsTest(DiskTestCase): def test_ctx(self): tiledb.libtiledb.stats_enable() ctx = tiledb.default_ctx() uri = self.path("test_ctx") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w", ctx=ctx) as T: T[:] = np.random.randint(10, size=3) stats = ctx.get_stats(print_out=False) assert "Context.StorageManager.write_store" in stats def test_query(self): tiledb.libtiledb.stats_enable() uri = self.path("test_ctx") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as T: T[:] = np.random.randint(10, size=3) with tiledb.open(uri, mode="r") as T: q = T.query() assert "" == q.get_stats() q[:] stats = q.get_stats(print_out=False) assert "Context.StorageManager.Query" in stats class ReprTest(DiskTestCase): def test_attr_repr(self): attr = tiledb.Attr(name="itsanattr", dtype=np.float64) self.assertTrue( re.match( r"Attr\(name=[u]?'itsanattr', dtype='float64', var=False, nullable=False\)", repr(attr), ) ) g = dict() exec("from tiledb import Attr; from numpy import float64", g) self.assertEqual(eval(repr(attr), g), attr) def test_dim_repr(self): dtype_set = [bytes, np.bytes_] opts = { None: None, "var": True, "domain": (None, None), "filters": [tiledb.GzipFilter()], } dim_test_imports = textwrap.dedent( """ from tiledb import Dim, FilterList, GzipFilter from numpy import float64 """ ) for dtype in dtype_set: opt_choices = [ itertools.combinations(opts.keys(), r=n) for n in range(1, len(opts) + 1) ] for opt_set in itertools.chain(*opt_choices): opt_kwarg = {k: opts[k] for k in opt_set if k} g = dict() exec(dim_test_imports, g) dim = tiledb.Dim(name="d1", dtype=dtype, **opt_kwarg) self.assertEqual(eval(repr(dim), g), dim) def test_arrayschema_repr(self, sparse_cell_order): filters = tiledb.FilterList([tiledb.ZstdFilter(-1)]) for sparse in [False, True]: cell_order = sparse_cell_order if sparse else None domain = tiledb.Domain( tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) ) a1 = tiledb.Attr("val", dtype="f8", filters=filters) orig_schema = tiledb.ArraySchema( domain=domain, attrs=(a1,), sparse=sparse, cell_order=cell_order ) schema_repr = repr(orig_schema) g = dict() setup = "from tiledb import *\n" "import numpy as np\n" exec(setup, g) new_schema = None try: new_schema = eval(schema_repr, g) except Exception as exc: warn_str = ( """Exception during ReprTest schema eval""" + """, schema string was:\n""" + """'''""" + """\n{}\n'''""".format(schema_repr) ) warnings.warn(warn_str) raise self.assertEqual(new_schema, orig_schema) def test_arrayschema_repr_hilbert(self): domain = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2)) a = tiledb.Attr("a", dtype="f8") schema = tiledb.ArraySchema( domain=domain, attrs=(a,), cell_order="hilbert", sparse=True ) assert schema.cell_order == "hilbert" assert schema.tile_order == None class NullableIOTest(DiskTestCase): def test_nullable_write(self): uri = self.path("nullable_write_test") schema = tiledb.ArraySchema( domain=tiledb.Domain( *[tiledb.Dim(name="__dim_0", domain=(0, 3), tile=4, dtype="uint64")] ), attrs=[tiledb.Attr(name="", dtype="int64", var=False, nullable=True)], ) tiledb.Array.create(uri, schema) with tiledb.open(uri, "w") as A: A._setitem_impl( slice(0, 4), np.ones(4), {"": np.array([0, 1, 0, 1], dtype=np.uint8)} ) class IncompleteTest(DiskTestCase): @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) def test_incomplete_dense_varlen(self, non_overlapping_ranges): ncells = 10 path = self.path("incomplete_dense_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] data = np.array(str_data, dtype=np.unicode_) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) att = tiledb.Attr(dtype=np.unicode_, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.DenseArray.create(path, schema) with tiledb.DenseArray(path, mode="w") as T: T[:] = data with tiledb.DenseArray(path, mode="r") as T: assert_array_equal(data, T[:]) # set the memory to the max length of a cell # these settings force ~100 retries # TODO would be good to check repeat count here; not yet exposed # Also would be useful to have max cell config in libtiledb. init_buffer_bytes = 1024**2 config = tiledb.Config( { "sm.memory_budget": ncells, "sm.memory_budget_var": ncells, "py.init_buffer_bytes": init_buffer_bytes, "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, } ) self.assertEqual(config["py.init_buffer_bytes"], str(init_buffer_bytes)) with tiledb.DenseArray(path, mode="r", ctx=tiledb.Ctx(config)) as T2: df = T2.query(attrs=[""]).df[:] assert_array_equal(df[""], data) @pytest.mark.parametrize("allows_duplicates", [True, False]) @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_ranges): ncells = 100 path = self.path("incomplete_sparse_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] data = np.array(str_data, dtype=np.unicode_) coords = np.arange(ncells) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data))) att = tiledb.Attr(dtype=np.unicode_, var=True) schema = tiledb.ArraySchema( dom, (att,), sparse=True, allows_duplicates=allows_duplicates ) tiledb.SparseArray.create(path, schema) with tiledb.SparseArray(path, mode="w") as T: T[coords] = data with tiledb.SparseArray(path, mode="r") as T: assert_array_equal(data, T[:][""]) # set the memory to the max length of a cell # these settings force ~100 retries # TODO would be good to check repeat count here; not yet exposed # Also would be useful to have max cell config in libtiledb. init_buffer_bytes = 1024**2 config = tiledb.Config( { "sm.memory_budget": ncells, "sm.memory_budget_var": ncells, "py.init_buffer_bytes": init_buffer_bytes, } ) self.assertEqual(config["py.init_buffer_bytes"], str(init_buffer_bytes)) with tiledb.SparseArray(path, mode="r", ctx=tiledb.Ctx(config)) as T2: assert_array_equal(data, T2[:][""]) assert_array_equal(data, T2.multi_index[0:ncells][""]) # ensure that empty results are handled correctly assert_array_equal( T2.multi_index[101:105][""], np.array([], dtype=np.dtype(" 0 assert est_results[""].offsets_bytes > 0 assert est_results[""].data_bytes > 0 if return_arrow: assert isinstance(result, pa.Table) df = result.to_pandas() else: if indexer == "df": assert isinstance(result, pd.DataFrame) df = result else: assert isinstance(result, OrderedDict) df = pd.DataFrame(result) to_slice = slice(idx, idx + len(df)) chunk = full_data[to_slice] assert np.all(chunk == df[""].values) assert np.all(df["__dim_0"] == np.arange(idx, idx + len(df))) # update the current read count idx += len(df) assert idx == len(full_data) @pytest.mark.parametrize("cell_order", ["col-major", "row-major", "hilbert"]) @pytest.mark.parametrize("tile_order", ["col-major", "row-major"]) @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) def test_incomplete_global_order( self, cell_order, tile_order, non_overlapping_ranges ): uri = self.path("test_incomplete_global_order") dom = tiledb.Domain(tiledb.Dim(domain=(0, 30), tile=10, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, allows_duplicates=True, cell_order=cell_order, tile_order=tile_order, ) tiledb.Array.create(uri, schema) expected_data = np.random.randint(0, 10, 30) with tiledb.open(uri, mode="w") as T: T[np.arange(30)] = expected_data init_buffer_bytes = 200 cfg = tiledb.Config( { "py.init_buffer_bytes": init_buffer_bytes, "py.exact_init_buffer_bytes": "true", "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, } ) with tiledb.open(uri, mode="r", ctx=tiledb.Ctx(cfg)) as T: actual_data = T.query(order="G")[:][""] assert_array_equal(actual_data, expected_data) @pytest.mark.parametrize("exact_init_buffer_bytes", ["true", "false"]) @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) def test_offset_can_fit_data_var_size_cannot( self, exact_init_buffer_bytes, non_overlapping_ranges ): """ One condition that would be nice to get more coverage on is when the offset buffer can fit X cells, but the var size data of those cells cannot fit the buffer. In this case, the reader does adjust the results back. @Luc Rancourt so would we test this by having really large var-size content in each cell? Isaiah 4 days ago eg something like: we set buffers that can hold 100kb, but each var-len cell has 20kb, so we can read at most 5 cells into the data buffer, but theoretically the offsets buffer could hold many more? """ tiledb.stats_enable() uri = self.path("test_incomplete_global_order") dom = tiledb.Domain(tiledb.Dim(domain=(0, 4), tile=1, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64, var=True) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, allows_duplicates=True, ) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as T: T[np.arange(5)] = np.array( [ np.random.randint(0, 10, 10000, dtype=np.int64), np.random.randint(0, 10, 10000, dtype=np.int64), np.random.randint(0, 10, 10000, dtype=np.int64), np.random.randint(0, 10, 10000, dtype=np.int64), np.random.randint(0, 10, 101, dtype=np.int64), ], dtype="O", ) init_buffer_bytes = 160000 cfg = tiledb.Config( { "py.init_buffer_bytes": init_buffer_bytes, "py.exact_init_buffer_bytes": exact_init_buffer_bytes, "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, } ) with tiledb.open(uri, mode="r", ctx=tiledb.Ctx(cfg)) as T: qry = T.query() actual_data = qry[:][""] # assert_array_equal(actual_data, expected_data) # print(tiledb.main.python_internal_stats()) tiledb.stats_disable() class TestTest(DiskTestCase): def test_path(self, pytestconfig): path = self.path("foo") if pytestconfig.getoption("vfs") == "s3": assert path.startswith("s3://") @pytest.mark.skipif( sys.platform == "win32", reason="no_output fixture disabled on Windows" ) @pytest.mark.xfail( True, reason="This test prints, and should fail because of no_output fixture!" ) def test_no_output(self): print("this test should fail") # if __name__ == '__main__': # # run a single example for in-process debugging # # better to use `pytest --gdb` if available # t = DenseArrayTest() # t.setUp() # t.test_array_1d() TileDB-Py-0.12.2/tiledb/tests/test_metadata.cc000066400000000000000000000017031417663620700210350ustar00rootroot00000000000000 #include #include #include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include "../util.h" #include // C++ #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2 #if !defined(NDEBUG) //#include "debug.cc" #endif namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; class PyASCIIMetadataTest { public: static void write_ascii(py::str uri) { Context ctx; Array array(ctx, uri, TILEDB_WRITE); std::string st = "xyz"; array.put_metadata("abc", TILEDB_STRING_ASCII, st.length(), st.c_str()); array.close(); } }; void init_test_metadata(py::module &m) { py::class_(m, "test_metadata") .def_static("write_ascii", &PyASCIIMetadataTest::write_ascii); } }; // namespace tiledbpy #endif TileDB-Py-0.12.2/tiledb/tests/test_metadata.py000066400000000000000000000311141417663620700210770ustar00rootroot00000000000000import os import time import warnings import tiledb import numpy as np import pytest from hypothesis import given, settings, strategies as st from hypothesis.extra import numpy as st_np from tiledb.tests.common import DiskTestCase, rand_utf8 from tiledb.main import test_metadata MIN_INT = np.iinfo(np.int64).min MAX_INT = np.iinfo(np.int64).max st_int = st.integers(min_value=MIN_INT, max_value=MAX_INT) st_float = st.floats(allow_nan=False) st_metadata = st.fixed_dictionaries( { "int": st_int, "double": st_float, "bytes": st.binary(), "str": st.text(), "list_int": st.lists(st_int), "tuple_int": st.lists(st_int).map(tuple), "list_float": st.lists(st_float), "tuple_float": st.lists(st_float).map(tuple), } ) st_ndarray = st_np.arrays( dtype=st.one_of( st_np.integer_dtypes(endianness="<"), st_np.unsigned_integer_dtypes(endianness="<"), st_np.floating_dtypes(endianness="<", sizes=(32, 64)), st_np.byte_string_dtypes(max_len=1), st_np.unicode_string_dtypes(endianness="<", max_len=1), st_np.datetime64_dtypes(endianness="<"), ), shape=st_np.array_shapes(min_dims=0, max_dims=3, min_side=0, max_side=10), ) class MetadataTest(DiskTestCase): def assert_equal_md_values(self, written_value, read_value): if isinstance(written_value, np.ndarray): self.assertIsInstance(read_value, np.ndarray) self.assertEqual(read_value.dtype, written_value.dtype) np.testing.assert_array_equal(read_value, written_value) elif not isinstance(written_value, (list, tuple)): self.assertEqual(read_value, written_value) # we don't round-trip perfectly sequences elif len(written_value) == 1: # sequences of length 1 are read as a single scalar element self.assertEqual(read_value, written_value[0]) else: # sequences of length != 1 are read as tuples self.assertEqual(read_value, tuple(written_value)) def assert_metadata_roundtrip(self, tdb_meta, dict_meta): for k, v in dict_meta.items(): # test __contains__ self.assertTrue(k in tdb_meta) # test __getitem__ self.assert_equal_md_values(v, tdb_meta[k]) # test get self.assert_equal_md_values(v, tdb_meta.get(k)) # test __contains__, __getitem__, get for non-key non_key = str(object()) self.assertFalse(non_key in tdb_meta) with self.assertRaises(KeyError): tdb_meta[non_key] self.assertIsNone(tdb_meta.get(non_key)) self.assertEqual(tdb_meta.get(non_key, 42), 42) # test __len__ self.assertEqual(len(tdb_meta), len(dict_meta)) # test __iter__() is consistent with keys() self.assertEqual(list(tdb_meta), tdb_meta.keys()) # test keys() self.assertSetEqual(set(tdb_meta.keys()), set(dict_meta.keys())) # test values() and items() read_values = tdb_meta.values() read_items = tdb_meta.items() self.assertEqual(len(read_values), len(read_items)) for (item_key, item_value), value in zip(read_items, read_values): self.assertTrue(item_key in dict_meta) self.assert_equal_md_values(dict_meta[item_key], item_value) self.assert_equal_md_values(dict_meta[item_key], value) def assert_not_implemented_methods(self, tdb_meta): with self.assertRaises(NotImplementedError): tdb_meta.setdefault("nokey", "hello!") with self.assertRaises(NotImplementedError): tdb_meta.pop("nokey", "hello!") with self.assertRaises(NotImplementedError): tdb_meta.popitem() with self.assertRaises(NotImplementedError): tdb_meta.clear() def test_errors(self): path = self.path("test_md_errors") with tiledb.from_numpy(path, np.ones((5,), np.float64)): pass # can't read from a closed array A = tiledb.open(path) A.close() with self.assertRaises(tiledb.TileDBError): A.meta["x"] with tiledb.Array(path) as A: # can't write to a mode='r' array with self.assertRaises(tiledb.TileDBError): A.meta["invalid_write"] = 1 # missing key raises KeyError with self.assertRaises(KeyError): A.meta["xyz123nokey"] self.assert_not_implemented_methods(A.meta) # test invalid input with tiledb.Array(path, "w") as A: # keys must be strings with self.assertRaises(TypeError): A.meta[123] = 1 # can't write an int > typemax(Int64) with self.assertRaises(OverflowError): A.meta["bigint"] = MAX_INT + 1 # can't write mixed-type list with self.assertRaises(TypeError): A.meta["mixed_list"] = [1, 2.1] # can't write mixed-type tuple with self.assertRaises(TypeError): A.meta["mixed_list"] = (0, 3.1) # can't write objects with self.assertRaises(TypeError): A.meta["object"] = object() self.assert_not_implemented_methods(A.meta) @given(st_metadata) @settings(deadline=None) def test_basic(self, test_vals): path = self.path() with tiledb.from_numpy(path, np.ones((5,), np.float64)): pass with tiledb.Array(path, mode="w") as A: A.meta.update(test_vals) with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test a 1 MB blob blob = np.random.rand(int((1024**2) / 8)).tobytes() with tiledb.Array(path, "w") as A: test_vals["bigblob"] = blob A.meta["bigblob"] = blob with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test del key with tiledb.Array(path, "w") as A: del test_vals["bigblob"] del A.meta["bigblob"] with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test update with tiledb.Array(path, mode="w") as A: test_vals.update(foo="bar", double=3.14) A.meta.update(foo="bar", double=3.14) with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) @given(st_metadata, st_ndarray) @settings(deadline=None) def test_numpy(self, test_vals, ndarray): test_vals["ndarray"] = ndarray path = self.path() with tiledb.from_numpy(path, np.ones((5,), np.float64)): pass with tiledb.Array(path, mode="w") as A: A.meta.update(test_vals) with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test resetting a key with a ndarray value to a non-ndarray value with tiledb.Array(path, "w") as A: A.meta["ndarray"] = 42 test_vals["ndarray"] = 42 with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test resetting a key with a non-ndarray value to a ndarray value with tiledb.Array(path, "w") as A: A.meta["bytes"] = ndarray test_vals["bytes"] = ndarray with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test del ndarray key with tiledb.Array(path, "w") as A: del A.meta["ndarray"] del test_vals["ndarray"] with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) # test update with tiledb.Array(path, mode="w") as A: test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) A.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) @pytest.mark.filterwarnings("ignore::UserWarning") @tiledb.scope_ctx( {"sm.vacuum.mode": "array_meta", "sm.consolidation.mode": "array_meta"} ) def test_consecutive(self): vfs = tiledb.VFS() path = self.path("test_md_consecutive") write_count = 100 with tiledb.from_numpy(path, np.ones((5,), np.float64)): pass randints = np.random.randint(0, MAX_INT - 1, size=write_count, dtype=np.int64) randutf8s = [rand_utf8(i) for i in np.random.randint(1, 30, size=write_count)] # write 100 times, then consolidate for i in range(write_count): with tiledb.Array(path, mode="w") as A: A.meta["randint"] = int(randints[i]) A.meta["randutf8"] = randutf8s[i] time.sleep(0.001) self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 100) with tiledb.Array(path) as A: self.assertEqual(A.meta["randint"], randints[-1]) self.assertEqual(A.meta["randutf8"], randutf8s[-1]) with tiledb.Array(path, mode="w") as aw: aw.meta.consolidate() try: self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 102) except AssertionError: # this test is broken under libtiledb 2.3, see ch 7449 if tiledb.libtiledb.version() >= (2, 3): warnings.warn( "Suppressed assertion error with libtiledb 2.3! see ch 7449" ) else: raise with tiledb.Array(path) as A: self.assertEqual(A.meta["randint"], randints[-1]) self.assertEqual(A.meta["randutf8"], randutf8s[-1]) # use randutf8s as keys, then consolidate for _ in range(2): for i in range(write_count): with tiledb.Array(path, mode="w") as A: A.meta[randutf8s[i] + "{}".format(randints[i])] = int(randints[i]) A.meta[randutf8s[i]] = randutf8s[i] time.sleep(0.001) # test data with tiledb.Array(path) as A: for i in range(write_count): key_int = randutf8s[i] + "{}".format(randints[i]) self.assertEqual(A.meta[key_int], randints[i]) self.assertEqual(A.meta[randutf8s[i]], randutf8s[i]) # test expected number of fragments before consolidating try: self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 302) except AssertionError: # this test is broken under libtiledb 2.3, see ch 7449 if tiledb.libtiledb.version() >= (2, 3): warnings.warn( "Suppressed assertion error with libtiledb 2.3! see ch 7449" ) else: raise with tiledb.Array(path, mode="w") as A: A.meta.consolidate() # test expected number of fragments before vacuuming try: self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 304) except AssertionError: # this test is broken under libtiledb 2.3, see ch 7449 if tiledb.libtiledb.version() >= (2, 3): warnings.warn( "Suppressed assertion error with libtiledb 2.3! see ch 7449" ) else: raise tiledb.vacuum(path) # should only have one fragment+'.ok' after vacuuming try: self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 1) except AssertionError: # this test is broken under libtiledb 2.3, see ch 7449 if tiledb.libtiledb.version() >= (2, 3): warnings.warn( "Suppressed assertion error with libtiledb 2.3! see ch 7449" ) else: raise # test data again after consolidation with tiledb.Array(path) as A: for i in range(write_count): key_int = randutf8s[i] + "{}".format(randints[i]) self.assertEqual(A.meta[key_int], randints[i]) self.assertEqual(A.meta[randutf8s[i]], randutf8s[i]) def test_ascii_metadata(self): uri = self.path("test_ascii_metadata") dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=1, dtype=np.int64)) att = tiledb.Attr(dtype=np.int64) schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) tiledb.Array.create(uri, schema) test_metadata.write_ascii(uri) with tiledb.open(uri) as A: assert A.meta["abc"] == b"xyz" TileDB-Py-0.12.2/tiledb/tests/test_multi_index-hp.py000066400000000000000000000116601417663620700222510ustar00rootroot00000000000000# # Property-based tests for Array.multi_index using Hypothesis # import tiledb from tiledb import SparseArray import numpy as np from numpy.testing import assert_array_equal import warnings import pytest from tiledb.tests.common import checked_path from tiledb.tests.strategies import bounded_ntuple, ranged_slices from hypothesis import given, assume from hypothesis import strategies as st def is_boundserror(exc: Exception): assert str(exc) != "" vals = ["out of domain bounds", "Cannot add range to dimension"] return any(x in str(exc) for x in vals) def _direct_query_ranges(array: SparseArray, ranges): with tiledb.scope_ctx() as ctx: q = tiledb.main.PyQuery(ctx, array, ("a",), (), 0, False) q.set_ranges(ranges) q.submit() return {k: v[0].view(array.attr(0).dtype) for k, v in q.results().items()} # Compound strategies to build valid inputs for multi_index subindex_obj = st.one_of(st.integers(), ranged_slices()) index_obj = st.one_of(subindex_obj, st.tuples(st.lists(subindex_obj))) class TestMultiIndexPropertySparse: dmin, dmax = -100, 100 @classmethod @pytest.fixture(scope="class") def sparse_array_1d(cls, checked_path): def write_sparse_contig(uri): data = np.arange(cls.dmin, cls.dmax, dtype=np.int64) with tiledb.open(uri, "w") as A: A[data] = data def create_array(uri): schema = tiledb.ArraySchema( tiledb.Domain( [tiledb.Dim(dtype=np.int64, domain=(cls.dmin, cls.dmax))] ), attrs=[ tiledb.Attr(name="a", dtype="float64", var=False, nullable=False) ], cell_order="row-major", tile_order="row-major", capacity=10000, sparse=True, ) tiledb.Array.create(uri, schema) uri = checked_path.path() create_array(uri) write_sparse_contig(uri) return uri @given(st.lists(bounded_ntuple(length=2, min_value=-100, max_value=100))) def test_multi_index_two_way_query(self, sparse_array_1d, ranges): """This test checks the result of "direct" range queries using PyQuery against the result of `multi_index` on the same ranges.""" uri = sparse_array_1d assert isinstance(uri, str) assume(v[0] <= v[1] for v in ranges) try: with tiledb.open(uri) as A: r1 = A.multi_index[ranges]["a"] r2 = _direct_query_ranges(A, [ranges])["a"] assert_array_equal(r1, r2) except tiledb.TileDBError as exc: if is_boundserror(exc): # out of bounds, this is ok so we tell hypothesis to ignore # TODO these should all be IndexError assume(False) raise @given(index_obj) def test_multi_index_inputs(self, sparse_array_1d, ind): # TODO # currently we don't have a comparison target/mockup to check # as there is no direct numpy equivalent for this indexing mode # but we could still assert more details about the result # - coordinates are inbounds # - values are within known attribute range from write # another option for indirect testing # - densify slices and ranges and compare to numpy # numpy vectorized indexing result uri = sparse_array_1d try: with tiledb.open(uri) as A: r1 = A.multi_index[ind] r1_array = r1["a"] r1_coords = r1["__dim_0"] assert isinstance(r1_array, np.ndarray) assert isinstance(r1_coords, np.ndarray) # some results may be empty if len(r1_array): # assertions based on input data assert r1_array.min() >= self.dmin assert r1_array.max() <= self.dmax assert r1_coords.min() >= self.dmin assert r1_coords.max() <= self.dmax except tiledb.TileDBError as exc: # bounds errors are not failures if is_boundserror(exc): assume(False) elif "Failed to cast dim range" in str(exc): # TODO this should be IndexError assume(False) else: raise except ValueError as exc: if "Stepped slice ranges are not supported" in str(exc): # stepped slice errors are ok assume(False) elif "Cannot convert to scalar" in str(exc): assume(False) else: raise except TypeError as exc: if "Unsupported selection" in str(exc): # mostly ok but warn for cross-check warnings.warn(str(exc)) assume(False) else: raise TileDB-Py-0.12.2/tiledb/tests/test_multi_index.py000066400000000000000000000726551417663620700216570ustar00rootroot00000000000000""" TODO - # implement mock of expected behavior in pure numpy w/ test function - implement read function and tests (single [x], multi-attribute [ ]) - implement custom indexer - implement oindex... """ import random import numpy as np from numpy.testing import assert_array_equal import pytest import tiledb from tiledb.multirange_indexing import getitem_ranges, mr_dense_result_shape from tiledb.tests.common import ( DiskTestCase, assert_tail_equal, intspace, SUPPORTED_DATETIME64_DTYPES, rand_datetime64_array, assert_all_arrays_equal, ) import hypothesis.extra.numpy as npst def make_1d_dense(path, attr_name="", attr_dtype=np.int64): a_orig = np.arange(36) dom = tiledb.Domain(tiledb.Dim(domain=(0, 35), tile=35, dtype=np.uint64)) att = tiledb.Attr(name=attr_name, dtype=attr_dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) tiledb.DenseArray.create(path, schema) with tiledb.DenseArray(path, "w") as A: A[:] = a_orig def make_2d_dense(path, attr_name="", attr_dtype=np.int64): a_orig = np.arange(1, 37).reshape(9, 4) dom = tiledb.Domain( tiledb.Dim(domain=(0, 8), tile=9, dtype=np.uint64), tiledb.Dim(domain=(0, 3), tile=4, dtype=np.uint64), ) att = tiledb.Attr(name=attr_name, dtype=attr_dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) tiledb.DenseArray.create(path, schema) with tiledb.DenseArray(path, "w") as A: A[:] = a_orig class TestMultiRangeAuxiliary(DiskTestCase): def test_shape_funcs(self): range1el = (((1, 1),),) self.assertEqual(mr_dense_result_shape(range1el), (1,)) range1d = tuple([((1, 2), (4, 4))]) self.assertEqual(mr_dense_result_shape(range1d), (3,)) range2d1 = (((3, 6), (7, 7), (10, 12)), ((5, 7),)) self.assertEqual(mr_dense_result_shape(range2d1), (8, 3)) # range2d2 = ([(3, 6), (7, 7), (10, 12)], [(5, 7), (10, 10)]) # def test_3d(self): # range3d1 = (((2, 4),), ((3, 6),), ((1, 4), (5, 9))) # # # self.assertEqual() def test_sel_to_ranges(self): class Obj(object): pass class IBI(object): def __getitem__(self, idx): return idx def make_arr(ndim): arr = Obj() arr.schema = Obj() arr.schema.domain = Obj() arr.schema.domain.ndim = ndim arr.schema.sparse = False arr.array = Obj() # place-holder for attribute that is not used in these tests arr.nonempty_domain = lambda: [()] * ndim return arr ibi = IBI() # ndim = 1 arr = make_arr(1) self.assertEqual(getitem_ranges(arr, ibi[[1]]), (((1, 1),),)) self.assertEqual(getitem_ranges(arr, ibi[[1, 2]]), (((1, 1), (2, 2)),)) self.assertEqual(getitem_ranges(arr, ibi[slice(1, 2)]), (((1, 2),),)) self.assertEqual(getitem_ranges(arr, ibi[1:2]), (((1, 2),),)) # ndim = 2 arr2 = make_arr(2) self.assertEqual(getitem_ranges(arr2, ibi[[1]]), (((1, 1),), ())) self.assertEqual(getitem_ranges(arr2, ibi[slice(1, 33)]), (((1, 33),), ())) self.assertEqual( getitem_ranges(arr2, ibi[[1, 2], [[1], slice(1, 3)]]), (((1, 1), (2, 2)), ((1, 1), (1, 3))), ) # ndim = 3 arr3 = make_arr(3) self.assertEqual( getitem_ranges(arr3, ibi[1, 2, 3]), (((1, 1),), ((2, 2),), ((3, 3),)) ) self.assertEqual(getitem_ranges(arr3, ibi[1, 2]), ((((1, 1),), ((2, 2),), ()))) self.assertEqual( getitem_ranges(arr3, ibi[1:2, 3:4]), (((1, 2),), ((3, 4),), ()) ) self.assertEqual( getitem_ranges(arr3, ibi[1:2, 3:4, 5:6]), (((1, 2),), ((3, 4),), ((5, 6),)) ) self.assertEqual( getitem_ranges(arr3, ibi[[1], [2], [5, 6]]), (((1, 1),), ((2, 2),), ((5, 5), (6, 6))), ) self.assertEqual( getitem_ranges(arr3, ibi[1, [slice(3, 6), 8], slice(4, 6)]), (((1, 1),), ((3, 6), (8, 8)), ((4, 6),)), ) self.assertEqual(getitem_ranges(arr3, ibi[(1, 2)]), (((1, 1),), ((2, 2),), ())) self.assertEqual(getitem_ranges(arr3, ibi[[(1, 2)]]), (((1, 2),), (), ())) self.assertEqual( getitem_ranges(arr3, ibi[[(1, 2), 4], [slice(1, 4)]]), (((1, 2), (4, 4)), ((1, 4),), ()), ) class TestMultiRange(DiskTestCase): def test_multirange_behavior(self): uri = self.path("multirange_behavior_sparse") schema = tiledb.ArraySchema( domain=tiledb.Domain( *[ tiledb.Dim( name="idx", domain=(-1.0, 0.7999999999999996), tile=2.0, dtype="float64", ) ] ), attrs=[tiledb.Attr(name="data", dtype="float64", var=False)], cell_order="row-major", tile_order="row-major", capacity=10000, sparse=True, allows_duplicates=True, ) tiledb.SparseArray.create(uri, schema) data = np.random.rand(10) idx = np.arange(-1, 1, 0.2) with tiledb.open(uri, "w") as A: A[idx] = {"data": data} with tiledb.open(uri) as A: res = A.multi_index[:] # always return data self.assertTrue("data" in res) # return coordinates for sparse self.assertTrue("idx" in res) assert_array_equal(res["data"], data) assert_array_equal(res["idx"], idx) uri = self.path("multirange_behavior_dense") with tiledb.from_numpy(uri, data): pass with tiledb.open(uri) as B: res = B.multi_index[0:9] # TODO: this should accept [:] # always return data self.assertTrue("" in res) # don't return coordinates for dense self.assertTrue("idx" not in res) def test_multirange_empty(self): path1 = self.path("test_multirange_empty_1d") make_1d_dense(path1, attr_dtype=np.uint16) with tiledb.open(path1) as A: res = A.multi_index[tiledb.EmptyRange] assert res[""].dtype == np.uint16 assert res[""].shape == (0,) path2 = self.path("test_multirange_empty_2d") make_2d_dense(path2, attr_dtype=np.float32) with tiledb.open(path2) as A: res = A.multi_index[tiledb.EmptyRange] assert res[""].dtype == np.float32 assert res[""].shape == (0,) def test_multirange_1d_1dim_ranges(self): path = self.path("test_multirange_1d_1dim_ranges") attr_name = "a" make_1d_dense(path, attr_name=attr_name) with tiledb.DenseArray(path) as A: ranges = (((0, 0),),) expected = np.array([0], dtype=np.int64) res = tiledb.libtiledb.multi_index(A, (attr_name,), ranges) a = res[attr_name] assert_array_equal(a, expected) self.assertEqual(a.dtype, expected.dtype) self.assertEqual(len(res.keys()), 2) ranges2 = (((1, 1), (5, 8)),) expected2 = np.array([1, 5, 6, 7, 8], dtype=np.int64) a2 = tiledb.libtiledb.multi_index(A, (attr_name,), ranges2)[attr_name] assert_array_equal(a2, expected2) self.assertEqual(a2.dtype, expected2.dtype) def test_multirange_2d_1dim_ranges(self): path = self.path("test_multirange_1dim_ranges") attr_name = "a" make_2d_dense(path, attr_name=attr_name) expected = np.array( [ 1, 2, 3, 4, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, ], dtype=np.uint64, ) ranges = (((0, 0), (5, 8)),) with tiledb.DenseArray(path) as A: a = tiledb.libtiledb.multi_index(A, (attr_name,), ranges)[attr_name] assert_array_equal(a, expected) def test_multirange_2d_2dim_ranges(self): path = self.path("test_multirange_2dim_ranges") attr_name = "a" make_2d_dense(path, attr_name=attr_name) expected = np.arange(1, 21) ranges = (((0, 4),), ((0, 3),)) with tiledb.DenseArray(path) as A: a = tiledb.libtiledb.multi_index(A, (attr_name,), ranges)[attr_name] assert_array_equal(a, expected) # test slicing start=end on 1st dim at 0 (bug fix) assert_tail_equal( np.array([[1, 2, 3, 4]]), A.multi_index[:0][attr_name], A.multi_index[0:0][attr_name], ) # test slicing start=end on 2nd dim at 0 (bug fix) assert_tail_equal( np.arange(1, 34, 4).reshape((9, 1)), A.multi_index[:, :0][attr_name], A.multi_index[:, 0:0][attr_name], ) # test slicing start=end on 1st dim at 1 assert_array_equal(np.array([[5, 6, 7, 8]]), A.multi_index[1:1][attr_name]) # test slicing start=end on 2nd dim at 1 assert_array_equal( np.arange(2, 35, 4).reshape((9, 1)), A.multi_index[:, 1:1][attr_name] ) # test slicing start=end on 1st dim at max range assert_array_equal( np.array([[33, 34, 35, 36]]), A.multi_index[8:8][attr_name] ) # test slicing start=end on 2nd dim at max range assert_tail_equal( np.arange(4, 37, 4).reshape((9, 1)), A.multi_index[:, 3:3][attr_name] ) def test_multirange_1d_dense_int64(self): attr_name = "" path = self.path("multi_index_1d") dom = tiledb.Domain( tiledb.Dim(name="coords", domain=(-10, 10), tile=9, dtype=np.int64) ) att = tiledb.Attr(name=attr_name, dtype=np.float32) schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) tiledb.DenseArray.create(path, schema) orig_array = np.random.rand(schema.domain.dim(0).size).astype(np.float32) with tiledb.open(path, "w") as A: A[:] = orig_array with tiledb.open(path) as A: # stepped ranges are not supported with self.assertRaises(ValueError): A.query(coords=True).multi_index[1::2] assert_array_equal(orig_array[[0, -1]], A.multi_index[[-10, 10]][attr_name]) self.assertEqual(orig_array[0], A.multi_index[-10][attr_name]) self.assertEqual( -10, A.query(coords=True).multi_index[-10]["coords"].view("i8") ) assert_array_equal(orig_array[0:], A.multi_index[[(-10, 10)]][attr_name]) assert_array_equal( orig_array[0:], A.multi_index[[slice(-10, 10)]][attr_name] ) assert_array_equal( orig_array[0:10], A.multi_index[-10 : np.int64(-1)][attr_name] ) assert_array_equal(orig_array, A.multi_index[:][attr_name]) ned = A.nonempty_domain()[0] assert_array_equal( A.multi_index[ned[0] : ned[1]][attr_name], A.multi_index[:][attr_name] ) def test_multirange_1d_sparse_double(self): attr_name = "" path = self.path("mr_1d_sparse_double") dom = tiledb.Domain( tiledb.Dim(name="coords", domain=(0, 30), tile=10, dtype=np.float64) ) att = tiledb.Attr(name=attr_name, dtype=np.float64) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=(att,)) tiledb.SparseArray.create(path, schema) coords = np.linspace(0, 30, num=31) orig_array = np.random.rand(coords.size) with tiledb.open(path, "w") as A: A[coords] = orig_array with tiledb.open(path) as A: assert_array_equal(orig_array[[0]], A.multi_index[[0]][attr_name]) assert_array_equal(orig_array[-1], A.multi_index[30][attr_name]) assert_array_equal(orig_array[-1], A.multi_index[30.0][attr_name]) assert_array_equal( orig_array[coords.size - 3 : coords.size], A.multi_index[ (28.0, 30.0), ][attr_name], ) res = A.multi_index[slice(0, 5)] assert_array_equal(orig_array[0:6], res[attr_name]) assert_array_equal(coords[0:6], res["coords"].astype(np.float64)) # test slice range indexing ned = A.nonempty_domain() res = A.multi_index[: ned[0][1]] assert_array_equal(coords, res["coords"].astype(np.float64)) res = A.multi_index[ned[0][0] : coords[15]] assert_array_equal(coords[:16], res["coords"].astype(np.float64)) def test_multirange_2d_sparse_domain_utypes(self): attr_name = "foo" types = (np.uint8, np.uint16, np.uint32, np.uint64) for dtype in types: min = 0 max = int(np.iinfo(dtype).max) - 1 path = self.path("multi_index_2d_sparse_" + str(dtype.__name__)) dom = tiledb.Domain(tiledb.Dim(domain=(min, max), tile=1, dtype=dtype)) att = tiledb.Attr(name=attr_name, dtype=dtype) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=(att,)) tiledb.SparseArray.create(path, schema) coords = intspace(min, max, num=100, dtype=dtype) with tiledb.open(path, "w") as A: A[coords] = coords with tiledb.open(path) as A: res = A.multi_index[slice(coords[0], coords[-1])] assert_array_equal(res[attr_name], coords) assert_array_equal(res["__dim_0"].astype(dtype), coords) res = A.multi_index[coords[0]] assert_array_equal(res[attr_name], coords[0]) assert_array_equal(res["__dim_0"].astype(dtype), coords[0]) res = A.multi_index[coords[-1]] assert_array_equal(res[attr_name], coords[-1]) assert_array_equal(res["__dim_0"].astype(dtype), coords[-1]) midpoint = len(coords) // 2 start = midpoint - 20 stop = midpoint + 20 srange = slice(coords[start], coords[stop]) res = A.multi_index[srange] assert_array_equal(res[attr_name], coords[start : stop + 1]) assert_array_equal( res["__dim_0"].astype(dtype), coords[start : stop + 1] ) def test_multirange_2d_sparse_float(self): attr_name = "" path = self.path("mr_2d_sparse_float") dom = tiledb.Domain( tiledb.Dim(domain=(0, 10), tile=1, dtype=np.float32), tiledb.Dim(domain=(0, 10), tile=1, dtype=np.float32), ) att = tiledb.Attr(name=attr_name, dtype=np.float64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.SparseArray.create(path, schema) orig_array = np.random.rand(11, 11) d1 = np.linspace(0, 10, num=11, dtype=np.float32) d2 = np.linspace(0, 10, num=11, dtype=np.float32) coords_d1, coords_d2 = np.meshgrid(d1, d2, indexing="ij") with tiledb.open(path, "w") as A: A[coords_d1.flatten(), coords_d2.flatten()] = orig_array with tiledb.open(path) as A: res = A.multi_index[[0], :] assert_array_equal(orig_array[[0], :].squeeze(), res[attr_name]) assert_array_equal(coords_d1[0, :], res["__dim_0"]) # === res = A.multi_index[10, :] assert_array_equal(orig_array[[-1], :].squeeze(), res[attr_name]) assert_array_equal(coords_d2[[-1], :].squeeze(), res["__dim_1"]) # === res = A.multi_index[[slice(0, 2), [5]]] assert_array_equal( np.vstack([orig_array[0:3, :], orig_array[5, :]]).flatten(), res[attr_name], ) assert_array_equal( np.vstack((coords_d1[0:3], coords_d1[5])).flatten(), res["__dim_0"] ) # === res = A.multi_index[slice(0.0, 2.0), slice(2.0, 5.0)] assert_array_equal(orig_array[0:3, 2:6].flatten(), res[attr_name]) assert_array_equal(coords_d1[0:3, 2:6].flatten(), res["__dim_0"]) assert_array_equal(coords_d2[0:3, 2:6].flatten(), res["__dim_1"]) res = A.multi_index[ slice(np.float32(0.0), np.float32(2.0)), slice(np.float32(2.0), np.float32(5.0)), ] assert_array_equal(orig_array[0:3, 2:6].flatten(), res[attr_name]) assert_array_equal(coords_d1[0:3, 2:6].flatten(), res["__dim_0"]) assert_array_equal(coords_d2[0:3, 2:6].flatten(), res["__dim_1"]) def test_multirange_1d_sparse_query(self): path = self.path("mr_1d_sparse_query") dom = tiledb.Domain( tiledb.Dim(name="coords", domain=(-100, 100), tile=1, dtype=np.float32) ) attrs = [ tiledb.Attr(name="U", dtype=np.float64), tiledb.Attr(name="V", dtype=np.uint32), ] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.SparseArray.create(path, schema) U = np.random.rand(11) V = np.random.randint(0, np.iinfo(np.uint32).max, 11, dtype=np.uint32) coords = np.linspace(-10, 10, num=11, dtype=np.float32) data = {"U": U, "V": V} with tiledb.open(path, "w") as A: A[coords] = data with tiledb.open(path) as A: for k, d in data.items(): Q = A.query(attrs=k) res = Q.multi_index[[-10]] assert_array_equal(d[[0]], res[k]) assert_array_equal(coords[[0]], res["coords"].view("f4")) res = A.multi_index[10] assert_array_equal(d[[-1]].squeeze(), res[k]) assert_array_equal(coords[[-1]], res["coords"].view("f4")) res = A.multi_index[[slice(coords[0], coords[2]), [coords[-1]]]] assert_array_equal(np.hstack([d[0:3], d[-1]]), res[k]) # make sure full slice indexing works on query res = Q.multi_index[:] assert_array_equal(coords, res["coords"]) # TODO: this should be an error # res = A.multi_index[10, :] # assert_array_equal( # d[[-1]].squeeze(), # res[k] # ) with tiledb.open(path) as A: Q = A.query(coords=False, attrs=["U"]) res = Q.multi_index[:] self.assertTrue("U" in res) self.assertTrue("V" not in res) self.assertTrue("coords" not in res) assert_array_equal(res["U"], data["U"]) def test_multirange_1d_dense_vectorized(self): path = self.path("mr_1d_dense_vectorized") dom = tiledb.Domain(tiledb.Dim(domain=(0, 999), tile=1000, dtype=np.uint32)) attrs = tiledb.Attr(name="", dtype=np.float64) schema = tiledb.ArraySchema(domain=dom, attrs=(attrs,), sparse=False) tiledb.DenseArray.create(path, schema) data = np.random.rand(1000) with tiledb.DenseArray(path, "w") as A: A[0] = data[0] A[-1] = data[-1] A[:] = data for _ in range(0, 50): with tiledb.DenseArray(path) as A: idxs = random.sample(range(0, 999), k=100) res = A.multi_index[idxs] assert_array_equal(data[idxs], res[""]) def test_multirange_2d_dense_float(self): attr_name = "" path = self.path("multirange_2d_dense_float") dom = tiledb.Domain( tiledb.Dim(domain=(0, 10), tile=1, dtype=np.int64), tiledb.Dim(domain=(0, 10), tile=1, dtype=np.int64), ) att = tiledb.Attr(name=attr_name, dtype=np.float64) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) tiledb.DenseArray.create(path, schema) orig_array = np.random.rand(11, 11) with tiledb.open(path, "w") as A: A[:] = orig_array with tiledb.open(path) as A: assert_array_equal(orig_array[[0], :], A.multi_index[[0], :][attr_name]) assert_array_equal( orig_array[[-1, -1], :], A.multi_index[[10, 10], :][attr_name] ) assert_array_equal( orig_array[0:4, 7:10], A.multi_index[[(0, 3)], slice(7, 9)][attr_name] ) assert_array_equal(orig_array[:, :], A.multi_index[:, :][attr_name]) # TODO this should be an error to match NumPy 1.12 semantics # assert_array_equal( # orig_array[0:4,7:10], # A.multi_index[[(np.float64(0),np.float64(3.0))], slice(7,9)][attr_name] # ) @pytest.mark.parametrize("dtype", SUPPORTED_DATETIME64_DTYPES) def test_multirange_1d_sparse_datetime64(self, dtype): path = self.path("multirange_1d_sparse_datetime64") dates = rand_datetime64_array(10, dtype=dtype) dom = tiledb.Domain( tiledb.Dim(domain=(dates.min(), dates.max()), dtype=dtype, tile=1) ) attr_name = "" att = tiledb.Attr(name=attr_name, dtype=dtype) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.SparseArray.create(path, schema) with tiledb.SparseArray(path, mode="w") as T: T[dates] = dates with tiledb.open(path) as A: res = A.multi_index[:] # check full range assert_tail_equal(dates, res[""], res["__dim_0"]) # check range pairs for i in range(len(dates) - 1): start, stop = dates[i : i + 2] assert_tail_equal( dates[i : i + 2], A.multi_index[start:stop][""], A.multi_index[start:stop]["__dim_0"], ) def test_fix_473_sparse_index_bug(self): # test of fix for issue raised in # https://github.com/TileDB-Inc/TileDB-Py/pull/473#issuecomment-784675012 uri = self.path("test_fix_473_sparse_index_bug") dom = tiledb.Domain( tiledb.Dim(name="x", domain=(0, 2**64 - 2), tile=1, dtype=np.uint64) ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.uint64)] ) tiledb.SparseArray.create(uri, schema) slice_index = slice(0, 4, None) with tiledb.SparseArray(uri, mode="r") as A: data = A.multi_index[slice_index] assert_array_equal(data["a"], np.array([], dtype=np.uint64)) assert_array_equal(A.multi_index[:], []) with tiledb.open(uri, mode="w") as A: A[[10]] = {"a": [10]} with tiledb.open(uri) as A: assert_tail_equal( A.multi_index[slice_index]["a"], A.multi_index[:], A.multi_index[0:], A.multi_index[1:], A.multi_index[:10], A.multi_index[:11], np.array([], dtype=np.uint64), ) def test_fixed_multi_attr_df(self): uri = self.path("test_fixed_multi_attr_df") dom = tiledb.Domain( tiledb.Dim(name="dim", domain=(0, 0), tile=None, dtype=np.int32) ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[ tiledb.Attr( name="111", dtype=[("", np.int32), ("", np.int32), ("", np.int32)] ) ], ) tiledb.SparseArray.create(uri, schema) data_111 = np.array( [(1, 1, 1)], dtype=[("", np.int32), ("", np.int32), ("", np.int32)] ) with tiledb.SparseArray(uri, mode="w") as A: A[0] = data_111 with tiledb.SparseArray(uri, mode="r") as A: result = A.query(attrs=["111"])[0] assert_array_equal(result["111"], data_111) with self.assertRaises(tiledb.TileDBError): result = A.query(attrs=["111"]).df[0] result = A.query(attrs=["111"], use_arrow=False) assert_array_equal(result.df[0]["111"], data_111) def test_var_multi_attr_df(self): uri = self.path("test_var_multi_attr_df") dom = tiledb.Domain( tiledb.Dim(name="dim", domain=(0, 2), tile=None, dtype=np.int32) ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="1s", dtype=np.int32, var=True)], ) tiledb.SparseArray.create(uri, schema) data = np.array( [ np.array([1], dtype=np.int32), np.array([1, 1], dtype=np.int32), np.array([1, 1, 1], dtype=np.int32), ], dtype="O", ) with tiledb.SparseArray(uri, mode="w") as A: A[[0, 1, 2]] = data with tiledb.SparseArray(uri, mode="r") as A: result = A.query(attrs=["1s"]) assert_array_equal(result[0]["1s"][0], data[0]) assert_array_equal(result[1]["1s"][0], data[1]) assert_array_equal(result[2]["1s"][0], data[2]) with self.assertRaises(tiledb.TileDBError): result = A.query(attrs=["1s"]).df[0] result = A.query(attrs=["1s"], use_arrow=False) assert_array_equal(result.df[0]["1s"][0], data[0]) assert_array_equal(result.df[1]["1s"][0], data[1]) assert_array_equal(result.df[2]["1s"][0], data[2]) def test_multi_index_with_implicit_full_string_range(self): uri = self.path("test_multi_index_with_implicit_full_string_range") dom = tiledb.Domain( tiledb.Dim(name="dint", domain=(0, 4), tile=5, dtype=np.int32), tiledb.Dim(name="dstr", domain=(None, None), tile=None, dtype=np.bytes_), ) schema = tiledb.ArraySchema( domain=dom, sparse=True, attrs=[tiledb.Attr(name="", dtype=np.int32)] ) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w") as A: d1 = np.concatenate((np.arange(5), np.arange(5))) d2 = np.asarray( ["a", "b", "ab", "ab", "c", "c", "c", "c", "d", "e"], dtype=np.bytes_ ) A[d1, d2] = np.array(np.random.randint(10, size=10), dtype=np.int32) with tiledb.open(uri, mode="r") as A: assert_array_equal(A[:][""], A.multi_index[:][""]) assert_array_equal(A.multi_index[:][""], A.multi_index[:, :][""]) assert_array_equal(A[1:4][""], A.multi_index[1:3][""]) assert_array_equal(A.multi_index[1:3][""], A.multi_index[1:3, :][""]) assert_array_equal(A[0][""], A.multi_index[0][""]) assert_array_equal(A.multi_index[0][""], A.multi_index[0, :][""]) def test_multi_index_open_timestamp_with_empty_nonempty_domain(self): uri = self.path("test_multi_index_open_timestamp_with_empty_nonempty_domain") dom = tiledb.Domain(tiledb.Dim(domain=(1, 3))) attr = tiledb.Attr(name="", dtype=np.int32) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[attr]) tiledb.Array.create(uri, schema) with tiledb.open(uri, mode="w", timestamp=2) as A: d1 = np.array(np.random.randint(1, 11, size=3, dtype=np.int32)) A[np.arange(1, 4)] = d1 with tiledb.open(uri, mode="r", timestamp=1) as A: assert A.nonempty_domain() is None assert_array_equal(A.multi_index[:][""], A[:][""]) def test_multi_index_query_args(self): uri = self.path("test_multi_index_query_args") schema = tiledb.ArraySchema( domain=tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 9), dtype=np.uint8)), sparse=True, attrs=[ tiledb.Attr(name="a", dtype=np.uint8), tiledb.Attr(name="b", dtype=np.uint8), ], ) tiledb.Array.create(uri, schema) a = np.array(np.random.randint(10, size=10), dtype=np.int8) b = np.array(np.random.randint(10, size=10), dtype=np.int8) with tiledb.open(uri, mode="w") as A: A[np.arange(10)] = {"a": a, "b": b} with tiledb.open(uri, mode="r") as A: q = A.query(attr_cond=tiledb.QueryCondition("a >= 5"), attrs=["a"]) assert {"a", "dim"} == q.multi_index[:].keys() == q[:].keys() assert_array_equal(q.multi_index[:]["a"], q[:]["a"]) assert_array_equal(q.multi_index[:]["a"], q.df[:]["a"]) assert all(q[:]["a"] >= 5) def test_multi_index_timing(self): path = self.path("test_multi_index_timing") attr_name = "a" make_1d_dense(path, attr_name=attr_name) tiledb.stats_enable() with tiledb.open(path) as A: assert_array_equal(A.df[:][attr_name], np.arange(36)) internal_stats = tiledb.main.python_internal_stats() assert "py.getitem_time :" in internal_stats assert "py.getitem_time.buffer_conversion_time :" in internal_stats assert "py.getitem_time.pandas_index_update_time :" in internal_stats tiledb.stats_disable() TileDB-Py-0.12.2/tiledb/tests/test_pandas_dataframe.py000066400000000000000000001426231417663620700226010ustar00rootroot00000000000000import pytest pd = pytest.importorskip("pandas") tm = pd._testing import copy from datetime import date import glob import os from pathlib import Path import random import string import tempfile import uuid import numpy as np from numpy.testing import assert_array_equal import tiledb from tiledb.dataframe_ import ColumnInfo from tiledb.tests.common import ( DiskTestCase, checked_path, dtype_max, dtype_min, rand_ascii, rand_ascii_bytes, rand_datetime64_array, rand_utf8, ) def make_dataframe_basic1(col_size=10): # ensure no duplicates when using as string dim chars = list() for _ in range(col_size): next = rand_ascii_bytes(2) while next in chars: next = rand_ascii_bytes(2) chars.append(next) data_dict = { "time": rand_datetime64_array(col_size, include_extremes=False), "x": np.array([rand_ascii(4).encode("UTF-8") for _ in range(col_size)]), "chars": np.array(chars), "cccc": np.arange(0, col_size), "q": np.array([rand_utf8(np.random.randint(1, 100)) for _ in range(col_size)]), "t": np.array([rand_utf8(4) for _ in range(col_size)]), "r": np.array( [rand_ascii_bytes(np.random.randint(1, 100)) for _ in range(col_size)] ), "s": np.array([rand_ascii() for _ in range(col_size)]), "u": np.array([rand_ascii_bytes().decode() for _ in range(col_size)]), "v": np.array([rand_ascii_bytes() for _ in range(col_size)]), "vals_int64": np.random.randint( dtype_max(np.int64), size=col_size, dtype=np.int64 ), "vals_float64": np.random.rand(col_size), } # TODO: dump this dataframe to pickle/base64 so that it can be reconstructed if # there are weird failures on CI? df = pd.DataFrame.from_dict(data_dict) return df def make_dataframe_basic2(): # This code is from Pandas feather i/o tests "test_basic" function: # https://github.com/pandas-dev/pandas/blob/master/pandas/tests/io/test_feather.py # (available under BSD 3-clause license # https://github.com/pandas-dev/pandas/blob/master/LICENSE df = pd.DataFrame( { "string": list("abc"), "int": list(range(1, 4)), "uint": np.arange(3, 6).astype("u1"), "float": np.arange(4.0, 7.0, dtype="float64"), # TODO "float_with_null": [1.0, np.nan, 3], "bool": [True, False, True], # TODO "bool_with_null": [True, np.nan, False], # "cat": pd.Categorical(list("abc")), "dt": pd.date_range("20130101", periods=3), # "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"), # "dt_with_null": [ # pd.Timestamp("20130101"), # pd.NaT, # pd.Timestamp("20130103"), # ], "dtns": pd.date_range("20130101", periods=3, freq="ns"), } ) return df def make_dataframe_basic3(col_size=10, time_range=(None, None)): df_dict = { "time": rand_datetime64_array( col_size, start=time_range[0], stop=time_range[1], include_extremes=False ), "double_range": np.linspace(-1000, 1000, col_size), "int_vals": np.random.randint( dtype_max(np.int64), size=col_size, dtype=np.int64 ), } df = pd.DataFrame(df_dict) return df class TestColumnInfo: def assertColumnInfo(self, info, info_dtype, info_repr=None, info_nullable=False): assert isinstance(info.dtype, np.dtype) assert info.dtype == info_dtype assert info.repr is None or isinstance(info.repr, str) assert info.repr == info_repr assert isinstance(info.nullable, bool) assert info.nullable == info_nullable @pytest.mark.parametrize( "type_specs, info_dtype, info_repr, info_nullable", [ # bool types ([bool, "b1"], np.dtype("uint8"), "bool", False), ([pd.BooleanDtype()], np.dtype("uint8"), "boolean", True), # numeric types ([np.uint8, "u1"], np.dtype("uint8"), None, False), ([np.uint16, "u2"], np.dtype("uint16"), None, False), ([np.uint32, "u4"], np.dtype("uint32"), None, False), ([np.uint64, "u8"], np.dtype("uint64"), None, False), ([np.int8, "i1"], np.dtype("int8"), None, False), ([np.int16, "i2"], np.dtype("int16"), None, False), ([np.int32, "i4"], np.dtype("int32"), None, False), ([np.int64, "i8"], np.dtype("int64"), None, False), ([np.float32, "f4"], np.dtype("float32"), None, False), ([np.float64, "f8", float], np.dtype("float64"), None, False), # nullable int types ([pd.UInt8Dtype(), "UInt8"], np.dtype("uint8"), "UInt8", True), ([pd.UInt16Dtype(), "UInt16"], np.dtype("uint16"), "UInt16", True), ([pd.UInt32Dtype(), "UInt32"], np.dtype("uint32"), "UInt32", True), ([pd.UInt64Dtype(), "UInt64"], np.dtype("uint64"), "UInt64", True), ([pd.Int8Dtype(), "Int8"], np.dtype("int8"), "Int8", True), ([pd.Int16Dtype(), "Int16"], np.dtype("int16"), "Int16", True), ([pd.Int32Dtype(), "Int32"], np.dtype("int32"), "Int32", True), ([pd.Int64Dtype(), "Int64"], np.dtype("int64"), "Int64", True), # datetime types (["datetime64[ns]"], np.dtype("= (2, 2, 3): pytest.skip("Only run QueryCondition test with TileDB>=2.2.3") def test_errors(self, input_array_UIDS): with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("1.324 < 1") A.query(attr_cond=qc, use_arrow=False).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("foo >= bar") A.query(attr_cond=qc, use_arrow=False).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("'foo' == 'bar'") A.query(attr_cond=qc, use_arrow=False).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("U < 10000000000000000000000.0") A.query(attr_cond=qc, attrs=["U"]).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("D") A.query(attr_cond=qc, attrs=["D"]).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("D,") A.query(attr_cond=qc, attrs=["D"]).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("D > ") A.query(attr_cond=qc, attrs=["D"]).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("(D > 0.7) | (D < 3.5)") A.query(attr_cond=qc, attrs=["D"]).df[:] with self.assertRaises(tiledb.TileDBError): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("U >= 3 or 0.7 < D") A.query(attr_cond=qc, attrs=["U", "D"]).df[:] @pytest.mark.xfail( tiledb.libtiledb.version() >= (2, 5), reason="Skip fail_on_dense with libtiledb >2.5", ) def test_fail_on_dense(self): path = self.path("test_fail_on_dense") dom = tiledb.Domain( tiledb.Dim(name="d", domain=(1, 10), tile=1, dtype=np.uint8) ) attrs = [tiledb.Attr(name="a", dtype=np.uint8)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) tiledb.Array.create(path, schema) with tiledb.open(path) as A: with pytest.raises(tiledb.TileDBError) as excinfo: A.query(attr_cond=tiledb.QueryCondition("a < 5")) assert "QueryConditions may only be applied to sparse arrays" in str( excinfo.value ) def test_unsigned(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("U < 5") result = A.query(attr_cond=qc, attrs=["U"]).df[:] assert all(result["U"] < 5) def test_signed(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("I < 1") result = A.query(attr_cond=qc, attrs=["I"]).df[:] assert all(result["I"] < 1) qc = tiledb.QueryCondition("I < +1") result = A.query(attr_cond=qc, attrs=["I"]).df[:] assert all(result["I"] < +1) qc = tiledb.QueryCondition("I < ---1") result = A.query(attr_cond=qc, attrs=["I"]).df[:] assert all(result["I"] < ---1) qc = tiledb.QueryCondition("-5 < I < 5") result = A.query(attr_cond=qc, attrs=["I"]).df[:] assert all(-5 < result["I"]) assert all(result["I"] < 5) def test_floats(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("D > 5.0") result = A.query(attr_cond=qc, attrs=["D"]).df[:] assert all(result["D"] > 5.0) qc = tiledb.QueryCondition("(D > 0.7) & (D < 3.5)") result = A.query(attr_cond=qc, attrs=["D"]).df[:] assert all((result["D"] > 0.7) & (result["D"] < 3.5)) qc = tiledb.QueryCondition("0.2 < D < 0.75") result = A.query(attr_cond=qc, attrs=["D", "I"]).df[:] assert all(0.2 < result["D"]) assert all(result["D"] < 0.75) def test_string(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("S == 'c'") result = A.query(attr_cond=qc, attrs=["S"], use_arrow=False).df[:] assert len(result["S"]) == 1 assert result["S"][0] == b"c" qc = tiledb.QueryCondition("A == 'a'") result = A.query(attr_cond=qc, attrs=["A"], use_arrow=False).df[:] assert len(result["A"]) == 1 assert result["A"][0] == b"a" def test_combined_types(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("(I > 0) & ((-3 < D) & (D < 3.0))") result = A.query(attr_cond=qc, attrs=["I", "D"]).df[:] assert all((result["I"] > 0) & ((-3 < result["D"]) & (result["D"] < 3.0))) qc = tiledb.QueryCondition("U >= 3 and 0.7 < D") result = A.query(attr_cond=qc, attrs=["U", "D"]).df[:] assert all(result["U"] >= 3) assert all(0.7 < result["D"]) qc = tiledb.QueryCondition("(0.2 < D and D < 0.75) and (-5 < I < 5)") result = A.query(attr_cond=qc, attrs=["D", "I"]).df[:] assert all((0.2 < result["D"]) & (result["D"] < 0.75)) assert all((-5 < result["I"]) & (result["I"] < 5)) qc = tiledb.QueryCondition("(-5 < I <= -1) and (0.2 < D < 0.75)") result = A.query(attr_cond=qc, attrs=["D", "I"]).df[:] assert all((0.2 < result["D"]) & (result["D"] < 0.75)) assert all((-5 < result["I"]) & (result["I"] <= -1)) qc = tiledb.QueryCondition("(0.2 < D < 0.75) and (-5 < I < 5)") result = A.query(attr_cond=qc, attrs=["D", "I"]).df[:] assert all((0.2 < result["D"]) & (result["D"] < 0.75)) assert all((-5 < result["I"]) & (result["I"] < 5)) def test_check_attrs(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: qc = tiledb.QueryCondition("U < 0.1") result = A.query(attr_cond=qc, attrs=["U"]).df[:] assert all(result["U"] < 0.1) qc = tiledb.QueryCondition("U < 1.0") result = A.query(attr_cond=qc, attrs=["U"]).df[:] assert all(result["U"] < 1.0) with self.assertRaises(tiledb.TileDBError): qc = tiledb.QueryCondition("U < '1'") A.query(attr_cond=qc, attrs=["U"]).df[:] with self.assertRaises(tiledb.TileDBError): qc = tiledb.QueryCondition("U < 'one'") A.query(attr_cond=qc, attrs=["U"]).df[:] with self.assertRaises(tiledb.TileDBError): qc = tiledb.QueryCondition("U < 1") A.query(attr_cond=qc, attrs=["D"]).df[:] def test_error_when_using_dim(self, input_array_UIDS): with tiledb.open(input_array_UIDS) as A: with pytest.raises(tiledb.TileDBError) as excinfo: qc = tiledb.QueryCondition("d < 5") A.query(attr_cond=qc).df[:] assert ( "`d` is a dimension. QueryConditions currently only work on attributes." in str(excinfo.value) ) def test_attr_and_val_casting_num(self): path = self.path("test_attr_and_val_casting_num") dom = tiledb.Domain( tiledb.Dim(name="dim", domain=(1, 10), tile=1, dtype=np.uint32) ) attrs = [ tiledb.Attr(name="64-bit integer", dtype=np.int64), tiledb.Attr(name="double", dtype=np.float64), ] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.Array.create(path, schema) I = np.random.randint(-5, 5, 10) D = np.random.rand(10) with tiledb.open(path, "w") as arr: arr[np.arange(1, 11)] = {"64-bit integer": I, "double": D} with tiledb.open(path) as arr: qc = tiledb.QueryCondition("attr('64-bit integer') <= val(0)") result = arr.query(attr_cond=qc).df[:] assert all(result["64-bit integer"] <= 0) qc = tiledb.QueryCondition("attr('64-bit integer') <= 0") result = arr.query(attr_cond=qc).df[:] assert all(result["64-bit integer"] <= 0) qc = tiledb.QueryCondition("double <= 0.5") result = arr.query(attr_cond=qc).df[:] assert all(result["double"] <= 0.5) qc = tiledb.QueryCondition("attr('double') <= 0.5") result = arr.query(attr_cond=qc).df[:] assert all(result["double"] <= 0.5) qc = tiledb.QueryCondition("double <= val(0.5)") result = arr.query(attr_cond=qc).df[:] assert all(result["double"] <= 0.5) qc = tiledb.QueryCondition("attr('double') <= val(0.5)") result = arr.query(attr_cond=qc).df[:] assert all(result["double"] <= 0.5) def test_attr_and_val_casting_str(self): path = self.path("test_attr_and_val_casting_str") dom = tiledb.Domain(tiledb.Dim(name="dim", dtype="ascii")) attrs = [tiledb.Attr(name="attr with spaces", dtype="ascii", var=True)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.Array.create(path, schema) A = np.array( [ "value with spaces", "nospaces", "value with spaces", "another value", "", ] ) with tiledb.open(path, "w") as arr: arr[["a", "b", "c", "d", "e"]] = {"attr with spaces": A} with tiledb.open(path) as arr: qc = tiledb.QueryCondition( "attr('attr with spaces') == 'value with spaces'" ) result = arr.query(attr_cond=qc, use_arrow=False).df[:] assert list(result["dim"]) == [b"a", b"c"] qc = tiledb.QueryCondition( "attr('attr with spaces') == val('value with spaces')" ) result = arr.query(attr_cond=qc, use_arrow=False).df[:] assert list(result["dim"]) == [b"a", b"c"] TileDB-Py-0.12.2/tiledb/tests/test_schema_evolution.py000066400000000000000000000033351417663620700226670ustar00rootroot00000000000000import pytest import tiledb import numpy as np from numpy.testing import assert_array_equal def test_schema_evolution(tmp_path): ctx = tiledb.default_ctx() se = tiledb.ArraySchemaEvolution(ctx) uri = str(tmp_path) attrs = [ tiledb.Attr(name="a1", dtype=np.float64), tiledb.Attr(name="a2", dtype=np.int32), ] dims = [tiledb.Dim(domain=(0, 3), dtype=np.uint64)] domain = tiledb.Domain(*dims) schema = tiledb.ArraySchema(domain=domain, attrs=attrs, sparse=False) tiledb.Array.create(uri, schema) data1 = { "a1": np.arange(5, 9), "a2": np.random.randint(0, 1e7, size=4).astype(np.int32), } with tiledb.open(uri, "w") as A: A[:] = data1 with tiledb.open(uri) as A: res = A[:] assert_array_equal(res["a1"], data1["a1"]) assert_array_equal(res["a2"], data1["a2"]) assert "a3" not in res.keys() newattr = tiledb.Attr("a3", dtype=np.int8) se.add_attribute(newattr) se.array_evolve(uri) data2 = { "a1": np.arange(5, 9), "a2": np.random.randint(0, 1e7, size=4).astype(np.int32), "a3": np.random.randint(0, 255, size=4).astype(np.int8), } with tiledb.open(uri, "w") as A: A[:] = data2 with tiledb.open(uri) as A: res = A[:] assert_array_equal(res["a1"], data2["a1"]) assert_array_equal(res["a2"], data2["a2"]) assert_array_equal(res["a3"], data2["a3"]) se = tiledb.ArraySchemaEvolution(ctx) se.drop_attribute("a1") se.array_evolve(uri) with tiledb.open(uri) as A: res = A[:] assert "a1" not in res.keys() assert_array_equal(res["a2"], data2["a2"]) assert_array_equal(res["a3"], data2["a3"]) TileDB-Py-0.12.2/tiledb/tests/test_serialization.cc000066400000000000000000000046461417663620700221430ustar00rootroot00000000000000 #include #include #include #include #include #define TILEDB_DEPRECATED #define TILEDB_DEPRECATED_EXPORT #include "../util.h" #include // C++ #include // C #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 2 #if !defined(NDEBUG) //#include "debug.cc" #endif namespace tiledbpy { using namespace std; using namespace tiledb; namespace py = pybind11; using namespace pybind11::literals; class PySerializationTest { public: static py::bytes create_serialized_test_query(py::object pyctx, py::object pyarray) { int rc; tiledb_ctx_t *ctx; tiledb_array_t *array; ctx = (py::capsule)pyctx.attr("__capsule__")(); if (ctx == nullptr) TPY_ERROR_LOC("Invalid context pointer."); tiledb_ctx_alloc(NULL, &ctx); array = (py::capsule)pyarray.attr("__capsule__")(); if (array == nullptr) TPY_ERROR_LOC("Invalid array pointer."); tiledb_query_t *query; uint32_t subarray[] = {3, 7}; int64_t data[5]; uint64_t data_size = sizeof(data); tiledb_query_alloc(ctx, array, TILEDB_READ, &query); tiledb_query_set_subarray(ctx, query, subarray); tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED); tiledb_query_set_buffer(ctx, query, "", data, &data_size); tiledb_buffer_list_t *buff_list; tiledb_buffer_t *buff; rc = tiledb_serialize_query(ctx, query, TILEDB_CAPNP, 1, &buff_list); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not serialize the query."); rc = tiledb_buffer_list_flatten(ctx, buff_list, &buff); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not flatten the buffer list."); void *buff_data; uint64_t buff_num_bytes; rc = tiledb_buffer_get_data(ctx, buff, &buff_data, &buff_num_bytes); if (rc == TILEDB_ERR) TPY_ERROR_LOC("Could not get the data from the buffer."); py::bytes output((char *)buff_data, buff_num_bytes); tiledb_buffer_free(&buff); tiledb_buffer_list_free(&buff_list); tiledb_query_free(&query); return output; } }; void init_test_serialization(py::module &m) { py::class_(m, "test_serialization") .def_static("create_serialized_test_query", &PySerializationTest::create_serialized_test_query); } }; // namespace tiledbpy #endif TileDB-Py-0.12.2/tiledb/tests/test_serialization.py000066400000000000000000000021061417663620700221730ustar00rootroot00000000000000from contextlib import AbstractContextManager import itertools import numpy as np from numpy.testing import assert_array_equal import pytest import tiledb from tiledb.main import tiledb_serialization_type_t as ser_type from tiledb.main import serialization as ser from tiledb.tests.common import DiskTestCase from tiledb.main import test_serialization as ser_test class SerializationTest(DiskTestCase): def test_query_deserialization(self): path = self.path("test_query_deserialization") dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), dtype=np.uint32)) attrs = [tiledb.Attr(dtype=np.int64)] schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.Array.create(path, schema) data = np.random.randint(-5, 5, 10) with tiledb.open(path, "w") as A: A[np.arange(1, 11)] = data with tiledb.open(path, "r") as A: ctx = tiledb.default_ctx() ser_qry = ser_test.create_serialized_test_query(ctx, A) assert_array_equal(A.query()[3:8][""], A.set_query(ser_qry)[""]) TileDB-Py-0.12.2/tiledb/tests/test_util.py000066400000000000000000000065611417663620700203040ustar00rootroot00000000000000import numpy as np from numpy.testing import assert_array_equal import tiledb from tiledb.tests.common import DiskTestCase class UtilTest(DiskTestCase): def test_empty_like(self): arr = np.zeros((10, 10), dtype=np.float32) def check_schema(self, s): self.assertEqual(s.attr(0).dtype, np.float32) self.assertEqual(s.shape, (10, 10)) self.assertEqual(s.domain.dim(0).shape, (10,)) self.assertEqual(s.domain.dim(1).shape, (10,)) with self.assertRaises(ValueError): tiledb.schema_like("", None) schema = tiledb.schema_like(arr, tile=1) self.assertIsInstance(schema, tiledb.ArraySchema) check_schema(self, schema) uri = self.path("empty_like") T = tiledb.empty_like(uri, arr) check_schema(self, T.schema) self.assertEqual(T.shape, arr.shape) self.assertEqual(T.dtype, arr.dtype) # test a fake object with .shape, .ndim, .dtype class FakeArray(object): def __init__(self, shape, dtype): self.shape = shape self.ndim = len(shape) self.dtype = dtype fake = FakeArray((3, 3), np.int16) schema2 = tiledb.empty_like(self.path("fake_like"), fake) self.assertIsInstance(schema2, tiledb.Array) self.assertEqual(schema2.shape, fake.shape) self.assertEqual(schema2.dtype, fake.dtype) self.assertEqual(schema2.ndim, fake.ndim) # test passing shape and dtype directly schema3 = tiledb.schema_like(shape=(4, 4), dtype=np.float32) self.assertIsInstance(schema3, tiledb.ArraySchema) self.assertEqual(schema3.attr(0).dtype, np.float32) self.assertEqual(schema3.domain.dim(0).tile, 4) schema3 = tiledb.schema_like(shape=(4, 4), dtype=np.float32, tile=1) self.assertEqual(schema3.domain.dim(0).tile, 1) def test_open(self): uri = self.path("load") with tiledb.from_numpy(uri, np.array(np.arange(3))) as T: with tiledb.open(uri) as T2: self.assertEqual(T.schema, T2.schema) assert_array_equal(T, T2) def test_save(self): uri = self.path("test_save") arr = np.array(np.arange(3)) with tiledb.save(uri, arr) as tmp: with tiledb.open(uri) as T: assert_array_equal(arr, T) def test_array_exists(self): import tempfile with tempfile.NamedTemporaryFile() as tmpfn: self.assertFalse(tiledb.array_exists(tmpfn.name)) uri = self.path("test_array_exists_dense") with tiledb.from_numpy(uri, np.arange(0, 5)) as T: self.assertTrue(tiledb.array_exists(uri)) self.assertTrue(tiledb.array_exists(uri, isdense=True)) self.assertFalse(tiledb.array_exists(uri, issparse=True)) uri = self.path("test_array_exists_sparse") dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4, dtype=int)) att = tiledb.Attr(dtype=int) schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) tiledb.Array.create(uri, schema) with tiledb.SparseArray(uri, mode="w") as T: T[[0, 1]] = np.array([0, 1]) self.assertTrue(tiledb.array_exists(uri)) self.assertTrue(tiledb.array_exists(uri, issparse=True)) self.assertFalse(tiledb.array_exists(uri, isdense=True)) TileDB-Py-0.12.2/tiledb/util.h000066400000000000000000000020321417663620700156670ustar00rootroot00000000000000#include #ifndef TILEDB_PY_UTIL_H #define TILEDB_PY_UTIL_H const uint64_t DEFAULT_INIT_BUFFER_BYTES = 1310720 * 8; const uint64_t DEFAULT_ALLOC_MAX_BYTES = uint64_t(5 * pow(2, 30)); #define TPY_ERROR_STR(m) \ [](auto m) -> std::string { \ return std::string(m) + " (" + __FILE__ + ":" + \ std::to_string(__LINE__) + ")"); \ }(); #define TPY_ERROR_LOC(m) \ throw TileDBPyError(std::string(m) + " (" + __FILE__ + ":" + \ std::to_string(__LINE__) + ")"); class TileDBPyError : std::runtime_error { public: explicit TileDBPyError(const char *m) : std::runtime_error(m) {} explicit TileDBPyError(std::string m) : std::runtime_error(m.c_str()) {} public: virtual const char *what() const noexcept override { return std::runtime_error::what(); } }; #endif // TILEDB_PY_UTIL_H TileDB-Py-0.12.2/tiledb/util.py000066400000000000000000000023621417663620700160760ustar00rootroot00000000000000import tiledb import numpy as np from typing import Iterable from tiledb.dataframe_ import ColumnInfo def _sparse_schema_from_dict(input_attrs, input_dims): attr_infos = {k: ColumnInfo.from_values(v) for k, v in input_attrs.items()} dim_infos = {k: ColumnInfo.from_values(v) for k, v in input_dims.items()} dims = list() for name, dim_info in dim_infos.items(): dim_dtype = np.bytes_ if dim_info.dtype == np.dtype("U") else dim_info.dtype dtype_min, dtype_max = tiledb.libtiledb.dtype_range(dim_info.dtype) if np.issubdtype(dim_dtype, np.integer): dtype_max = dtype_max - 1 if np.issubdtype(dim_dtype, np.integer) and dtype_min < 0: dtype_min = dtype_min + 1 dims.append( tiledb.Dim( name=name, domain=(dtype_min, dtype_max), dtype=dim_dtype, tile=1 ) ) attrs = list() for name, attr_info in attr_infos.items(): dtype_min, dtype_max = tiledb.libtiledb.dtype_range(attr_info.dtype) attrs.append(tiledb.Attr(name=name, dtype=dim_dtype)) return tiledb.ArraySchema(domain=tiledb.Domain(*dims), attrs=attrs, sparse=True) def schema_from_dict(attrs, dims): return _sparse_schema_from_dict(attrs, dims) TileDB-Py-0.12.2/tiledb/version_.py000066400000000000000000000004021417663620700167360ustar00rootroot00000000000000from .version import version, version_tuple class VersionHelper: def __getattr__(self, name): if name == "version": return version else: raise AttributeError def __call__(self): return version_tuple