pax_global_header00006660000000000000000000000064152025326650014520gustar00rootroot0000000000000052 comment=e0aee790be2b2510e3c9842d5e782545cc0fb6f1 cogent3-scinexus-e0aee79/000077500000000000000000000000001520253266500154225ustar00rootroot00000000000000cogent3-scinexus-e0aee79/.github/000077500000000000000000000000001520253266500167625ustar00rootroot00000000000000cogent3-scinexus-e0aee79/.github/workflows/000077500000000000000000000000001520253266500210175ustar00rootroot00000000000000cogent3-scinexus-e0aee79/.github/workflows/ci.yml000066400000000000000000000047151520253266500221440ustar00rootroot00000000000000name: CI on: push: pull_request: # NOTE: # if changing python versions, also update versions in # - release.yml # - noxfile.py jobs: tests: name: "Python ${{ matrix.python-version }} (${{ matrix.os }})" runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.11", "3.14"] steps: - uses: "actions/checkout@v6" with: fetch-depth: 0 # Setup env - uses: "actions/setup-python@v6" with: python-version: "${{ matrix.python-version }}" - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: Install MPI (Ubuntu) if: startsWith(matrix.os, 'ubuntu') run: | sudo apt-get update sudo apt-get install -y openmpi-bin libopenmpi-dev - name: "Run nox for ${{ matrix.python-version }}" shell: bash run: | lname="snx-${{matrix.os}}-${{matrix.python-version}}.lcov" xname="snx-${{matrix.os}}-${{matrix.python-version}}.xml" cov="lcov -o$lname xml -o$xname" uv run --group dev nox --force-python python -s testcov -- $cov - name: Coveralls Parallel uses: coverallsapp/github-action@v2 with: parallel: true github-token: ${{ secrets.github_token }} flag-name: run-${{matrix.python-version}}-${{matrix.os}} file: "snx-${{matrix.os}}-${{matrix.python-version}}.lcov" type_check: name: Type Check runs-on: ${{ matrix.os }} strategy: matrix: python-version: ["3.14"] os: [ubuntu-latest] steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: "actions/setup-python@v6" with: python-version: "${{ matrix.python-version }}" - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: "Run Type Checking for ${{ matrix.python-version }}" run: | uv run --group dev nox --force-python python -s type_check finish: name: "Finish Coveralls" needs: tests runs-on: ubuntu-latest steps: - name: Coveralls Finished uses: coverallsapp/github-action@v2 with: github-token: ${{ secrets.github_token }} parallel-finished: truecogent3-scinexus-e0aee79/.github/workflows/codeql.yml000066400000000000000000000014421520253266500230120ustar00rootroot00000000000000name: "CodeQL" on: push: branches-ignore: - master pull_request: branches-ignore: - master schedule: - cron: '39 20 * * 6' jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: [ 'python' ] steps: - name: Checkout repository uses: actions/checkout@v6 - name: Initialize CodeQL uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} - name: Autobuild uses: github/codeql-action/autobuild@v4 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v4 with: category: "/language:${{matrix.language}}" cogent3-scinexus-e0aee79/.github/workflows/docs.yml000066400000000000000000000034041520253266500224730ustar00rootroot00000000000000name: Build Docs concurrency: group: docs-build-${{ github.ref }} cancel-in-progress: true on: workflow_dispatch: release: types: [published] push: branches: - main paths: - 'docs/**' - '.readthedocs.yaml' - 'zensical.toml' - 'rtd_get_docs.py' permissions: contents: read jobs: build-docs: if: github.repository == 'cogent3/SciNexus' runs-on: ubuntu-latest steps: - name: Checkout Repository uses: actions/checkout@v4 with: fetch-depth: 0 submodules: recursive - uses: "actions/setup-python@v5" with: python-version: "3.14" - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: Build Documentation run: | # update executable components uv run --group dev nox -s cogdocs # build actual docs uv run --group dev zensical build --clean working-directory: ${{ github.workspace }} - name: Upload Documentation Artifact uses: actions/upload-artifact@v4 with: name: scinexus-docs-html path: site trigger_rtd: if: github.repository == 'cogent3/SciNexus' runs-on: ubuntu-latest needs: build-docs steps: - name: Trigger Read the Docs build env: RTDS_WEBHOOK_URL: ${{ secrets.RTDS_WEBHOOK_URL }} RTDS_WEBHOOK_TOKEN: ${{ secrets.RTDS_WEBHOOK_TOKEN }} BRANCH: ${{ github.ref_name }} run: | curl -sS --fail-with-body -X POST \ --data-urlencode "token=${RTDS_WEBHOOK_TOKEN}" \ --data-urlencode "branches=${BRANCH}" \ "${RTDS_WEBHOOK_URL}" cogent3-scinexus-e0aee79/.github/workflows/linters.yml000066400000000000000000000014551520253266500232270ustar00rootroot00000000000000name: Lint code using ruff on: push: pull_request: jobs: linters: runs-on: ubuntu-latest if: github.repository != 'cogent3/SciNexus' steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.14' - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: Format code using ruff run: uv run --group dev nox -s fmt - name: Commit changes uses: EndBug/add-and-commit@v10 with: author_name: ${{ github.actor }} author_email: ${{ github.actor }}@users.noreply.github.com message: "STY: pre-commit linting with ruff" add: "." cogent3-scinexus-e0aee79/.github/workflows/release.yml000066400000000000000000000066531520253266500231740ustar00rootroot00000000000000name: Release on: [workflow_dispatch] jobs: test: name: "Test on Python ${{ matrix.python-version }} (${{ matrix.os }})" runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.11", "3.12", "3.13", "3.14"] steps: - uses: "actions/checkout@v6" with: fetch-depth: 0 # Setup env - uses: "actions/setup-python@v6" with: python-version: "${{ matrix.python-version }}" - name: Install MPI (Ubuntu) if: startsWith(matrix.os, 'ubuntu') run: | sudo apt-get update sudo apt-get install -y openmpi-bin libopenmpi-dev - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: "Run nox for ${{ matrix.python-version }}" shell: bash run: | uv run --group dev nox -db uv --force-python python -s test uv run --group dev nox -db uv --force-python python -s test_types uv run --group dev nox -db uv --force-python python -s test_docs - name: "Run MPI tests" if: startsWith(matrix.os, 'ubuntu') shell: bash run: | uv run --group dev nox -db uv --force-python python -s testmpi docbuild: name: "Test the docs" runs-on: ubuntu-latest steps: - uses: "actions/checkout@v6" with: fetch-depth: 0 - uses: "actions/setup-python@v6" with: python-version: "3.14" - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: "test the docs code" run: | uv run --group dev nox -s cogdocs uv run --group dev zensical build build: name: Build wheel and sdist needs: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: '3.14' - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: "pyproject.toml" - name: Build sdist and wheel run: uv build - name: Upload sdist and wheel uses: actions/upload-artifact@v7 with: name: snx-wheel-sdist path: | ./dist/*.whl ./dist/*.tar.gz release_test: name: Release to Test PyPI needs: [build, docbuild] environment: release_test runs-on: ubuntu-latest permissions: id-token: write steps: - name: Download sdist and wheel uses: actions/download-artifact@v8 with: name: snx-wheel-sdist path: ./dist - name: Publish package distributions to Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ release: name: Release to PyPI needs: release_test environment: release runs-on: ubuntu-latest permissions: id-token: write steps: - name: Download sdist and wheel uses: actions/download-artifact@v8 with: name: snx-wheel-sdist path: ./dist - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 cogent3-scinexus-e0aee79/.gitignore000066400000000000000000000005111520253266500174070ustar00rootroot00000000000000* !.github/**/*.yml !.readthedocs.yaml !.hgignore !LICENSE !README.md !changelog.md !docs/conftest.py !docs/**/*.md !docs/**/*.svg !docs/*.txt !noxfile.py !pyproject.toml !rtd_get_docs.py !ruff.toml !scripts/*.py !src/**/*.py !tests/data/*.fasta !tests/data/*.gz !tests/data/*.tsv !tests/data/*.log !tests/**/*.py !zensical.toml cogent3-scinexus-e0aee79/.hgignore000066400000000000000000000006271520253266500172320ustar00rootroot00000000000000syntax:glob .svn *.pyc *.pyo *.so *.o *.DS_Store *.tmproj *.rej *.orig *.wpr *.pdf _build/* build *htmlcov* *.idea *.coverage* *egg-info* *.wpu .cache* *taskpaper *.ipynb *.ipynb_checkpoints* *.sublime* *.patch *.pytest_cache *.tox *.nox *.vscode *.code-workspace coverage.xml __pycache__ junit-*.xml dist/* working/* lcov*.info .ruff_cache/* .devcontainer/* venv* .venv* .mypy* CLAUDE.md site/* docs/data/*cogent3-scinexus-e0aee79/.readthedocs.yaml000066400000000000000000000011751520253266500206550ustar00rootroot00000000000000# .readthedocs.yaml # Read the Docs configuration file for MkDocs projects # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: python: "3.14" commands: # Install the required dependencies - pip install requests # Run the script to download and extract the pre-built docs - python rtd_get_docs.py - echo "Documentation downloaded and extracted" # Disable the default build processes since we're using pre-built docs sphinx: configuration: null python: install: [] cogent3-scinexus-e0aee79/LICENSE000066400000000000000000000027261520253266500164360ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2026, cogent3 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cogent3-scinexus-e0aee79/README.md000066400000000000000000000134211520253266500167020ustar00rootroot00000000000000

scinexus

[![Coverage Status](https://coveralls.io/repos/github/cogent3/scinexus/badge.svg?branch=main)](https://coveralls.io/github/cogent3/scinexus?branch=main) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/fd8810efd4f142069bd84144e14350b4)](https://app.codacy.com/gh/cogent3/scinexus/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![CI](https://github.com/cogent3/scinexus/actions/workflows/ci.yml/badge.svg)](https://github.com/cogent3/scinexus/actions/workflows/ci.yml) ![PyPI Downloads](https://img.shields.io/pypi/dm/scinexus) *`scinexus` is a framework for rapid development of data processing applications. It enables interoperability between objects through defined data types, allowing development of scientific domain app ecosystems. Just as `attrs` and `dataclasses` use type hints to simplify data type definition, `scinexus` uses them to simplify writing best-practice scientific algorithms.* Many scientific problems require repeating calculations across many files or database records. Such tasks suit data-level parallelism, but writing robust, maintainable code for them is often tedious and quickly becomes complex. As the Unix philosophy articulates, writing algorithms that do one thing well and can be composed together through piping data of known type is a *Very Good Thing*™. **`scinexus` encourages this design pattern and eliminates the boilerplate.** We leverage the Python type annotation system to govern the compatibility (composability) of different applications. This enables in-process composition of your applications with validation of the consistency of the pipeline and the consistency of the data being run through it. **`scinexus` is designed for scientific reproducibility.** Scientific computations should record all conditions needed to reproduce an analysis. `scinexus` reduces the effort by intercepting all arguments (including defaults) used in app construction and logging the resulting app state. ## Examples Developers can choose inheriting from a base class or use the `scinexus.define_app` decorator to make composable apps. The following examples show simple composition
Loading files so missing data does not cause a crash ```python from scinexus import define_app @define_app(app_type="loader") def read_json(path: str) -> dict: import json with open(path) as f: return json.load(f) @define_app def validate(data: dict, required_field: str) -> dict: if required_field not in data: # this becomes a NotCompleted sentinel object # your run doesn't crash! raise ValueError(f"missing {required_field!r} field") return data app = read_json() + validate(required_field="name") ``` You can apply `app` to a single file path as `app(filepath)`, or operate in parallel (and show a progress bar) on a sequence of file paths as ```python results = list(app.as_completed(["some_file_path.json", "some_other_file_path.json"], parallel=True, show_progress=True) ```
A contrived numerical example ```python from scinexus import define_app @define_app def normalise(values: list[float]) -> list[float]: lo, hi = min(values), max(values) return [(v - lo) / (hi - lo) for v in values] @define_app def threshold(values: list[float]) -> list[bool]: return [v > 0.5 for v in values] app = normalise() + threshold() app([1.0, 5.0, 3.0, 9.0]) ```
A configurable app ```python from scinexus import define_app @define_app(app_type="loader") def load_csv(path: str) -> list[dict]: import csv with open(path) as f: return list(csv.DictReader(f)) @define_app class summarise: def __init__(self, column: str) -> None: """column contains the values to produce summary stats for""" self.column = column def main(self, rows: list[dict]) -> dict[str, float]: vals = [float(r[self.column]) for r in rows] return {"mean": sum(vals) / len(vals), "min": min(vals), "max": max(vals)} app = load_csv() + summarise(column="price") ```
## Features - Type checking at composition time - Durable computing -- failures recorded as `NotCompleted` records, not exceptions - Data-level parallel execution with pluggable backends (stdlib, loky, MPI, or custom) - Progress bars (`tqdm` or `rich`) - Automated logging and citation tracking - Checkpointing via data stores (directory, SQLite) ## Installation ```bash pip install scinexus ``` ## The `scinexus` origin story The app framework and utility functions in `scinexus` incubated inside [cogent3](https://github.com/cogent3/cogent3) from March 2019, accumulating over seven years of development, testing, and real-world use in computational genomics before being extracted into a standalone package. The design is mature and has underpinned analyses in published studies. The extraction into `scinexus` makes the infrastructure available to any scientific Python project, free of the `cogent3` dependency. See the [changelog](changelog.md) for a detailed list of changes from the cogent3 app infrastructure. We acknowledge here that many members of the `cogent3` community contributed to the code that now lives here, including [@GavinHuttley](https://github.com/GavinHuttley), [@rmcar17](https://github.com/rmcar17), [@Nick-Foto](https://github.com/Nick-Foto), [@KatherineCaley](https://github.com/KatherineCaley), [@fredjaya](https://github.com/fredjaya), and [@khiron](https://github.com/khiron). cogent3-scinexus-e0aee79/changelog.md000066400000000000000000000051451520253266500177000ustar00rootroot00000000000000# Changelog Changes from the original cogent3 app infrastructure. ## New Features - Standalone package extracted from `cogent3.app` — no cogent3 dependency required. - Generic base classes `AppBase[T, R]`, `ComposableApp[T, R]`, and `WriterApp[T, R]` that apps can inherit from directly as an alternative to the `@define_app` decorator. Type checkers can resolve types through class inheritance without a plugin. - mypy plugin (`scinexus._mypy_plugin`) for correct type inference of `@define_app` decorated classes. Synthesises the `__call__` return type as `R | NotCompleted`. - `check_data_type` attribute on apps — a settable property to toggle runtime input type checking on or off. Disabling can speed up execution and simplify debugging. - `NotCompletedType` enum (`ERROR`, `FAIL`, `BUG`) for categorising failure types, replacing bare strings. - `set_summary_display()` / `get_summary_display()` — a module-level registry allowing downstream packages (e.g. cogent3) to register custom display functions for data store summary methods (`describe`, `summary_logs`, `summary_not_completed`, `summary_citations`, `validate`). - `citations` and `bib` properties on apps for tracking software citations via the `citeable` library. Citations propagate through composed pipelines. - Pluggable parallel backends -- choose between stdlib multiprocessing, loky, or MPI via `set_parallel_backend()`. - Pluggable progress bars -- use `tqdm` or `rich` via the `Progress` protocol and `set_default_progress()`. - `set_id_from_source()` / `get_id_from_source()` -- register a custom function for extracting storage identifiers from data. - `apply_to()` accepts `logger=False` to disable log file creation. ## Enhancements - App composition (`+`) now makes shallow copies of the right-hand operand. Composed pipelines no longer share mutable state. - Composition-time type compatibility checking via `check_type_compatibility()` — catches type mismatches when apps are composed with `+`, before any data is processed. - Data store summary methods (`describe`, `summary_logs`, etc.) return `list[dict]` or `dict` instead of cogent3 `Table` objects. Custom display can be restored via `set_summary_display()`. - All modules pass mypy strict type checking. - `StrOrBytes` type alias replaced with `str | bytes` throughout. - Inline `assert` statements replaced with explicit `ValueError` / `TypeError` raises. - Type-hint-related imports moved under `TYPE_CHECKING` for lighter runtime import overhead. ## Deprecated - `ComposableApp.disconnect()` — discontinued, will be removed in version 2026.9. No longer required since composition uses shallow copies. cogent3-scinexus-e0aee79/docs/000077500000000000000000000000001520253266500163525ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/conftest.py000066400000000000000000000003141520253266500205470ustar00rootroot00000000000000from scinexus.typing import register_type_namespace try: from cogent3.app.typing import _get_resolution_namespace register_type_namespace(_get_resolution_namespace) except ImportError: pass cogent3-scinexus-e0aee79/docs/explanation/000077500000000000000000000000001520253266500206745ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/explanation/app-lifecycle.md000066400000000000000000000022021520253266500237270ustar00rootroot00000000000000# The app lifecycle !!! abstract "" The different app types, their base classes, and how `define_app` transforms a user defined class or function into a composable app. ## Types of apps ### Loaders These are responsible for loading data and are composable. They inherit from `LoaderApp`. ```python from scinexus import LoaderApp ``` ### Writers These are responsible for writing data and are composable. They inherit from `WriterApp`. ```python from scinexus import WriterApp ``` ### Generic Generic apps do other operations on data and are composable. They inherit from `ComposableApp` ```python from scinexus import ComposableApp ``` ### Non-composable Non-composable apps cannot be combined with other apps into pipelines. ```python from scinexus import NonComposableApp ``` !!! info You can create your app by inheriting from one of the above base classes. Or you can use the `define_app` decorator. Using the decorator is the fastest way to turn something you already have into a composable app. Under the hood, the decorator is basically injecting the base classes described above into the inheritance of your own classes. cogent3-scinexus-e0aee79/docs/explanation/customisation-hooks.md000066400000000000000000000101511520253266500252360ustar00rootroot00000000000000# Customisation hooks !!! abstract "" How `scinexus` uses module-level registry functions to let downstream packages customise summary display and identifier extraction without subclassing. ## The pattern `scinexus` uses module-level registry functions to let downstream packages customise behaviour without subclassing or monkey-patching. Each hook follows the same shape: - **`set_*(func)`** registers a callable (or `None` to clear) - **`get_*()`** returns the currently registered callable (or the default) This keeps `scinexus` free of dependencies on downstream packages while still allowing them to integrate deeply. ## `set_summary_display` — transforming summary output Data store summary properties (`.describe`, `.summary_logs`, `.summary_not_completed`, `.summary_citations`, `.validate()`) collect their data as plain Python dicts or lists of dicts. By default these are returned as-is. A downstream package can register a display function that transforms these raw structures into richer objects. The function must accept `(data, *, name)` where `data` is the raw dict or list and `name` is the summary method name (e.g. `"describe"`). ### How `cogent3` uses this When `cogent3.app` is imported, it registers a function that converts summaries into `cogent3.core.table.Table` objects: ```python { notest } from scinexus.data_store import set_summary_display from cogent3.core.table import Table def _summary_to_table(data, *, name): if isinstance(data, dict): title = data.pop("title", name) rows = [[k, v] for k, v in data.items()] return Table( header=["Condition", "Value"], data=rows, title=title, ) if isinstance(data, list): if not data: return Table(header=[], data=[], title=name) header = list(data[0].keys()) rows = [list(row.values()) for row in data] return Table(header=header, data=rows, title=name) return data set_summary_display(_summary_to_table) ``` After this registration, every call to `dstore.describe` or `dstore.summary_not_completed` returns a `Table` with a rich notebook repr, rather than a plain dict. ## `set_id_from_source` — customising unique ID extraction When `apply_to()` or `as_completed()` processes a data store, each result needs a unique identifier so the writer can store it and skip already-processed inputs on subsequent runs. By default, `scinexus` extracts this ID using `get_unique_id`, which strips format suffixes from file names: ``` "gene_001.fasta.gz" → "gene_001" ``` If your data uses a different naming convention — for example, IDs embedded in the file content or in a metadata field — you can register a custom extractor: ```python { notest } from scinexus.data_store import set_id_from_source def my_id_extractor(data): """Extract ID from a metadata dict.""" return data.info.source.split("/")[-1].split("_")[0] set_id_from_source(my_id_extractor) ``` The registered function is consulted by: - `WriterApp.apply_to()` — to derive output record keys - `AppBase.as_completed()` — to identify results - `NotCompleted` — to normalise the `source=` attribute on error records Pass `None` to restore the default: ```python { notest } set_id_from_source(None) # back to get_unique_id ``` Per-call overrides via the `id_from_source` keyword on `apply_to()` and `as_completed()` still take precedence over the registered function. ## The default ID pipeline: `get_data_source` → `get_unique_id` The default extractor, `get_unique_id`, works in two steps: 1. **`get_data_source(data)`** extracts a source string from the input. This is a singledispatch function that handles: - `str` / `Path` → the file name - `dict` → looks for `data["info"]["source"]` or `data["source"]` - `DataMemberABC` → the member's `unique_id` - Any object with a `.source` attribute → recurses on that attribute 2. **`get_unique_id(name)`** strips format suffixes (e.g. `.fasta`, `.gz`) from the source string returned by `get_data_source`. Together they turn inputs like `DataMember(unique_id="gene_001.fasta.gz")` into the key `"gene_001"`. cogent3-scinexus-e0aee79/docs/explanation/data-store-model.md000066400000000000000000000041051520253266500243570ustar00rootroot00000000000000# Data store model !!! abstract "" How data stores map inputs to outputs via unique IDs, how checkpointing works to skip already-processed items, the three backends (directory, zip, SQLite) and when to use each, and how citations and logs are stored alongside results. ## Data stores -- collections of data records If you download [raw.zip](../data/raw.zip) and unzip it, you will see it contains 1,035 files ending with a `.fa` filename suffix. (It also contains a tab delimited file and a log file, which we ignore for now.) The directory `raw` is a "data store" and the `.fa` files are "members" of it. In summary, a data store is a collection of members of the same "type". This means we can apply the same application to every member. ### Types of data store | Class Name | Supported Operations | Supported Data Types | Identifying Suffix | |---|---|---|---| | `DataStoreDirectory` | read / write / append | text | None | | `ReadOnlyDataStoreZipped` | read | text | `.zip` | | `DataStoreSqlite` | read, write, append | text or bytes | `.sqlitedb` | !!! note The `ReadOnlyDataStoreZipped` is just a compressed `DataStoreDirectory`. ### The structure of data stores If a directory was not created by `scinexus` as a `DataStoreDirectory` then it has only the structure that existed previously. If a data store was created by `scinexus`, either as a directory or as a `sqlitedb`, then it contains four types of data: completed records, *not* completed records, log files and md5 files. In a `DataStoreDirectory`, these are organised using the file system. The completed members are valid data records (as distinct from not completed) and are at the top level. The remaining types are in subdirectories. ``` demo_dstore ├── logs ├── md5 ├── not_completed └── ... ``` `logs/` stores `scitrack` log files produced by `scinexus` writer apps. `md5/` stores plain text files with the md5 sum of a corresponding data member which are used to check the integrity of the data store. The `DataStoreSqlite` stores the same information, just in SQL tables. cogent3-scinexus-e0aee79/docs/explanation/flow.md000066400000000000000000000043651520253266500221750ustar00rootroot00000000000000# Execution flow of a composed app !!! abstract "" How data flows through a composed pipeline, step by step. Consider two apps composed into a pipeline: ```python { notest } from scinexus import define_app @define_app(app_type="loader") def read_json(path: str) -> dict: import json with open(path) as f: return json.load(f) @define_app def validate(data: dict, required_field: str) -> dict: if required_field not in data: raise ValueError(f"missing {required_field!r} field") return data app = read_json() + validate(required_field="name") ``` Composing with `+` creates a new app where `validate` is the outermost app and `read_json` is stored as its `.input` attribute. When you call `app(filepath)`, execution begins at the outermost app and works inward. ## The execution flow when you call `app(filepath)` ```mermaid flowchart TD entry["Executes scinexus __call__(val)"] --> none{val is None?} none -- yes --> nc_none[create and return NotCompleted ERROR, recording current app as origin] none -- no --> nc{val is NotCompleted?} nc -- yes --> nc_return[returns same NotCompleted] nc -- no --> has_input{has an input app?} has_input -- yes --> call_input["call input(val), which enters the top of this chart"] call_input --> input_nc{result is NotCompleted?} input_nc -- yes --> nc_input[return same NotCompleted] input_nc -- no --> type_check has_input -- no --> type_check{val type is valid for self} type_check -- fail --> nc_type[create and return NotCompleted ERROR] type_check -- pass --> main["main(val)"] main -- exception --> nc_main[NotCompleted ERROR] main -- success --> result["return result (which may be NotCompleted FAIL)"] classDef errorNode fill:#fde0c8,stroke:#333 classDef successNode fill:#c8e0fd,stroke:#333 class nc_none,nc_return,nc_input,nc_type,nc_main errorNode class result successNode ``` This is the same sequence for every composed app, regardless of pipeline length. Each app in the chain runs the same `__call__` checks, so `NotCompleted` propagation and exception handling are consistent throughout. See [Runtime type checking](type-system.md#runtime-type-checking) for details on how type validation works and how to disable it. cogent3-scinexus-e0aee79/docs/explanation/index.md000066400000000000000000000014711520253266500223300ustar00rootroot00000000000000# Explanation Background and design rationale behind `scinexus`. - [Why composable apps?](why-composable-apps.md) -- the problem `scinexus` solves and how it compares to alternatives - [The app lifecycle](app-lifecycle.md) -- base classes and `define_app` - [Type system](type-system.md) -- how composition-time type checking works - [Execution flow](flow.md) -- step-by-step data flow through a composed pipeline - [NotCompleted design](not-completed-design.md) -- why a sentinel pattern instead of exceptions - [Source tracking](source-tracking.md) -- how `source_proxy` tracks data provenance through pipelines - [Customisation hooks](customisation-hooks.md) -- `set_summary_display` and `set_id_from_source` registry functions - [Data store model](data-store-model.md) -- unique IDs, checkpointing, and backend choices cogent3-scinexus-e0aee79/docs/explanation/not-completed-design.md000066400000000000000000000071351520253266500252450ustar00rootroot00000000000000# `NotCompleted` design !!! abstract "" Why `scinexus` uses a sentinel object instead of exceptions for handling failures in batch processing. ## The problem with exceptions in pipelines When applying an algorithm to hundreds or thousands of data records, some records will inevitably fail — bad data, missing fields, violated preconditions. If failures raise exceptions, you face an unpleasant choice: - **Let it crash.** You lose all progress and must restart from scratch. - **Wrap everything in try/except.** Your pipeline logic becomes cluttered with error-handling boilerplate, and you must decide at every step what to catch and what to re-raise. Neither approach scales well. You want failures to be recorded and the pipeline to continue processing the remaining records. ## The sentinel pattern `NotCompleted` is `scinexus`'s answer: a sentinel return value that signals "this record could not be processed" without raising an exception. It carries structured information about the failure: - **`.type`** — `FALSE` (a condition was not met) or `ERROR` (an unexpected exception occurred) - **`.origin`** — which app produced the failure - **`.source`** — which input data failed - **`.message`** — a human-readable explanation Because `NotCompleted` is a regular return value, it flows through the same code paths as successful results. ## Why it subclasses `int` and is falsy `NotCompleted` subclasses `int` with a value of `0`, making it evaluate to `False` in boolean contexts. This means you can check for failure with a simple truthiness test: ```python { notest } result = my_app(data) if not result: print(f"Failed: {result.message}") ``` Subclassing `int` rather than defining `__bool__` alone ensures consistent behaviour with Python's truth-testing protocol across all contexts (including NumPy arrays and other libraries that inspect types). ## Automatic propagation through pipelines When apps are composed with `+`, the resulting pipeline checks each intermediate result. If any step returns a `NotCompleted`, subsequent steps are skipped and the `NotCompleted` is returned as the final result. This means: - A single failure does not corrupt downstream steps. - The failure's `.origin` accurately records where the problem occurred, not where it was finally caught. - No try/except scaffolding is needed in pipeline code. ```python { linenums="1" notest } import cogent3 as c3 aln = c3.get_dataset("primate-brca1") select_seqs = c3.get_app("take_named_seqs", "Mouse", "Human") min_length = c3.get_app("min_length", 300) app = select_seqs + min_length result = app(aln) print(result) # NotCompleted(type=FAIL, origin=take_named_seqs, source="brca1", message="named # seq(s) {'Mouse'} not in ('FlyingLem', 'TreeShrew', 'Galago', 'HowlerMon', # 'Rhesus', 'Orangutan', 'Gorilla', 'Chimpanzee', 'Human')") ``` ## Recording failures in data stores When a pipeline is run via `apply_to()` on a data store, `NotCompleted` results are automatically written to a separate area (the `not_completed/` subdirectory or SQL table). This gives you a complete audit trail: you can inspect which records failed, which app was responsible, and why — all without interrupting the processing of successful records. See [Handle failures](../howto/handle-failures.md) for usage examples. cogent3-scinexus-e0aee79/docs/explanation/source-tracking.md000066400000000000000000000055631520253266500243270ustar00rootroot00000000000000# Source tracking !!! abstract "" How `source_proxy` preserves the link between input identity and output when data is transformed through a pipeline. ## The problem When you call `apply_to()` or `as_completed()` on a data store, each member is fed through the pipeline independently. The pipeline may transform the data into something completely different — a new object with no reference back to the input that produced it. But the writer at the end of the pipeline needs to know *which input* produced *which output* so it can assign the correct unique ID in the output data store. For example, if a loader reads `"gene_001.fa"` and the pipeline returns a translated protein sequence, the writer needs to store that result under the key `"gene_001"`. Without a mechanism to carry the input identity forward, this link is lost. ## How `source_proxy` solves it `source_proxy` is a transparent wrapper that carries two extra pieces of state alongside the wrapped object: - **`.source`** — the original input (or its identifier), preserved across transformations - **`.uuid`** — a unique identifier for this proxy instance, used for hashing When `as_completed()` or `apply_to()` processes a data store, each member is wrapped in a `source_proxy` before entering the pipeline. Because `source_proxy` delegates attribute access to the wrapped object via `__getattr__`, downstream apps see the original object and do not need to know about the proxy. ```python { notest } from scinexus.composable import source_proxy proxy = source_proxy(some_data) proxy.source # the original input proxy.uuid # unique identifier for this proxy proxy.any_attr # delegates to some_data.any_attr ``` ## How `propagate_source` preserves the link After each pipeline step, the result needs to be re-associated with the original source. `propagate_source` handles this: 1. If the result already has a `.source` attribute (e.g. it is a `DataMember` or another object that natively tracks its origin), the proxy is **unwrapped** — the result stands on its own. 2. Otherwise, the proxy's wrapped object is **updated** to the new result via `set_obj()`, and the proxy (still carrying the original `.source`) is returned. This means the source identity survives an arbitrary number of pipeline steps, even when intermediate apps return entirely new objects. ## Why this matters for writers `WriterApp.apply_to()` uses the source to derive unique IDs for output records. This enables **append-only semantics**: on a subsequent run against the same data store, records that already exist in the output are skipped. The unique ID comes from the original input's identity (via `get_data_source()`), which is only available because `source_proxy` carried it through the pipeline. Without source tracking, the writer would have no way to determine whether a result corresponds to an input that has already been processed. cogent3-scinexus-e0aee79/docs/explanation/type-system.md000066400000000000000000000055341520253266500235300ustar00rootroot00000000000000# Type system !!! abstract "" Why types are checked at composition time rather than call time, how `check_type_compatibility` works, handling of Union types and forward references, the role of `register_type_namespace`, and the relationship to `typeguard` for runtime checking. ## Composability rules There are rules around app composition, starting with app types. Loaders and writers are special cases. If included, a loader must always be first, e.g. ```python { notest } app = a_loader + a_generic ``` If included, a writer must always be last, e.g. ```python { notest } app = a_generic + a_writer ``` Changing the order for either of the above will result in a `TypeError`. The next constraint on app composition are the input and output types of the apps involved. Specifically, apps define the type of input they work on and the type of output they produce. For two apps to be composed, the output (or return) type of app on the left (e.g. `a_loader`) must overlap with the input type of the app on the right (e.g. `a_generic`). If they don't match, a `TypeError` is raised. ## Built-in type protocols and aliases SciNexus defines two type-level constructs used across the framework: - `SerialisableType` -- a `Protocol` that any object with a `to_rich_dict()` method satisfies. Database writer apps rely on this to serialise results before storing them in a data store. - `IdentifierType` -- a type alias (`str | Path | DataMemberABC`) representing the accepted ways to identify a member of a data store. Loader apps accept this as input. See the [API reference](../reference/utilities.md#type-system) for details. ## Runtime type checking In addition to checking type compatibility when apps are composed, scinexus validates input data at runtime before each call to `main()`. This uses `typeguard.check_type` to verify that the data matches the app's declared input type. On a mismatch, a `NotCompleted` is returned with a message naming the received and expected types. !!! important "Why this matters" Without runtime type checking, passing the wrong data type to an app still fails — but the error occurs inside the users `main()` and can be confusing. For example, a message like `'NoneType' object has no attribute 'blah'` gives little indication that the real problem is a type mismatch from an upstream app. With runtime checking enabled, scinexus catches this before entering `main()` and reports the mismatch clearly. ### Disabling type checking with `check_data_type` Runtime type checking is enabled by default. For mature pipelines where type correctness has been established, you can disable it to remove the small overhead of the `typeguard` check: ```python { notest } app = read_json() + validate(required_field="name") app.check_data_type = False ``` Setting `check_data_type` on the outermost app propagates the setting to all apps in the pipeline. cogent3-scinexus-e0aee79/docs/explanation/why-composable-apps.md000066400000000000000000000043301520253266500251100ustar00rootroot00000000000000# Why composable apps? !!! abstract "" The design philosophy behind `scinexus` and why composing single-purpose apps leads to more robust, reproducible scientific computation. ## Make your algorithms more robust As the robustness of POSIX based operating systems (think Linux, Mac OS, Unix) can attest, writing algorithms that stitch together multiple single purpose applications is a *Very Good Thing*™. This is most elegantly expressed as a part of the Unix design philosophy. ???+ quote Write programs that do one thing and do it well. Write programs to work together. — Doug McIlroy **`scinexus` encourages this design pattern.** We leverage the Python type annotation system to govern the compatibility (composability) of different applications. This enables in-process composition of your applications with validation of the consistency of the pipeline and the consistency of the data being run through it. We can expand on this slightly for the problem of scientific computation by considering the critical benchmark of satisfying the conditions for reproducible computation, i.e. the obligation to track all of the properties affecting the execution of your algorithm. Examples of this are the operating system, the language version, the seed used for the random number generator, etc. **`scinexus` does this for you.** For example, we intercept all arguments (including default values) passed to the construction of apps and record them so that the app state is logged. If you, the developer, also leverage the capabilities of the [`scitrack`](https://pypi.org/project/scitrack/) logging package (which `scinexus` has as a dependency), you can capture extra information such as versions of packages that your application depends on. We provide an [example](../howto/log-and-cite.md#leveraging-scitrack-for-reproducibility) of using `scitrack` for these cases. ## Improve the accessibility of your work for end users Apps are ready-made functions that users can run on their data without needing technical expertise. They’re easy for non-programmers to use, and can be linked together into pipelines. This lets users process one or thousands of records at once—without writing loops, conditionals, or other structural code. cogent3-scinexus-e0aee79/docs/howto/000077500000000000000000000000001520253266500175125ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/howto/customise-display-and-ids.md000066400000000000000000000146341520253266500250370ustar00rootroot00000000000000# Customise display and IDs !!! abstract "" How to use `set_summary_display` to transform data store summary output into richer objects, and `set_id_from_source` to control how unique identifiers are extracted from data. ## Summary display default By default, summary properties like `.describe` return Python primitive types like `dict` and `list`. ```python { linenums="1" notest } from scinexus import open_data_store dstore = open_data_store("data/raw.zip", suffix="fa", mode="r") print(type(dstore.describe), "", dstore.describe, sep="\n") {'completed': 1035, 'not_completed': 0, 'logs': 0} ``` ## Customising summary display You can register a customised display function for your project. For `cogent3`, it converts them into `cogent3` `Table` objects: ```python { linenums="1" notest } from scinexus.data_store import set_summary_display from cogent3.core.table import Table def summary_to_table(data, *, name): if isinstance(data, dict): title = data.pop("title", name) rows = [[k, v] for k, v in data.items()] return Table(header=["Condition", "Value"], data=rows, title=title) if isinstance(data, list): if not data: return Table(header=[], data=[], title=name) header = list(data[0].keys()) rows = [list(row.values()) for row in data] return Table(header=header, data=rows, title=name) return data set_summary_display(summary_to_table) ``` This results in the following: ```python { linenums="1" notest } describe ====================== Condition Value ---------------------- completed 1035 not_completed 0 logs 0 ---------------------- ``` !!! note `cogent3` registers this transformation automatically when you `import cogent3.app`, so you get `Table` output without any setup in cogent3 projects. ### Unsetting the display function Reset the display function and revert to the default `scinexus` behaviour as follows: ```python { notest } set_summary_display(None) ``` ## Default unique ID extraction Being able to extract unique identifiers for individual data objects is fundamental to the ability of scinexus to track provenance of individual results. Because of its roots from `cogent3`, the `scinexus` default `get_unique_id` function extracts this information from a `.source` attribute. That function, strips format suffixes from file names to derive unique keys for data store records. ```python { linenums="1" notest } from scinexus import get_id_from_source func = get_id_from_source() print(func("gene_001.fasta.gz"), func("sample.txt")) # gene_001 sample ``` ## Customising unique ID extraction Register a custom extractor when your naming convention differs: ```python { linenums="1" notest } from scinexus.data_store import set_id_from_source, get_id_from_source def extract_ensembl_id(data): name = str(data) if name.startswith("ENSG"): return name.split(".")[0] return name set_id_from_source(extract_ensembl_id) func = get_id_from_source() # Now the registered function is used as the default print(func("ENSG00000157184.fa"), func("gene_001.fasta.gz")) # ENSG00000157184 gene_001.fasta.gz ``` ## Reset to default ```python { linenums="1" notest } from scinexus.data_store import set_id_from_source set_id_from_source(None) ``` ## Over-riding the default per-call You can also override per-call without affecting the global default: ```python { notest } result = app.apply_to(dstore, id_from_source=extract_ensembl_id) ``` cogent3-scinexus-e0aee79/docs/howto/extend-type-checking.md000066400000000000000000000106731520253266500240620ustar00rootroot00000000000000# Extend type checking !!! abstract "" How to use `register_type_namespace` to make forward references from downstream packages resolvable at composition time, enabling third-party types in app pipelines. ## The problem When you compose apps with `+`, `scinexus` checks that the output type of the left app is compatible with the input type of the right app. Type hints are often written as forward references — strings like `"Alignment"` or `"PhyloNode"` — to avoid circular imports. At composition time `scinexus` must resolve these strings to actual classes, but it only knows about its own types by default. If your package defines custom types used in app hints, `scinexus` cannot resolve them without help. ## The solution `register_type_namespace` lets a downstream package register a **lazy namespace provider** — a zero-argument callable that returns a `dict[str, type]`. When `scinexus` encounters an unresolved forward reference, it queries each registered provider in order until it finds a match. ```python { notest } from scinexus.typing import register_type_namespace register_type_namespace(my_provider) ``` The provider is called lazily each time a name needs resolving, so the package can defer heavy imports. Providers are responsible for their own caching. Registration is idempotent: re-registering the same callable is a no-op. ???- example "How `cogent3` does it" ```python { linenums="1" notest } from scinexus.typing import register_type_namespace _resolution_ns = None def _get_resolution_namespace(): global _resolution_ns if _resolution_ns is not None: return _resolution_ns from cogent3.core.alignment import Alignment, SequenceCollection # (1)! from cogent3.core.tree import PhyloNode # ... other imports ... _resolution_ns = { "Alignment": Alignment, "SequenceCollection": SequenceCollection, "PhyloNode": PhyloNode, # (3)! # ... other types ... } return _resolution_ns register_type_namespace(_get_resolution_namespace) # (2)! ``` 1. `cogent3` defines many types (`Alignment`, `PhyloNode`, `Table`, etc.) that are used as forward references in app type hints. 2. In `cogent3/app/typing.py`, a resolution namespace is built lazily and registered with `scinexus` 3. With this registration, any `scinexus` app that uses `"PhyloNode"` as a type hint will resolve correctly at composition time without the user importing `PhyloNode` explicitly. ## Registering your own package's types Follow the same pattern: define a lazy provider function that imports and caches your types, then register it at module level. ```python { notest } from scinexus.typing import register_type_namespace _ns = None def _get_my_types(): global _ns if _ns is not None: return _ns from my_package.core import MyDataType, MyResultType _ns = { "MyDataType": MyDataType, "MyResultType": MyResultType, } return _ns register_type_namespace(_get_my_types) ``` Place this in a module that is imported early (e.g. your package's `typing.py` or `__init__.py`). Once registered, apps using `"MyDataType"` as a forward reference will resolve correctly when composed with other apps. cogent3-scinexus-e0aee79/docs/howto/handle-failures.md000066400000000000000000000154561520253266500231120ustar00rootroot00000000000000# Handle failures !!! abstract "" How to create `NotCompleted` values, check their truthiness, inspect their attributes, and control propagation with `skip_not_completed=False`. ## `NotCompleted` FALSE type A FALSE type is returned when a condition is not met. For example, below we create an app that selects 2 specific sequences from an alignment. Applying this to a data set where a "Mouse" sequence does not exist produces a FALSE type. ```python { linenums="1" notest } import cogent3 as c3 aln = c3.get_dataset("primate-brca1") select_seqs = c3.get_app("take_named_seqs", "Mouse", "Human") result = select_seqs(aln) assert result == False print(result) # NotCompleted(type=FAIL, origin=take_named_seqs, source="brca1", message="named # seq(s) {'Mouse'} not in ('FlyingLem', 'TreeShrew', 'Galago', 'HowlerMon', # 'Rhesus', 'Orangutan', 'Gorilla', 'Chimpanzee', 'Human')") ``` ## Inspecting `NotCompleted` attributes The `NotCompleted` instance has attributes identifying what data failed: ```python { notest } result.source ``` Where the failure occurred: ```python { notest } result.origin ``` And the reason for the failure: ```python { notest } result.message ``` The `.type` attribute is the `NotCompletedType` enum value (e.g. `NotCompletedType.FALSE`, `NotCompletedType.ERROR`, or `NotCompletedType.BUG`). ## `NotCompleted` ERROR type An ERROR type is returned if an unexpected condition occurs, such as an exception raised during execution. Here we illustrate this by trying to open a file with an incorrect path. ???- example "Example" ```python { linenums="1" notest } import cogent3 as c3 reader = c3.get_app("load_aligned", moltype="dna") result = reader("primate_brca1.fasta") print(result) # NotCompleted(type=ERROR, origin=load_aligned, source="primate_brca1", # message="Traceback (most recent call last): File # "/Users/gavin/repos/SciNexus/src/scinexus/composable.py", line 545, in __call__ # result = self.main(val, *args, **kwargs) [...] ``` ## Composed functions propagate `NotCompleted` results If you have a composed function with multiple steps and a failure occurs, the resulting `NotCompleted` is returned without any of the subsequent steps being applied. For example, we make a composed app from both of the above apps: ???- example "Example" ```python { linenums="1" notest } import cogent3 as c3 reader = c3.get_app("load_aligned", moltype="dna") select_seqs = c3.get_app("take_named_seqs", "Mouse", "Human") app = reader + select_seqs result = app("data/primate_brca1.fasta") print(result) # NotCompleted(type=FAIL, origin=take_named_seqs, source="primate_brca1", # message="named seq(s) {'Mouse'} not in ('FlyingLem', 'TreeShrew', 'Galago', # 'HowlerMon', 'Rhesus', 'Orangutan', 'Gorilla', 'Chimpanzee', 'Human')") ``` The failure originated in `select_seqs` (an instance of `take_named_seqs`), and `reader` ran successfully — but the `NotCompleted` propagated through the rest of the pipeline. ???- example "Example" ```python { linenums="1" notest } import cogent3 as c3 reader = c3.get_app("load_aligned", moltype="dna") select_seqs = c3.get_app("take_named_seqs", "Mouse", "Human") app = reader + select_seqs result = app("primate_brca1.fasta") print(result) # NotCompleted(type=ERROR, origin=load_aligned, source="primate_brca1", [...] ``` Here the failure originated in `reader` (bad path), and `select_seqs` was never called. ## Creating `NotCompleted` in your own apps You can return a `NotCompleted` from your own app to signal that a particular input cannot be processed: ```python { linenums="1" notest } from scinexus import define_app, NotCompleted, NotCompletedType @define_app def require_min_length(val: str, min_length: int = 10) -> str: if len(val) < min_length: return NotCompleted( NotCompletedType.FALSE, "require_min_length", val, message=f"too short: {len(val)} < {min_length}", ) return val ``` ## Receiving `NotCompleted` with `skip_not_completed=False` By default, apps skip `NotCompleted` inputs — they propagate without calling `main()`. If your app needs to see `NotCompleted` values (e.g. a writer that records failures), set `skip_not_completed=False`: ```python { linenums="1" notest } from scinexus import define_app, NotCompleted @define_app(skip_not_completed=False) def log_failures(val: str) -> str: if isinstance(val, NotCompleted): print(f"Failure: {val.message}") return val return val ``` cogent3-scinexus-e0aee79/docs/howto/index.md000066400000000000000000000020711520253266500211430ustar00rootroot00000000000000# How-to guides Task-oriented recipes for common `scinexus` operations, each self-contained. - [Write a function app](write-a-function-app.md) -- use `@define_app` on a function - [Write a class app](write-a-class-app.md) -- use `@define_app` on a class with `main()` - [Handle failures](handle-failures.md) -- create and inspect `NotCompleted` values - [Use data stores](use-data-stores.md) -- open, read, and write data stores - [Read and write files](read-and-write-files.md) -- IO with compression, atomic writes, streaming - [Run in parallel](run-in-parallel.md) -- parallel execution with pluggable backends - [Track progress](track-progress.md) -- progress bars with `tqdm` or `rich` - [Log and cite](log-and-cite.md) -- `scitrack` logging and citation tracking - [Customise display and IDs](customise-display-and-ids.md) -- `set_summary_display` and `set_id_from_source` - [Extend type checking](extend-type-checking.md) -- register type namespaces for downstream packages - [Migrate from cogent3](migrate-from-cogent3.md) -- update code from `cogent3.app` to `scinexus` cogent3-scinexus-e0aee79/docs/howto/log-and-cite.md000066400000000000000000000177401520253266500223100ustar00rootroot00000000000000# Log and cite !!! abstract "" How to use `scitrack` logging in apps, control logging in `apply_to`, and access citation records from composed pipelines. ## Leveraging `scitrack` for reproducibility We reproduce here one of the examples from [scitrack](https://github.com/HuttleyLab/scitrack). ???- example "Using `scitrack` in a `click` app" ```python linenums="1" import click from scitrack import CachingLogger LOGGER = CachingLogger() @click.command() @click.option("-i", "--infile", type=click.Path(exists=True)) @click.option("-t", "--test", is_flag=True, help="Run test.") def main(infile, test): # capture the local variables, at this point just provided arguments LOGGER.log_args() # (1)! LOGGER.log_versions("numpy") # (2)! LOGGER.input_file(infile) # (3)! LOGGER.log_file_path = "some_path.log" # (4)! if __name__ == "__main__": main() ``` 1. :man_raising_hand: A single statement and you have captured all the input arguments and their values, including defaults! 2. This captures the version numbers of the packages our application depends on. 3. This logs the path to `infile` and its md5sum. 4. Until you assign the path where you want the file written, this content has been cached. ## Controlling logging in `apply_to` By default, `apply_to` creates a `CachingLogger` that records the composable function, package versions, output paths, MD5 checksums of every result, and total elapsed time. The log is then written into the output data store. This is the recommended setting for production analyses because it gives you a complete, self-contained record of what ran and what it produced. ```python { notest } result = process.apply_to(dstore) # logger=True by default ``` You can also pass your own `CachingLogger` instance if you want to configure it beforehand or reuse one across multiple calls. ```python { notest } from scitrack import CachingLogger LOGGER = CachingLogger() LOGGER.log_args() result = process.apply_to(dstore, logger=LOGGER) ``` ### Disabling logging Set `logger=False` to skip logging entirely. ```python { notest } result = process.apply_to(dstore, logger=False) ``` This is useful when: - **Your project is small** and a full provenance log is unnecessary. - **Logging is handled externally**, for example by a workflow manager or your own `CachingLogger` that wraps several `apply_to` calls. - **You want to avoid the overhead** of computing an MD5 checksum for every result object, which can be noticeable for large or numerous outputs. ## Make it easy for your work to be cited Correctly attributing the authors of algorithms and software is a requirement of good scientific practice. `scinexus` makes this easy by letting app authors declare citations that are automatically tracked through composed pipelines. Use the `cite` parameter of `define_app` (or the base classes) to attach a citation. The `citeable` library provides several classes for this purpose. ???- example "Adding a citation to your app" ```python { linenums="1" notest } from citeable import Software from scinexus import define_app from cogent3.app.typing import AlignedSeqsType from cogent3 import get_app my_cite = Software( author=["Doe, J", "Smith, A"], title="My Sequence Filter", year=2025, url="https://example.com/my-filter", version="0.1.0", ) @define_app(cite=my_cite) # (1)! def strict_filter(val: AlignedSeqsType) -> AlignedSeqsType: """Remove sequences shorter than the alignment.""" return val.omit_bad_seqs() app = strict_filter() loader = get_app("load_aligned", moltype="dna", format_name="fasta") pipeline = loader + strict_filter() print(pipeline.citations) # (2)! print(f"\n{pipeline.bib}") # (3)! # (Software( author=['Doe, J', 'Smith, A'], title='My Sequence Filter', # year=2025, version='0.1.0', url='https://example.com/my-filter', [...] ``` 1. Use the `cite` parameter of `define_app` to attach a citation 2. The `.citations` property returns citations as a tuple. When apps are composed into a pipeline, `.citations` collects unique citations from all apps in the chain. 3. The `.bib` gives the BibTeX string. ## Extracting citations from a data store When a composed pipeline is run via `apply_to()`, citations are automatically saved in the output data store. ???- example "Citations in data stores" ```python { linenums="1" notest } from citeable import Software from scinexus import define_app, open_data_store from cogent3.app.typing import AlignedSeqsType from cogent3 import get_app my_cite = Software( author=["Doe, J"], title="My Sequence Filter", year=2025, ) @define_app(cite=my_cite) def strict_filter(val: AlignedSeqsType) -> AlignedSeqsType: return val.omit_bad_seqs() in_dstore = open_data_store("data/raw.zip", suffix="fa", limit=5) out_dstore = open_data_store("cited_results", suffix="fa", mode="w") loader = get_app("load_aligned", moltype="dna", format_name="fasta") writer = get_app("write_seqs", data_store=out_dstore, format_name="fasta") process = loader + strict_filter() + writer result = process.apply_to(in_dstore) result.summary_citations # (1)! result.write_bib("my_analysis.bib") # (2)! ``` 1. Because we are using `cogent3`, the property returns a `cogent3` `Table` of all citations stored in the data store. 2. You can export to a BibTeX file. !!! note `ReadOnlyDataStoreZipped` supports reading stored citations but not writing them. cogent3-scinexus-e0aee79/docs/howto/migrate-from-cogent3.md000066400000000000000000000056161520253266500237750ustar00rootroot00000000000000# Migrate from cogent3 !!! abstract "" A guide for users moving from `cogent3.app` to `scinexus`. ## Update imports Replace `cogent3.app` imports with their `scinexus` equivalents: ```python { notest } # before from cogent3.app.composable import define_app, NotCompleted # after from scinexus import define_app, NotCompleted ``` ## Summary methods return plain Python objects Data store summary methods (`describe`, `summary_logs`, `summary_not_completed`, `summary_citations`, `validate`) now return `list[dict]` or `dict` instead of cogent3 `Table` objects. To restore `Table`-based display, register a converter with `set_summary_display()`: ```python { notest } from cogent3.core.table import Table from scinexus.data_store import set_summary_display def _summary_to_table(data, *, name): if isinstance(data, dict): rows = [[k, v] for k, v in data.items()] return Table(header=["Condition", "Value"], data=rows, title=name) if isinstance(data, list) and data: header = list(data[0].keys()) rows = [list(row.values()) for row in data] return Table(header=header, data=rows, title=name) return data set_summary_display(_summary_to_table) ``` ## NotCompletedType is now an enum Failure types are categorised using the `NotCompletedType` enum rather than bare strings: ```python { notest } from scinexus.composable import NotCompletedType # before nc.type == "ERROR" # after nc.type == NotCompletedType.ERROR ``` The three values are `ERROR`, `FAIL`, and `BUG`. ## App composition uses shallow copies Composing apps with `+` now creates a shallow copy of the right-hand operand. Composed pipelines no longer share mutable state, so `ComposableApp.disconnect()` is no longer needed and is deprecated. ## New features These capabilities are new in `scinexus` and were not available in `cogent3.app`: - **`check_data_type` property** -- toggle runtime input type checking on or off. See [Runtime type checking](../explanation/type-system.md#runtime-type-checking). - **Better IDE integration through static typing support** -- `AppBase[T, R]`, `ComposableApp[T, R]`, and `WriterApp[T, R]` can be inherited from directly as an alternative to `@define_app`. - **Pluggable parallel backends** -- choose between stdlib multiprocessing, loky, or MPI backends. See [Run in parallel](run-in-parallel.md). - **Pluggable progress bars** -- use `tqdm` or `rich` for progress display. See [Track progress](track-progress.md). - **Custom identifier extraction** -- register a custom function for extracting storage identifiers from data via `set_id_from_source()`. See [Customise display and IDs](customise-display-and-ids.md). - **Logging can be disabled** -- pass `logger=False` to `apply_to()` to skip log file creation. ## Full changelog See the [changelog](https://github.com/cogent3/scinexus/blob/main/changelog.md) for a complete list of changes from the cogent3 app infrastructure. cogent3-scinexus-e0aee79/docs/howto/read-and-write-files.md000066400000000000000000000104161520253266500237410ustar00rootroot00000000000000# Read and write files !!! abstract "" How to use `open_()` for reading and writing files with automatic compression detection (gzip, bzip2, lzma, zip), `atomic_write` for safe file writes that clean up on failure, `iter_splitlines` and `iter_line_blocks` for streaming large files, and `is_url`/`open_url` for working with URLs. ## Writing a compressed file `open_()` detects the compression format from the file suffix and handles it automatically. Writing a gzip-compressed text file is identical to writing a plain text file — just use a `.gz` suffix. ```python { linenums="1" notest } from scinexus import open_ with open_("data/sample.txt.gz", "wt") as f: f.write("Hello, compressed world!\n") f.write("Line two of the file.\n") ``` ## Reading a compressed file Reading works the same way — `open_()` detects the `.gz` suffix and decompresses transparently. ```python { linenums="1" notest } from scinexus import open_ with open_("data/sample.txt.gz") as f: print(f.read()) # Hello, compressed world! Line two of the file. ``` Supported compression formats are gzip (`.gz`), bzip2 (`.bz2`), lzma (`.xz`, `.lzma`), and zip (`.zip`). ## Reading a URL `open_()` also handles URLs. Use `is_url()` to check whether a path is a URL before opening it. ???- example "Checking and reading a URL" ```python linenums="1" from scinexus.io_util import is_url from scinexus import open_ url = "https://github.com/user-attachments/files/26728407/raw.zip" print(is_url(url)) # (1)! with open_(url, "rb") as f: # (2)! header = f.read(20) print(header) ``` 1. `is_url()` returns `True` for `http`, `https`, and `file` scheme URLs. 2. `open_()` detects the URL and delegates to `open_url()`. Only read mode is supported for URLs. ## Efficiently reading large files Reading an entire large file into memory or iterating line by line with Python's built-in `readline()` can be inefficient. The built-in approach makes a system call for every line, which becomes a bottleneck for files with millions of lines. `scinexus` provides two functions that read data in large chunks and then split into lines, greatly reducing I/O overhead. ### `iter_splitlines` `iter_splitlines(path, chunk_size=1_000_000)` reads a file in chunks (default 1 MB) and yields individual lines. It correctly handles lines that span chunk boundaries. ```python { notest } from scinexus.io_util import iter_splitlines for line in iter_splitlines("large_file.txt"): process(line) ``` ### `iter_line_blocks` `iter_line_blocks(path, num_lines=1000, chunk_size=5_000_000)` builds on `iter_splitlines` — it accumulates lines into lists of `num_lines` and yields each list. This is useful when downstream processing works on batches (e.g. FASTA records where each record spans a fixed number of lines). ```python { notest } from scinexus.io_util import iter_line_blocks for block in iter_line_blocks("large_file.txt", num_lines=1000): process_batch(block) # block is a list of up to 1000 strings ``` Use `iter_splitlines` when you need one line at a time. Use `iter_line_blocks` when your processing naturally operates on batches of lines. cogent3-scinexus-e0aee79/docs/howto/run-in-parallel.md000066400000000000000000000162361520253266500230460ustar00rootroot00000000000000# Run in parallel !!! abstract "" How to choose your preferred parallelisation backend, how to parallelise any function using `parallel.map`, `parallel.imap`, and `parallel.as_completed` as standalone utilities, and how to enable parallel execution in app pipelines with `parallel=True` and `par_kw`. ## Data level parallelism `scinexus` supports parallel computation for the common case where the same calculation needs to be applied to many independent data items. A master process splits the work among available CPU cores, each worker processes its share, and results are collected. !!! warning Parallelism is not always faster. You should see a performance gain when the computation time per task significantly exceeds the overhead of distributing work. If individual tasks are very fast, the overhead of inter-process communication can dominate. If individual output files are small, storing results in a single file (e.g. a `.sqlitedb` database) is more efficient than writing many small files. ## Choosing a parallel backend `scinexus` supports three parallel backends. The default uses only the Python standard library and requires no extra installs. | Backend | Install | Best for | |---|---|---| | `"multiprocess"` | included | scripts, CI, environments where you control dependencies | | `"loky"` | `pip install "scinexus[loky]"` | Jupyter notebooks, interactive sessions, long-running pools | | `"mpi"` | `pip install "scinexus[mpi]"` | HPC clusters with multiple nodes | Set the backend once, typically at the top of your script or notebook: ```python { notest } import scinexus scinexus.set_parallel_backend("loky") ``` !!! note The `"loky"` backend uses [loky](https://loky.readthedocs.io/) which provides reusable process pools and robust pickling via `cloudpickle`. This makes it the recommended choice for Jupyter notebooks, where the stdlib `ProcessPoolExecutor` can fail to serialise closures and lambda functions. ### Getting a specific backend without changing the default If your code requires a particular backend, pass the ``backend`` argument to ``get_parallel_backend``. This returns an instance of the requested backend without changing the global default, so other packages that depend on the current setting are unaffected: ```python { notest } from scinexus import get_parallel_backend backend = get_parallel_backend(backend="loky") ``` ## Parallel computation on a single computer ### Using `app.apply_to()` If you have a composed app **with** a writer, use `apply_to()` with the `parallel` and `par_kw` keyword arguments: ```python { notest } result = app.apply_to(dstore, parallel=True, par_kw=dict(max_workers=4)) ``` ### Using `app.as_completed()` If you have a composed app **without** a writer, use `as_completed()`. This returns a generator, so wrap it with `list()` or iterate over it: ```python { notest } results = list(app.as_completed(dstore, parallel=True, par_kw=dict(max_workers=4))) ``` ### Using `scinexus.parallel` directly For parallelising any function (not just apps), use the functions in `scinexus.parallel`. #### `parallel.as_completed` -- results in completion order Returns results as they finish. The order may differ from the input order. It also tends to balance work better across compute nodes than `imap` or `map`. ```python { notest } from scinexus import parallel result = list(parallel.as_completed(is_prime, PRIMES, max_workers=4)) ``` The first argument is the function to call, the second is the iterable of inputs. Each input element is passed as a single argument to the function. The data is broken into chunks across workers automatically. !!! note If you don't specify `max_workers`, all available CPUs are used. #### `parallel.imap` -- preserving input order (generator) Returns results in the same order as the input, yielding one at a time: ```python { notest } from scinexus import parallel for result in parallel.imap(process_item, items, max_workers=4): handle(result) ``` #### `parallel.map` -- preserving input order (list) Same as `imap` but returns a list: ```python { notest } from scinexus import parallel results = parallel.map(process_item, items, max_workers=4) ``` ### Complete example ```python { notest } import math from scinexus import parallel def is_prime(n): if n % 2 == 0: return False sqrt_n = int(math.floor(math.sqrt(n))) for i in range(3, sqrt_n + 1, 2): if n % i == 0: return False return True PRIMES = [ 112272535095293, 112582705942171, 115280095190773, 115797848077099, 117450548693743, 993960000099397, ] if __name__ == "__main__": results = parallel.map(is_prime, PRIMES, max_workers=4) for number, prime in zip(PRIMES, results): print(f"{number} is prime: {prime}") ``` ## Parallel computation on multiple computers (MPI) On systems with multiple nodes (e.g. an HPC cluster), use MPI via the [mpi4py](https://mpi4py.readthedocs.io/) library. You need to install an MPI implementation (e.g. [OpenMPI](https://www.open-mpi.org/)) and the `mpi4py` Python package ```bash pip install mpi4py ``` or installing `scinexus` [with `mpi` extra](../install.md#optional-extras). Set the backend to MPI: ```python { notest } import scinexus scinexus.set_parallel_backend("mpi") ``` Or pass `use_mpi=True` to any of the parallel functions: ```python { notest } from scinexus import parallel results = parallel.map(is_prime, PRIMES, use_mpi=True, max_workers=PBS_NCPUS) ``` Or with app pipelines: ```python { notest } result = app.apply_to(dstore, parallel=True, par_kw=dict(use_mpi=True, max_workers=4)) ``` To run an MPI script, invoke it via `mpiexec`: ```bash mpiexec -n $PBS_NCPUS python3 -m mpi4py.futures my_script.py ``` !!! note You can use MPI for parallel execution on a single computer too. This can be useful for testing your code locally before migrating to a larger system. ### MPI script structure MPI scripts must guard the main logic behind `if __name__ == "__main__":`: ```python { notest } import os from scinexus import parallel PBS_NCPUS = int(os.environ["PBS_NCPUS"]) def process(data): ... if __name__ == "__main__": results = parallel.map(process, my_data, use_mpi=True, max_workers=PBS_NCPUS) ``` ## Custom backends You can integrate any parallel engine by subclassing `Parallel`: ```python { notest } from scinexus.parallel import Parallel, set_parallel_backend class DaskBackend(Parallel): def __init__(self, client): self._client = client def imap(self, f, s, max_workers=None, **kwargs): futures = self._client.map(f, list(s)) yield from self._client.gather(futures) def as_completed(self, f, s, max_workers=None, **kwargs): from dask.distributed import as_completed futures = self._client.map(f, list(s)) for future in as_completed(futures): yield future.result() def is_master_process(self): from dask.distributed import get_worker try: get_worker() return False except ValueError: return True def get_rank(self): return 0 def get_size(self): return sum(self._client.nthreads().values()) set_parallel_backend(DaskBackend(client)) ``` cogent3-scinexus-e0aee79/docs/howto/track-progress.md000066400000000000000000000144621520253266500230110ustar00rootroot00000000000000# Track progress !!! abstract "" How to choose your preferred progress bar backend and customise progress bars. `scinexus` defaults to using the [tqdm](https://pypi.org/project/tqdm/) for progress bars. These behave well across terminal and notebook environments. We also support using [rich](https://pypi.org/project/rich/) for its progress bars. A single API for different progress backends. ## Choosing the progress bar backend Use `set_progress_backend` to switch between backends. The default is `tqdm`. ```python { notest } import scinexus scinexus.set_progress_backend("rich") # switch to rich scinexus.set_progress_backend("tqdm") # switch back to tqdm scinexus.set_progress_backend(None) # reset to default (tqdm) ``` ## Getting a progress bar Use `get_progress` to obtain a `Progress` instance. Passing `show_progress=True` returns the current default backend. ```python { notest } import scinexus pbar = scinexus.get_progress(show_progress=True) for item in pbar(range(100), msg="Processing"): pass # your work here ``` You can pass keyword arguments to configure the default backend: ```python { notest } import scinexus pbar = scinexus.get_progress(show_progress=True, colour="blue", leave=True) ``` You can also pass a `Progress` instance directly: ```python { notest } from scinexus.progress import RichProgress pbar = scinexus.get_progress(show_progress=RichProgress()) ``` !!! note If you call `get_progress(show_progress=False)`, it returns `NoProgress`, which silently passes through the iterable. ## Nesting progress bars === "Using `tqdm` (default)" Create nested progress bars using `child()`. Each bar can have its own description via the `msg` keyword. Create the child once before the loop — it automatically resets to zero on each subsequent call. ```python { notest } import scinexus pbar = scinexus.get_progress(show_progress=True) child = pbar.child() for batch in pbar(range(3), msg="Outer loop"): for item in child(range(10), msg=f"Inner batch {batch}"): pass # your work here ``` === "Using `rich`" The same nesting pattern works with the `rich` backend: ```python { notest } import scinexus scinexus.set_progress_backend("rich") pbar = scinexus.get_progress(show_progress=True) child = pbar.child() for batch in pbar(range(3), msg="Outer loop"): for item in child(range(10), msg=f"Inner batch {batch}"): pass # your work here ``` `rich` children share the same `rich.progress.Progress` display instance, so all bars render together in a single live display. The outer bar tracks the top-level iteration. Each call to `child()` creates a new `Progress` at the next cursor position, so inner bars appear below the outer one. The child bar is reused across iterations — on the second and subsequent calls, the bar resets to zero instead of creating a new one. #### Push-based sub-contexts When you need to report fractional progress rather than iterating, use `context()`: ```python { notest } import scinexus pbar = scinexus.get_progress(show_progress=True) child = pbar.child() for batch in pbar(range(3), msg="Processing"): with child.context(msg=f"Batch {batch}") as ctx: for i in range(100): ctx.update(progress=i / 100, msg=f"Step {i}") ``` The context maps progress values from `[0.0, 1.0]` to the configured `[start, end]` range and is cleaned up automatically when the `with` block exits. ## Cleaning up Both `Progress` and `ProgressContext` support the context manager protocol. Using a progress bar as a context manager ensures that `close()` is called automatically, which finalises the display and moves the cursor past the bars. Without cleanup, leftover bars can leave the terminal cursor in the wrong position. === "Using `tqdm` (default)" ```python import scinexus with scinexus.get_progress(show_progress=True) as pbar: # (1)! child = pbar.child() for batch in pbar(range(3), msg="Outer"): for item in child(range(10), msg=f"Batch {batch}"): pass ``` 1. `close()` is called automatically and the cursor position is restored. === "Using `rich`" ```python import scinexus scinexus.set_progress_backend("rich") with scinexus.get_progress(show_progress=True) as pbar: # (1)! child = pbar.child() for batch in pbar(range(3), msg="Outer"): for item in child(range(10), msg=f"Batch {batch}"): pass ``` 1. `close()` is called automatically and the cursor position is restored. ???- tip "No context manager? No problem!" ```python import scinexus pbar = scinexus.get_progress(show_progress=True) child = pbar.child() for batch in pbar(range(3), msg="Outer"): for item in child(range(10), msg=f"Batch {batch}"): pass pbar.close() # (1)! ``` 1. Call `close()` explicitly when you are done !!! note Calling `close()` on a `Progress` instance also closes all of its children. For standalone `ProgressContext` objects (from `context()`), use the `with` statement as shown in the [push-based sub-contexts](#push-based-sub-contexts) section. ## Customising appearance ### Persisting bars after completion By default, `tqdm` keeps the outermost bar visible after completion but clears nested bars. `rich` removes all bars. Use `leave` to control this: ```python { notest } from scinexus.progress import TqdmProgress, RichProgress # Keep all tqdm bars visible after completion pbar = TqdmProgress(leave=True) # Keep all rich bars visible after completion pbar = RichProgress(leave=True) ``` You can also set `leave` independently on child bars: ```python { notest } from scinexus.progress import TqdmProgress pbar = TqdmProgress(leave=True) child = pbar.child(leave=False) # child bars disappear, outer persists for batch in pbar(range(3), msg="Outer"): for item in child(range(10), msg=f"Batch {batch}"): pass ``` ### Setting bar colour Both backends support a `colour` parameter. For `tqdm`, this sets the bar colour directly. For `rich`, it styles the bar column when the display is auto-created. ```python { notest } from scinexus.progress import TqdmProgress, RichProgress pbar = TqdmProgress(colour="green") pbar = RichProgress(colour="cyan") ``` Colour is inherited by child bars. cogent3-scinexus-e0aee79/docs/howto/use-data-stores.md000066400000000000000000000216751520253266500230670ustar00rootroot00000000000000# Use data stores !!! abstract "" How to use `open_data_store` in read, write, and append modes with directory, zip, and SQLite backends, iterate over members, and inspect `.completed`, `.not_completed`, and `.summary_`. ## How do I use a data store? A data store is just a "container". To open a data store you use the `open_data_store()` function. To load the data for a member of a data store you need an appropriately selected loader type of app. ## Supported operations on a data store All data store classes can be iterated over, indexed, checked for membership. These operations return a `DataMember` object. In addition to providing access to members, the data store classes have convenience methods for describing their contents and providing summaries of log files that are included and of the `NotCompleted` members (see not completed). ## Opening a data store Use the `open_data_store()` function, illustrated below. Use the mode argument to identify whether to open as read only (`mode="r"`), write (`mode="w"`) or append(`mode="a"`). ### Opening a read only data store We open the zipped directory described above, defining the filenames ending in ``.fa`` as the data store members. All files within the directory become members of the data store (unless we use the ``limit`` argument). ```python { linenums="1" notest } from scinexus import open_data_store dstore = open_data_store("data/raw.zip", suffix="fa", mode="r") # (1)! print(dstore) dstore.describe # (2)! m = dstore[0] # (3)! for m in dstore[:5]: # (4)! print(m) m.read()[:20] # (5)! # 1035x member # ReadOnlyDataStoreZipped(source='/Users/gavin/repos/SciNexus/docs/data/raw.zip', # members=[DataMember(data_store=/Users/gavin/repos/SciNexus/docs/data/raw.zip, # unique_id=ENSG00000157184.fa), # DataMember(data_store=/Users/gavin/repos/SciNexus/docs/data/raw.zip, # unique_id=ENSG00000131791.fa)]...) ENSG00000157184.fa ENSG00000131791.fa # ENSG00000127054.fa ENSG00000067704.fa ENSG00000182004.fa ``` 1. Open a data store. 2. The `.describe` property summarises the contents. 3. You can index like any Python sequence. 4. Or loop over members. 5. And read data from a member. !!! note For a `DataStoreSqlite` member, the default data storage format is bytes. So reading the content of an individual record is best done using the `load_db` app. ### Making a writeable data store The creation of a writeable data store is specified with `mode="w"`, or (to append) `mode="a"`. In the former case, any existing records are overwritten. In the latter case, existing records are ignored. ## `DataStoreSqlite` stores serialised data When you specify a Sqlitedb data store as your output (by using `open_data_store()`) you write multiple records into a single file making distribution easier. One important issue to note is the process which creates a Sqlitedb "locks" the file. If that process exits unnaturally (e.g. the run that was producing it was interrupted) then the file may remain in a locked state. If the db is in this state, `scinexus` will not modify it unless you explicitly unlock it. This is represented in the display as shown below. ```python { linenums="1" notest } {'completed': 175, 'not_completed': 0, 'logs': 1, 'title': 'Unlocked db store.'} ``` To unlock, you execute the following: ```python { notest } dstore.unlock(force=True) ``` ## Interrogating run logs If you use the `apply_to()` method, a `scitrack` logfile will be stored in the data store. This includes useful information regarding the run conditions that produced the contents of the data store. ```python { linenums="1" notest } # [{'time': '2019-07-24 14:42:56', 'name': 'logs/load_unaligned-progressive_align- # write_db-pid8650.log', 'python_version': '3.7.3', 'who': 'gavin', 'command': # '/Users/gavin/miniconda3/envs/c3dev/lib/python3.7/site- # packages/ipykernel_launcher.py -f [...] ``` Log files can be accessed via a special attribute. ```python { linenums="1" notest } # [DataMember(data_store=/Users/gavin/repos/SciNexus/docs/data/demo- # locked.sqlitedb, unique_id=logs/load_unaligned-progressive_align-write_db- # pid8650.log)] ``` Each element in that list is a `DataMember` which you can use to get the data contents. The following ```python { notest } print(dstore.logs[0].read()[:225]) ``` Produces ```python { linenums="1" notest } # 2019-07-24 14:42:56 Eratosthenes.local:8650 INFO system_details : # system=Darwin Kernel Version 18.6.0: Thu Apr 25 23:16:27 PDT 2019; # root:xnu-4903.261.4~2/RELEASE_X86_64 2019-07-24 14:42:56 # Eratosthenes.local:8650 INFO python ``` ## Citations – giving credit to package developers When apps declare citations, those citations are automatically saved alongside your results when you use `apply_to()`. ```python { linenums="1" notest } import pathlib import shutil from citeable import Software from scinexus import define_app, open_data_store from cogent3 import get_app from cogent3.app.typing import AlignedSeqsType my_cite = Software( author=["Doe, J"], title="My Sequence Filter", year=2025, ) @define_app(cite=my_cite) def strict_filter(val: AlignedSeqsType) -> AlignedSeqsType: return val.omit_bad_seqs() in_dstore = open_data_store("data/raw.zip", suffix="fa", limit=5) out_dstore = open_data_store("cited_results", suffix="fa", mode="w") loader = get_app("load_aligned", moltype="dna", format_name="fasta") writer = get_app("write_seqs", data_store=out_dstore, format_name="fasta") process = loader + strict_filter() + writer result = process.apply_to(in_dstore) result.write_bib("my_analysis.bib") print(pathlib.Path("my_analysis.bib").read_text()) # @software{cogent3, author = {Huttley, Gavin and Caley, Katherine and # Fotovat, Nabi and Ma, Stephen Ka-Wah and Koh, Moses and Morris, Richard and # McArthur, Robert and McDonald, Daniel and Jaya, Fred and Maxwell, Peter and # Martini, James and La, Thomas and Lang, Yapeng}, title = {{cogent3}: [...] ``` The `summary_citations` property returns a table of all citations stored in the data store (line 24). Export to BibTeX with `write_bib()` (line 26). !!! note `ReadOnlyDataStoreZipped` supports reading stored citations but not writing them. cogent3-scinexus-e0aee79/docs/howto/write-a-class-app.md000066400000000000000000000070311520253266500232660ustar00rootroot00000000000000# Write a class app !!! abstract "" How to inherit from `scinexus` app base classes, or using the `define_app` decorator, specifying input/output type hints. ## Using inheritance from a base class ```python linenums="1" from collections.abc import Callable from citeable import Software from scinexus import ComposableApp my_cite = Software( author=["Doe, J", "Smith, A"], title="My Sequence Filter", year=2025, url="https://example.com/my-filter", version="0.1.0", ) class my_app( # (1)! ComposableApp[str, str], # (2)! cite=my_cite, # (3)! ): def __init__(self, convert: Callable[[str], str]): self.convert = convert def main(self, val: str) -> str: # (4)! return self.convert(val) ``` 1. We suggest naming your apps using the PEP8 naming style for functions (lowercase separated by underscores) because the instances will be used like functions. 2. We type hint the input / output types with the base class. 3. We assign the citation in the class definition. 4. Your class **must** have a `main()` method with type hints specified for its first argument and its return type. ## Using the `define_app` decorator How to use `@define_app` on a class with a `main()` method, configure it via `__init__` parameters, and control behaviour with the `app_type` parameter. ```python { linenums="1" notest } from collections.abc import Callable from citeable import Software from scinexus import ComposableApp my_cite = Software( author=["Doe, J", "Smith, A"], title="My Sequence Filter", year=2025, url="https://example.com/my-filter", version="0.1.0", ) @define_app(cite=my_cite) # (1)! class my_app: def __init__(self, convert: Callable[[str], str]): self.convert = convert def main(self, val: str) -> str: # (2)! return self.convert(val) ``` 1. The `define_app` decorator is used. You can specify the `app_type` here, which we don't in this case, and assign your citation. 2. Your class **must** have a `main()` method with type hints specified for its first argument and its return type. ### Specifying the app type The `define_app` decorator has a default `app_type` of `"generic"`. This means the app does data transformation and does not load or write data. The supported app types are indicated by the `AppType` enum: ```python { linenums="1" notest } from scinexus.composable import AppType print(list(AppType)) # [, , , ] ``` If your app is not intended to be composed sequentially with other apps, set it to non-composable: ```python from scinexus import define_app, AppType @define_app(app_type=AppType.NON_COMPOSABLE) class my_standalone_app: def main(self, val: str) -> str: return val.upper() ``` ## Handling `NotCompleted` values By default, apps skip `NotCompleted` inputs — they propagate through the pipeline without calling `main()`. If your app needs access to `NotCompleted` instances (e.g. you are developing a writer that records failures), set `skip_not_completed=False`: ```python from scinexus import define_app, NotCompleted @define_app(skip_not_completed=False) class my_writer: def main(self, val: str) -> str: if isinstance(val, NotCompleted): # handle the failure ... return val ``` cogent3-scinexus-e0aee79/docs/howto/write-a-function-app.md000066400000000000000000000042671520253266500240160ustar00rootroot00000000000000# Write a function app !!! abstract "" How to use `@define_app` on a plain function, specifying input/output type hints and turning function parameters into constructor arguments. ```python { linenums="1" notest } from collections.abc import Callable from citeable import Software from scinexus import define_app, AppType my_cite = Software( author=["Doe, J", "Smith, A"], title="My Sequence Filter", year=2025, url="https://example.com/my-filter", version="0.1.0", ) @define_app( app_type=AppType.GENERIC, # (1)! cite=my_cite, # (2)! ) def my_app(val: str, convert: Callable[[str], str]) -> str: # (3)! return convert(val) app = my_app(str.upper) print(app("hello world")) # HELLO WORLD ``` 1. We specify the `app_type` explicitly here. 2. We assign the citation in the class definition. 3. The function definition has type hints for its first argument and its return type. !!! note Your function can only have one required argument. It can have any number of optional arguments. Pay attention to the order of arguments for the function! Every call to the app provides a new instance of `val`. Whereas `str.upper` is assigned to the variable `convert`. You can think of all of the other arguments as being arguments to a class constructor. Under the hood, `scinexus` caches these and injects them into each call of your function with new values of `val`. cogent3-scinexus-e0aee79/docs/images/000077500000000000000000000000001520253266500176175ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/images/logo-bw.png000066400000000000000000000023111520253266500216700ustar00rootroot00000000000000PNG  IHDR pHYsll\.tEXtSoftwarewww.inkscape.org<VIDATH{ewSDgEai,gyDHl +Lrha+rsYJ)u!sn+}V]q{x!;}]@8s$.%0ZwT_L | ]wxlű9ɏq=lJ2 oRܖIQx I :oB, E'Ob f5!xU 8;{'9s zgO܅7'Yw0$ӫGcMF.o|LIVc_LdX_73p 8b43d_<;|Ёc/]ݻqwU=ګ\ql)XWU  QU!u`WIzG`44o4"\Sq^,OqnO#;pfUmIBEÊE;J|] DyP'$ 1YU[AgMU~sq"܏A|;|'n_oq{?@?S:43{e))f`o qvk~{&h*ޤ44sj_`͢,Ω8.⑵a㠪bb׫ގO^cFI }3W4)I Y d$p26kYs74杂PU/cS/| :ڤ`۰67$&Σ1$4̯L۪O4üi@gjnpڀ{v5Uth:Gf'dgjCҧM QM4ecFYO;j|Ḫ;7ڭz_ZU%m4AErMdJ0yh9'aknB8F#m3 #o4M߭ {5gcOzO׎> Ҵ3g^/K31IENDB`cogent3-scinexus-e0aee79/docs/images/logo-bw.svg000066400000000000000000001151231520253266500217110ustar00rootroot00000000000000 cogent3-scinexus-e0aee79/docs/images/logo-text-bw.svg000066400000000000000000001262141520253266500226760ustar00rootroot00000000000000 cogent3-scinexus-e0aee79/docs/images/logo-text-wb.svg000066400000000000000000001263011520253266500226730ustar00rootroot00000000000000 cogent3-scinexus-e0aee79/docs/images/logo-wb.png000066400000000000000000000021761520253266500217010ustar00rootroot00000000000000PNG  IHDR pHYsll\.tEXtSoftwarewww.inkscape.org< IDATHic9n۫V,hKc%hH*"A[b6%Zb-H,IC+mPZԵ(u%L3_4"Bű8uoNX~ b.h`*B;¹^:[|p'ÁXp3! a*z CjU}ek9+ئY]q^ hO%izE_ޅ~M h%-X |_$vsj5 IFeվָJ@/ ֒5 $b 6}MIz8N>Ou?NGI.=uR [n=CVfT+֮{Wa$FBbmQC1e3%Л9sCҹ5`U,J|,"#oyODe"bLjx1">}DiIwgDĔŧ#w#"ézD})=^ qWKHMAq/9wh] {t.Lkl/}k$H?IΣ jJpٔarRѰW'5$RSo7"{:>=/}ޔn]1*{I%qsu(%Pi?K+StUR;@? lIENDB`cogent3-scinexus-e0aee79/docs/images/logo-wb.svg000066400000000000000000001151231520253266500217110ustar00rootroot00000000000000 cogent3-scinexus-e0aee79/docs/index.md000066400000000000000000000072101520253266500200030ustar00rootroot00000000000000
scinexus logo scinexus logo
# About !!! abstract "*Just as `attrs` and `dataclasses` use type hints to simplify data type definition, `scinexus` uses them to simplify writing best-practice scientific algorithms.*" `scinexus` (pronounced 'sigh-nexus') is a Python framework for rapid development of data processing applications. It enables interoperability between apps through defined data types, allowing development of scientific domain app ecosystems (for examples see [cogent3](https://cogent3.org/doc/app/index-app.html) and [piqtree](https://piqtree.readthedocs.io/en/stable/)). Many scientific problems require repeating calculations across many files or database records. Such tasks suit data-level parallelism on multi-core CPUs, but writing robust, maintainable code for them is often tedious and quickly becomes complex. With `scinexus` apps, you can use a functional programming style when developing your application. Combined with `scinexus` app composition, this greatly simplifies your programming logic making it easier to understand and thus easier to explain. And as we know !!! quote If the implementation is easy to explain, it may be a good idea. -- Tim Peters, "Zen of Python" ## What you get - Type checking at composition time - Durable computing[^1] - Greatly simplified data level parallel execution - Automated logging - Automated citation tracking - Checkpointing via data stores - Customisable experience (progress bars[^2], parallelisation[^3], data store representations etc..) [^1]: Failures are automatically recorded as `NotCompleted` records which get propagated and stored in [data stores](explanation/not-completed-design.md). These records record salient details that help you identify the cause of the failure. [^2]: `tqdm` is the default because of its robustness in notebooks, but you can choose `rich`. [^3]: The default is Python’s standard library `multiprocessing` module. If you're using Jupyter Notebooks, however, it's recommended that you use `loky`. This is an [installation option](install.md#optional-extras) and [configuration is easy](howto/run-in-parallel.md#choosing-a-parallel-backend). ## Standalone utilities `scinexus` also provides generally useful utilities for developers of data analysis applications. Utilities for file IO, parallel execution, and progress tracking are usable independently of the app framework. ## Get started - **Install `scinexus`** -- see [Installing from PyPI](install.md) - **Build algorithms** -- see [How to write apps](howto/write-a-function-app.md) - **Build applications for others** -- see [Why composable apps?](explanation/why-composable-apps.md) - **Use existing apps** -- see [Composing apps](tutorials/composing-apps.md) ## The `scinexus` origin story The app infrastructure code was originally developed within [cogent3](https://cogent3.org), where it accumulated over seven years of development, testing, and real-world use in computational genomics before being extracted into `scinexus`. The design is mature and has underpinned analyses in published studies. We acknowledge here that many members of the `cogent3` community contributed to the code that now lives here, including [@GavinHuttley](https://github.com/GavinHuttley), [@rmcar17](https://github.com/rmcar17), [@Nick-Foto](https://github.com/Nick-Foto), [@KatherineCaley](https://github.com/KatherineCaley), [@fredjaya](https://github.com/fredjaya), and [@khiron](https://github.com/khiron). cogent3-scinexus-e0aee79/docs/install.md000066400000000000000000000016731520253266500203510ustar00rootroot00000000000000# Installation !!! abstract "" How to install `scinexus` and its optional extras for parallel execution, progress bars, and MPI support. ## Basic install ```bash pip install scinexus ``` ## Optional extras - `pip install "scinexus[loky]"` -- uses the [loky](https://loky.readthedocs.io/) library for parallel execution. Loky provides reusable process pools that are more robust than the stdlib `ProcessPoolExecutor`, particularly in Jupyter notebooks where standard multiprocessing can fail. Recommended for interactive and notebook-based workflows. - `pip install "scinexus[rich]"` -- also installs the `rich` package for its progress bars ([see using rich](howto/track-progress.md)) - `pip install "scinexus[mpi]"` -- MPI parallel execution via `mpi4py` You can combine extras: ```bash pip install "scinexus[loky,rich]" ``` ## Requirements - Python 3.11+ ## Verify installation ```python import scinexus print(scinexus.__version__) ``` cogent3-scinexus-e0aee79/docs/llms.txt000066400000000000000000000136051520253266500200670ustar00rootroot00000000000000# scinexus > A composable app infrastructure for scientific computing scinexus (pronounced "sigh-nexus") is a Python framework for building composable, type-checked data processing pipelines. What dataclasses and attrs are for structured data, scinexus apps are for structured algorithms. It enables interoperability between apps through defined data types, supporting scientific domain app ecosystems. ## Core Concepts ### define_app The `@define_app` decorator transforms a class (with a `main()` method) or a function into a composable app. Apps are callable and compose with `+`. ```python from scinexus import define_app @define_app class upper: def main(self, data: str) -> str: return data.upper() result = upper()("hello") # "HELLO" ``` Function-based apps are also supported: ```python @define_app def double(val: int) -> int: return val * 2 ``` ### App Types - `GENERIC` (default) -- general-purpose processing step - `LOADER` -- must be first in a composed pipeline - `WRITER` -- must be last in a composed pipeline; writes results to a data store - `NON_COMPOSABLE` -- cannot participate in `+` composition Set via `@define_app(app_type="loader")` etc. ### App Composition Apps compose with `+`. Type compatibility between the return type of the left app and the input type of the right app is checked at composition time. ```python @define_app def add_one(x: int) -> int: return x + 1 @define_app def to_str(x: int) -> str: return str(x) pipeline = add_one() + to_str() pipeline(5) # "6" ``` Ordering rules: LOADER must be first, WRITER must be last, GENERIC can go anywhere in between. ### NotCompleted A sentinel return type for failed computations. It propagates through pipelines without raising exceptions, enabling durable computing. ```python from scinexus import NotCompleted nc = NotCompleted("ERROR", "my_app", "something went wrong", source="input.txt") bool(nc) # False -- so it is falsy ``` If an app's `main()` raises an exception, the framework catches it and returns a `NotCompleted` instance. If an app receives a `NotCompleted` as input (and `skip_not_completed=True`, the default), it passes it through unchanged. ## Data Stores Data stores provide checkpointed, append-only storage for pipeline results. ```python from scinexus import open_data_store # Directory-based dstore = open_data_store("results/", suffix="json", mode="w") # SQLite-based dstore = open_data_store("results.sqlitedb", mode="w") # Read-only from a zip dstore = open_data_store("results.zip") ``` Key features: - `.describe` -- summary of stored data - `.summary_not_completed` -- summary of failed results - Completed members accessed via `.completed` - Membership testing with `"identifier" in dstore` - Logs and citations are stored alongside data ## Writer Apps and apply_to Writer apps process an entire data store, with logging, checkpointing, and progress tracking: ```python @define_app(app_type="writer") class save_result: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str) -> str: self.data_store.write(unique_id=identifier, data=data) return identifier pipeline = add_one() + to_str() + save_result(dstore) pipeline.apply_to(input_dstore, parallel=True, show_progress=True) ``` ## Parallel Execution Three backends are available: ```python from scinexus import set_parallel_backend, get_parallel_backend set_parallel_backend("multiprocess") # default, uses multiprocessing set_parallel_backend("loky") # recommended for Jupyter set_parallel_backend("mpi") # for HPC clusters via mpi4py ``` Use `parallel=True` in `apply_to()` or `as_completed()` to enable parallel execution. Configure workers via `par_kw={"max_workers": 4}`. ## Progress Tracking ```python from scinexus import set_progress_backend set_progress_backend("tqdm") # default set_progress_backend("rich") # requires rich extra ``` Pass `show_progress=True` to `apply_to()` or `as_completed()`. ## Installation ``` pip install scinexus ``` Optional extras: - `pip install scinexus[loky]` -- process pool for Jupyter - `pip install scinexus[rich]` -- rich progress bars - `pip install scinexus[mpi]` -- MPI support via mpi4py Requires Python 3.11+. ## Type System scinexus checks type compatibility when composing apps with `+`. The return type of the left app must overlap with the input type of the right app. Standard typing constructs (`Union`, `Optional`, protocols) are supported. ## Public API Direct imports from `scinexus`: - `define_app` -- decorator for creating apps - `NotCompleted`, `NotCompletedType` -- failure sentinel - `is_app`, `is_app_composable` -- introspection helpers - `AppBase`, `ComposableApp`, `LoaderApp`, `WriterApp`, `NonComposableApp` -- base classes for inheritance-based app definition - `Progress`, `ProgressContext`, `get_progress`, `set_progress_backend` -- progress tracking - `__version__` -- package version Lazy imports from `scinexus`: - `open_data_store` -- create/open data stores - `open_` -- open files with format detection - `set_parallel_backend`, `get_parallel_backend` -- parallel configuration - `set_summary_display`, `get_summary_display` -- customise data store summary output - `set_id_from_source`, `get_id_from_source` -- customise unique identifier extraction ## Documentation - How-to guides: writing function apps, writing class apps, composing apps, using data stores, handling failures, running in parallel, tracking progress, logging and citations, extending the type system, customising display, migrating from cogent3 - Tutorials: composing apps, processing a dataset - Explanations: why composable apps, app lifecycle, type system, data store model, NotCompleted design, source tracking, customisation hooks, control flow ## Links - Documentation: https://scinexus.readthedocs.io - Source: https://github.com/cogent3/scinexus - Bug tracker: https://github.com/cogent3/scinexus/issues cogent3-scinexus-e0aee79/docs/reference/000077500000000000000000000000001520253266500203105ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/reference/app-classes.md000066400000000000000000000007161520253266500230510ustar00rootroot00000000000000# App classes Properties and methods of the app base classes. ::: scinexus.composable.AppBase options: show_root_heading: true ::: scinexus.composable.ComposableApp options: show_root_heading: true ::: scinexus.composable.LoaderApp options: show_root_heading: true ::: scinexus.composable.WriterApp options: show_root_heading: true ::: scinexus.composable.NonComposableApp options: show_root_heading: true cogent3-scinexus-e0aee79/docs/reference/data-stores.md000066400000000000000000000022221520253266500230560ustar00rootroot00000000000000# Data stores API for `open_data_store` and data store backends. ::: scinexus.io.open_data_store options: show_root_heading: true ::: scinexus.data_store.DataStoreABC options: show_root_heading: true ::: scinexus.data_store.DataStoreDirectory options: show_root_heading: true ::: scinexus.data_store.ReadOnlyDataStoreZipped options: show_root_heading: true ::: scinexus.sqlite_data_store.DataStoreSqlite options: show_root_heading: true ::: scinexus.data_store.DataMemberABC options: show_root_heading: true ::: scinexus.data_store.DataMember options: show_root_heading: true ::: scinexus.data_store.set_summary_display options: show_root_heading: true ::: scinexus.data_store.get_summary_display options: show_root_heading: true ::: scinexus.data_store.set_id_from_source options: show_root_heading: true ::: scinexus.data_store.get_id_from_source options: show_root_heading: true ::: scinexus.data_store.get_unique_id options: show_root_heading: true ::: scinexus.data_store.get_data_source options: show_root_heading: true cogent3-scinexus-e0aee79/docs/reference/define-app.md000066400000000000000000000004641520253266500226460ustar00rootroot00000000000000# `define_app` Full signature, parameters, and behaviour of the `define_app` decorator, plus the `AppType` enum that controls which base class is used. ::: scinexus.composable.define_app options: show_root_heading: true ::: scinexus.composable.AppType options: show_root_heading: true cogent3-scinexus-e0aee79/docs/reference/deserialise.md000066400000000000000000000002171520253266500231230ustar00rootroot00000000000000# Deserialisation API for the extensible JSON deserialisation registry. ::: scinexus.deserialise options: show_root_heading: false cogent3-scinexus-e0aee79/docs/reference/index.md000066400000000000000000000016651520253266500217510ustar00rootroot00000000000000# API reference Complete API documentation for all public `scinexus` modules. ## App framework - [App classes](app-classes.md) -- `AppBase`, `ComposableApp`, `WriterApp`, `LoaderApp`, `NonComposableApp` - [define_app](define-app.md) -- the `define_app` decorator and `AppType` enum - [NotCompleted](not-completed.md) -- the `NotCompleted` sentinel and `NotCompletedType` enum - [Source tracking](source-proxy.md) -- `source_proxy` and `propagate_source` for data provenance - [Data stores](data-stores.md) -- `open_data_store` and data store backends ## Standalone utilities - [IO utilities](io-util.md) -- file IO with compression, atomic writes, streaming - [Parallel execution](parallel.md) -- parallel map, imap, as_completed - [Progress](progress.md) -- progress bar ABCs and backends - [Deserialisation](deserialise.md) -- JSON deserialisation registry - [Utilities](utilities.md) -- introspection helpers and type namespace registration cogent3-scinexus-e0aee79/docs/reference/io-util.md000066400000000000000000000002321520253266500222110ustar00rootroot00000000000000# IO utilities API for file IO functions that work independently of the app framework. ::: scinexus.io_util options: show_root_heading: false cogent3-scinexus-e0aee79/docs/reference/not-completed.md000066400000000000000000000004401520253266500234020ustar00rootroot00000000000000# `NotCompleted` The sentinel type for failed computations that propagates through pipelines without raising exceptions. ::: scinexus.composable.NotCompleted options: show_root_heading: true ::: scinexus.composable.NotCompletedType options: show_root_heading: true cogent3-scinexus-e0aee79/docs/reference/parallel.md000066400000000000000000000002541520253266500224270ustar00rootroot00000000000000# Parallel execution API for parallel execution functions that work independently of the app framework. ::: scinexus.parallel options: show_root_heading: false cogent3-scinexus-e0aee79/docs/reference/progress.md000066400000000000000000000002051520253266500224730ustar00rootroot00000000000000# Progress API for progress tracking ABCs and built-in backends. ::: scinexus.progress options: show_root_heading: false cogent3-scinexus-e0aee79/docs/reference/source-proxy.md000066400000000000000000000004001520253266500233030ustar00rootroot00000000000000# Source tracking Internal utilities for tracking data provenance through app pipelines. ::: scinexus.composable.source_proxy options: show_root_heading: true ::: scinexus.composable.propagate_source options: show_root_heading: true cogent3-scinexus-e0aee79/docs/reference/utilities.md000066400000000000000000000013021520253266500226410ustar00rootroot00000000000000# Utilities API for introspection helpers and type namespace registration. ## App introspection ::: scinexus.composable.is_app options: show_root_heading: true ::: scinexus.composable.is_app_composable options: show_root_heading: true ## Type system ::: scinexus.typing.register_type_namespace options: show_root_heading: true ::: scinexus.typing.SerialisableType options: show_root_heading: true ::: scinexus.typing.IdentifierType options: show_root_heading: true ## Introspection helpers ::: scinexus.misc.get_object_provenance options: show_root_heading: true ::: scinexus.misc.in_jupyter options: show_root_heading: true cogent3-scinexus-e0aee79/docs/scripts/000077500000000000000000000000001520253266500200415ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/scripts/cog_utils.py000066400000000000000000000076201520253266500224100ustar00rootroot00000000000000import os import pathlib import re import sys import textwrap import urllib.request import zipfile from io import StringIO import cog ROOT_DIR = pathlib.Path(__file__).parent.parent DOCS_DIR = ROOT_DIR DATA_DIR = ROOT_DIR / "data" DATA_DIR.mkdir(exist_ok=True) DATA_URL = "https://github.com/user-attachments/files/26728407/raw.zip" DECOMPRESS_URL = ( "https://github.com/user-attachments/files/26769518/demo-locked.sqlitedb.zip" ) def count_pattern(text: str) -> int: return len(re.findall(r"# \((\d+)\)!", text)) def _register_cogent3_types() -> None: try: from cogent3.app.typing import _get_resolution_namespace from scinexus.typing import register_type_namespace register_type_namespace(_get_resolution_namespace) except ImportError: pass _register_cogent3_types() def setup_installed() -> None: zip_dest = DATA_DIR / "raw.zip" if not zip_dest.exists(): urllib.request.urlretrieve(DATA_URL, filename=zip_dest) # noqa: S310 return decompress_dest = DATA_DIR / "demo-locked.sqlitedb.zip" if not decompress_dest.exists(): urllib.request.urlretrieve(DECOMPRESS_URL, filename=decompress_dest) # noqa: S310 with zipfile.ZipFile(decompress_dest, "r") as zip_ref: zip_ref.extractall(DATA_DIR) return def exec_codeblock( *, src: str, lang: str = "python", width: int = 80, max_lines: int = 10, admonition: str | None = None, use_wrap: bool = True, display_src: bool = True, annotations: list[str] | None = None, ) -> None: """Execute code lines, then emit a fenced code block with source + output. Parameters ---------- lines List of code strings (one per line). lang Language for the emitted code block (default: "python"). width Wrap width for output lines. max_lines Maximum number of lines to emit for output (after wrapping). use_wrap Whether to wrap the output lines. admonition Optional admonition type to wrap the code block in (e.g. "note", "tip", etc.). display_src Whether to include the source code in the emitted code block. annotations Optional list of annotation strings to include as comments in the emitted code block. """ setup_installed() cwd = os.getcwd() # noqa: PTH109 os.chdir(ROOT_DIR) wd = os.getcwd() lines = src.splitlines() if not lines[0]: lines = lines[1:] if not lines[-1]: lines = lines[:-1] src = "\n".join(lines) buf = StringIO() sys.stdout = buf try: ns: dict = {} exec(src, ns) # noqa: S102 except Exception as exc: msg = f"Error executing code block: \n{src}\n{exc}\ncwd={wd}\n" raise Exception(msg) from exc sys.stdout = sys.__stdout__ output = [f'```{lang} {{ linenums="1" notest }}'] output_text = buf.getvalue().strip() if display_src: output.append(src) if use_wrap and output_text: output.extend( [ "", *[ f"# {l}" for l in textwrap.wrap( output_text, width=width, max_lines=max_lines ) ], ] ) elif output_text: output.extend(["", output_text]) output.append("```") if annotations: expect = count_pattern(src) if expect != len(annotations): msg = f"Number of annotations ({len(annotations)}) does not match expected ({expect}) based on pattern count in source." raise ValueError(msg) annotation_text = "\n".join(f"{i}. {a}" for i, a in enumerate(annotations, 1)) output.extend(["", annotation_text]) txt = "\n".join(output) if admonition: txt = textwrap.indent(txt, " ") txt = f"{admonition}\n\n{txt}" cog.outl(txt, dedent=False) os.chdir(cwd) cogent3-scinexus-e0aee79/docs/stylesheets/000077500000000000000000000000001520253266500207265ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/stylesheets/extra.css000066400000000000000000000012011520253266500225550ustar00rootroot00000000000000@import url('https://fonts.bunny.net/css?family=orbitron:400,500,700&display=swap'); .md-header__topic:first-child .md-ellipsis { font-family: 'Orbitron', sans-serif; } [data-md-color-scheme="slate"] .md-logo img, [data-md-color-scheme="slate"] .md-nav__button.md-logo img { content: url('../images/logo-wb.png'); } [data-md-color-scheme="default"] .only-dark { display: none; } [data-md-color-scheme="slate"] .only-light { display: none; } [data-md-color-scheme="slate"] .only-dark { display: inline; } [data-md-color-scheme="slate"] { --md-mermaid-label-fg-color: #000; --md-mermaid-node-bg-color: #fff; }cogent3-scinexus-e0aee79/docs/tutorials/000077500000000000000000000000001520253266500204005ustar00rootroot00000000000000cogent3-scinexus-e0aee79/docs/tutorials/composing-apps.md000066400000000000000000000117401520253266500236640ustar00rootroot00000000000000# Composing apps !!! abstract "" Compose multiple apps into a single pipeline using the `+` operator, see what happens when types don't match, and observe how `NotCompleted` propagates through a pipeline without raising exceptions. ## Why compose? Consider an app that performs a molecular evolutionary analysis (`fit_model`) and another that extracts statistics from the result (`extract_stats`). You could apply them sequentially: ```python { notest } fitted = fit_model(alignment) stats = extract_stats(fitted) ``` Composability simplifies this into a single callable: ```python { notest } app = fit_model + extract_stats stats = app(alignment) ``` You can have many more apps in a composed function than just two. ## A worked example We compose three apps: a loader, a processor, and a writer. ```python { notest } from cogent3 import get_app from scinexus import open_data_store out_dstore = open_data_store(path_to_dir, suffix="fa", mode="w") loader = get_app("load_aligned", format_name="fasta", moltype="dna") cpos3 = get_app("take_codon_positions", 3) writer = get_app("write_seqs", out_dstore, format_name="fasta") ``` ### Using apps sequentially ```python { notest } data = loader("data/primate_brca1.fasta") just3rd = cpos3(data) m = writer(just3rd) ``` ### Composing into a single pipeline ```python { notest } process = loader + cpos3 + writer m = process("data/primate_brca1.fasta") ``` The result is identical, but the composed form is more concise and enables batch processing via `apply_to()`. ## Composability rules ### App type ordering Loaders and writers are special cases. If included, a loader must always be first: ```python { notest } app = a_loader + a_generic ``` If included, a writer must always be last: ```python { notest } app = a_generic + a_writer ``` Changing the order for either will raise a `TypeError`. ### Type compatibility Apps define the type of input they accept and the type of output they produce. For two apps to be composed, the output type of the app on the left must overlap with the input type of the app on the right. If they don't match, a `TypeError` is raised. ## `NotCompleted` propagation If any step in a composed pipeline returns a `NotCompleted`, subsequent steps are skipped and the `NotCompleted` is returned as the final result. ???+ example "Condition not satisfied" ```python { linenums="1" notest } from cogent3 import get_app reader = get_app("load_aligned", format_name="fasta") select_seqs = get_app("take_named_seqs", "Mouse", "Human") app = reader + select_seqs result = app("data/primate_brca1.fasta") print(result) # (1)! # NotCompleted(type=FAIL, origin=take_named_seqs, source="primate_brca1", # message="named seq(s) {'Mouse'} not in ('FlyingLem', 'TreeShrew', 'Galago', # 'HowlerMon', 'Rhesus', 'Orangutan', 'Gorilla', 'Chimpanzee', 'Human')") ``` 1. A successful load but a failed selection — the `NotCompleted` from `select_seqs` is returned ???+ example "Caught an exception" ```python { linenums="1" notest } from cogent3 import get_app reader = get_app("load_aligned", format_name="fasta") select_seqs = get_app("take_named_seqs", "Mouse", "Human") app = reader + select_seqs result = app("primate_brca1.fasta") print(result) # (1)! # NotCompleted(type=ERROR, origin=load_aligned, source="primate_brca1", # message="Traceback (most recent call last): File # "/Users/gavin/repos/SciNexus/src/scinexus/composable.py", line 545, in __call__ # result = self.main(val, *args, **kwargs) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File # "/Users/gavin/repos/SciNexus/.venv/lib/python3.12/site- # packages/cogent3/app/io.py", line 334, in main return _load_seqs(path, # cogent3.make_aligned_seqs, self._parser, self.moltype) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File # "/Users/gavin/repos/SciNexus/.venv/lib/python3.12/site- [...] ``` 1. An error during load — `select_seqs` is never called. cogent3-scinexus-e0aee79/docs/tutorials/index.md000066400000000000000000000004031520253266500220260ustar00rootroot00000000000000# Tutorials Step-by-step lessons that take you from zero to productive with `scinexus`. - [Composing apps](composing-apps.md) -- build pipelines with `+` and type safety - [Processing a dataset](processing-a-dataset.md) -- batch processing with data stores cogent3-scinexus-e0aee79/docs/tutorials/processing-a-dataset.md000066400000000000000000000066331520253266500247470ustar00rootroot00000000000000# Processing a dataset !!! abstract "" Use `open_data_store` with a loader, processor, and writer app to batch-process a directory of files `apply_to`, and enable progress bars and parallel execution. Then inspect results. We will translate the DNA sequences in `raw.zip` into amino acid and store them as a sqlite database. We will interrogate the generated data store to get a synopsis of the results. ???+ example "Translating DNA to amino acid" ```python { linenums="1" notest } from scinexus import open_data_store from cogent3 import get_app in_dstore = open_data_store("data/raw.zip", suffix="fa") # (1)! out_dstore = open_data_store("translated.sqlitedb", mode="w") # (2)! load = get_app("load_unaligned", moltype="dna") translate = get_app("translate_seqs") write = get_app("write_db", data_store=out_dstore) app = load + translate + write # (3)! out_dstore = app.apply_to(in_dstore) # (4)! out_dstore.describe # (5)! out_dstore.validate() # (6)! out_dstore.summary_not_completed # (7)! ``` 1. Open the zipped input data store, selecting `.fa` files as members. 2. Create a writable SQLite output data store. Using a single database file is more efficient than writing many small files. 3. Compose loader, translator, and writer into a single pipeline. 4. Apply the pipeline to every member of the input data store. Results are written to `out_dstore`. 5. Summary showing counts of completed records, not-completed records, and log files. 6. Verify the integrity of all records via MD5 checksums. 7. Summary of why some records could not be processed — e.g. sequences not divisible by 3 or containing stop codons. !!! note The `.completed` and `.not_completed` attributes give access to the different types of members, while `.members` gives them all. For example, `len(out_dstore.not_completed)` returns the count of failed records and each element is a `DataMember`. cogent3-scinexus-e0aee79/noxfile.py000066400000000000000000000103671520253266500174470ustar00rootroot00000000000000import os import pathlib import shutil import subprocess import sys import nox # on python >= 3.12 this will improve speed of test coverage a lot if sys.version_info >= (3, 12): os.environ["COVERAGE_CORE"] = "sysmon" _py_versions = range(11, 15) nox.options.default_venv_backend = "uv" @nox.session(python=False) def fmt(session: nox.Session) -> None: session.run("ruff", "check", "--fix-only", ".", external=True) session.run("ruff", "format", ".", external=True) @nox.session(python="3.14") def cogdocs(session: nox.Session) -> None: session.install("-e", ".", "--group", "dev") cmnd = 'find docs -name "*.md" | xargs uv run --group dev cog -r -I docs/scripts' subprocess.run(cmnd, check=True, shell=True) # noqa: S602 @nox.session(python=[f"3.{v}" for v in _py_versions]) def type_check(session): session.install("-e", ".", "--group", "dev") session.run("mypy", "src/scinexus/") @nox.session(python=[f"3.{v}" for v in _py_versions]) def test_types(session): session.install("-e", ".") session.run("mypy", "src/scinexus/") @nox.session(python=[f"3.{v}" for v in _py_versions]) def test(session): session.install("-e", ".", "--group", "dev") session.run("uv", "pip", "list") # doctest modules within scinexus session.chdir("src/scinexus") session.run( "pytest", "-s", "-x", "--doctest-modules", ".", ) session.chdir("../../tests") session.run( "pytest", "-s", "-x", "-m", "not slow and not mpi", *session.posargs, ) @nox.session(python=[f"3.{v}" for v in _py_versions]) def testmpi(session): session.install("-e", ".[mpi]", "--group", "dev") session.chdir("tests") py = pathlib.Path(session.bin_paths[0]) / "python" session.run( "mpiexec", "--oversubscribe", "-n", "4", str(py), "-m", "mpi4py.futures", "-m", "pytest", "-s", "-x", "-m", "mpi", *session.posargs, external=True, ) @nox.session(python=[f"3.{v}" for v in _py_versions]) def testcov(session): session.install("-e", ".", "--group", "dev") cover_mpi = shutil.which("mpiexec") is not None if cover_mpi: session.install("-e", ".[mpi]") cov_file = str(pathlib.Path.cwd() / ".coverage") session.env["COVERAGE_FILE"] = cov_file session.run("coverage", "erase") base = ["coverage", "run", "--source=scinexus"] # mypy via API wrapper so coverage traces plugin hook execution session.run(*base, "scripts/run_mypy_cov.py", "--no-incremental", "src/scinexus/") # doctests session.chdir("src/scinexus") session.run(*base, "--append", "-m", "pytest", "-s", "-x", "--doctest-modules", ".") # unit tests session.chdir("../../tests") session.run( *base, "--append", "-m", "pytest", "-s", "-x", "-m", "not mpi", ) # MPI tests when mpiexec is available if cover_mpi: py = pathlib.Path(session.bin_paths[0]) / "python" session.run( "mpiexec", "--oversubscribe", "-n", "4", str(py), "-m", "mpi4py.futures", "-m", *base, "--append", "-m", "pytest", "-s", "-x", "-m", "mpi", external=True, ) session.chdir("..") session.run("coverage", "report") i = 0 while i < len(session.posargs): fmt = session.posargs[i] if fmt == "html": session.run("coverage", fmt, external=True) i += 1 continue o_name = session.posargs[i + 1] session.run("coverage", fmt, o_name, external=True) i += 2 @nox.session(python=[f"3.{v}" for v in _py_versions]) def test_docs(session): session.install("-e", ".", "--group", "dev") session.run("uv", "pip", "list") # doctest modules within scinexus session.chdir("docs") session.run( "pytest", "--markdown-docs", "-m", "markdown-docs", "-x", ".", "--ignore", "scripts", *session.posargs, ) cogent3-scinexus-e0aee79/pyproject.toml000066400000000000000000000036571520253266500203510ustar00rootroot00000000000000[project] name = "scinexus" dynamic = ["version"] description = "A composable app infrastructure for scientific computing" requires-python = ">=3.11" license = "BSD-3-Clause" readme = "README.md" readme_renderer = "markdown" dependencies = [ "charset_normalizer", "citeable", "scitrack", "tqdm", "typeguard", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Topic :: System :: Distributed Computing", "Topic :: Scientific/Engineering", "Topic :: Software Development :: Libraries :: Python Modules", "Operating System :: OS Independent", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", ] [project.urls] Documentation = "https://scinexus.readthedocs.io" "Bug Tracker" = "https://github.com/cogent3/scinexus/issues" "Source Code" = "https://github.com/cogent3/scinexus/" [project.optional-dependencies] loky = ["loky"] mpi = ["mpi4py"] rich = ["rich"] [dependency-groups] dev = [ "pymdown-extensions>=10.21.2", "zensical>=0.0.33", "cogent3[extra]>=2026.4.20a0", "pytest", "pytest-cov", "pytest-markdown-docs", "pytest-xdist", "mypy", "nox", "numpy", "rich", "ruff", "cogapp>=3.6.0", "click>=8.3.2", "mkdocstrings-python>=2.0.3", ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.version] path = "src/scinexus/_version.py" [tool.hatch.build.targets.wheel] packages = ["src/scinexus"] [tool.pytest.ini_options] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "mpi: marks tests requiring MPI (run with nox -s testmpi)", "internet: marks tests requiring internet access", ] testpaths = ["tests"] [tool.mypy] plugins = ["scinexus._mypy_plugin"] cogent3-scinexus-e0aee79/rtd_get_docs.py000066400000000000000000000111601520253266500204330ustar00rootroot00000000000000# this file # is directly used by .readthedocs.yaml # it extracts the built docs from the github artefact # created by the build_docs.yml github action import os import pathlib import time import zipfile import requests MAX_WAIT_TIME = 600 # Typically takes 5.5 minutes POLL_INTERVAL = 20 def get_rtd_version_name() -> str: return os.environ.get("READTHEDOCS_VERSION_NAME", "").lower() def get_github_token() -> str: token = os.environ.get("GITHUB_TOKEN") if token is None: token = os.environ.get("GITHUB_TOKEN_PRIVATE") if not token: msg = "GitHub token not found." raise OSError(msg) return token def get_latest_run(workflow_filename: str, headers: dict) -> dict: url = f"https://api.github.com/repos/cogent3/scinexus/actions/workflows/{workflow_filename}/runs" response = requests.get(url, headers=headers, timeout=10) # Check if we got a successful response before trying to parse JSON if not response.ok: msg = f"GitHub API request failed with status {response.status_code}: {response.text[:200]}" raise RuntimeError(msg) try: data = response.json() except requests.exceptions.JSONDecodeError as e: msg = f"Failed to parse JSON response. Status: {response.status_code}, Content: {response.text[:200]}" raise RuntimeError(msg) from e runs = data.get("workflow_runs", []) if not runs: msg = f"No workflow runs found for: '{workflow_filename}'" raise ValueError(msg) return runs[0] def wait_for_run_completion(run: dict, headers: dict) -> dict: run_id = run["id"] run_url = f"https://api.github.com/repos/cogent3/scinexus/actions/runs/{run_id}" waited = 0 while waited < MAX_WAIT_TIME: response = requests.get(run_url, headers=headers, timeout=10) if not response.ok: msg = f"GitHub API request failed with status {response.status_code}: {response.text[:200]}" raise RuntimeError(msg) try: run_status = response.json() except requests.exceptions.JSONDecodeError as e: msg = f"Failed to parse JSON response. Status: {response.status_code}, Content: {response.text[:200]}" raise RuntimeError(msg) from e status = run_status["status"] if status == "completed": conclusion = run_status["conclusion"] if conclusion != "success": msg = f"Latest workflow run failed with conclusion: '{conclusion}'" raise RuntimeError(msg) return run_status time.sleep(POLL_INTERVAL) waited += POLL_INTERVAL msg = "Timed out waiting for workflow run to complete." raise TimeoutError(msg) def download_and_extract_artifact(run: dict, headers: dict) -> None: artifact_name = "scinexus-docs-html" response = download_extract_artifact( run, "artifacts_url", headers, "GitHub API request failed with status " ) try: artifacts_data = response.json() except requests.exceptions.JSONDecodeError as e: msg = f"Failed to parse JSON response. Status: {response.status_code}, Content: {response.text[:200]}" raise RuntimeError(msg) from e artifacts = artifacts_data.get("artifacts", []) artifact = next((a for a in artifacts if a["name"] == artifact_name), None) if artifact is None: msg = f"Artifact '{artifact_name}' not found in the run." raise ValueError(msg) response = download_extract_artifact( artifact, "archive_download_url", headers, "Artifact download failed with status ", ) out = pathlib.Path(f"{artifact_name}.zip") out.write_bytes(response.content) with zipfile.ZipFile(out, "r") as zip_ref: zip_ref.extractall("_readthedocs/html/") out.unlink() def download_extract_artifact(arg0, arg1, headers, arg3): artifacts_url = arg0[arg1] result = requests.get(artifacts_url, headers=headers, timeout=10) if not result.ok: msg = f"{arg3}{result.status_code}: {result.text[:200]}" raise RuntimeError(msg) return result def download_and_extract_docs() -> None: version = get_rtd_version_name() if version not in ("latest", "stable"): msg = f"Unexpected version '{version}' for readthedocs." raise ValueError(msg) workflow_filename = "docs.yml" headers = {"Authorization": f"token {get_github_token()}"} latest_run = get_latest_run(workflow_filename, headers) completed_run = wait_for_run_completion(latest_run, headers) download_and_extract_artifact(completed_run, headers) if __name__ == "__main__": download_and_extract_docs() cogent3-scinexus-e0aee79/ruff.toml000066400000000000000000000065431520253266500172710ustar00rootroot00000000000000exclude = [ ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".ipynb_checkpoints", ".mypy_cache", ".nox", ".pants.d", ".pyenv", ".pytest_cache", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv", ".vscode", "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules", "site-packages", "venv", "working", ] extend-exclude = ["rtd_get_docs.py"] line-length = 88 indent-width = 4 target-version = "py311" [lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or # McCabe complexity (`C901`) by default. select = ["ALL"] # ICN001 not adhering to numpy as np convention due to # name collision, where np is used as num params # N801 CamelCase is not always appropriate # PLR0913 number of arguments sometimes needs to be > 5 # FBT001 and FBT002, positional boolean arguments are allowed! # PT011, I disagree about checking error messages as well # as exception types as it doubles the "cost" of fixing typos ignore = [ "COM812", "EXE002", "FA100", "E501", "D", "N801", "ICN001", "PLR0913", "FBT001", "FBT002", "PT011", "S311", # Allow use of random "S608", # sql injection unlikely "PLR2004", # Magic number usage where appropriate "PLC0415", # Allow delayed imports ] # Allow fix for all enabled rules (when `--fix`) is provided. fixable = ["ALL"] unfixable = [] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [lint.per-file-ignores] "tests/**/*.py" = [ "S101", # asserts allowed in tests... "INP001", # __init__.py files are not required... "ANN", "N802", "N803", "S301", # we allow pickling / unpickling in tests "SLF001", # Allow accessing private attrs ] "noxfile.py" = [ "S101", # asserts allowed in tests... "INP001", # __init__.py files are not required... "ANN", "N802", "N803", ] "src/scinexus/composable.py" = [ "ANN401", # Allow use of Any ] "src/scinexus/deserialise.py" = [ "ANN401", # Allow use of Any ] "src/scinexus/data_store.py" = [ "ANN401", # Allow use of Any "SLF001", # Allow accessing private attrs ] "src/scinexus/io_util.py" = [ "ANN401", # Allow use of Any ] "src/scinexus/misc.py" = [ "ANN401", # Allow use of Any ] "src/scinexus/io.py" = [ "ANN401", # Allow use of Any for generic serialisation apps ] "src/scinexus/arallel.py" = [ "ANN401", # Allow use of Any for generic serialisation apps ] "src/scinexus/progress.py" = [ "ANN401", # Allow use of Any for generic serialisation apps "SLF001", # Allow accessing private attrs ] "src/scinexus/typing.py" = [ "ANN401", # Allow use of Any ] [format] # Like Black, use double quotes for strings. quote-style = "double" # Like Black, indent with spaces, rather than tabs. indent-style = "space" # Like Black, respect magic trailing commas. skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "lf" docstring-code-format = true # Set the line length limit used when formatting code snippets in # docstrings. # # This only has an effect when the `docstring-code-format` setting is # enabled. docstring-code-line-length = "dynamic" cogent3-scinexus-e0aee79/scripts/000077500000000000000000000000001520253266500171115ustar00rootroot00000000000000cogent3-scinexus-e0aee79/scripts/run_mypy_cov.py000066400000000000000000000003071520253266500222140ustar00rootroot00000000000000"""Run mypy via API so coverage can trace the plugin.""" import sys import mypy.api result = mypy.api.run(sys.argv[1:]) sys.stdout.write(result[0]) sys.stderr.write(result[1]) sys.exit(result[2]) cogent3-scinexus-e0aee79/src/000077500000000000000000000000001520253266500162115ustar00rootroot00000000000000cogent3-scinexus-e0aee79/src/scinexus/000077500000000000000000000000001520253266500200525ustar00rootroot00000000000000cogent3-scinexus-e0aee79/src/scinexus/__init__.py000066400000000000000000000037141520253266500221700ustar00rootroot00000000000000"""scinexus: composable app infrastructure for scientific computing.""" import typing as _typing from scinexus._version import __version__ from scinexus.composable import ( AppBase, AppType, ComposableApp, LoaderApp, NonComposableApp, NotCompleted, NotCompletedType, WriterApp, define_app, is_app, is_app_composable, ) from scinexus.progress import ( Progress, ProgressContext, get_progress, set_progress_backend, ) if _typing.TYPE_CHECKING: # pragma: no cover from typing import Any __all__ = [ "AppBase", "AppType", "ComposableApp", "LoaderApp", "NonComposableApp", "NotCompleted", "NotCompletedType", "Progress", "ProgressContext", "WriterApp", "__version__", "define_app", "get_id_from_source", "get_parallel_backend", "get_progress", "get_summary_display", "is_app", "is_app_composable", "open_", "open_data_store", "set_id_from_source", "set_parallel_backend", "set_progress_backend", "set_summary_display", ] _LAZY_IMPORTS: dict[str, tuple[str, str]] = { "open_": ("scinexus.io_util", "open_"), "open_data_store": ("scinexus.io", "open_data_store"), "set_parallel_backend": ("scinexus.parallel", "set_parallel_backend"), "get_parallel_backend": ("scinexus.parallel", "get_parallel_backend"), "set_summary_display": ("scinexus.data_store", "set_summary_display"), "get_summary_display": ("scinexus.data_store", "get_summary_display"), "set_id_from_source": ("scinexus.data_store", "set_id_from_source"), "get_id_from_source": ("scinexus.data_store", "get_id_from_source"), } def __getattr__(name: str) -> "Any": if name in _LAZY_IMPORTS: module_path, attr = _LAZY_IMPORTS[name] import importlib mod = importlib.import_module(module_path) return getattr(mod, attr) msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) cogent3-scinexus-e0aee79/src/scinexus/_mypy_plugin.py000066400000000000000000000064301520253266500231420ustar00rootroot00000000000000"""Mypy plugin for scinexus.composable.define_app. Reads the ``main()`` method signature on decorated classes and synthesises the correct ``__call__`` return type so that ``reveal_type(app(x))`` works. """ from __future__ import annotations from collections.abc import Callable from mypy.nodes import ARG_POS, ARG_STAR, ARG_STAR2, Argument, Var from mypy.plugin import ClassDefContext, Plugin from mypy.plugins.common import add_method_to_class from mypy.types import AnyType, TypeOfAny, UnionType from mypy.types import Type as MypyType DEFINE_APP_FULLNAME = "scinexus.composable.define_app" def _get_main_return_type(ctx: ClassDefContext) -> MypyType | None: """Extract the return type from the ``main()`` method.""" info = ctx.cls.info main_sym = info.names.get("main") if main_sym is None or main_sym.node is None: return None main_type = main_sym.node.type # type: ignore[attr-defined] if main_type is None: return None return main_type.ret_type def _define_app_hook(ctx: ClassDefContext) -> bool: """Add __call__ and __add__ to @define_app-decorated classes.""" ret = _get_main_return_type(ctx) if ret is None: return True from mypy.types import Instance # Build NotCompleted union type not_completed_type: MypyType = AnyType(TypeOfAny.special_form) nc_info = ctx.api.lookup_fully_qualified_or_none("scinexus.composable.NotCompleted") if nc_info and nc_info.node: not_completed_type = Instance(nc_info.node, []) # type: ignore[arg-type] return_type = UnionType([ret, not_completed_type]) val_arg = Argument( Var("val", AnyType(TypeOfAny.explicit)), AnyType(TypeOfAny.explicit), None, ARG_POS, ) args_arg = Argument( Var("args", AnyType(TypeOfAny.explicit)), AnyType(TypeOfAny.explicit), None, ARG_STAR, ) kwargs_arg = Argument( Var("kwargs", AnyType(TypeOfAny.explicit)), AnyType(TypeOfAny.explicit), None, ARG_STAR2, ) add_method_to_class( ctx.api, ctx.cls, "__call__", [val_arg, args_arg, kwargs_arg], return_type, ) # Add ComposableApp as a base class so instances are recognised as # composable by mypy (the runtime decorator rebuilds the class with # ComposableApp as a base via types.new_class). composable_sym = ctx.api.lookup_fully_qualified_or_none( "scinexus.composable.ComposableApp" ) if composable_sym and composable_sym.node: any_type = AnyType(TypeOfAny.explicit) base_type = Instance(composable_sym.node, [any_type, any_type]) # type: ignore[arg-type] if not any( isinstance(b, Instance) and b.type.fullname == "scinexus.composable.ComposableApp" for b in ctx.cls.info.bases ): ctx.cls.info.bases.append(base_type) ctx.cls.info.mro.insert(1, composable_sym.node) # type: ignore[arg-type] return True class SciNexusPlugin(Plugin): def get_class_decorator_hook_2( self, fullname: str ) -> Callable[[ClassDefContext], bool] | None: return _define_app_hook if fullname == DEFINE_APP_FULLNAME else None def plugin(version: str) -> type[SciNexusPlugin]: # noqa: ARG001 return SciNexusPlugin cogent3-scinexus-e0aee79/src/scinexus/_version.py000066400000000000000000000000321520253266500222430ustar00rootroot00000000000000__version__ = "2026.5.18" cogent3-scinexus-e0aee79/src/scinexus/composable.py000066400000000000000000001241501520253266500225530ustar00rootroot00000000000000import contextlib import inspect import json import re import sys import textwrap import time import traceback import types import typing from collections.abc import Callable, Iterable, Iterator from copy import copy, deepcopy from enum import Enum from pathlib import Path from typing import Any, ClassVar, Generic, Literal, Self, TypeVar, overload from uuid import uuid4 from citeable import Citation from scitrack import CachingLogger # type: ignore[import-untyped] from typeguard import TypeCheckError, check_type from scinexus import typing as snx_typing from scinexus._version import __version__ from scinexus.deserialise import register_deserialiser from scinexus.misc import docstring_to_summary_rest, get_object_provenance from scinexus.progress import Progress, get_progress from scinexus.typing import ( check_type_compatibility, get_type_display_names, resolve_type_hint, ) from scinexus.warning import deprecated_callable from .data_store import ( DataMember, DataStoreABC, get_id_from_source, ) _builtin_seqs = list, set, tuple T = TypeVar("T") R = TypeVar("R") _T = TypeVar("_T") def _make_logfile_name(process: object) -> str: text = re.split(r"\s+\+\s+", str(process)) parts = [] for part in text: if part.find("(") >= 0: part = part[: part.find("(")] parts.append(part) result = "-".join(parts) uid = str(uuid4()) return f"{result}-{uid[:8]}.log" def _get_origin(origin: typing.Any) -> str: return origin if isinstance(origin, str) else origin.__class__.__name__ class NotCompletedType(Enum): ERROR = "ERROR" FAIL = "FAIL" BUG = "BUG" class NotCompleted(int): """results that failed to complete""" type: NotCompletedType origin: str message: str source: str | None _persistent: tuple[tuple[Any, ...], dict[str, Any]] def __new__( cls, type_: NotCompletedType | str, origin: typing.Any, message: str, source: typing.Any = None, ) -> Self: """ Parameters ---------- type_ the category of failure, e.g. NotCompletedType.ERROR origin where the instance was created, can be an instance message descriptive message, succinct traceback source the data operated on that led to this result. May be a string or an instance with ``.source`` or ``.info.source`` attributes. """ type_ = NotCompletedType(type_) origin = _get_origin(origin) try: source = get_id_from_source()(source) except Exception: # noqa: BLE001 source = None result = int.__new__(cls, False) # noqa: FBT003 result._persistent = (type_.value, origin, message), {"source": source} # noqa: SLF001 result.type = type_ result.origin = origin result.message = message result.source = source return result def __getnewargs_ex__(self) -> tuple[tuple[Any, ...], dict[str, Any]]: return self._persistent[0], self._persistent[1] def __repr__(self) -> str: return str(self) def __str__(self) -> str: name = self.__class__.__name__ source = self.source or "Unknown" return f'{name}(type={self.type.value}, origin={self.origin}, source="{source}", message="{self.message}")' def to_rich_dict(self) -> dict[str, Any]: """returns components for to_json""" return { "type": get_object_provenance(self), "not_completed_construction": { "args": self._persistent[0], "kwargs": self._persistent[1], }, "version": __version__, } def to_json(self) -> str: """returns json string""" return json.dumps(self.to_rich_dict()) class AppType(Enum): LOADER = "loader" WRITER = "writer" GENERIC = "generic" NON_COMPOSABLE = "non_composable" # Aliases to use Enum easily LOADER = AppType.LOADER WRITER = AppType.WRITER GENERIC = AppType.GENERIC NON_COMPOSABLE = AppType.NON_COMPOSABLE def _get_raw_hints( main_func: Callable[..., Any], min_params: int ) -> tuple[object, object]: _no_value = inspect.Parameter.empty params = inspect.signature(main_func) if len(params.parameters) < min_params: msg = f"{main_func.__name__!r} must have at least {min_params} input parameters" raise ValueError( msg, ) # annotation for first parameter other than self, params.parameters is an orderedDict first_param_type = [p.annotation for p in params.parameters.values()][ min_params - 1 ] return_type = params.return_annotation if return_type is _no_value: msg = "must specify type hint for return type" raise TypeError(msg) if first_param_type is _no_value: msg = "must specify type hint for first parameter" raise TypeError(msg) if first_param_type is None: msg = "NoneType invalid type for first parameter" raise TypeError(msg) if return_type is None: msg = "NoneType invalid type for return value" raise TypeError(msg) if isinstance(first_param_type, str): msg = ( "Apps do not yet support string type hints " "(such as those caused by __future__ annotations). " f"Bad type hint: {first_param_type}" ) raise NotImplementedError(msg) if isinstance(return_type, str): msg = ( "Apps do not yet support string type hints " "(such as those caused by __future__ annotations). " f"Bad type hint: {return_type}" ) raise NotImplementedError(msg) return first_param_type, return_type def _get_main_hints(klass: type[Any]) -> tuple[object, object]: """return raw type hints for main method Returns ------- (first_param_type_hint, return_type_hint) """ # Check klass.main exists and is type method main_func = getattr(klass, "main", None) if ( main_func is None or not inspect.isclass(klass) or not inspect.isfunction(main_func) ): msg = f"must define a callable main() method in {klass.__name__!r}" raise ValueError(msg) first_param_type, return_type = _get_raw_hints(main_func, 2) return first_param_type, return_type def _set_hints( main_meth: Callable[..., Any], first_param_type: object, return_type: object ) -> Callable[..., Any]: """adds type hints to main""" main_meth.__annotations__["arg"] = first_param_type main_meth.__annotations__["return"] = return_type return main_meth class source_proxy(Generic[_T]): """wraps an object to track its source through app pipelines""" __slots__ = ("_obj", "_src", "_uuid") def __init__(self, obj: _T) -> None: self._obj = obj self._src = obj self._uuid = uuid4() def __hash__(self) -> int: return hash(self._uuid) @property def obj(self) -> _T: return self._obj def set_obj(self, obj: typing.Any) -> None: self._obj = obj @property def source(self) -> typing.Any: """origin of this object""" return self._src @source.setter def source(self, src: typing.Any) -> None: # need to check whether src is hashable, how to cope if it isn't? # might need to make this instance hashable perhaps using a uuid? self._src = src @property def uuid(self) -> str: """unique identifier for this object""" return str(self._uuid) def __getattr__(self, name: str) -> typing.Any: return getattr(self._obj, name) def __setattr__(self, name: str, value: typing.Any) -> None: if name.startswith("_"): super().__setattr__(name, value) else: setattr(self._obj, name, value) def __bool__(self) -> bool: return bool(self._obj) def __repr__(self) -> str: return self.obj.__repr__() def __str__(self) -> str: return self.obj.__str__() def __eq__(self, other: object) -> bool: return self.obj.__eq__(other) def __len__(self) -> int: return self.obj.__len__() # type: ignore[attr-defined] # pickling induces infinite recursion on python 3.10 # only on Windows, so implementing the following methods explicitly def __getstate__(self) -> tuple[Any, Any, Any]: return self._obj, self._src, self._uuid def __setstate__(self, state: tuple[Any, Any, Any]) -> None: self._obj, self._src, self._uuid = state def _proxy_input(dstore: Iterable[Any]) -> list[source_proxy[Any]]: inputs = [] for e in dstore: if not e: continue if not isinstance(e, source_proxy): e = e if hasattr(e, "source") else source_proxy(e) inputs.append(e) return inputs GetIdFuncType = typing.Callable[[source_proxy[Any] | snx_typing.HasSource], str | None] class propagate_source: """retains result association with source Notes ----- Returns the unwrapped result if it has a .source attribute, otherwise returns the original source_proxy with the .obj updated with result. """ def __init__(self, app: "AppBase[Any, Any]", id_from_source: GetIdFuncType) -> None: self.app = app self.id_from_source = id_from_source def __call__( self, value: source_proxy[Any] | snx_typing.HasSource ) -> snx_typing.HasSource: if not isinstance(value, source_proxy): return self.app(value) result = self.app(value.obj) if self.id_from_source(result): return result value.set_obj(result) return value # Forbidden methods per app kind _FORBIDDEN_BASE = frozenset( { "__call__", "__repr__", "__str__", "__new__", "__copy__", "__eq__", "_validate_data_type", "as_completed", "check_data_type", "_get_citations", "citations", "bib", } ) _FORBIDDEN_COMPOSABLE = _FORBIDDEN_BASE | frozenset( { "__add__", "disconnect", "input", } ) _FORBIDDEN_WRITER = _FORBIDDEN_COMPOSABLE | frozenset( { "apply_to", "set_logger", } ) def _init_subclass_setup( cls: Any, app_type: AppType | Literal["loader", "writer", "generic", "non_composable"], skip_not_completed: bool, cite: Citation | None, ) -> None: """Shared setup logic for __init_subclass__ and define_app.""" app_type = AppType(app_type) if "__slots__" in cls.__dict__: msg = "slots are not currently supported" raise NotImplementedError(msg) if app_type is WRITER: forbidden = _FORBIDDEN_WRITER elif app_type is not NON_COMPOSABLE: forbidden = _FORBIDDEN_COMPOSABLE else: forbidden = _FORBIDDEN_BASE if ( app_type is not NON_COMPOSABLE and "input" in cls.__dict__ and cls.__dict__["input"] is not None ): msg = f"remove 'input' attribute in {cls.__name__!r}, reserved by the app framework" raise TypeError(msg) for meth in forbidden: if meth in cls.__dict__: val = cls.__dict__[meth] if isinstance(val, staticmethod): val = val.__func__ if inspect.isfunction(val) or isinstance(val, property): msg = f"remove {meth!r} in {cls.__name__!r}, reserved by the app framework" raise TypeError(msg) raw_input, raw_return = _get_main_hints(cls) mod = sys.modules.get(cls.__module__) if cls.__module__ else None module_globals = vars(mod) if mod else {} cls._input_type = resolve_type_hint(raw_input, module_globals) cls._return_type = resolve_type_hint(raw_return, module_globals) cls.app_type = app_type cls._skip_not_completed = skip_not_completed cls._check_data_type = True cls._cite = cite cls._source_wrapped = None if app_type is not LOADER: cls.input = None class AppBase(Generic[T, R]): """Base for all app types. Provides __call__, __repr__, etc. Raises ------ TypeError If a subclass defines any method name reserved by the app framework. The reserved names depend on the app type. All app types: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``. Composable apps (GENERIC, LOADER, WRITER) additionally: ``__add__``, ``disconnect``, ``input``. Writer apps additionally: ``apply_to``, ``set_logger``. """ _is_intermediate_base: bool = False _skip_not_completed: bool _check_data_type: bool _source_wrapped: propagate_source | None _cite: Citation | None _input_type: type _return_type: type _init_vals: dict[str, Any] app_type: AppType input: Any main: Callable[..., Any] def __init_subclass__( cls, skip_not_completed: bool = True, cite: Citation | None = None, **kwargs: Any, ) -> None: super().__init_subclass__(**kwargs) # Skip setup for intermediate bases and classes built by define_app if "_is_intermediate_base" in cls.__dict__ or getattr( cls, "_define_app_pending", False ): return app_type = getattr(cls, "_default_app_type", GENERIC) _init_subclass_setup(cls, app_type, skip_not_completed, cite) def __new__(cls, *args: Any, **kwargs: Any) -> Self: obj = object.__new__(cls) if hasattr(cls, "_func_sig"): # we have a decorated function, the first parameter in the signature # is not given to constructor, so we create a new signature excluding that one params = cls._func_sig.parameters # type: ignore[attr-defined] init_sig = inspect.Signature(parameters=list(params.values())[1:]) bargs = init_sig.bind_partial(*args, **kwargs) else: init_sig = inspect.signature(cls.__init__) bargs = init_sig.bind_partial(cls, *args, **kwargs) bargs.apply_defaults() init_vals = bargs.arguments init_vals.pop("self", None) obj._init_vals = init_vals return obj def __copy__(self) -> Self: new = object.__new__(type(self)) new.__dict__.update(self.__dict__) return new def __eq__(self, other: object) -> bool: if not isinstance(other, type(self)): return False return all( v is other.__dict__.get(k) for k, v in self.__dict__.items() if k != "input" ) __hash__ = None # type: ignore[assignment] @property def check_data_type(self) -> bool: """toggle whether the type of input data matches the defined compatible types Notes ----- If False, and an invalid data type is passed, the error will still be caught, but in a potentially less informative way. For instance "'NoneType' object has no attribute 'blah'" """ return self._check_data_type @check_data_type.setter def check_data_type(self, value: bool) -> None: self._check_data_type = value head = getattr(self, "input", None) while head is not None: head._check_data_type = value head = getattr(head, "input", None) def __call__(self, val: T, *args: Any, **kwargs: Any) -> R | NotCompleted: if val is None: return NotCompleted( NotCompletedType.ERROR, self, "unexpected input value None", source=val ) if isinstance(val, NotCompleted) and self._skip_not_completed: return val if self.app_type is not LOADER and self.input: # passing to connected app val = self.input(val, *args, **kwargs) if isinstance(val, NotCompleted) and self._skip_not_completed: return val if self._check_data_type: type_checked = self._validate_data_type(val) if not type_checked: return type_checked # type: ignore[return-value] try: result = self.main(val, *args, **kwargs) except Exception: result = NotCompleted( NotCompletedType.ERROR, self, traceback.format_exc(), source=val ) if result is None: result = NotCompleted( NotCompletedType.BUG, self, "unexpected output value None", source=val ) return result def __repr__(self) -> str: val = f"{self.input!r} + " if self.app_type is not LOADER and self.input else "" all_args = {**self._init_vals} args_items = all_args.pop("args", None) data = ", ".join(f"{v!r}" for v in args_items) if args_items else "" kwargs_items = all_args.pop("kwargs", None) data += ( ", ".join(f"{k}={v!r}" for k, v in kwargs_items.items()) if kwargs_items else "" ) data += ", ".join(f"{k}={v!r}" for k, v in all_args.items()) data = f"{val}{self.__class__.__name__}({data})" return textwrap.fill( data, width=80, break_long_words=False, break_on_hyphens=False ) __str__ = __repr__ def _validate_data_type(self, data: Any) -> bool | NotCompleted: """checks data type matches defined compatible types using typeguard""" if isinstance(data, NotCompleted): if self._skip_not_completed: return data # skip_not_completed=False means the app handles NotCompleted itself return True if isinstance(data, source_proxy): data = data.obj if isinstance(data, _builtin_seqs) and len(data) == 0: return NotCompleted( NotCompletedType.ERROR, self, message="empty data", source=data ) try: check_type(data, self._input_type) return True except TypeCheckError: class_name = data.__class__.__name__ expected = get_type_display_names(self._input_type) msg = f"invalid data type, '{class_name}' not in {', '.join(sorted(expected))}" return NotCompleted(NotCompletedType.ERROR, self, message=msg, source=data) def as_completed( self, dstore: DataStoreABC | Iterable[Any] | str, parallel: bool = False, par_kw: dict[str, Any] | None = None, id_from_source: GetIdFuncType | None = None, show_progress: bool | Progress = False, ) -> Iterator[Any]: """invokes self composable function on the provided data store Parameters ---------- dstore a path, list of paths, or DataStore to which the process will be applied. parallel run in parallel, according to arguments in par_kwargs. If True, the last step of the composable function serves as the master process, with earlier steps being executed in parallel for each member of dstore. par_kw dict of values for configuring parallel execution. id_from_source extracts a unique identifier from each input. If not provided, defaults to the function registered via ``scinexus.data_store.set_id_from_source``, falling back to ``scinexus.data_store.get_unique_id``. show_progress controls progress bar display. Pass ``True`` for the default progress bar, ``False`` to disable, or a ``Progress`` instance for a custom backend. Notes ----- If run in parallel, this instance serves as the master object and aggregates results. If run in serial, results are returned in the same order as provided. """ if id_from_source is None: id_from_source = get_id_from_source() if self._source_wrapped is None: app = propagate_source( self.input if self.app_type is WRITER else self, id_from_source ) else: app = ( self.input._source_wrapped if self.app_type is WRITER else self._source_wrapped ) if isinstance(dstore, str): dstore = [dstore] elif isinstance(dstore, DataStoreABC): dstore = dstore.completed mapped = _proxy_input(dstore) if not mapped: return (_ for _ in ()) if parallel: from scinexus import parallel as snxpar par_kw = par_kw or {} to_do: typing.Iterable = snxpar.as_completed(app, mapped, **par_kw) else: to_do = map(app, mapped) progress = get_progress(show_progress) return progress(to_do, total=len(mapped)) def _get_citations(self) -> tuple[Citation, ...]: """Return citations for this app and all composed input apps.""" seen: set[Citation] = set() result: list[Citation] = [] if self._cite is not None: self._cite.app = self.__class__.__name__ seen.add(self._cite) result.append(self._cite) head = getattr(self, "input", None) while head is not None: if head._cite is not None and head._cite not in seen: head._cite.app = head.__class__.__name__ seen.add(head._cite) result.append(head._cite) head = getattr(head, "input", None) return tuple(result) @property def citations(self) -> tuple[Citation, ...]: """Citations for this app and all composed input apps.""" return self._get_citations() @property def bib(self) -> str: """BibTeX formatted string of citations for this app and all composed input apps.""" return "\n\n".join(str(cite) for cite in self.citations) class ComposableApp(AppBase[T, R]): """Adds __add__ for LOADER/GENERIC. Raises ------ TypeError If a subclass defines any method name reserved by the app framework: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``, ``__add__``, ``disconnect``, ``input``. """ _is_intermediate_base: bool = True def __add__(self, other: "ComposableApp[Any, Any]") -> "ComposableApp[Any, Any]": if getattr(other, "app_type", None) not in {WRITER, LOADER, GENERIC}: msg = f"{other!r} is not composable" raise TypeError(msg) if other == self: msg = "cannot add an app to itself" raise ValueError(msg) # Check order if self.app_type is WRITER: msg = "Left hand side of add operator must not be of type writer" raise TypeError(msg) if other.app_type is LOADER: msg = "Right hand side of add operator must not be of type loader" raise TypeError(msg) if not check_type_compatibility(self._return_type, other._input_type): self_names = get_type_display_names(self._return_type) other_names = get_type_display_names(other._input_type) msg = ( f"{self.__class__.__name__!r} return_type {self_names} " f"incompatible with {other.__class__.__name__!r} input " f"type {other_names}" ) raise TypeError(msg) result = copy(other) result.input = copy(self) return result @deprecated_callable( version="2026.9", reason="no longer required", is_discontinued=True, ) def disconnect(self) -> None: # pragma: no cover """Deprecated. No longer required since composition uses copies.""" class WriterApp(ComposableApp[T, R]): """Adds apply_to and set_logger for WRITER. Raises ------ TypeError If a subclass defines any method name reserved by the app framework: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``, ``__add__``, ``disconnect``, ``input``, ``apply_to``, ``set_logger``. """ _is_intermediate_base: bool = True _default_app_type: ClassVar[AppType] = WRITER data_store: DataStoreABC logger: CachingLogger | None def apply_to( self, dstore: DataStoreABC | Iterable[Any] | str | Path, id_from_source: GetIdFuncType | None = None, parallel: bool = False, par_kw: dict[str, Any] | None = None, logger: bool | CachingLogger = True, cleanup: bool = True, show_progress: bool | Progress = False, ) -> DataStoreABC: """invokes self composable function on the provided data store Parameters ---------- dstore a path, list of paths, or DataStore to which the process will be applied. id_from_source makes the unique identifier from elements of dstore that will be used for writing results. If not provided, defaults to the function registered via ``scinexus.data_store.set_id_from_source``, falling back to ``scinexus.data_store.get_unique_id``. parallel run in parallel, according to arguments in par_kwargs. If True, the last step of the composable function serves as the master process, with earlier steps being executed in parallel for each member of dstore. par_kw dict of values for configuring parallel execution. logger Controls logging. If True (default), a CachingLogger is created automatically. If False, logging is disabled. A CachingLogger instance can be passed directly. cleanup after copying of log files into the data store, it is deleted from the original location show_progress controls progress bar display Returns ------- The output data store instance Notes ----- This is an append only function, meaning that if a member already exists in self.data_store for an input, it is skipped. If run in parallel, this instance spawns workers and aggregates results. """ if id_from_source is None: id_from_source = get_id_from_source() if self.app_type is WRITER: if self.input is None: msg = "writer app has no composed input" raise RuntimeError(msg) self.input._source_wrapped = propagate_source(self.input, id_from_source) self._source_wrapped = propagate_source(self, id_from_source) if isinstance(dstore, str | Path): # one filename dstore = [dstore] elif isinstance(dstore, DataStoreABC): dstore = dstore.completed # TODO this should fail if somebody provides data that cannot produce a unique_id inputs = {} for m in dstore: input_id = Path(m.unique_id) if isinstance(m, DataMember) else m input_id = id_from_source(input_id) # type: ignore[arg-type] if input_id in inputs or not input_id: msg = f"non-unique identifier {input_id!r} detected in data" raise ValueError(msg) if input_id in self.data_store: # we are assuming that this query returns True only when # an input_id is completed, we will not hit this if not_completed continue inputs[input_id] = m if ( not dstore ): # this should just return datastore, because if all jobs are done! msg = "dstore is empty" raise ValueError(msg) self.set_logger(logger) active_logger: CachingLogger | None = self.logger if active_logger: start = time.time() active_logger.log_message(str(self), label="composable function") active_logger.log_versions(["scinexus"]) proxied = _proxy_input(inputs.values()) for result in self.as_completed( proxied, parallel=parallel, par_kw=par_kw, show_progress=show_progress, ): member = self.main( data=getattr(result, "obj", result), identifier=id_from_source(result), # type: ignore[arg-type] ) if active_logger: md5 = getattr(member, "md5", None) active_logger.log_message(str(member), label="output") if md5: active_logger.log_message(md5, label="output md5sum") if active_logger: taken = time.time() - start active_logger.log_message(f"{taken}", label="TIME TAKEN") log_file_path = Path(active_logger.log_file_path) active_logger.shutdown() self.data_store.write_log( unique_id=log_file_path.name, data=log_file_path.read_text(), ) if cleanup: log_file_path.unlink(missing_ok=True) # write citations self.data_store.write_citations(data=self.citations) return self.data_store def set_logger(self, logger: bool | CachingLogger = True) -> None: if logger is False: self.logger = None return if logger is True: logger = CachingLogger(create_dir=True) if not isinstance(logger, CachingLogger): msg = f"logger must be of type CachingLogger not {type(logger)}" raise TypeError(msg) if not logger.log_file_path: src = Path(self.data_store.source).parent # type: ignore[attr-defined] logger.log_file_path = str(src / _make_logfile_name(self)) self.logger = logger class LoaderApp(ComposableApp[T, R]): """Intermediate base class for LOADER apps. Subclasses of ``LoaderApp`` are automatically assigned ``app_type=LOADER``. Loaders sit at the start of a composed pipeline and have no ``input`` attribute. Raises ------ TypeError If a subclass defines any method name reserved by the app framework: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``, ``__add__``, ``disconnect``, ``input``. Examples -------- Define a loader by inheritance:: class my_loader(LoaderApp): def main(self, path): return path """ _is_intermediate_base: bool = True _default_app_type: ClassVar[AppType] = LOADER class NonComposableApp(AppBase[T, R]): """Intermediate base class for NON_COMPOSABLE apps. Subclasses of ``NonComposableApp`` are automatically assigned ``app_type=NON_COMPOSABLE``. Non-composable apps cannot participate in pipeline composition via ``+``. Raises ------ TypeError If a subclass defines any method name reserved by the app framework: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``. Examples -------- Define a non-composable app by inheritance:: class my_app(NonComposableApp): def main(self, val): return val """ _is_intermediate_base: bool = True _default_app_type: ClassVar[AppType] = NON_COMPOSABLE def _class_from_func(func: Callable[..., Any]) -> type[Any]: """make a class based on func Notes ----- produces a class consistent with the necessary properties for the define_app class decorator. func becomes a static method on the class """ # these methods MUST be in function scope so that separate instances are # created for each decorated function def _init(self: Any, *args: Any, **kwargs: Any) -> None: self._args = args self._kwargs = kwargs self._source_wrapped = None def _main(self: Any, arg: Any, *args: Any, **kwargs: Any) -> Any: kw_args = deepcopy(self._kwargs) kw_args = {**kw_args, **kwargs} args = (arg, *args, *deepcopy(self._args)) bound = self._func_sig.bind(*args, **kw_args) return self._user_func(**bound.arguments) module = func.__module__ # to be assigned to the generated class sig = inspect.signature(func) class_name = func.__name__ _main = _set_hints(_main, *_get_raw_hints(func, 1)) summary, body = docstring_to_summary_rest(func.__doc__ or "") func.__doc__ = None _class_dict = {"__init__": _init, "main": _main, "_user_func": staticmethod(func)} for method_name, method in _class_dict.items(): method.__name__ = method_name # type: ignore[attr-defined] method.__qualname__ = f"{class_name}.{method_name}" # type: ignore[attr-defined] result = types.new_class(class_name, (), exec_body=lambda x: x.update(_class_dict)) result.__module__ = module # necessary for pickle support result._func_sig = sig # type: ignore[attr-defined] result.__doc__ = summary result.__init__.__doc__ = body # type: ignore[misc] return result def _fix_super_class_cells( old_klass: type[Any], new_klass: type[Any], ) -> None: """Update __class__ closure cells so zero-arg super() works in the new class. When define_app rebuilds a class via types.new_class, methods copied from the original class retain closure cells pointing to the old class. Python's zero-arg super() uses the __class__ cell, so it fails when the instance is of the new class. This function patches those cells to point to new_klass. """ for attr in new_klass.__dict__.values(): fn = attr if isinstance(attr, types.FunctionType) else None if fn is None: # unwrap classmethod / staticmethod fn = getattr(attr, "__func__", None) if not isinstance(fn, types.FunctionType): continue closure = fn.__closure__ if closure is None: continue freevars = fn.__code__.co_freevars for i, name in enumerate(freevars): if name == "__class__": with contextlib.suppress(ValueError): if closure[i].cell_contents is old_klass: closure[i].cell_contents = new_klass @overload def define_app( klass: type[Any] | Callable[..., Any], ) -> type[ComposableApp[Any, Any]]: ... @overload def define_app( # type: ignore[overload-overlap] klass: None = None, *, app_type: Literal[AppType.NON_COMPOSABLE, "non_composable"], skip_not_completed: bool = ..., cite: Citation | None = ..., ) -> Callable[[type[Any] | Callable[..., Any]], type[AppBase[Any, Any]]]: ... @overload def define_app( # type: ignore[overload-overlap] klass: None = None, *, app_type: Literal[AppType.WRITER, "writer"], skip_not_completed: bool = ..., cite: Citation | None = ..., ) -> Callable[[type[Any] | Callable[..., Any]], type[WriterApp[Any, Any]]]: ... @overload def define_app( klass: None = None, *, app_type: AppType | Literal["loader", "writer", "generic", "non_composable"] = ..., skip_not_completed: bool = ..., cite: Citation | None = ..., ) -> Callable[[type[Any] | Callable[..., Any]], type[ComposableApp[Any, Any]]]: ... def define_app( klass: type[Any] | Callable[..., Any] | None = None, *, app_type: AppType | Literal["loader", "writer", "generic", "non_composable"] = GENERIC, skip_not_completed: bool = True, cite: Citation | None = None, ) -> type[Any]: """decorator for building callable apps Parameters ---------- klass either a class or a function. If a function, it is converted to a class with the function bound as a static method. app_type what type of app, typically you just want GENERIC. skip_not_completed if True (default), NotCompleted instances are returned without being passed to the app. cite a Citation instance describing the software or algorithm. If provided, its ``.app`` attribute is set to the class name. Raises ------ TypeError If the decorated class defines any method name reserved by the app framework. The reserved names depend on the app type. All app types: ``__call__``, ``__repr__``, ``__str__``, ``__new__``, ``__copy__``, ``__eq__``, ``_validate_data_type``, ``as_completed``, ``check_data_type``, ``_get_citations``, ``citations``, ``bib``. Composable apps (GENERIC, LOADER, WRITER) additionally: ``__add__``, ``disconnect``, ``input``. Writer apps additionally: ``apply_to``, ``set_logger``. Examples -------- An example app definition. >>> from scinexus.composable import define_app >>> @define_app ... class noop: ... def main(self, data: int) -> int: ... return data Notes ----- Instances of scinexus apps are callable. If an exception occurs, the app returns a ``NotCompleted`` instance with logging information. Apps defined with app_type ``LOADER``, ``GENERIC`` or ``WRITER`` can be "composed" (summed together) to produce a single callable that sequentially invokes the composed apps. For example, the independent usage of app instances ``app1`` and ``app2`` as ```python { notest } app2(app1(data)) ``` is equivalent to ```python { notest } combined = app1 + app2 combined(data) ``` The ``app_type`` attribute is used to constrain how apps can be composed. ``LOADER`` and ``WRITER`` are special cases. If included, a ``LOADER`` must always be first, e.g. ```python { notest } app = a_loader + a_generic ``` If included, a ``WRITER`` must always be last, e.g. ```python { notest } app = a_generic + a_writer ``` Changing the order for either of the above will result in a ``TypeError``. There are no constraints on ordering of ``GENERIC`` aside from compatability of their input and return types (see below). In order to be decorated with ``@define_app`` a class **must** - implement a method called ``main`` - type hint the first argument of ``main`` - type hint the return type for ``main`` While you can have more than one argument in ``main``, this is currently not supported in composable apps. Overlap between the return type hint and first argument hint is required for two apps to be composed together. ``define_app`` adds a ``__call__`` method which checks an input value prior to passing it to ``app.main()`` as a positional argument. The data checking results in ``NotCompleted`` being returned immediately, unless ``skip_not_completed==False``. If the input value type is consistent with the type hint on the first argument of main it is passed to ``app.main()``. If it does not match, a new ``NotCompleted`` instance is returned. """ if hasattr(klass, "app_type"): msg = ( f"The class {klass.__name__!r} is already decorated, avoid using " # type: ignore[union-attr] "inheritance from a decorated class." ) raise TypeError( msg, ) app_type = AppType(app_type) def wrapped(klass: type[Any] | Callable[..., Any]) -> type[Any]: if inspect.isfunction(klass): klass = _class_from_func(klass) if not inspect.isclass(klass): msg = f"{klass} is not a class" raise ValueError(msg) # Select base class based on app_type base: type[AppBase[Any, Any]] if app_type is WRITER: base = WriterApp elif app_type is not NON_COMPOSABLE: base = ComposableApp else: base = AppBase if "__slots__" in klass.__dict__: msg = "slots are not currently supported" raise NotImplementedError(msg) # Get type hints before rebuilding the class raw_input, raw_return = _get_main_hints(klass) # Collect the user's class dict (excluding metaclass artefacts) original_dict = { k: v for k, v in klass.__dict__.items() if k not in ("__dict__", "__weakref__") } # Prevent __init_subclass__ from running setup (we do it below) original_dict["_define_app_pending"] = True # Preserve user-specified bases (excluding object) so super() # resolves through the original MRO extra_bases = tuple(b for b in klass.__bases__ if b is not object) # Recreate class with the base (types.new_class stores # parameterised form in __orig_bases__ for type checkers) new_klass = types.new_class( klass.__name__, (base[raw_input, raw_return], *extra_bases), # type: ignore[index] exec_body=lambda ns: ns.update(original_dict), ) new_klass.__module__ = klass.__module__ new_klass.__qualname__ = klass.__qualname__ _fix_super_class_cells(klass, new_klass) del new_klass._define_app_pending # type: ignore[attr-defined] # Run setup once with the decorator's arguments _init_subclass_setup(new_klass, app_type, skip_not_completed, cite) return new_klass return wrapped(klass) if klass else wrapped # type: ignore[return-value] def is_app_composable(obj: object) -> bool: """checks whether obj has been decorated by define_app and it's app_type attribute is not NON_COMPOSABLE""" return is_app(obj) and obj.app_type is not NON_COMPOSABLE # type: ignore[attr-defined] def is_app(obj: object) -> bool: """checks whether obj has been decorated by define_app""" return hasattr(obj, "app_type") @register_deserialiser(get_object_provenance(NotCompleted)) def deserialise_not_completed(data: dict[str, Any]) -> NotCompleted: """deserialising NotCompletedResult""" data.pop("version", None) init = data.pop("not_completed_construction") args = init.pop("args") kwargs = init.pop("kwargs") return NotCompleted(*args, **kwargs) cogent3-scinexus-e0aee79/src/scinexus/data_store.py000066400000000000000000000753241520253266500225640ustar00rootroot00000000000000from __future__ import annotations import contextlib import inspect import json import re import reprlib from abc import ABC, abstractmethod from collections import defaultdict from enum import Enum from functools import singledispatch from io import TextIOWrapper from pathlib import Path from typing import TYPE_CHECKING, overload from scitrack import get_text_hexdigest # type: ignore[import-untyped] from scinexus.deserialise import deserialise_object from scinexus.io_util import get_format_suffixes, open_ from scinexus.parallel import is_master_process if TYPE_CHECKING: # pragma: no cover from collections.abc import Callable, Iterator from typing import Any, Self from citeable import CitationBase NOT_COMPLETED_TABLE = "not_completed" LOG_TABLE = "logs" MD5_TABLE = "md5" # used for log files, not-completed results _special_suffixes = re.compile(r"\.(log|json)$") CITATIONS_FILE = "bibliography.citations" NoneType = type(None) class Mode(Enum): r = "r" w = "w" a = "a" APPEND = Mode.a OVERWRITE = Mode.w READONLY = Mode.r # Summary display registry _summary_display_func: Callable[..., Any] | None = None # Unique-ID extractor registry _id_from_source_func: Callable[..., Any] | None = None def set_summary_display(func: Callable[..., Any] | None) -> None: """Set the function used to display data store summaries. Parameters ---------- func A callable with signature ``func(data, *, name) -> Any`` where *data* is a ``dict`` or ``list[dict]`` and *name* identifies the summary method (e.g. ``"describe"``). Pass ``None`` to clear. """ global _summary_display_func # noqa: PLW0603 _summary_display_func = func def get_summary_display() -> Callable[..., Any] | None: """Return the currently registered summary display function, or ``None``.""" return _summary_display_func def _apply_summary_display(data: Any, *, name: str) -> Any: if _summary_display_func is not None: return _summary_display_func(data, name=name) return data def _summary_property(data_method: Callable[..., Any]) -> property: """Create a property that delegates to a protected data method and applies display. The *data_method* should be a method defined on ``DataStoreABC`` (or a subclass) whose name starts with ``_``. Subclasses customise the raw data by overriding the ``_``-prefixed method; the public property created here handles display wrapping automatically. """ method_name = data_method.__name__ public_name = method_name.removeprefix("_") def fget(self: DataStoreABC) -> Any: data = getattr(self, method_name)() return _apply_summary_display(data, name=public_name) return property(fget, doc=data_method.__doc__) class DataMemberABC(ABC): """Abstract base class for DataMember A data member is a handle to a record in a DataStore. It has a reference to its data store and a unique identifier. """ @property @abstractmethod def data_store(self) -> DataStoreABC: ... @property @abstractmethod def unique_id(self) -> str: ... def __str__(self) -> str: return self.unique_id def __repr__(self) -> str: return f"{self.__class__.__name__}(data_store={self.data_store.source}, unique_id={self.unique_id})" def read(self) -> str | bytes: return self.data_store.read(self.unique_id) def __eq__(self, other: object) -> bool: """to check equality of members and check existence of a member in a list of members""" return isinstance(other, type(self)) and (self.data_store, self.unique_id) == ( other.data_store, other.unique_id, ) @property def md5(self) -> str | None: return self.data_store.md5(self.unique_id) class DataStoreABC(ABC): """Abstract base class for DataStore""" _init_vals: dict[str, Any] _completed: list[DataMemberABC] _not_completed: list[DataMemberABC] def __new__(cls, *args: Any, **kwargs: Any) -> Self: obj = object.__new__(cls) init_sig = inspect.signature(cls.__init__) bargs = init_sig.bind_partial(cls, *args, **kwargs) bargs.apply_defaults() init_vals = bargs.arguments init_vals.pop("self", None) obj._init_vals = init_vals obj._completed = [] obj._not_completed = [] return obj @property @abstractmethod def source(self) -> str | Path: """string that references connecting to data store, override in subclass constructor""" ... @property @abstractmethod def mode(self) -> Mode: """string that references datastore mode, override in subclass constructor""" ... @property @abstractmethod def limit(self) -> int | None: ... def __repr__(self) -> str: name = self.__class__.__name__ construction = ", ".join(f"{k}={v}" for k, v in self._init_vals.items()) return f"{name}({construction})" def __str__(self) -> str: num = len(self.members) name = self.__class__.__name__ sample = f"{list(self[:2])}..." if num > 2 else list(self) return f"{num}x member {name}(source='{self.source}', members={sample})" @overload def __getitem__(self, index: int) -> DataMemberABC: ... @overload def __getitem__(self, index: slice) -> list[DataMemberABC]: ... def __getitem__(self, index: int | slice) -> DataMemberABC | list[DataMemberABC]: return self.members[index] def __len__(self) -> int: return len(self.members) def __contains__(self, identifier: object) -> bool: """whether relative identifier has been stored""" return any(m.unique_id == identifier for m in self) @abstractmethod def read(self, unique_id: str) -> str | bytes: ... def _check_writable(self, unique_id: str) -> None: if self.mode is READONLY: msg = "datastore is readonly" raise OSError(msg) if unique_id in self and self.mode is APPEND: msg = "cannot overwrite existing record in append mode" raise OSError(msg) @abstractmethod def write(self, *, unique_id: str, data: str | bytes) -> None: self._check_writable(unique_id) @abstractmethod def write_not_completed(self, *, unique_id: str, data: str | bytes) -> None: self._check_writable(unique_id) @abstractmethod def write_log(self, *, unique_id: str, data: str | bytes) -> None: self._check_writable(unique_id) @property def members(self) -> list[DataMemberABC]: return self.completed + self.not_completed def __iter__(self) -> Iterator[DataMemberABC]: yield from self.members @property @abstractmethod def logs(self) -> list[DataMemberABC]: ... @property @abstractmethod def completed(self) -> list[DataMemberABC]: ... @property @abstractmethod def not_completed(self) -> list[DataMemberABC]: ... def _summary_logs(self) -> list[dict]: """returns a list of dicts summarising log files""" rows = [] for record in self.logs: lines = str(record.read()).splitlines() first = lines.pop(0).split("\t") row = {"time": first[0], "name": record.unique_id} key: str | None = None mapped: dict[str, str] = {} for line in lines: parts = line.split("\t")[-1].split(" : ", maxsplit=1) if len(parts) == 1: if key is None: msg = "malformed log data: continuation line before any key" raise ValueError(msg) mapped[key] += parts[0] continue key = parts[0] mapped[key] = parts[1] row["python_version"] = mapped["python"] row["who"] = mapped["user"] row["command"] = mapped["command_string"] row["composable"] = mapped.get("composable function", "") rows.append(row) return rows summary_logs = _summary_property(_summary_logs) def _summary_not_completed(self) -> list[dict]: """returns a list of dicts summarising not completed results""" return summary_not_completeds(self.not_completed) summary_not_completed = _summary_property(_summary_not_completed) def _describe(self) -> dict[str, object]: num_not_completed = len(self.not_completed) num_completed = len(self.completed) num_logs = len(self.logs) return { "completed": num_completed, "not_completed": num_not_completed, "logs": num_logs, } describe = _summary_property(_describe) @abstractmethod def drop_not_completed(self, *, unique_id: str | None = None) -> None: ... def _validate(self) -> dict[str, object]: correct_md5 = len(self.members) missing_md5 = 0 for m in self.members: data = m.read() md5 = self.md5(m.unique_id) if md5 is None: missing_md5 += 1 correct_md5 -= 1 elif md5 != get_text_hexdigest(data): correct_md5 -= 1 incorrect_md5 = len(self.members) - correct_md5 - missing_md5 return { "md5_correct": correct_md5, "md5_incorrect": incorrect_md5, "md5_missing": missing_md5, "has_log": len(self.logs) > 0, } def validate(self) -> dict[str, object]: return _apply_summary_display(self._validate(), name="validate") @abstractmethod def md5(self, unique_id: str) -> str | None: """ Parameters ---------- unique_id name of data store member Returns ------- md5 checksum for the member, if available, None otherwise """ def write_citations(self, *, data: tuple[CitationBase, ...]) -> None: """Write citations to the data store. Subclasses should override.""" if not data: return import warnings warnings.warn( f"{type(self).__name__!r} does not support saving citations", UserWarning, stacklevel=2, ) def _summary_citations(self) -> list[dict]: """Return a list of dicts summarising stored citations.""" if type(self)._load_citations is DataStoreABC._load_citations: import warnings warnings.warn( f"{type(self).__name__!r} does not support saving citations", UserWarning, stacklevel=2, ) citations = self._load_citations() return [{"app": c.summary()[0], "citation": c.summary()[1]} for c in citations] summary_citations = _summary_property(_summary_citations) def write_bib(self, dest_path: str | Path) -> None: """Write stored citations as a BibTeX .bib file.""" citations = self._load_citations() if not citations: import warnings warnings.warn( "No citations stored in this data store", UserWarning, stacklevel=2, ) return from citeable import write_bibtex dest_path = Path(dest_path).expanduser().absolute() write_bibtex(citations, dest_path) def _load_citations(self) -> list[CitationBase]: """Load stored citations. Override in subclasses.""" return [] class DataMember(DataMemberABC): """Generic DataMember class, bound to a data store. All read operations delivered by the parent.""" def __init__(self, *, data_store: DataStoreABC, unique_id: str) -> None: self._data_store = data_store self._unique_id = str(unique_id) @property def data_store(self) -> DataStoreABC: return self._data_store @property def unique_id(self) -> str: return self._unique_id def summary_not_completeds( not_completed: list[DataMemberABC], deserialise: Callable[..., Any] | None = None, ) -> list[dict]: """ Parameters ---------- not_completed list of DataMember instances for notcompleted records deserialise a callable for converting not completed contents, the result of member.read() must be a json string """ err_pat = re.compile(r"[A-Z][a-z]+[A-Z][a-z]+\:.+") types = defaultdict(list) indices = "type", "origin" num_bytes = 0 for member in not_completed: record = member.read() if deserialise: record = deserialise(record) if isinstance(record, bytes): num_bytes += 1 continue record = deserialise_object(record) key = tuple(getattr(record, k, None) for k in indices) match = err_pat.findall(record.message) types[key].append([match[-1] if match else record.message, record.source]) if num_bytes == len(not_completed): return [] rows = [] maxtring = reprlib.aRepr.maxstring reprlib.aRepr.maxstring = 45 limit_len = 45 for key in types: msg_list, src_list = list(zip(*types[key], strict=False)) messages = reprlib.repr(", ".join(m.splitlines()[-1] for m in set(msg_list))) sources = ", ".join(s.splitlines()[-1] for s in src_list if s) if len(sources) > limit_len: idx = sources.rfind(",", None, limit_len) + 1 idx = idx if idx > 0 else limit_len sources = f"{sources[:idx]} ..." row = { "type": getattr(key[0], "value", key[0]), "origin": key[1], "message": messages, "num": len(types[key]), "source": sources, } rows.append(row) reprlib.aRepr.maxstring = maxtring # restoring original val return rows def _tidy_and_check_suffix(suffix: str | None) -> str: """tidies suffix by removing leading wildcards and dots""" suffix = suffix or "" suffix = re.sub(r"^[\s.*]+", "", suffix) # tidy the suffix if not suffix or suffix == "*": msg = "suffix is required for DataStoreDirectory and cannot be just a wildcard" raise ValueError(msg) return suffix class DataStoreDirectory(DataStoreABC): """data store backed by a directory on the filesystem""" def __init__( self, source: str | Path, mode: Mode | str = READONLY, suffix: str | None = None, limit: int | None = None, verbose: bool = False, ) -> None: self._mode = Mode(mode) source = Path(source) self._source = source.expanduser() self.suffix = _tidy_and_check_suffix(suffix) self._verbose = verbose self._source_check_create(self._mode) self._limit = limit def __contains__(self, item: object) -> bool: if not isinstance(item, str): return False if not _special_suffixes.search(item): item = f"{item}.{self.suffix}" if self.suffix not in item else item return super().__contains__(item) def _source_check_create(self, mode: Mode) -> None: if not is_master_process(): return sub_dirs = [NOT_COMPLETED_TABLE, LOG_TABLE, MD5_TABLE] source = self.source if mode is READONLY: if not source.exists(): msg = f"'{source}' does not exist" raise OSError(msg) return if not source.exists(): source.mkdir(parents=True, exist_ok=True) for sub_dir in sub_dirs: (source / sub_dir).mkdir(parents=True, exist_ok=True) @property def source(self) -> Path: """path that references the data store""" return self._source @property def mode(self) -> Mode: """string that references datastore mode, override in subclass constructor""" return self._mode @property def limit(self) -> int | None: return self._limit def read(self, unique_id: str) -> str: """reads data corresponding to identifier""" with open_(self.source / unique_id) as infile: return infile.read() def drop_not_completed(self, *, unique_id: str | None = None) -> None: """remove not-completed records from the directory Parameters ---------- unique_id if provided, only drop the record with this identifier, otherwise drop all not-completed records """ unique_id = (unique_id or "").replace(f".{self.suffix}", "") unique_id = f"{unique_id}.json" if unique_id else unique_id nc_dir = self.source / NOT_COMPLETED_TABLE md5_dir = self.source / MD5_TABLE for m in list(self.not_completed): if unique_id and not m.unique_id.endswith(unique_id): continue file = nc_dir / Path(m.unique_id).name file.unlink() md5_file = md5_dir / f"{file.stem}.txt" md5_file.unlink() self.not_completed.remove(m) if not unique_id: Path(self.source / NOT_COMPLETED_TABLE).rmdir() # reset _not_completed list to force not_completed function to make it again self._not_completed: list[DataMemberABC] = [] @property def logs(self) -> list[DataMemberABC]: log_dir = self.source / LOG_TABLE return ( [ DataMember(data_store=self, unique_id=str(Path(LOG_TABLE) / m.name)) for m in log_dir.glob("*") ] if log_dir.exists() else [] ) @property def completed(self) -> list[DataMemberABC]: if not self._completed: self._completed = [] suffix = f"*.{self.suffix}" for i, m in enumerate(self.source.glob(suffix)): if self.limit and i == self.limit: break self._completed.append(DataMember(data_store=self, unique_id=m.name)) return self._completed @property def not_completed(self) -> list[DataMemberABC]: if not self._not_completed: self._not_completed = [] for i, m in enumerate((self.source / NOT_COMPLETED_TABLE).glob("*.json")): if self.limit and i == self.limit: break self._not_completed.append( DataMember( data_store=self, unique_id=str(Path(NOT_COMPLETED_TABLE) / m.name), ), ) return self._not_completed def _write( self, *, subdir: str, unique_id: str, suffix: str, data: str, ) -> DataMember | None: super().write(unique_id=unique_id, data=data) # check suffix compatible with this datastore sfx, cmp = get_format_suffixes(unique_id) if sfx != suffix: unique_id = f"{Path(unique_id).stem}.{suffix}" sfx, cmp = get_format_suffixes(unique_id) unique_id = ( unique_id.replace(self.suffix, suffix) if self.suffix and self.suffix != suffix else unique_id ) if suffix != "log" and unique_id in self: return None newline = None if cmp else "\n" mode = "wt" if cmp else "w" with open_(self.source / subdir / unique_id, mode=mode, newline=newline) as out: out.write(data) if subdir == LOG_TABLE: return None if subdir == NOT_COMPLETED_TABLE: member = DataMember( data_store=self, unique_id=str(Path(NOT_COMPLETED_TABLE) / unique_id), ) elif not subdir: member = DataMember(data_store=self, unique_id=unique_id) md5 = get_text_hexdigest(data) unique_id = unique_id.replace(suffix, "txt") unique_id = unique_id if cmp is None else unique_id.replace(f".{cmp}", "") with open_(self.source / MD5_TABLE / unique_id, mode="w") as out: out.write(md5) return member def write(self, *, unique_id: str, data: str) -> DataMember: # type: ignore[override] """writes a completed record ending with .suffix Parameters ---------- unique_id unique identifier data text data to be written Returns ------- a member for this record Notes ----- Drops any not-completed member corresponding to this identifier """ member = self._write( subdir="", unique_id=unique_id, suffix=self.suffix, data=data, ) self.drop_not_completed(unique_id=unique_id) if member is not None: self._completed.append(member) return member # type: ignore[return-value] def write_not_completed(self, *, unique_id: str, data: str) -> DataMember: # type: ignore[override] """writes a not completed record as json Parameters ---------- unique_id unique identifier data text data to be written Returns ------- a member for this record """ (self.source / NOT_COMPLETED_TABLE).mkdir(parents=True, exist_ok=True) member = self._write( subdir=NOT_COMPLETED_TABLE, unique_id=unique_id, suffix="json", data=data, ) if member is not None: self._not_completed.append(member) return member # type: ignore[return-value] def write_log(self, *, unique_id: str, data: str) -> None: # type: ignore[override] (self.source / LOG_TABLE).mkdir(parents=True, exist_ok=True) _ = self._write(subdir=LOG_TABLE, unique_id=unique_id, suffix="log", data=data) def md5(self, unique_id: str) -> str | None: """ Parameters ---------- unique_id name of data store member Returns ------- md5 checksum for the member, if available, None otherwise """ uid_name = Path(unique_id).name md5_name = re.sub(rf"[.]({self.suffix}|json)$", ".txt", uid_name) path = self.source / MD5_TABLE / md5_name return path.read_text() if path.exists() else None def write_citations(self, *, data: tuple[CitationBase, ...]) -> None: if not data: return from citeable import to_jsons path = self.source / CITATIONS_FILE path.write_text(to_jsons(data)) def _load_citations(self) -> list[CitationBase]: from citeable import from_jsons path = self.source / CITATIONS_FILE if not path.exists(): return [] return from_jsons(path.read_text()) class ReadOnlyDataStoreZipped(DataStoreABC): """read-only data store backed by a zip archive""" def __init__( self, source: str | Path, mode: Mode | str = READONLY, suffix: str | None = None, limit: int | None = None, verbose: bool = False, ) -> None: self._mode = Mode(mode) if self._mode is not READONLY: msg = "this is a read only data store" raise ValueError(msg) self.suffix = _tidy_and_check_suffix(suffix) source = Path(source) self._source = source.expanduser() if not self._source.exists(): msg = f"{self._source!s} does not exit" raise OSError(msg) self._verbose = verbose self._limit = limit @property def limit(self) -> int | None: return self._limit @property def mode(self) -> Mode: return self._mode @property def source(self) -> Path: return self._source def read(self, unique_id: str) -> str | bytes: """reads data corresponding to identifier from the zip archive""" import zipfile member_path = str(Path(self.source.stem, unique_id)).replace("\\", "/") with zipfile.ZipFile(self.source) as archive: raw = archive.open(member_path) wrapped = TextIOWrapper(raw, encoding="latin-1") return wrapped.read() def _iter_matches(self, subdir: str, pattern: str) -> Iterator[Path]: import zipfile with zipfile.ZipFile(self._source) as archive: names = archive.namelist() for name in names: p = Path(name) if subdir and p.parent.name != subdir: continue if p.match(pattern) and not p.name.startswith("."): yield p @property def completed(self) -> list[DataMemberABC]: if not self._completed: pattern = f"*.{self.suffix}" self._completed = [] num_matches = 0 for name in self._iter_matches("", pattern): num_matches += 1 member = DataMember(data_store=self, unique_id=name.name) self._completed.append(member) if self.limit and num_matches >= self.limit: break return self._completed @property def not_completed(self) -> list[DataMemberABC]: if not self._not_completed: self._not_completed = [] num_matches = 0 nc_dir_path = Path(NOT_COMPLETED_TABLE) for name in self._iter_matches(NOT_COMPLETED_TABLE, "*.json"): num_matches += 1 member = DataMember( data_store=self, unique_id=str(nc_dir_path / name.name), ) self._not_completed.append(member) if self.limit and num_matches >= self.limit: break return self._not_completed @property def logs(self) -> list[DataMemberABC]: log_dir = Path(LOG_TABLE) logs: list[DataMemberABC] = [] for name in self._iter_matches(LOG_TABLE, "*"): m = DataMember(data_store=self, unique_id=str(log_dir / name.name)) logs.append(m) return logs def md5(self, unique_id: str) -> str | None: """ Parameters ---------- unique_id name of data store member Returns ------- md5 checksum for the member, if available, None otherwise """ uid_name = Path(unique_id).name md5_name = re.sub(rf"[.]({self.suffix}|json)$", ".txt", uid_name) md5_dir = Path(MD5_TABLE) for name in self._iter_matches(MD5_TABLE, md5_name): m = DataMember(data_store=self, unique_id=str(md5_dir / name.name)) result = m.read() return result if isinstance(result, str) else result.decode() return None def drop_not_completed(self, *, unique_id: str | None = None) -> None: """not supported on read-only zip data stores""" msg = "zip data stores are read only" raise TypeError(msg) def write(self, *, unique_id: str, data: str | bytes) -> None: msg = "zip data stores are read only" raise TypeError(msg) def write_not_completed(self, *, unique_id: str, data: str | bytes) -> None: msg = "zip data stores are read only" raise TypeError(msg) def write_log(self, *, unique_id: str, data: str | bytes) -> None: msg = "zip data stores are read only" raise TypeError(msg) def write_citations(self, *, data: tuple[CitationBase, ...]) -> None: msg = "zip data stores are read only" raise TypeError(msg) def _load_citations(self) -> list[CitationBase]: import zipfile from citeable import from_jsons target = str(Path(self.source.stem, CITATIONS_FILE)).replace("\\", "/") try: with zipfile.ZipFile(self.source) as archive: data = archive.read(target).decode("utf-8") return from_jsons(data) except KeyError: return [] def get_unique_id(name: object) -> str | None: """strips any format suffixes from name""" if (name := get_data_source(name)) is None: return None suffixes = ".".join(sfx for sfx in get_format_suffixes(name) if sfx) return re.sub(rf"[.]{suffixes}$", "", name) def set_id_from_source(func: Callable[..., Any] | None) -> None: """Register a custom function for extracting unique IDs from data objects. The registered function is consulted as the default by :meth:`AppBase.as_completed` and :meth:`WriterApp.apply_to` to derive a unique identifier for each input, and by :class:`NotCompleted` to normalise the ``source=`` keyword on error records. Pass ``None`` to clear the registration and restore the built-in :func:`get_unique_id`. Parameters ---------- func A callable taking a single data object and returning a string identifier (or ``None`` if no identifier can be extracted). The callable must be picklable if scinexus apps will be executed in parallel via ``loky`` / MPI. Notes ----- Per-call overrides via the ``id_from_source`` keyword on :meth:`as_completed` and :meth:`apply_to` still take precedence over the registered function. Register before constructing apps for the cleanest behaviour. """ global _id_from_source_func # noqa: PLW0603 _id_from_source_func = func def get_id_from_source() -> Callable[..., Any]: """Return the active unique-ID extractor. Returns the function previously passed to :func:`set_id_from_source`, or :func:`get_unique_id` if nothing has been registered. """ return _id_from_source_func or get_unique_id @singledispatch def get_data_source(data: object) -> str | None: source = getattr(data, "source", None) return None if source is None else get_data_source(source) @get_data_source.register def _(data: str) -> str | None: return get_data_source(Path(data)) @get_data_source.register def _(data: Path) -> str | None: return data.name @get_data_source.register def _(data: dict) -> str | None: try: source = data.get("info", {})["source"] except KeyError: source = data.get("source", None) # noqa return get_data_source(source) @get_data_source.register def _(data: DataMemberABC) -> str | None: return str(data.unique_id) def make_record_for_json( identifier: str, data: Any, completed: bool ) -> dict[str, object]: """returns a dict for storage as json""" with contextlib.suppress(AttributeError): data = data.to_rich_dict() data = json.dumps(data) return {"identifier": identifier, "data": data, "completed": completed} def load_record_from_json(data: Any) -> tuple[str, Any, bool]: """returns identifier, data, completed status from json string""" if isinstance(data, str): data = json.loads(data) value = data["data"] if isinstance(value, str): with contextlib.suppress(json.JSONDecodeError): value = json.loads(value) return data["identifier"], value, data["completed"] cogent3-scinexus-e0aee79/src/scinexus/deserialise.py000066400000000000000000000067111520253266500227220ustar00rootroot00000000000000import json import re from collections.abc import Callable from importlib import import_module from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar, cast if TYPE_CHECKING: # pragma: no cover from scinexus.io_util import PathType P = ParamSpec("P") R = TypeVar("R") _deserialise_func_map: dict[str, Callable[..., Any]] = {} class register_deserialiser: """ registration decorator for functions to inflate objects that were serialised using json. Functions are added to a dict which is used by the deserialise_object() function. The type string(s) must uniquely identify the appropriate value for the dict 'type' entry, e.g. 'scinexus.composable.NotCompleted'. Parameters ---------- args must be unique """ def __init__(self, *args: str) -> None: for type_str in args: if not isinstance(type_str, str): msg = f"{type_str!r} is not a string" raise TypeError(msg) if type_str in _deserialise_func_map: msg = f"{type_str!r} already in {list(_deserialise_func_map)}" raise ValueError(msg) self._type_str = args def __call__(self, func: Callable[P, R]) -> Callable[P, R]: for type_str in self._type_str: _deserialise_func_map[type_str] = func return func def get_class(provenance: str) -> type: """resolve a dotted provenance string to the corresponding class Parameters ---------- provenance fully qualified class name, e.g. ``'scinexus.composable.NotCompleted'`` """ index = provenance.rfind(".") if index <= 0: msg = ( f"invalid provenance string {provenance!r}, expected 'module.class' format" ) raise ValueError(msg) klass = provenance[index + 1 :] nc = "NotCompleted" klass = nc if nc in klass else klass mod = import_module(provenance[:index]) return getattr(mod, klass) _pat = re.compile("[a-z]") def str_to_version(v: str) -> tuple[str, ...]: """parse a version string into a tuple of components Parameters ---------- v version string, e.g. ``'2024.8.7a3'`` """ if letter := _pat.search(v): return tuple( f"{v[: letter.start()]}.{letter.group()}.{letter.end():}".split(".") ) return () def deserialise_object(data: "PathType | str | dict[str, Any]") -> Any: """ deserialises from json Parameters ---------- data path to json file, json string or a dict Returns ------- If the dict from json.loads does not contain a "type" key, the object will be returned as is. Otherwise, it will be deserialised to the original object. Notes ----- The value of the "type" key is used to identify the specific function for recreating the original instance. """ from scinexus.io_util import open_, path_exists if path_exists(path := cast("PathType", data)): with open_(path) as infile: data = json.load(infile) if isinstance(data, str): data = json.loads(str(data)) data = cast("dict[str, Any]", data) type_ = data.get("type", None) if hasattr(data, "get") else None if type_ is None: return data for type_str, func in _deserialise_func_map.items(): # noqa: B007 if type_str in type_: break else: msg = f"deserialising '{type_}' from json" raise NotImplementedError(msg) return func(data) cogent3-scinexus-e0aee79/src/scinexus/io.py000066400000000000000000000156731520253266500210470ustar00rootroot00000000000000import contextlib import json import pickle import typing from collections.abc import Callable from gzip import compress as gzip_compress from gzip import decompress as gzip_decompress from pathlib import Path from typing import Any from scinexus.deserialise import deserialise_object from .composable import define_app from .data_store import ( READONLY, DataStoreABC, DataStoreDirectory, Mode, ReadOnlyDataStoreZipped, ) from .sqlite_data_store import _MEMORY, DataStoreSqlite _datastore_reader_map: dict[str | None, type[DataStoreABC]] = {} class register_datastore_reader: """ registration decorator for read only data store classes The registration key must be a string that of the file format suffix (more than one suffix can be registered at a time). Parameters ---------- args must be unique, a preceding '.' will be added if not already present """ def __init__(self, *args: str | None) -> None: suffixes: list[str | None] = list(args) for i, sfx in enumerate(suffixes): if sfx is None: if sfx in _datastore_reader_map: msg = f"{sfx!r} already in {list(_datastore_reader_map)}" raise ValueError(msg) # noqua: TRY004 continue if not isinstance(sfx, str): msg = f"{sfx!r} is not a string" raise TypeError(msg) if sfx.strip() == sfx and not sfx: msg = "cannot have white-space suffix" raise ValueError(msg) # noqua: TRY004 if suffix := sfx.strip(): suffix = suffix if suffix[0] == "." else f".{suffix}" if suffix in _datastore_reader_map: msg = f"{suffix!r} already in {list(_datastore_reader_map)}" raise ValueError(msg) # noqua: TRY004 suffixes[i] = suffix self._type_str = tuple(suffixes) def __call__(self, func: type[DataStoreABC]) -> type[DataStoreABC]: for type_str in self._type_str: _datastore_reader_map[type_str] = func return func # register the main readers register_datastore_reader("zip")(ReadOnlyDataStoreZipped) register_datastore_reader(None)(DataStoreDirectory) register_datastore_reader("sqlitedb")(DataStoreSqlite) def open_data_store( base_path: str | Path, suffix: str | None = None, limit: int | None = None, mode: str | Mode = READONLY, **kwargs: Any, ) -> DataStoreABC: """returns DataStore instance of a type specified by the path suffix Parameters ---------- base_path path to directory or db suffix suffix of filenames limit the number of matches to return mode opening mode, either r, w, a as per file opening modes """ import zipfile mode = Mode(mode) if not isinstance(suffix, str | type(None)): msg = f"suffix {type(suffix)} not one of string or None" raise TypeError(msg) kwargs = {"limit": limit, "mode": mode, "suffix": suffix, **kwargs} base_path = Path(base_path) base_path = ( base_path if base_path.name == ":memory:" else base_path.expanduser().absolute() ) if base_path.is_dir(): ds_suffix = None elif base_path.suffix == ".sqlitedb" or base_path.name == _MEMORY: ds_suffix = ".sqlitedb" kwargs.pop("suffix") elif zipfile.is_zipfile(base_path): ds_suffix = ".zip" elif base_path.suffix: ds_suffix = base_path.suffix else: # triggered when mode="w" ds_suffix = None if base_path.name == _MEMORY and mode is READONLY: msg = "in memory readonly sqlitedb" raise NotImplementedError(msg) if ds_suffix is None and suffix is None: msg = "a suffix is required if using a directory data store" raise ValueError(msg) klass = _datastore_reader_map[ds_suffix] return klass(base_path, **kwargs) @define_app(skip_not_completed=False) def pickle_it(data: typing.Any) -> bytes: """Serialises data using pickle.""" return pickle.dumps(data) @define_app(skip_not_completed=False) def unpickle_it(data: bytes) -> typing.Any: "Deserialises pickle data." return pickle.loads(data) # noqa: S301 @define_app(skip_not_completed=False) class compress: """Compresses bytes data.""" def __init__(self, compressor: Callable[..., Any] = gzip_compress) -> None: """ Parameters ---------- compressor function for compressing bytes data, defaults to gzip """ self.compressor = compressor def main(self, data: bytes) -> bytes: """compress bytes data using the configured compressor""" return self.compressor(data) @define_app(skip_not_completed=False) class decompress: """Decompresses data.""" def __init__(self, decompressor: Callable[..., Any] = gzip_decompress) -> None: """ Parameters ---------- decompressor a function for decompression, defaults to the gzip decompress function """ self.decompressor = decompressor def main(self, data: bytes) -> bytes: """decompress bytes data using the configured decompressor""" return self.decompressor(data) def as_dict(obj: typing.Any) -> dict[str, typing.Any]: """returns result of to_rich_dict method if it exists""" with contextlib.suppress(AttributeError): obj = obj.to_rich_dict() return obj @define_app(skip_not_completed=False) class to_primitive: """convert an object to primitive python types suitable for serialisation""" def __init__(self, convertor: Callable[..., Any] = as_dict) -> None: """ Parameters ---------- convertor callable that converts an object to primitive types, defaults to ``as_dict`` """ self.convertor = convertor def main(self, data: typing.Any) -> typing.Any: """returns dict from an object""" return self.convertor(data) @define_app(skip_not_completed=False) class from_primitive: """deserialises from primitive python types""" def __init__(self, deserialiser: Callable[..., Any] = deserialise_object) -> None: """ Parameters ---------- deserialiser callable that recreates an object from primitive types, defaults to ``deserialise_object`` """ self.deserialiser = deserialiser def main(self, data: typing.Any) -> typing.Any: """either json or a dict from an object""" return self.deserialiser(data) @define_app def to_json(data: dict[str, Any]) -> str: """Convert primitive python types to json string.""" return json.dumps(data) @define_app def from_json(data: str) -> dict[str, Any]: """Convert json string to primitive python types.""" return json.loads(data) DEFAULT_DESERIALISER = unpickle_it() + from_primitive() DEFAULT_SERIALISER = to_primitive() + pickle_it() cogent3-scinexus-e0aee79/src/scinexus/io_util.py000066400000000000000000000404361520253266500220770ustar00rootroot00000000000000from __future__ import annotations import contextlib import functools import re import shutil import uuid from bz2 import open as bzip_open from gzip import open as gzip_open from io import TextIOWrapper from lzma import open as lzma_open from os import PathLike from pathlib import Path, PurePath from tempfile import mkdtemp from typing import IO, TYPE_CHECKING, Any from urllib.parse import ParseResult, urlparse from urllib.request import urlopen from charset_normalizer import detect _wout_period = re.compile(r"^\.") if TYPE_CHECKING: from collections.abc import Callable, Iterator from types import TracebackType PathType = str | PathLike[Any] | PurePath | Path @functools.singledispatch def is_url(path: str | bytes | Path | PathLike | ParseResult) -> bool: # noqa: ARG001 """whether a path is a url""" return False @is_url.register def _(path: str) -> bool: return is_url(urlparse(path)) @is_url.register def _(path: bytes) -> bool: return is_url(urlparse(path.decode("utf8"))) @is_url.register def _(path: ParseResult) -> bool: return path.scheme in {"http", "https", "file"} def _get_compression_open( path: PathType | None = None, compression: str | None = None, ) -> Callable[..., Any] | None: """returns function for opening compression formats Parameters ---------- path file path or url compression file compression suffix Returns ------- function for opening compressed files or None if unknown compression """ if not (path or compression): msg = "either path or compression argument must be provided" raise ValueError(msg) if compression is None and path is not None: _, compression = get_format_suffixes(path) return None if compression is None else _compression_handlers.get(compression) def open_zip(filename: PathType, mode: str = "r", **kwargs: Any) -> IO[Any]: """open a single member zip-compressed file Note ---- If mode="r". The function raises ValueError if zip has > 1 record. The returned object is wrapped by TextIOWrapper with latin encoding (so it's not a bytes string). If mode="w", returns an atomic_write() instance. """ # import of standard library io module as some code quality tools # confuse this with a circular import mode = mode or "r" binary_mode = "b" in mode mode = mode[:1] encoding = kwargs.pop("encoding") if "encoding" in kwargs else "latin-1" if mode.startswith("w"): return atomic_write(filename, mode=mode, in_zip=True) # type: ignore[return-value] from zipfile import ZipFile mode = mode.strip("t") with ZipFile(filename) as zf: if len(zf.namelist()) != 1: msg = "Archive is supposed to have only one record." raise ValueError(msg) opened = zf.open(zf.namelist()[0], mode=mode, **kwargs) return opened if binary_mode else TextIOWrapper(opened, encoding=encoding) _compression_handlers: dict[str, Callable[..., Any]] = { "gz": gzip_open, "bz2": bzip_open, "zip": open_zip, "xz": lzma_open, "lzma": lzma_open, } def open_(filename: PathType, mode: str = "rt", **kwargs: Any) -> IO[Any]: """open that handles different compression Parameters ---------- filename path or url, if a url delegates processing to open_url mode standard file opening mode kwargs passed to open functions Returns ------- an object compatible with the file protocol """ if not filename: msg = f"{filename} not a valid file name or url" raise ValueError(msg) if is_url(filename): return open_url(filename, mode=mode, **kwargs) # type: ignore[arg-type] mode = mode or "rt" filename = Path(filename).expanduser() op = _get_compression_open(filename) or open encoding = kwargs.pop("encoding", None) need_encoding = mode.startswith("r") and "b" not in mode if need_encoding and "encoding" not in kwargs: with op(filename, mode="rb") as infile: data = infile.read(100) encoding = detect(data) encoding = encoding["encoding"] return op(filename, mode, encoding=encoding, **kwargs) def open_url(url: str | ParseResult, mode: str = "rt", **kwargs: Any) -> IO[Any]: """open a url Parameters ---------- url A url of file in http or https web address mode mode of reading file, 'rb', 'rt', 'r' Raises ------ Rasies IOError if mode is write or it's not a url. Returns ------- file object which reads binary if "b" in mode, else text. """ _, compression = get_format_suffixes( url.path if isinstance(url, ParseResult) else url, ) mode = mode or "r" if "r" not in mode: msg = "opening a url only allowed in read mode" raise OSError(msg) if not is_url(url): msg = f"URL scheme must be http, https or file, not {str(url)[:20]!r}" raise OSError(msg) url_parsed = url if isinstance(url, ParseResult) else urlparse(url) response = urlopen(url_parsed.geturl(), timeout=10) encoding = response.headers.get_content_charset() if compression: opener = _get_compression_open(compression=compression) if opener is not None: response = opener(response) return response if "b" in mode else TextIOWrapper(response, encoding=encoding) def _path_relative_to_zip_parent(zip_path: Path, member_path: Path) -> Path: """returns member_path relative to zip_path Parameters ---------- zip_path: Path member_path: Path Notes ----- with zip_path = "parentdir/named.zip", then member_path="named/member.tsv" or path="member.tsv" will return "named/member.tsv" """ zip_name = zip_path.name.replace(".zip", "") if zip_name not in member_path.parts: return Path(zip_name) / member_path return Path(*member_path.parts[member_path.parts.index(zip_name) :]) class atomic_write: """performs atomic write operations, cleans up if fails""" def __init__( self, path: PathType, tmpdir: PathType | None = None, in_zip: PathType | bool | None = None, mode: str = "w", encoding: str | None = None, ) -> None: """ Parameters ---------- path path to file, or relative to directory specified by in_zip tmpdir directory where temporary file will be created in_zip path to the zip archive containing path, e.g. if in_zip="path/to/data.zip", then path="data/seqs.tsv" Decompressing the archive will produce the "data/seqs.tsv" mode file writing mode encoding text encoding """ path = Path(path).expanduser() _, cmp = get_format_suffixes(path) zip_path: Path | None = None if in_zip: if isinstance(in_zip, bool): zip_path = path if cmp == "zip" else None else: zip_path = Path(in_zip) if zip_path and cmp == "zip": path = Path(str(path)[: str(path).rfind(".zip")]) if zip_path: path = _path_relative_to_zip_parent(zip_path, path) self._path = path self._cmp = cmp self._mode = mode self._file: IO[Any] | None = None self._encoding = encoding self._in_zip = zip_path self._tmppath = self._make_tmppath(tmpdir) self.succeeded: bool | None = None self._close_func = ( self._close_rename_zip if zip_path else self._close_rename_standard ) def _make_tmppath(self, tmpdir: PathType | None) -> Path: """returns path of temporary file Parameters ---------- tmpdir: Path to directory Returns ------- full path to a temporary file Notes ----- Uses a random uuid as the file name, adds suffixes from path """ suffixes = ( "".join(self._path.suffixes) if not self._in_zip else "".join(self._path.suffixes[:-1]) ) parent = self._in_zip.parent if self._in_zip else self._path.parent if not parent.exists(): raise OSError(f"Parent dir '{parent}' of provided path does not exist") name = f"{uuid.uuid4()}{suffixes}" tmpdir = Path(mkdtemp(dir=parent)) if tmpdir is None else Path(tmpdir) if not tmpdir.exists(): msg = f"{tmpdir} directory does not exist" raise FileNotFoundError(msg) return tmpdir / name def _get_fileobj(self) -> IO[Any]: """returns file to be written to""" if self._file is None: self._file = open_(self._tmppath, self._mode, encoding=self._encoding) return self._file def __enter__(self) -> IO[Any]: return self._get_fileobj() def _close_rename_standard(self, src: Path) -> None: dest = Path(self._path) try: dest.unlink() except FileNotFoundError: pass finally: src.rename(dest) shutil.rmtree(src.parent) def _close_rename_zip(self, src: Path) -> None: from zipfile import ZipFile if self._in_zip is None: msg = "in_zip path is unexpectedly None" raise RuntimeError(msg) with ZipFile(self._in_zip, "a") as out: out.write(str(src), arcname=self._path) shutil.rmtree(src.parent) def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: if self._file is None: msg = "file object is unexpectedly None" raise ValueError(msg) self._file.close() if exc_type is None: self._close_func(self._tmppath) self.succeeded = True else: self.succeeded = False shutil.rmtree(self._tmppath.parent) def write(self, text: str | bytes) -> None: """writes text to file""" fileobj = self._get_fileobj() fileobj.write(text) def close(self) -> None: """closes file""" self.__exit__(None, None, None) def get_format_suffixes(filename: PathType) -> tuple[str | None, str | None]: """returns file, compression suffixes""" filename = Path(filename) if not filename.suffix: return None, None suffixes = [_wout_period.sub("", sfx).lower() for sfx in filename.suffixes[-2:]] cmp_suffix = suffixes[-1] if suffixes[-1] in _compression_handlers else None if len(suffixes) == 2 and cmp_suffix is not None: suffix = suffixes[0] elif cmp_suffix is None: suffix = suffixes[-1] else: suffix = None return suffix, cmp_suffix def path_exists(path: PathType) -> bool: """whether path is a valid path and it exists""" with contextlib.suppress(Exception): return Path(path).exists() return False def iter_splitlines( path: PathType, chunk_size: int | None = 1_000_000, ) -> Iterator[str]: """yields line from file Parameters ---------- path data file chunk_size number of bytes to load in one go from path Notes ----- Loads chunks of data from the file, yields one line at a time """ if is_url(path): chunk_size = None else: path = Path(path).expanduser() if chunk_size and path.stat().st_size < chunk_size: # file is smaller than provided chunk_size, just # load it all chunk_size = None with open_(path) as infile: last = "" while True: data = infile.read() if chunk_size is None else infile.read(chunk_size) if not data: # end of file break data = last + data end_is_newline = data.endswith("\n") lines = data.splitlines() last = lines.pop(-1) if end_is_newline: # even if text is from Windows and uses "\r\n", pythons # string splitlines() will respect \n last += "\n" if not len(lines): # we have not seen a newline continue yield from lines if last: yield from last.splitlines() def iter_line_blocks( path: PathType, num_lines: int | None = 1000, chunk_size: int | None = 5_000_000, ) -> Iterator[list[str]]: """yields list with num_lines str from path Parameters ---------- path data file num_lines number of lines per block. If None just returns all lines. chunk_size number of bytes to load in one go from path """ lines = [] for line in iter_splitlines(path, chunk_size=chunk_size): lines.append(line) if len(lines) == num_lines: yield lines lines = [] if lines: yield lines def iter_record_chunks( *, path: PathType, delimiter: bytes, chunk_size: int | None = 5_000_000, ) -> Iterator[bytes]: """yield bytes between successive occurrences of ``delimiter`` Parameters ---------- path data file. Accepts a path, URL, or any ``PathType`` and opens it via ``open_(path, mode="rb")`` so compressed formats are handled transparently. If ``path`` is a URL the stream is read in full (``chunk_size`` is forced to ``None``). delimiter bytes delimiter on which records are split. Must be non-empty. chunk_size bytes read per iteration. If ``None``, or if the on-disk file is smaller than ``chunk_size``, the file is read in a single call. Yields ------ bytes each item is the content between two successive delimiters. The first item is whatever precedes the first delimiter (often empty for files that start with a delimiter). The final item is whatever follows the last delimiter; callers filter as needed for their format. Raises ------ ValueError if ``delimiter`` is empty. Notes ----- Reads ``path`` in chunks of ``chunk_size`` bytes and splits on ``delimiter``, holding any trailing partial record across chunk boundaries so that delimiters spanning a boundary are detected correctly. Peak memory is bounded by ``chunk_size`` plus the size of the largest record, rather than the full file size. Operates on raw bytes only; callers that need text decoding should do so per yielded record. Examples -------- >>> import tempfile, pathlib >>> with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: ... _ = f.write(b">a\\nAAA>b\\nBBB>c\\nCCC") ... tmp = pathlib.Path(f.name) >>> list(iter_record_chunks(path=tmp, delimiter=b">", chunk_size=8)) [b'', b'a\\nAAA', b'b\\nBBB', b'c\\nCCC'] >>> tmp.unlink() """ if not delimiter: msg = "delimiter must be non-empty" raise ValueError(msg) if is_url(path): chunk_size = None else: path = Path(path).expanduser() if chunk_size and path.stat().st_size < chunk_size: chunk_size = None # We accommodate a chunked read falling within a delimiter # by extracting the overlap_len of the last (potentially partial) # record and prepending it to the next chunk. # We only need to keep the last len(delimiter) - 1 bytes, as a delimiter # cannot span more than that. overlap_len = len(delimiter) - 1 with open_(path, mode="rb") as infile: pending: list[bytes] = [] # carry represents the portion of the last (potentially partial) record # that we need to prepend to the next chunk. carry = b"" while True: chunk = infile.read() if chunk_size is None else infile.read(chunk_size) if not chunk: break parts = (carry + chunk).split(delimiter) last = parts.pop() cut = max(len(last) - overlap_len, 0) carry = last[cut:] for part in parts: pending.append(part) yield b"".join(pending) pending.clear() if cut: pending.append(last[:cut]) if pending or carry: pending.append(carry) yield b"".join(pending) cogent3-scinexus-e0aee79/src/scinexus/misc.py000066400000000000000000000045561520253266500213710ustar00rootroot00000000000000"""Selected utility functions.""" from __future__ import annotations import inspect import re from typing import TYPE_CHECKING, ParamSpec, TypeVar if TYPE_CHECKING: # pragma: no cover from collections.abc import Callable P = ParamSpec("P") R = TypeVar("R") def get_object_provenance(obj: object) -> str: """returns string of complete object provenance""" # algorithm inspired by Greg Baacon's answer to # https://stackoverflow.com/questions/2020014/get-fully-qualified-class # -name-of-an-object-in-python if isinstance(obj, type) or inspect.isfunction(obj): mod = obj.__module__ name = obj.__name__ else: mod = obj.__class__.__module__ name = obj.__class__.__name__ return name if mod is None or mod == "builtins" else f"{mod}.{name}" def extend_docstring_from( source: object, pre: bool = False ) -> Callable[[Callable[P, R]], Callable[P, R]]: def docstring_inheriting_decorator(dest: Callable[P, R]) -> Callable[P, R]: parts = [source.__doc__ or "", dest.__doc__ or ""] # trim leading/trailing blank lines from parts for i, part in enumerate(parts): lines = part.split("\n") if not lines[0].strip(): lines.pop(0) if lines and not lines[-1].strip(): lines.pop(-1) parts[i] = "\n".join(lines) if pre: parts.reverse() dest.__doc__ = "\n".join(parts) return dest return docstring_inheriting_decorator _doc_block = re.compile( r"^\s*(Parameters|Notes|Raises)", flags=re.IGNORECASE | re.MULTILINE, ) def docstring_to_summary_rest(text: str) -> tuple[str, str]: """separates the summary at the start of a docstring from the rest Notes ----- Assumes numpydoc style. """ if not text: return "", "" pos = _doc_block.search(text) if pos is None: return text, "" summary = text[: pos.start()].rstrip() text = text[pos.start() :] return summary, text.lstrip("\n").rstrip(" ") def in_jupyter() -> bool: """whether code is being executed within a jupyter notebook""" val = True try: # primitive approach, just check whether the following function # is in the namespace get_ipython # type: ignore[name-defined] # noqa: B018 except NameError: val = False return val cogent3-scinexus-e0aee79/src/scinexus/parallel.py000066400000000000000000000413031520253266500222210ustar00rootroot00000000000000from __future__ import annotations import concurrent.futures as concurrentfutures import multiprocessing import os import sys import warnings from abc import ABC, abstractmethod from collections.abc import Sized from types import MappingProxyType from typing import TYPE_CHECKING, Generic, Literal, ParamSpec, TypeVar, cast from scinexus.misc import extend_docstring_from if TYPE_CHECKING: # pragma: no cover from collections.abc import Callable, Generator, Iterable from typing import Any MPI: Any if os.environ.get("DONT_USE_MPI", 0): MPI = None else: try: from mpi4py import MPI # type: ignore[import-not-found,no-redef] from mpi4py import futures as MPIfutures # noqa: N812 except ImportError: MPI = None else: COMM = MPI.COMM_WORLD if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1: MPI = None USING_MPI = MPI is not None P = ParamSpec("P") R = TypeVar("R") T = TypeVar("T") BackendType = Literal["multiprocess", "loky", "mpi"] class Parallel(ABC): """abstract base class for parallel execution backends Subclass this to integrate a custom parallel engine (e.g. ray, dask). """ @abstractmethod def imap( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: """yield results of ``f`` applied to each element of ``s``, in order""" @abstractmethod def as_completed( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: """yield results of ``f`` applied to each element of ``s``, in completion order""" @abstractmethod def is_master_process(self) -> bool: """return True if the current process is the master""" @abstractmethod def get_rank(self) -> int: """return the rank of the current process""" @abstractmethod def get_size(self) -> int: """return the number of available workers""" class MultiprocessBackend(Parallel): """parallel backend using the stdlib ``concurrent.futures.ProcessPoolExecutor``""" def imap( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: max_workers = _resolve_max_workers_local(max_workers) chunksize = _resolve_chunksize(s, max_workers, kwargs.get("chunksize")) ctx = multiprocessing.get_context("spawn") with concurrentfutures.ProcessPoolExecutor( max_workers=max_workers, mp_context=ctx ) as executor: yield from executor.map(f, s, chunksize=chunksize) def as_completed( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: max_workers = _clamp_max_workers_local(max_workers) ctx = multiprocessing.get_context("spawn") with concurrentfutures.ProcessPoolExecutor( max_workers=max_workers, mp_context=ctx ) as executor: to_do = [executor.submit(f, e) for e in s] for result in concurrentfutures.as_completed(to_do): yield result.result() def is_master_process(self) -> bool: return multiprocessing.parent_process() is None def get_rank(self) -> int: return _get_rank_local() def get_size(self) -> int: return multiprocessing.cpu_count() class LokyBackend(Parallel): """parallel backend using the loky library loky provides reusable process pools that are more robust than the stdlib ``ProcessPoolExecutor``, particularly in Jupyter notebooks. Requires ``pip install "scinexus[loky]"``. """ def imap( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: import loky # type: ignore[import-untyped] max_workers = _resolve_max_workers_local(max_workers) chunksize = _resolve_chunksize(s, max_workers, kwargs.get("chunksize")) with loky.get_reusable_executor(max_workers=max_workers) as executor: yield from executor.map(f, s, chunksize=chunksize) def as_completed( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: import loky # type: ignore[import-untyped] max_workers = _clamp_max_workers_local(max_workers) with loky.get_reusable_executor(max_workers=max_workers) as executor: to_do = [executor.submit(f, e) for e in s] for result in concurrentfutures.as_completed(to_do): yield result.result() def is_master_process(self) -> bool: import loky # type: ignore[import-untyped] ctxt = loky.backend.get_context() return ctxt.parent_process() is None def get_rank(self) -> int: return _get_rank_local() def get_size(self) -> int: return multiprocessing.cpu_count() class MPIBackend(Parallel): """parallel backend using MPI via mpi4py Requires ``pip install "scinexus[mpi]"`` and an MPI implementation (e.g. OpenMPI). """ def __init__(self) -> None: if MPI is None: msg = "Cannot use MPI" raise RuntimeError(msg) self._mpi = MPI self._comm = COMM self._futures = MPIfutures self._size: int = self._comm.Get_attr(self._mpi.UNIVERSE_SIZE) def imap( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: if_serial = cast( "Literal['raise', 'ignore', 'warn']", kwargs.get("if_serial", "raise"), ) self._check_serial(if_serial) max_workers = max_workers or 1 max_workers = self._clamp_workers(max_workers) chunksize = _resolve_chunksize(s, max_workers, kwargs.get("chunksize")) with self._futures.MPIPoolExecutor(max_workers=max_workers) as executor: yield from executor.map(f, s, chunksize=chunksize) def as_completed( self, f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, **kwargs: Any, ) -> Generator[R]: if_serial = cast( "Literal['raise', 'ignore', 'warn']", kwargs.get("if_serial", "raise"), ) self._check_serial(if_serial) max_workers = max_workers or 1 pickled_f: Callable[[T], R] = PicklableAndCallable(f) max_workers = self._clamp_workers(max_workers) chunksize = _resolve_chunksize(s, max_workers, kwargs.get("chunksize")) with self._futures.MPIPoolExecutor( max_workers=max_workers, chunksize=chunksize, ) as executor: to_do = [executor.submit(pickled_f, e) for e in s] for result in concurrentfutures.as_completed(to_do): yield result.result() def is_master_process(self) -> bool: process_cmd = sys.argv[0] process_file = process_cmd.split(os.sep)[-1] return False if process_file == "server.py" else self._comm.Get_rank() == 0 def get_rank(self) -> int: return self._comm.Get_rank() def get_size(self) -> int: return self._size def _check_serial(self, if_serial: Literal["raise", "ignore", "warn"]) -> None: if self._size == 1: err_msg = ( "Execution in serial. For parallel MPI execution, use:\n" " $ mpiexec -n python -m mpi4py.futures" " " ) if if_serial == "raise": raise RuntimeError(err_msg) if if_serial == "warn": warnings.warn(err_msg, UserWarning, stacklevel=4) def _clamp_workers(self, max_workers: int) -> int: if max_workers > self._size: warnings.warn( "max_workers too large, reducing to UNIVERSE_SIZE-1", UserWarning, stacklevel=3, ) return min(max_workers, self._size - 1) class PicklableAndCallable(Generic[P, R]): """wraps a callable so it is picklable for use with MPI executors""" def __init__(self, func: Callable[P, R]) -> None: self.func = func def __call__(self, *args: P.args, **kw: P.kwargs) -> R: return self.func(*args, **kw) BACKEND_TYPES: MappingProxyType[BackendType, type[Parallel]] = MappingProxyType( { "multiprocess": MultiprocessBackend, "loky": LokyBackend, "mpi": MPIBackend, } ) def _resolve_max_workers_local(max_workers: int | None) -> int: """resolve max_workers for local (non-MPI) backends""" cpu = multiprocessing.cpu_count() if not max_workers: return cpu if max_workers > cpu: msg = f"max_workers ({max_workers}) must be less than or equal to CPU count ({cpu})" raise ValueError(msg) return max_workers def _clamp_max_workers_local(max_workers: int | None) -> int: """clamp max_workers for local as_completed (silent, no raise)""" if not max_workers or max_workers > multiprocessing.cpu_count(): return multiprocessing.cpu_count() return max_workers def _get_rank_local() -> int: """return the rank of the current process for local backends""" process_name = multiprocessing.current_process().name return int(process_name.split("-")[-1]) if process_name != "MainProcess" else 0 def _resolve_chunksize( s: Iterable[Any], max_workers: int, chunksize: int | None ) -> int: """resolve chunksize, defaulting via get_default_chunksize for Sized inputs""" if not chunksize: return get_default_chunksize(s, max_workers) if isinstance(s, Sized) else 1 return chunksize def _validate_if_serial( if_serial: str, ) -> Literal["raise", "ignore", "warn"]: """validate and normalise the if_serial parameter""" if_serial = if_serial.lower() if if_serial not in ("ignore", "raise", "warn"): msg = f"invalid choice '{if_serial}'" raise ValueError(msg) return cast("Literal['raise', 'ignore', 'warn']", if_serial) def get_default_chunksize(s: Sized, max_workers: int) -> int: """compute a stable chunksize for distributing items across workers Parameters ---------- s a sized collection of work items max_workers number of worker processes """ chunksize, remainder = divmod(len(s), max_workers * 4) if remainder: chunksize += 1 return chunksize _default_backend: Parallel | None = None _mpi_backend: MPIBackend | None = None def _make_backend(backend: BackendType) -> Parallel: """create a backend instance from a backend type string""" if backend == "loky": try: import loky # noqa: F401 except ImportError: msg = 'loky is not installed, use pip install "scinexus[loky]"' raise ImportError(msg) from None elif backend == "mpi" and MPI is None: msg = 'mpi4py is not installed, use pip install "scinexus[mpi]"' raise ImportError(msg) return BACKEND_TYPES[backend]() def set_parallel_backend( backend: BackendType | Parallel | None = None, ) -> None: """set the default parallel execution backend Parameters ---------- backend a ``Parallel`` instance, a string literal (``"multiprocess"``, ``"loky"``, or ``"mpi"``), or ``None`` to reset to the default (``MultiprocessBackend``). """ global _default_backend # noqa: PLW0603 if backend is None or isinstance(backend, Parallel): _default_backend = backend elif backend in BACKEND_TYPES: _default_backend = _make_backend(backend) else: msg = ( f"unknown backend {backend!r}, expected 'multiprocess'," " 'loky', 'mpi', or a Parallel instance" ) raise ValueError(msg) def get_parallel_backend(backend: BackendType | None = None) -> Parallel: """return the current parallel execution backend Parameters ---------- backend if provided, return an instance of this backend type without changing the global default. This lets a package obtain the backend it needs without disrupting the settings of other packages. Returns ------- `MultiprocessBackend`` when no backend has been set and ``backend is None``. """ if backend is not None: return _make_backend(backend) global _default_backend # noqa: PLW0603 if _default_backend is None: _default_backend = MultiprocessBackend() return _default_backend def _effective_backend() -> Parallel: """return the backend for the current process context If MPI is active, always returns an ``MPIBackend`` regardless of the default -- MPI worker processes don't inherit the parent's backend setting, and introspection functions like ``get_rank()`` must use the MPI communicator to report correctly. """ global _mpi_backend # noqa: PLW0603 if USING_MPI: if _mpi_backend is None: _mpi_backend = MPIBackend() return _mpi_backend return get_parallel_backend() def get_rank() -> int: """Returns the rank of the current process""" return _effective_backend().get_rank() def get_size() -> int: """Returns the num cpus""" return _effective_backend().get_size() SIZE = ( COMM.Get_attr(MPI.UNIVERSE_SIZE) # type: ignore[possibly-undefined] if USING_MPI else multiprocessing.cpu_count() ) def is_master_process() -> bool: """ Evaluates if current process is master In case of MPI checks whether current process is being run on file generated by mpi4py.futures In case of Multiprocessing checks if generated process name included "ForkProcess" for Windows or "SpawnProcess" for POSIX """ return _effective_backend().is_master_process() def imap( f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, use_mpi: bool = False, if_serial: Literal["raise", "ignore", "warn"] = "raise", chunksize: int | None = None, ) -> Generator[R]: """ Parameters ---------- f function that operates on values in s s series of inputs to f max_workers maximum number of workers. Defaults to 1-maximum available. use_mpi use MPI for parallel execution. Temporarily switches to ``MPIBackend`` for the duration of the call. if_serial action to take if conditions will result in serial execution. Valid values are 'raise', 'ignore', 'warn'. Defaults to 'raise'. chunksize Size of data chunks executed by worker processes. Defaults to None where stable chunksize is determined by get_default_chunksize() Returns ------- imap and as_completed are generators yielding result of f(s[i]), map returns the result series. imap and map return results in the same order as s, as_completed returns results in the order completed (which can differ from the order in s). Notes ----- To use MPI, you must have openmpi (use conda or your preferred package manager) and mpi4py (use pip or conda) installed. In addition, your initial script must have a ``if __name__ == '__main__':`` block. You then invoke your program using `$ mpiexec -n python3 -m mpi4py.futures ` """ if_serial = _validate_if_serial(if_serial) if use_mpi: backend = MPIBackend() yield from backend.imap( f, s, max_workers=max_workers, if_serial=if_serial, chunksize=chunksize ) else: yield from get_parallel_backend().imap( f, s, max_workers=max_workers, chunksize=chunksize ) @extend_docstring_from(imap) def map( f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, use_mpi: bool = False, if_serial: Literal["raise", "ignore", "warn"] = "raise", chunksize: int | None = None, ) -> list[R]: return list(imap(f, s, max_workers, use_mpi, if_serial, chunksize)) @extend_docstring_from(imap, pre=True) def as_completed( f: Callable[[T], R], s: Iterable[T], max_workers: int | None = None, use_mpi: bool = False, if_serial: Literal["raise", "ignore", "warn"] = "raise", chunksize: int | None = None, ) -> Generator[R]: if_serial = _validate_if_serial(if_serial) if use_mpi: backend = MPIBackend() yield from backend.as_completed( f, s, max_workers=max_workers, if_serial=if_serial, chunksize=chunksize ) else: yield from get_parallel_backend().as_completed( f, s, max_workers=max_workers, chunksize=chunksize ) cogent3-scinexus-e0aee79/src/scinexus/progress.py000066400000000000000000000430431520253266500222740ustar00rootroot00000000000000"""Generic progress bar framework with pluggable backends.""" from __future__ import annotations import contextlib from abc import ABC, abstractmethod from collections.abc import Sized from typing import TYPE_CHECKING, Literal, TypeVar if TYPE_CHECKING: # pragma: no cover from collections.abc import Iterable, Iterator from types import TracebackType from typing import Any, Self T = TypeVar("T") ProgressType = Literal["tqdm", "rich"] class ProgressContext(ABC): """A handle for push-based progress updates.""" def __init__(self, start: float = 0.0, end: float = 1.0) -> None: """ Parameters ---------- start start of the progress range (0.0-1.0) end end of the progress range (0.0-1.0) """ self._start = start self._end = end def _map(self, progress: float) -> float: return self._start + progress * (self._end - self._start) @abstractmethod def update(self, *, progress: float, msg: str = "") -> None: """Update the progress bar. Parameters ---------- progress fraction complete [0.0, 1.0], mapped to [start, end] range msg description text to display """ def close(self) -> None: # noqa: B027 """Close the progress bar. Override in subclasses with cleanup.""" def __enter__(self) -> Self: """Enter the context manager, returning self.""" return self def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: """Exit the context manager, calling close().""" self.close() class Progress(ABC): """A callable that wraps an iterable to display progress.""" @abstractmethod def __call__( self, iterable: Iterable[T], *, total: int | None = None, msg: str = "", ) -> Iterator[T]: """Wrap an iterable to display a progress bar. Parameters ---------- iterable items to iterate over total total number of items. Inferred from ``len(iterable)`` when possible. msg description text to display """ @abstractmethod def child(self, *, leave: bool | None = None) -> Progress: """Return a child Progress for nested bars. Parameters ---------- leave whether the child bar persists after completion. ``None`` inherits the parent setting. """ @abstractmethod def context( self, *, msg: str = "", start: float = 0.0, end: float = 1.0, ) -> ProgressContext: """Create a push-based progress context. Parameters ---------- msg initial description start start of the progress range (0.0-1.0) end end of the progress range (0.0-1.0) """ def close(self) -> None: # noqa: B027 """Finalize progress display. Override in subclasses with cleanup.""" def __enter__(self) -> Self: """Enter the context manager, returning self.""" return self def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: """Exit the context manager, calling close().""" self.close() class _NoOpContext(ProgressContext): """A no-op progress context that silently ignores updates.""" def update(self, *, progress: float, msg: str = "") -> None: pass _NO_OP_CONTEXT = _NoOpContext() # pylint: disable=unused-argument # ruff: noqa: ARG002 class NoProgress(Progress): """A no-op progress wrapper that passes through the iterable unchanged.""" def __call__( self, iterable: Iterable[T], *, total: int | None = None, msg: str = "", ) -> Iterator[T]: """Yield items from the iterable unchanged.""" yield from iterable def child(self, *, leave: bool | None = None) -> NoProgress: """Return self since no-op progress needs no nesting.""" return self def context( self, *, msg: str = "", start: float = 0.0, end: float = 1.0, ) -> ProgressContext: """Return a shared no-op context that silently ignores updates.""" return _NO_OP_CONTEXT class _TqdmContext(ProgressContext): """Push-based progress context backed by a tqdm bar.""" def __init__(self, bar: Any, start: float, end: float) -> None: super().__init__(start, end) self._bar = bar def update(self, *, progress: float, msg: str = "") -> None: self._bar.n = self._map(progress) if msg: self._bar.set_description(msg) self._bar.refresh() def close(self) -> None: self._bar.close() class TqdmProgress(Progress): """Progress wrapper using tqdm.auto (handles TTY, Jupyter, etc.).""" def __init__( self, refresh_per_second: float = 10.0, bar_format: str | None = "{desc}: {bar} {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", leave: bool | None = None, colour: str | None = None, bar_width: int | None = None, **tqdm_kwargs: Any, ) -> None: """ Parameters ---------- refresh_per_second how often to refresh the display bar_format custom bar format string passed to tqdm leave whether the bar persists after completion. ``None`` uses position-based logic (persist at position 0, clear otherwise). colour bar colour, forwarded to tqdm bar_width total width of the progress bar in characters **tqdm_kwargs additional keyword arguments (e.g. ``dynamic_ncols=True``) forwarded to tqdm """ if refresh_per_second <= 0: msg = f"refresh_per_second must be positive, got {refresh_per_second!r}" raise ValueError(msg) self._position = tqdm_kwargs.pop("position", 0) self._refresh_per_second = refresh_per_second self._bar_format = bar_format self._leave = leave self._colour = colour self._bar_width = bar_width self._tqdm_kwargs = tqdm_kwargs self._bar: Any = None self._children: list[TqdmProgress] = [] def _resolve_leave(self) -> bool: return self._leave if self._leave is not None else self._position == 0 def _make_bar(self, *, total: float | None, msg: str) -> Any: from tqdm.auto import tqdm # type: ignore[import-untyped] if self._bar_width is not None: ncols_kwargs: dict[str, Any] = { "ncols": self._bar_width, "dynamic_ncols": False, } tqdm_kw = { k: v for k, v in self._tqdm_kwargs.items() if k != "dynamic_ncols" } else: tqdm_kw = self._tqdm_kwargs ncols_kwargs = {} if "dynamic_ncols" in tqdm_kw else {"dynamic_ncols": True} return tqdm( total=total, desc=msg, position=self._position, leave=self._resolve_leave(), mininterval=1.0 / self._refresh_per_second, bar_format=self._bar_format, colour=self._colour, **ncols_kwargs, **tqdm_kw, ) def __call__( self, iterable: Iterable[T], *, total: int | None = None, msg: str = "", ) -> Iterator[T]: total = ( len(iterable) if isinstance(iterable, Sized) and total is None else total ) if self._bar is None: self._bar = self._make_bar(total=total, msg=msg) else: self._bar.n = 0 self._bar.last_print_n = 0 self._bar.total = total if msg: self._bar.set_description(msg) for item in iterable: yield item self._bar.update(1) self._bar.refresh() def close(self) -> None: """Close all bars so the cursor moves past them.""" for child in reversed(self._children): if child._bar is not None: child._bar.close() child._bar = None if self._bar is not None: self._bar.close() self._bar = None def __del__(self) -> None: with contextlib.suppress(Exception): self.close() def child(self, *, leave: bool | None = None) -> TqdmProgress: """Return a child TqdmProgress at the next cursor position. Parameters ---------- leave whether the child bar persists after completion. ``None`` inherits the parent setting. """ child = TqdmProgress( refresh_per_second=self._refresh_per_second, bar_format=self._bar_format, leave=leave if leave is not None else self._leave, colour=self._colour, bar_width=self._bar_width, **self._tqdm_kwargs, ) child._position = self._position + 1 self._children.append(child) return child def context( self, *, msg: str = "", start: float = 0.0, end: float = 1.0, ) -> ProgressContext: """Create a tqdm-backed push-based progress context.""" bar = self._make_bar(total=1.0, msg=msg) return _TqdmContext(bar, start, end) class _RichContext(ProgressContext): """Push-based progress context backed by a rich progress bar.""" def __init__( self, progress: Any, task: Any, start: float, end: float, *, leave: bool, ) -> None: super().__init__(start, end) self._progress = progress self._task = task self._leave = leave def update(self, *, progress: float, msg: str = "") -> None: self._progress.update( self._task, completed=self._map(progress), description=msg or None ) def close(self) -> None: if self._leave: self._progress.update(self._task, completed=1.0) else: self._progress.remove_task(self._task) class RichProgress(Progress): """Progress wrapper using the rich library (requires ``pip install "scinexus[rich]"``).""" def __init__( self, progress: Any = None, refresh_per_second: float = 10.0, leave: bool = False, colour: str | None = None, bar_width: int | None = None, **rich_kwargs: Any, ) -> None: """ Parameters ---------- progress an existing ``rich.progress.Progress`` instance, or None to create one on first call refresh_per_second how often to refresh the display leave whether completed bars persist in the display colour bar colour, applied when auto-creating the display bar_width width of the progress bar portion in characters **rich_kwargs additional keyword arguments (e.g. ``disable=True``) forwarded to ``rich.progress.Progress`` """ if refresh_per_second <= 0: msg = f"refresh_per_second must be positive, got {refresh_per_second!r}" raise ValueError(msg) self._progress = progress self._owns_progress = progress is None self._refresh_per_second = refresh_per_second self._leave = leave self._colour = colour self._bar_width = bar_width self._rich_kwargs = rich_kwargs self._task: Any = None self._last_total: int | None = None self._children: list[RichProgress] = [] def _ensure_progress(self) -> Any: if self._progress is None: from rich.progress import ( # type: ignore[import-not-found] BarColumn, TextColumn, TimeElapsedColumn, TimeRemainingColumn, ) from rich.progress import ( Progress as RProgress, ) bar_kwargs: dict[str, Any] = {} if self._bar_width is not None: bar_kwargs["bar_width"] = self._bar_width if self._colour is not None: bar_kwargs["complete_style"] = self._colour bar_kwargs["finished_style"] = self._colour bar_column = BarColumn(**bar_kwargs) self._progress = RProgress( TextColumn("[progress.description]{task.description}"), bar_column, TimeElapsedColumn(), TimeRemainingColumn(), refresh_per_second=self._refresh_per_second, **self._rich_kwargs, ) self._progress.start() return self._progress def __call__( self, iterable: Iterable[T], *, total: int | None = None, msg: str = "", ) -> Iterator[T]: total = ( len(iterable) if isinstance(iterable, Sized) and total is None else total ) rp = self._ensure_progress() if self._task is None: self._task = rp.add_task(msg, total=total) else: rp.reset(self._task, total=total, description=msg) self._last_total = total for item in iterable: yield item rp.advance(self._task) rp.refresh() if not self._leave: rp.remove_task(self._task) self._task = None def _cleanup_task(self) -> None: if self._task is not None and self._progress is not None: if self._leave and self._last_total is not None: self._progress.update(self._task, completed=self._last_total) else: self._progress.remove_task(self._task) self._task = None def close(self) -> None: """Finalize the rich progress display so the cursor moves past it.""" for child in reversed(self._children): child._cleanup_task() self._cleanup_task() if self._owns_progress and self._progress is not None: self._progress.stop() self._progress = None def __del__(self) -> None: with contextlib.suppress(Exception): self.close() def child(self, *, leave: bool | None = None) -> RichProgress: """Return a child RichProgress sharing the same display. Parameters ---------- leave whether the child bar persists after completion. ``None`` inherits the parent setting. """ child = RichProgress( progress=self._ensure_progress(), refresh_per_second=self._refresh_per_second, leave=leave if leave is not None else self._leave, colour=self._colour, bar_width=self._bar_width, **self._rich_kwargs, ) self._children.append(child) return child def context( self, *, msg: str = "", start: float = 0.0, end: float = 1.0, ) -> ProgressContext: """Create a rich-backed push-based progress context.""" rp = self._ensure_progress() task = rp.add_task(msg, total=1.0) return _RichContext(rp, task, start, end, leave=self._leave) _default_progress: Progress | None = None def set_progress_backend( progress: ProgressType | Progress | None = None, **kwargs: Any ) -> None: """Set the default Progress used when ``show_progress=True``. Parameters ---------- progress A ``Progress`` instance, a string literal (``"tqdm"`` or ``"rich"``), or ``None`` to reset to the default (``TqdmProgress``). **kwargs additional keyword arguments forwarded to the backend constructor when ``progress`` is ``"tqdm"`` or ``"rich"`` """ global _default_progress # noqa: PLW0603 if progress is None or isinstance(progress, Progress): _default_progress = progress elif progress == "tqdm": _default_progress = TqdmProgress(**kwargs) elif progress == "rich": try: import rich # noqa: F401 except ImportError: msg = 'rich is not installed, use pip install "scinexus[rich]"' raise ImportError(msg) from None _default_progress = RichProgress(**kwargs) else: msg = f"unknown progress type {progress!r}, expected 'tqdm', 'rich', or a Progress instance" raise ValueError(msg) def get_progress(show_progress: bool | Progress = False, **kwargs: Any) -> Progress: """Resolve a ``show_progress`` argument into a ``Progress`` instance. Parameters ---------- show_progress If a ``Progress`` instance, returned as-is. If ``True``, returns the module default (set via ``set_progress_backend``, or ``TqdmProgress``). If falsy, returns ``NoProgress``. **kwargs additional keyword arguments forwarded to the backend constructor. When a default has been set via ``set_progress_backend``, a new instance of the same type is created with these kwargs. Ignored when ``show_progress`` is a ``Progress`` instance. """ if isinstance(show_progress, Progress): return show_progress if not show_progress: return NoProgress() if _default_progress is not None: return type(_default_progress)(**kwargs) if kwargs else _default_progress return TqdmProgress(**kwargs) cogent3-scinexus-e0aee79/src/scinexus/py.typed000066400000000000000000000000001520253266500215370ustar00rootroot00000000000000cogent3-scinexus-e0aee79/src/scinexus/sqlite_data_store.py000066400000000000000000000422531520253266500241400ustar00rootroot00000000000000from __future__ import annotations import contextlib import datetime import os import re import sqlite3 import weakref from pathlib import Path from typing import TYPE_CHECKING, Any from scitrack import get_text_hexdigest # type: ignore[import-untyped] from scinexus.data_store import ( APPEND, LOG_TABLE, OVERWRITE, READONLY, DataMember, DataMemberABC, DataStoreABC, DataStoreDirectory, Mode, ) from scinexus.misc import extend_docstring_from if TYPE_CHECKING: # pragma: no cover from citeable import CitationBase RESULT_TABLE = "results" _MEMORY = ":memory:" _mem_pattern = re.compile(r"^\s*[:]{0,1}memory[:]{0,1}\s*$") NoneType = type(None) # dealing with python3.12 deprecation of datetime objects and their sqlite3 handling def _datetime_to_iso(timestamp: datetime.datetime) -> str: """timestamp in ISO 8601 format""" return timestamp.isoformat() sqlite3.register_adapter(datetime.datetime, _datetime_to_iso) def _datetime_from_iso(data: bytes) -> datetime.datetime: """timestamp from ISO 8601 format""" return datetime.datetime.fromisoformat(data.decode()) sqlite3.register_converter("timestamp", _datetime_from_iso) # create db def open_sqlite_db_rw(path: str | Path) -> sqlite3.Connection: """creates a new sqlitedb for read/write at path, can be an in-memory db Notes ----- This function embeds the schema. There are three tables: - results: analysis objects, may be completed or not completed - logs: log-file contents - state: whether db is locked to a process Returns ------- Handle to a sqlite3 session """ db = sqlite3.connect( path, isolation_level=None, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES, ) db.row_factory = sqlite3.Row create_template = "CREATE TABLE IF NOT EXISTS {};" # note it is essential to use INTEGER for the autoincrement of primary key to work creates = [ "state(state_id INTEGER PRIMARY KEY, record_type TEXT, lock_pid INTEGER)", f"{LOG_TABLE}(log_id INTEGER PRIMARY KEY, log_name TEXT, date timestamp, data BLOB)", f"{RESULT_TABLE}(record_id TEXT PRIMARY KEY, log_id INTEGER, md5 BLOB, is_completed INTEGER, data BLOB)", "citations(citation_id INTEGER PRIMARY KEY, data TEXT)", ] for table in creates: db.execute(create_template.format(table)) return db def open_sqlite_db_ro(path: str | Path) -> sqlite3.Connection: """returns db opened as read only Returns ------- Handle to a sqlite3 session """ db = sqlite3.connect( f"file:{path}?mode=ro", isolation_level=None, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES, uri=True, ) db.row_factory = sqlite3.Row if not has_valid_schema(db): msg = "database does not have a valid schema" raise ValueError(msg) return db def has_valid_schema(db: sqlite3.Connection) -> bool: # TODO: should be a full schema check query = "SELECT name FROM sqlite_master WHERE type='table'" result = db.execute(query).fetchall() table_names = {r["name"] for r in result} _required = {RESULT_TABLE, LOG_TABLE, "state"} _optional = {"citations"} return _required <= table_names <= (_required | _optional) class DataStoreSqlite(DataStoreABC): """data store backed by a SQLite database""" store_suffix = "sqlitedb" def __init__( self, source: str | Path, mode: Mode | str = READONLY, limit: int | None = None, verbose: bool = False, ) -> None: if _mem_pattern.search(str(source)): self._source: str | Path = _MEMORY else: source = Path(source).expanduser() self._source = ( source if source.suffix[1:] == self.store_suffix # sliced to remove "." else Path(f"{source}.{self.store_suffix}") ) self._mode = Mode(mode) if mode is not READONLY and limit is not None: msg = "Using limit argument is only valid for readonly datastores" raise ValueError( msg, ) self._limit = limit self._verbose = verbose self._db: sqlite3.Connection | None = None self._open = False self._log_id: int | None = None weakref.finalize(self, self.close) def __getstate__(self) -> dict[str, object]: return {**self._init_vals} def __setstate__(self, state: dict[str, Any]) -> None: # this will reset connections to read only db's obj = self.__class__(**state) self.__dict__.update(obj.__dict__) def __del__(self) -> None: """close the db connection when the object is deleted""" self.close() @property def source(self) -> str | Path: """string that references connecting to data store, override in subclass constructor""" return self._source @property def mode(self) -> Mode: """string that references datastore mode, override in override in subclass constructor""" return self._mode @property def limit(self) -> int | None: return self._limit @property def db(self) -> sqlite3.Connection: if self._db is None: db_func = open_sqlite_db_ro if self.mode is READONLY else open_sqlite_db_rw self._db = db_func(self.source) self._open = True self.lock() if self._db is None: msg = "database connection is unexpectedly None" raise ValueError(msg) return self._db def _init_log(self) -> None: timestamp = datetime.datetime.now(tz=datetime.UTC) self.db.execute(f"INSERT INTO {LOG_TABLE}(date) VALUES (?)", (timestamp,)) self._log_id = self.db.execute( f"SELECT log_id FROM {LOG_TABLE} where date = ?", (timestamp,), ).fetchone()["log_id"] def close(self) -> None: """close the database connection""" db: sqlite3.Connection | None = getattr(self, "_db", None) if db is None: return with contextlib.suppress(sqlite3.ProgrammingError): db.close() self._open = False def read(self, unique_id: str) -> str | bytes: """ identifier string formed from Path(table_name) / identifier """ uid_path = Path(unique_id) table_name = str(uid_path.parent) if table_name not in ( ".", LOG_TABLE, ): msg = f"unknown table for {str(uid_path)!r}" raise ValueError(msg) if table_name != LOG_TABLE: cmnd = f"SELECT * FROM {RESULT_TABLE} WHERE record_id = ?" result = self.db.execute(cmnd, (uid_path.name,)).fetchone() return result["data"] cmnd = f"SELECT * FROM {LOG_TABLE} WHERE log_name = ?" result = self.db.execute(cmnd, (uid_path.name,)).fetchone() return result["data"] @property def completed(self) -> list[DataMemberABC]: if not self._completed: self._completed = self._select_members( table_name=RESULT_TABLE, is_completed=True, ) return self._completed @property def not_completed(self) -> list[DataMemberABC]: """returns database records of type NotCompleted""" if not self._not_completed: self._not_completed = self._select_members( table_name=RESULT_TABLE, is_completed=False, ) return self._not_completed def _select_members( self, *, table_name: str, is_completed: bool, ) -> list[DataMemberABC]: limit = f"LIMIT {self.limit}" if self.limit else "" cmnd = self.db.execute( f"SELECT record_id FROM {table_name} WHERE is_completed=? {limit}", (is_completed,), ) return [ DataMember(data_store=self, unique_id=r["record_id"]) for r in cmnd.fetchall() ] @property def logs(self) -> list[DataMemberABC]: """returns all log records""" cmnd = self.db.execute(f"SELECT log_name FROM {LOG_TABLE}") return [ DataMember(data_store=self, unique_id=Path(LOG_TABLE) / r["log_name"]) for r in cmnd.fetchall() if r["log_name"] ] def _write( self, *, table_name: str, unique_id: str, data: str | bytes, is_completed: bool, ) -> DataMemberABC | None: """ Parameters ---------- table_name name of table to save data. It must be _RESULT_TABLE or _LOG_TABLE. unique_id unique identifier that data will be saved under. data data to be saved. is_completed flag to identify NotCompleted results Returns ------- DataMember instance or None when writing to _LOG_TABLE """ if self._log_id is None: self._init_log() if table_name == LOG_TABLE: # TODO how to evaluate whether writing a new log? cmnd = f"UPDATE {table_name} SET data =?, log_name =? WHERE log_id=?" self.db.execute(cmnd, (data, unique_id, self._log_id)) return None md5 = get_text_hexdigest(data) if unique_id in self and self.mode is not APPEND: cmnd = f"UPDATE {table_name} SET data= ?, log_id=?, md5=? WHERE record_id=?" self.db.execute(cmnd, (data, self._log_id, md5, unique_id)) else: cmnd = f"INSERT INTO {table_name} (record_id,data,log_id,md5,is_completed) VALUES (?,?,?,?,?)" self.db.execute(cmnd, (unique_id, data, self._log_id, md5, is_completed)) return DataMember(data_store=self, unique_id=unique_id) def drop_not_completed(self, *, unique_id: str | None = None) -> None: """remove not-completed records from the database Parameters ---------- unique_id if provided, only drop the record with this identifier, otherwise drop all not-completed records """ vals: tuple[int] | tuple[int, str] if not unique_id: cmnd = f"DELETE FROM {RESULT_TABLE} WHERE is_completed=?" vals = (0,) else: cmnd = f"DELETE FROM {RESULT_TABLE} WHERE is_completed=? AND record_id=?" vals = (0, unique_id) self.db.execute(cmnd, vals) self._not_completed = [] @property def _lock_id(self) -> int | None: """returns lock_pid""" result = self.db.execute("SELECT lock_pid FROM state").fetchone() return result[0] if result else result @property def locked(self) -> bool: """returns if lock_pid is NULL or doesn't exist.""" return self._lock_id is not None def lock(self) -> None: """if writable, and not locked, locks the database to this pid""" if self.mode is READONLY: return if self._db is None: msg = "database connection is unexpectedly None" raise RuntimeError(msg) result = self._db.execute("SELECT state_id,lock_pid FROM state").fetchall() locked = result[0]["lock_pid"] if result else None if locked and self.mode is OVERWRITE: msg = ( f"You are trying to OVERWRITE {str(self.source)!r} which is " "locked. Use APPEND mode or unlock." ) raise OSError( msg, ) if result: # we will update an existing state_id = result[0]["state_id"] cmnd = "UPDATE state SET lock_pid=? WHERE state_id=?" vals = [os.getpid(), state_id] else: cmnd = "INSERT INTO state(lock_pid) VALUES (?)" vals = [os.getpid()] self._db.execute(cmnd, tuple(vals)) def unlock(self, force: bool = False) -> None: """remove a lock if pid matches. If force, ignores pid. ignored if mode is READONLY""" if self.mode is READONLY: return lock_id = self._lock_id if lock_id is None: return if lock_id == os.getpid() or force: self.db.execute("UPDATE state SET lock_pid=NULL WHERE state_id=1") return @extend_docstring_from(DataStoreDirectory.write) def write(self, *, unique_id: str, data: str | bytes) -> DataMemberABC: # type: ignore[override] if unique_id.startswith(RESULT_TABLE): unique_id = Path(unique_id).name super().write(unique_id=unique_id, data=data) self.drop_not_completed(unique_id=unique_id) member = self._write( table_name=RESULT_TABLE, unique_id=unique_id, data=data, is_completed=True, ) if member is None: msg = "write to results table failed to produce a member" raise RuntimeError(msg) if member not in self._completed: self._completed.append(member) return member @extend_docstring_from(DataStoreDirectory.write_log) def write_log(self, *, unique_id: str, data: str | bytes) -> None: if unique_id.startswith(LOG_TABLE): unique_id = Path(unique_id).name super().write_log(unique_id=unique_id, data=data) _ = self._write( table_name=LOG_TABLE, unique_id=unique_id, data=data, is_completed=False, ) @extend_docstring_from(DataStoreDirectory.write_not_completed) def write_not_completed( # type: ignore[override] self, *, unique_id: str, data: str | bytes ) -> DataMemberABC: if unique_id.startswith(RESULT_TABLE): unique_id = Path(unique_id).name super().write_not_completed(unique_id=unique_id, data=data) member = self._write( table_name=RESULT_TABLE, unique_id=unique_id, data=data, is_completed=False, ) if member is None: msg = "write to results table failed to produce a member" raise RuntimeError(msg) self._not_completed.append(member) return member def md5(self, unique_id: str) -> str | None: """ Parameters ---------- unique_id name of data store member Returns ------- md5 checksum for the member, if available, None otherwise """ cmnd = f"SELECT * FROM {RESULT_TABLE} WHERE record_id = ?" result = self.db.execute(cmnd, (unique_id,)).fetchone() return result["md5"] if result else None def write_citations(self, *, data: tuple[CitationBase, ...]) -> None: if not data: return if not self._has_citations_table(): self.db.execute( "CREATE TABLE IF NOT EXISTS citations" "(citation_id INTEGER PRIMARY KEY, data TEXT)", ) from citeable import to_jsons json_data = to_jsons(data) if existing := self.db.execute("SELECT citation_id FROM citations").fetchone(): self.db.execute( "UPDATE citations SET data=? WHERE citation_id=?", (json_data, existing["citation_id"]), ) else: self.db.execute("INSERT INTO citations(data) VALUES (?)", (json_data,)) def _load_citations(self) -> list[CitationBase]: from citeable import from_jsons if not self._has_citations_table(): return [] result = self.db.execute("SELECT data FROM citations").fetchone() return from_jsons(result["data"]) if result else [] def _has_citations_table(self) -> bool: result = self.db.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='citations'", ).fetchone() return result is not None def _describe(self) -> dict[str, object]: if self.locked and self._lock_id != os.getpid(): title = f"Locked db store. Locked to pid={self._lock_id}, current pid={os.getpid()}." elif self.locked: title = "Locked to the current process." else: title = "Unlocked db store." result = super()._describe() result["title"] = title return result @property def record_type(self) -> str: """class name of completed results""" result = self.db.execute("SELECT record_type FROM state").fetchone() return result["record_type"] @record_type.setter def record_type(self, obj: object) -> None: from scinexus.misc import get_object_provenance rt = self.record_type if self.mode is OVERWRITE and rt: msg = f"cannot overwrite existing record_type {rt}" raise OSError(msg) n = get_object_provenance(obj) self.db.execute("UPDATE state SET record_type=? WHERE state_id=1", (n,)) def _summary_not_completed(self) -> list[dict]: """returns a list of dicts summarising not completed results""" from scinexus.data_store import summary_not_completeds from scinexus.io import DEFAULT_DESERIALISER return summary_not_completeds( self.not_completed, deserialise=DEFAULT_DESERIALISER, ) cogent3-scinexus-e0aee79/src/scinexus/typing.py000066400000000000000000000173001520253266500217370ustar00rootroot00000000000000"""defined type hints for app composability""" from __future__ import annotations from pathlib import Path from types import UnionType from typing import ( TYPE_CHECKING, Any, ForwardRef, Protocol, TypeVar, Union, get_args, get_origin, runtime_checkable, ) from scinexus.data_store import DataMemberABC if TYPE_CHECKING: from collections.abc import Callable NESTED_HINTS = (Union, UnionType, list, tuple, set) _type_namespace_providers: list[Callable[[], dict[str, type]]] = [] def register_type_namespace( provider: Callable[[], dict[str, type]], ) -> None: """register a lazy namespace provider for forward-reference resolution Parameters ---------- provider a zero-arg callable returning a dict of {name: type}. It is invoked lazily (each time _resolve_name needs a fallback) so downstream packages can defer heavy imports. Providers are responsible for their own caching. Notes ----- Registration is idempotent: re-registering the same callable is a no-op. Providers are consulted in registration order, and the first provider that yields ``name`` wins. """ if provider not in _type_namespace_providers: _type_namespace_providers.append(provider) def _clear_type_namespace_providers() -> None: """remove all registered namespace providers (intended for tests)""" _type_namespace_providers.clear() @runtime_checkable class HasSource(Protocol): @property def source(self) -> Any: ... @runtime_checkable class HasInfo(Protocol): @property def info(self) -> HasSource: ... @runtime_checkable class SerialisableType(Protocol): """a runtime-checkable protocol for objects that serialise to a dict Any object that implements a ``to_rich_dict`` method returning ``dict[str, object]`` satisfies this protocol. Writer apps rely on this to convert results before storing them in a data store -- both ``DataStoreDirectory`` and ``DataStoreSqlite`` call ``to_rich_dict()`` during the write path. """ def to_rich_dict(self) -> dict[str, object]: ... IdentifierType = str | Path | DataMemberABC """accepted types for identifying members of a data store A loader app can receive a file path as a string, a ``pathlib.Path``, or a ``DataMemberABC`` handle returned by iterating over a data store.""" def _resolve_name( name: str, module_globals: dict[str, object] | None = None ) -> type[Any]: """resolves a string name to a type Checks ``module_globals`` first, then falls back to any namespace providers registered via :func:`register_type_namespace`. """ if module_globals and name in module_globals: result = module_globals[name] if isinstance(result, type): return result for provider in _type_namespace_providers: ns = provider() if name in ns: result = ns[name] if isinstance(result, type): return result msg = f"cannot resolve type name {name!r}" raise TypeError(msg) def resolve_type_hint( hint: object, module_globals: dict[str, object] | None = None ) -> object: """walks a type hint tree and resolves all forward references to classes Parameters ---------- hint a type hint (TypeVar, Union, ForwardRef, str, or concrete class) module_globals optional dict of the module where the hint was defined, used to resolve forward references from user code """ # Protocol classes (like SerialisableType) -- return as-is if getattr(hint, "_is_protocol", False) and hint is not Protocol: return hint # TypeVar with __bound__ -> resolve bound class if isinstance(hint, TypeVar): if hint.__bound__: bound = hint.__bound__ if isinstance(bound, ForwardRef): bound = _resolve_name(bound.__forward_arg__, module_globals) return bound if hint.__constraints__: resolved = tuple( resolve_type_hint(c, module_globals) for c in hint.__constraints__ ) return Union[resolved] # type: ignore # noqa: UP007 msg = f"unconstrained TypeVar {hint!r} cannot be resolved" raise TypeError(msg) # Union / UnionType -> recurse origin = get_origin(hint) if origin is Union or isinstance(hint, UnionType): args = tuple(resolve_type_hint(a, module_globals) for a in get_args(hint)) return Union[args] # type: ignore # noqa: UP007 # Container types (list[X], tuple[X,Y], set[X]) if origin in (list, tuple, set): args = tuple(resolve_type_hint(a, module_globals) for a in get_args(hint)) return origin[args] if args else hint # ForwardRef if isinstance(hint, ForwardRef): return _resolve_name(hint.__forward_arg__, module_globals) # plain str return _resolve_name(hint, module_globals) if isinstance(hint, str) else hint def get_type_display_names(hint: object) -> frozenset[str]: """extracts human-readable class names from a resolved type hint Parameters ---------- hint a resolved type hint (one that has been through resolve_type_hint) """ names: set[str] = set() origin = get_origin(hint) if origin is Union or isinstance(hint, UnionType) or origin in (list, tuple, set): for arg in get_args(hint): names |= get_type_display_names(arg) elif isinstance(hint, type): names.add(hint.__name__) elif isinstance(hint, TypeVar): # fallback for unresolved TypeVars -- shouldn't normally happen names.add(hint.__name__) return frozenset(names) def _get_concrete_classes(hint: object) -> set[type[Any]]: """extracts concrete classes from a resolved type hint, walking Unions""" classes = set() origin = get_origin(hint) if origin is Union or isinstance(hint, UnionType): for arg in get_args(hint): classes |= _get_concrete_classes(arg) elif origin in (list, tuple, set): classes.add(origin) elif isinstance(hint, type): classes.add(hint) return classes def _is_protocol(hint: object) -> bool: """checks if a type hint is or contains a runtime-checkable Protocol""" if getattr(hint, "_is_protocol", False) and hint is not Protocol: return True origin = get_origin(hint) if origin is Union or isinstance(hint, UnionType): return any(_is_protocol(a) for a in get_args(hint)) return False def check_type_compatibility(return_hint: object, input_hint: object) -> bool: """composition-time check: is the return type compatible with the input type? Parameters ---------- return_hint resolved return type of the upstream app input_hint resolved input type of the downstream app Returns ------- True if the types are compatible, False otherwise """ # typing.Any is compatible with everything if return_hint is Any or input_hint is Any: return True # If either side is or contains a Protocol, be lenient -- runtime check_type # provides the real safety net if _is_protocol(return_hint) or _is_protocol(input_hint): return True return_classes = _get_concrete_classes(return_hint) input_classes = _get_concrete_classes(input_hint) # Check if any return class is a subclass of any input class (or vice versa) for ret_cls in return_classes: for inp_cls in input_classes: try: if issubclass(ret_cls, inp_cls) or issubclass(inp_cls, ret_cls): return True except TypeError: # issubclass can fail for some types if ret_cls is inp_cls: return True return False cogent3-scinexus-e0aee79/src/scinexus/warning.py000066400000000000000000000170211520253266500220720ustar00rootroot00000000000000from __future__ import annotations import functools import inspect from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast from warnings import catch_warnings, simplefilter from warnings import warn as _warn if TYPE_CHECKING: # pragma: no cover from collections.abc import Callable, Sequence from typing import Any, Literal def deprecated( _type: Literal["class", "method", "function", "argument", "module"], old: str, new: str, version: str, reason: str | None = None, stack_level: int = 4, ) -> None: """a convenience function for deprecating classes, functions, arguments. Parameters ---------- _type should be one of class, method, function, argument, module old, new the old and new names version the version by which support for the old name will be discontinued reason why, and what choices users have stack_level as per warnings.warn """ msg = f"{_type} {old} will be removed in version {version}, use {new} instead" if reason is not None: msg = f"{msg}\nreason={reason!r}" with catch_warnings(): simplefilter("always") _warn(msg, DeprecationWarning, stacklevel=stack_level) def discontinued( _type: Literal["class", "method", "function", "argument", "module"], old: str, version: str, reason: str | None = None, stack_level: int = 4, ) -> None: """convenience func to warn about discontinued attributes Parameters ---------- _type should be one of class, method, function, argument, module old the attributes name version the version by which support for the old name will be discontinued reason why, and what choices users have stack_level as per warnings.warn """ msg = f"{_type} {old} is discontinued and will be removed in version {version}" if reason is not None: msg = f"{msg}\nreason={reason!r}" with catch_warnings(): simplefilter("always") _warn(msg, DeprecationWarning, stacklevel=stack_level) _discontinued = discontinued # renamed to avoid name clash with discontinued argument in deprecated args decorator P = ParamSpec("P") R = TypeVar("R") def deprecated_args( *, version: str, reason: str, old_new: Sequence[tuple[str, str]] | None = None, discontinued: Sequence[str] | None = None, stack_level: int = 4, ) -> Callable[..., Any]: """ A decorator that marks specific arguments of a function as deprecated. The decorator accepts a list of 2-tuples specifying the mapping of old argument names to new argument names. When the decorated function is called with any of the old argument names, they will be replaced with their corresponding new names in the kwargs dictionary. Parameters ---------- version The version when the old arguments will be removed in calver format, e.g. 'YYYY.MM' reason Reason for deprecation or guidance on what to do old-new A list of deprecated old and replacement new argument names. discontinued Names of single or multiple arguments to be discontinued. This should only be applied to arguments that have no effect. Returns ------- The decorated function. Warnings -------- DeprecationWarning A warning will be raised when the decorated function is called for each deprecated argument used in the calling function. Examples -------- To use, change the signature of the function / method by removing the deprecated / discontinued arguments. Apply the decorator to the function, indicating the old and new the argument names. >>> @deprecated_args( ... version="2024.1", ... reason="Use new_name instead", ... old_new=[("old_arg", "new_arg")], ... ) ... def my_function(new_arg): ... return new_arg >>> my_function(new_arg=42) 42 """ if old_new is None: old_new = [] discontinued = [discontinued] if isinstance(discontinued, str) else discontinued old_args = dict(old_new).keys() def decorator(func: Callable[P, R]) -> Callable[P, R]: @functools.wraps(func) def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: if old_args & kwargs.keys(): for old, new in old_new: if old in kwargs: kwargs[new] = kwargs.pop(old) deprecated( "argument", old, new, version, reason, stack_level=stack_level, ) if discontinued: for dropped in discontinued: if dropped in kwargs: _discontinued( "argument", dropped, version, reason, stack_level=stack_level, ) kwargs.pop(dropped) return func(*args, **kwargs) return wrapper return decorator def deprecated_callable( *, version: str, reason: str, new: str | None = None, is_discontinued: bool = False, stack_level: int = 4, ) -> Callable[[Callable[P, R]], Callable[P, R]]: """ A decorator that marks callables (function or method) as deprecated or discontinued.. Parameters ---------- version The version when it will be removed in calver format, e.g. 'YYYY.MM' reason Reason for deprecation or guidance on what to do new If the callable is being replaced, this is the replacement, e.g. 'ClassName.new_method()' is_discontinued If True the callable is being discontinued. stack_level as per warnings.warn Returns ------- The decorated callable. Warnings -------- DeprecationWarning A warning will be raised when the decorated function is called. Examples -------- Here's an example of how to use the `deprecated_callable` decorator to mark the function `my_function` as deprecated in favour of a new function. >>> @deprecated_callable( ... version="2023.6", reason="function rename", new="a_function" ... ) ... def my_function(arg): ... return arg >>> callable(my_function) True """ def decorator(func: Callable[P, R]) -> Callable[P, R]: sig = set(inspect.signature(func).parameters) _type: Literal["method", "function", "class"] = ( "method" if sig & {"self", "cls", "klass"} else "function" ) old = func.__name__ if old in {"__init__", "__init_subclass__"}: # we're really deprecating a class, so get that name old = func.__qualname__.split(".")[-2] _type = "class" if not is_discontinued and new is None: msg = "Must specify new callable if deprecating." raise ValueError(msg) def depr_func() -> None: if is_discontinued: discontinued(_type, old, version, reason, stack_level) else: deprecated(_type, old, cast("str", new), version, reason, stack_level) @functools.wraps(func) def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: depr_func() return func(*args, **kwargs) return wrapper return decorator cogent3-scinexus-e0aee79/tests/000077500000000000000000000000001520253266500165645ustar00rootroot00000000000000cogent3-scinexus-e0aee79/tests/conftest.py000066400000000000000000000021621520253266500207640ustar00rootroot00000000000000from collections.abc import Iterator from pathlib import Path import pytest from scinexus.data_store import set_id_from_source from scinexus.parallel import set_parallel_backend from scinexus.typing import register_type_namespace try: from cogent3.app.typing import _get_resolution_namespace register_type_namespace(_get_resolution_namespace) except ImportError: pass @pytest.fixture def tmp_dir(tmp_path_factory): return tmp_path_factory.mktemp("scinexus_tests") @pytest.fixture(scope="session") def DATA_DIR(): return Path(__file__).parent / "data" @pytest.fixture def HOME_TMP_DIR() -> Iterator[Path]: """makes a temporary directory""" import tempfile home = Path("~") with tempfile.TemporaryDirectory(dir=home.expanduser()) as dn: yield home / dn @pytest.fixture def reset_id_from_source() -> Iterator[None]: """Restore the default ID extractor after the test.""" yield set_id_from_source(None) @pytest.fixture def reset_parallel_backend() -> Iterator[None]: """Restore the default parallel backend after the test.""" yield set_parallel_backend(None) cogent3-scinexus-e0aee79/tests/data/000077500000000000000000000000001520253266500174755ustar00rootroot00000000000000cogent3-scinexus-e0aee79/tests/data/brca1.fasta000066400000000000000000005102501520253266500215100ustar00rootroot00000000000000>FlyingFox TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAAC---AGTTTATTATACACTAAAGACAGAATGAATGTAGA AAAGACTGACTTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAACAGATGGGTTGAAACTAAGGAAA CATGTAACGAT---ATGCAGACTTCCAGCACAGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTGAATGGGAGAATAGAA CTGAATAAGCAGAAACCTCCATGCTCTGACAGTCCTAGAGAT---TCTCAAGAT---ATTTCTTGGATAACACGGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAATATTAACTTCTGATGTCTCACCTGATGGGAGGTCTG AATCAAATGTG---------------GTAGAAGTTCCAAAT------GAAGTAGATGGATACTCTGGTGCTTCAGAGAAA ATAGCTTTAAAGGCCAATGATCCTCATGGTGCTTTAATGTGC------GAAAGAGTTCACTCCAAACTGGTAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACATATCGGAGGAAAGCAAGCCTCCCTAACTTGAGCCACATAACTGAAAATC TAATTACAGGAGCATCTGCTATAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGTATCCCCTCACAAATAAACTAAAGCGTAAAAGGAGAACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAATAGATTTGGCAGTTGTTCAGAAAACTCCGGAAAACATAATTGAGGAAACTGACCAAATAG AGCAGAAT---------GGTCATGTGATGAATAGTACTAATAATGGTCATGAGAATGAAACAAAAGGTGATTAT---GTT CAGAAGAAGAAAAATACAAACCCAACAGAA------TCATTGGAAAAAGAATCTACTTTCAAAACTAAAGCTGAACCTAT ATGCAGCAGCATAAGCAATATGGAACTAGAATTAAATATCCACAGTTCAAAAGCAGTTAAGAAGAATAGGCTGAGGAGGA AGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGCCCACCTAATCATACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---CTGAAGGAAAAA---AATTCTGACCAAATGCCAGTCAGACACAGCAAAAA ACTTCAATTCATAGAAGATAAAGAATCTTCAACTGGAGCCAAGAAGAATAACAAGCCAAATGAGACAATCAATAAAAGAC TTGCCAGTGATGCTTTTCCAGAATTAAATATAACAAACATACCTGGTTTTTTTACTAATGGTTCAAGTTCTAATAAACTT CAAGAGTTTGTCAATCCTAGCCTTCAAAGAGAAGAAATAAAAGAGAAC---CTAGGAACAATTCAAGTGTCTAATAGTAC CAAGGACCCCAAAATTTTGATCTTCGGTGAAGGAAGA---GGTTCACAA---ACTGATCGATCTACAGAGAGTACCAGTA TTTTATTGGTGCCTGAAACGGATTATGGCACTCAAGATAGTATCTCATTACTGGAACCTGACATCCCAGAG---AGGGTA AAG---ACAGCACCAAACCATCAT------------GCAGCAATTAAAAACCCCAGAGAACTTATTCATGGTTGT---TC TGAAGATACTAGAAATGATGCAGAGGGCTTTAAAGATCCATTGAGACGTGAAGTTAAC---TACANNNNNNNNNNN---- -----NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNTTCAAACCCAGGAAATCTAGAAAAGGAATGTGCAACAGGCTATGCCCACTCCAAGTCCTTGAGGAAACAAAGTCCAAA AGTCACTCTTGAATGTGACCGAAAAGAA---AATCAGGGAAAGAAAGAGTCTAACGTCAAGCATGTGCAGGCAGTTTATA CAACTGTAGGCTTTCCTGTGGTTTGTGAGAAAGAAAAAAAGCCAGGAGATTATGCTAAATATGGCATAAAAGAAGTCTCT AGGCTTTGTCAGTCATTTCAGTTCAGA---GAAAATGAAACTGAACTCACTATTGCAAATAAACTTGGAATTTCACAAAA CCCATATCATATGCCATCCATTCCTCCCATCAAGTCATCTGTTAAAACTACATGTAAGAAAAAT---CTGTCAGAGGAAA AGTTTGAAGAACATTCAATATCCCCTGAAAGAACAATAGGAAATGAGACCATCATTCAAAGTACAGTGGGCACAATTAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAGGCAGCTCAAGCAGTATTTATGAAGCAGGTTCCAGTACTAACGAACT AGGCTCTAGTGTCAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAAGTA GAAACAGAGGACCACAATTAAATGCTGTGCTTCAATTGGGTCTCATGCAGCCTGAAGTCTATAAGCAAAGCCTT---CCT CTAAGTAATTGTAAACATCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAATGCAGATGTCTC TCTACGTCAGATTTCAGATAACTTAGAGCAA---CCTATGGGAAACAGTAATGCTTCTCAGGTTTGTTCTGAGACACCGG ATGACCTGTTAAATGATGACAAAATAAAAGAGAATATCGGCTTTGATGAAAGTGGCATTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAGAAAGGAGAATTCAAAAGGAGCCCTAGTCCCTTAGCCCAT---ACAAGTTTGTCTCAAGGTCGCCG AAGAGGGGCTAGGAAATTAGAGTCCTCAGAAGAGGA------------- >DogFaced TGTGGCACAAATACTCATGCCAACTCATTACAGCATGAGAACAGCAGTTTATTATACACTAAAGACAGAATGAATGTAGA AAAGACTGACTTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGCAGAACAGATGGGTTGAAACTAAGGAAA CATGTAATGAT---AGGCAGACTTCCAGCANAGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTGAATGGAAGAATAAAA CTGAATAAGCAGAAACCTCCATGCTCTGACAGTCCTAGAGAT---TCCAAAGAT---ATTCCTTGGATAACACGGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAACATTAACTTCTGATGTCTTACTTGATGAGAGGTCTG AATCAAATGTG---------------GTAGAAGTTCCAAAT------GAAGTAGATGGATACTCTGGTGCTTCAGAGGAA ATAGCCTTAAAGGCCAGTGATCCTCATGGTGCTTTAATATGT------GAAAGAGTTCACTCCAAATTGATAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACATATCGGAGGAAAGCAAGCCTCCCTAACTTAAGCCACATAACTGAAAATC TAATTACAAGAGCATCTGCTACAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGTGCCCCCTCACAAATAAACTAAAACGTAAAAGAAGAACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAATAGATTTGACAACTGTTCAAAAAACTTCTGAAAATATAATTGAGGGAACTGACCAAATAG AGCAGAAT---------GGTCATGTGATGAATAGTTCTAATGATGGTCATGAGAATGAAACAAAAGGTGATTAT---GTT CAGAAGAAGAAAAATACAAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTAAAGTTGAGTCTGT ACCCAACAACATAAGCAATGTGGAACTAGAATTAAATATTCACGGTTCAAAAGCACTCAAGAAGAATAGNCTGAGGAGGA AGTCCTNTACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATTCAAGCCCACCTAATCATACTGAACTACAA ATTGATAGTTGTTCCAGCAGTGAAGAA---CTGAAGGAAAAA---AATTCTGACCGAATGCCAGACAGACACAGCAAAAA ACTTCAGTTCGTAGAAGATAAAGAATCTGCAACTGGAGCCAAGAAGAATAACATGCCAAATGAGGCAATAAATAAAAGAC TTTCCAGTGAAGCTTTTCCCGAATTAAATATAACAAACGTACCTGGTTTTTTTACTAATGGTTCAAGTTCTAATAAACGT CAAGAGTTTGTCAATCCTAGCCTTCAAGGAGAAGAAATAAAAGAGAAT---CTACGAACAATTCAAGTGTCTAATAGCAC CAAAGACCCCAAAATTCTAATCTTTGGTGAAGGAAGA---GGTTCACAA---ACTGATCGATCTACAGAGAGTACCAGTA TTTTATTGGGACCTGAAACGGATTATGGCACTCAAGATAGTATCTCATTACTGGAATCTGACATCCCAGGG---AGGGCA AAG---ACAGCACCAAACCAACATGCAGATCTGTGTGCAGCAATTGAAAACCCCAGAGAACTTATTCATGATTGT---TT TAAAGAAACTAGAAATGACACAGAGAGCTTTAAAGATCCATTGAGACATGAAGTTAAC---TCCACNNNNNNNNNN---- -----NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNTTCAGACCCAGGAAATCTAGAAAAGGAATGTGCAACAGGCTATGCCCACTCCAGGTCCTTGATAAAACAAAGTCCAAA AGTCACTCTTGAATGTGACCGAAAAGGA---AATCAGGGAAAGAAAGAGTCTAACATNGAGCATGTGCAGGCAGTTTATA CAACTATAGGCTTTCCTGGGGTTTCTGAGAAAGACAAAAAGCCAGGAGATTATGCCAGATATGGCATAAAAGAAGTCTCT AGGCTTTGTCAGTCATTTCAGTCTAGA---AGAAATGAAACTGAGCTCACTATTGCAAATAAACTTGGACTTTCACAAAA CCCATATCATATGCCATCCATTTCTCCCATCAAGTCATCTGTTAAAACTATATGTAAGAAAAAT---CTGTCAGAGGAAA AGTTTGAAGAACATTCAATATTCCCTGAAAGAGCAATAGGAAATGAGACCATCATTCAAAGTACAGTGGGCACAATTAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAGGCAGCTCAAGCGGTATTTATGAAGCAGGTTCCAGTACCAATGAACT AGGCTCTAGTGTCAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAAGTA GAAACAGAGGACCAAAATTAAATGCTGTGCTTCAGTTGGGTCTCATGCAGCCTGAAGTCTATGAGCAAAGCCTT---CCT CTAAGTAATTGTAAACATTCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTGGTTCAGGCTGTTAATGCAGATGTCTC TCCANGTCAAATTTCAGATAACTTAGAGCAA---CCTATGGGAAACAGTAATATTTCTCAGGTTTGTTCTGAGACACCGG ATGACCTGTTAAATGATGACAAAATAAAGGACAATATCAGCTTTGATGAAAGTGGCATTCAGGAAAGATCTGCTGTTTTT AGCAAAAATGTCCAGAAAGGAGAATTCAGAAGGAGCCCTAGTCCCTTAGCCCAT---GCAAGTTTGTCTCAAGGTCGCCC AAGAAGGGC---------------------------------------- >FreeTaile TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTACTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAGCAGCCTGGCTTAGCAAAGAGCCAGCAGAGCAGATGGGCTGAAAGTAAAGAAA CATGTAATGAT---AGGCAGACTCTCAGCACAGAGAAAAGGGTAGTTCTGAATGCTGATCCTCTGAATAGG--------- ------AGAAAAGAACCTCCAGGCTCTAACTATCCTAGAGAT---TCCCAAGAT---GTTCCTTGGATAACACGGAGTAG TAGCATACAGAAAGTTAATGAGTGGTTCTCCAGACGTGATGAAATACTAACTACTGGTGGCTCACATAACGGTAGATTTG AATCAAATGTTGAAGTAGCTGGTGCAGTAGAAGTTCCAAAT------GAAGTAGATGGATATTCTGGTTCTCCAGAGAAA ATAGCCTTAATGGCCTGTGATCCTCCTGATGCTTTAATATGT------GAAAGAGTCTCCTCTAAACCACTAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGAAAGGCAAGCTTCCCTAACTTGAGCCACATAAGTGAAAATC TAATTATAGGAGCATCTGCTATAGAACCCCAGGTAACAAAA--------------------------------------- ---------------------GAATGTCCCCTCACAAATAAACTAAAGCGTAAAAGAAGA---ACATCAAGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCAGTTGTTCAAAAGACTCTTGAAAAGGTAATTGAGGGAACTGACCAAATAG AACAGAAT---------GGTCATGTGATGAGTATTACTAGTAATTGTCATGAGAATAAAACAAAAGGTGATTAT---GTT CAGAAAGAGAAAAATTCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCACAACTAAAGCTGAACCTAT AAGCAGCAGCATAAGCAATATGGAACTAGAGTTAAATATCCACAGTTCAAAAGCACCTAAGAAGAATAGGCTGAAGAGGA AGTCCTCTACCAGGCACATTTATGCACTTGAACTAGTAGTCAATAGAAATCCAAGCCCACCTAATCACACTGAACTACAA ATTGATAGCTGTTCTAGCAGTGAAGAG---GTGAAGGAAAAA---AATTCTGACCAAATACCAGTCAGACACAGCAAAAA GCTTCAACTCATGGAAGGTAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAATCAAATGAACAAATAAATAAAAAAC TTGCCAGTGATGTTTTTCCAGAACTAAACTTAACAAACATACCTGGTTTTTTAAGTAATGATTCAAGTTCTAATAAACTT AAAGAGTTTGTCAATCCTAACCTTCAAAGAGAAGAAATAACAGAGAAC---CTAGGAACAGTTCACATGTCTAATAGTAC CAAAGACCTCAAAGATCTGATATTAAGTGGAGGAAGA---AGTTTGCAA---ACTGATAGATCTATGGAGAGTACCAATA TTTTATTGGTACCTGAAACTGATTATGGCACGCAGGATAGTATCTCATTACTGGAACCTGACACCCCAGGG---AAGGCA AAA---AAAGCTCCAAATCAATATGCGGGTCTGTGTGCAGAAATTAAAAACCNCAAGGAACTTATCCATGGTTGT---TC TAACGATAATAGAAATGACAGAGAGGACATTAAGGATCTATTGAGACCTGAAGTTAAC---CACANNNNNNNNNNNNNNN NNNNNNNGGAAGAGAGTGAACTTGATACACAGTATTTACAGAATACATTCAAGGTTTCAAAACGTCAGTCGTTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATATGCAACAGTCTATGGCCACTCCAGGTCCTTAAGGAAACAAAGTCCAAA AGTCACTCTTGAATGTGGACAAAAAGAAGAAAATCAGGGAGAGAAAGAATCTGAAATCAAGTATGTACGGGGAGTTCACA CAACTGCAGGCTTTCCTGTGGTTTGTGAGAAAGACGAAAAGCCAGAAGAATATGCCAAATGTAGCATAAAAGGAACCTCT AGCCTTTGTCAGCCACCTCAGTTCAGA---GGCAACGAAACTGAACTCACTATTGCAAATAAACCCGGAATTTCACGAAA CCCATATCATATACCATGCATTTCTCCCATCAGGTCCTTTGTTAAAACTATAAATAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACATTCAGTGTCACCTGAAAGAGCAATGAGAAATGAGAAT---ATTCTAAGTACAGTGAGCCCAATTAGC CTAAATAAC---AGAGAAAGCACTTTTAAAGAAGGCAGCTCAAGC---------------------AGTACTAATGAAGT AGGCTCTAGTACCAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGATCAAAAATAAATGCTATGCTCAGATCAGGTCTCATGCAACCTGAAGTCTATAAGCCAAGCCTT---TCT GTAAGTAATTGTGAACATCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAATGCAGATTTCTC TCCATGTCAAATTTCAGATAACTTAGAACAA---TCTATGGGAAGTACTCCTGCTTCTCAGGTTTGTTCTGAGACACCAG ATGACCTGTTAAATGATGACAAAATAAAGGAGAATAGCAGCTTTGCTGGAAGTGGCATTAAGGAAAGATCTGCTATTTTT AGCAAAAGTGTCAAGAAAGAAAAATTCAGAAGGAGCCCTAGCCCCTTTGCCCAT---ACACATTTGACTCATACTCGCCA AAGAGGGGCCAGGAAATTAGAGTCCTCAG-------------------- >LittleBro TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTACTACTCACCAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAGCAGCCTGGCTTAGCAAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAAGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAGGGTAATTCTGGATGCTGATCCTCTGAATGGG--------- ------GAAAAAGAACTTCCACGCTCTGACCATCCCAGAGAT---TCCCAAGAT---GTGCCTTGGATAACACGGAGCAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGGCGTGATGAAATACTAACTTCTGATGGCTCACATAATGGCAAGTCTG AGTCAAATGCTGAAGTAGCTGGTGCAGTGGAAGTTGCAAAT------GAAGTAGATGGGTATTCTGGTTCTCCAGGGAAA ATAACCTTAATGGCCCATGATCCTCATGGTGCTTTAACCTGTGAAAGTGAAAGAGTTCACTCCAAACCAGTAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCAAACTTGAGCCACATAACTAAAAATC TAATTGTAGGAGCATCTGCTATAGAACCTCAGATAGCACAA--------------------------------------- ---------------------GAGTGTCCCCTCACAAATAAACTAAAGCGTAAAAGGAGAAGTACATCNGGCCTTCATCC TGAGGATTTTATTAAGAAAGTAGATTTGGCAGTTGTTCAAAAGACTCCTGAAGAGATAATTAAGGGAACTGACCGAATAG AACAGAAT---------GGTCATGAGATGAATATTACTAATAATGATCATGAGAATGAAACAAAAGGTGATTGT---GTT CAGAAAGAGAAAAATGCTAACCTAACAGAA------TCACTGGAAAAAGAATCTGCGTTCACAAGTAAAGCTGAACCTAT AAGCAGCAGCATAAGCAATATGGAACTAGAATTAAATGTCCACAGTTCAAAAGCACCTAAGAAGAATAGGCTGAAGAGGA AGTCCTCTACCAGGCCTATTCATGCACTTGAACTAGTAGTCAATAGAAATCCGAGCCCATCTAACCATACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---GTGAAGGAAAAA---AATTCTGACCAAATACCAGTCAGACACAGCAAGAA GCTTCAACTCATGGAAGGTAAAGAACCTGCAGCTGGAGCCAAGAAGAGTAATAAGTCAGATGAACAAATAAATAAAAAAC TTGCCAGTGATGCTTTTCTAGAACAAAACTTAACAAACATGCCTGGTGTTTTTACTAATGGTTTAAGCTCTAATAAGCTT AACGAGTTTGTCGATCCTAACCTACAAAGAGAAGAAACAGAAGAGAAC---CTAGGAGCAGTTCAAATGTCTAATAGTAC CAAAGACCTCGAAGATCTGACATTAAGTGGAGGAAGA---AGTGTGCAA---ATTGATAGATCTAAAGAGAGTACCAATA TTGTATTGGTACCTGAAACTGATTATGGCACACAGGATAGTGTCTCATTACTGGAACCTGACATCCCAGGG---AAGGCA AAA---ACAGCTCCAAATCAATGTGGGGATCTGTGTGCAGCAGTTAAAAATCCTAAAGAACTTATTCGTGGTTGT---TC TAAAGATATTAGAAATGACAGAGAGGGCTTTAAGGATCTATTGAGATGTGAAGTTAAC---CACACGCAGGAGACAAGCA TAGAAGTGGAAGAGAGTGAACTTGATACACAGGAATTACAGAATACATTCAAGGTGTCAAAGCGCCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATGTGCAACAGCCTATGCTCACTCCCAGTCTTTAAGGAAACAAAGTCCAAA AGTCACTCTTGAATGTGGACAAAAAGAAGAAAATCAGGGAAAGAAAGAATCTAAAATCAAGCATGTACAGGCAGTTCACA CAGCTGTAGGCTTTCCTGTGGTTTGTGAGAAAGACAGAAAGCCAGGAGAGTATGCCAAATACAGCATAAAAGGAACCTCT ATGCATTGCCNGTCCTCTCAGTTCAGA---GGCAACAAAACTGAACTCACTATTACAGATAAATATGGACTTTCCCCAAA CCCATATCATATACCATCCATTTCTCCCATCAAGTCATTTGTTAAAACTGTAAGTAAGAAAAAC---CTGTCAAAGGAAA AGTTTGAGGAACATTTAGTGTCACCTGAAAGAGCAATGGGAAATGAGAAC---ATTCAAAGTACAGTGAGCCCAATTAGC CTAAGTAACATTAGAGAAAGCGCTTTTAAAGAAAGCAGCTCAAGC---------------------AGTACTAATGAAGG GGGCTCTAGTATCAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCATGCAGAACTAGATA TAAACAAAGGATCAAAATTAACTGCTATGCTCAGATTAGGTCTCATGCAACCCGAAGTCTATAAGCCAAGCCTT---CCT GTAAGTAATTGTAAACATCCTGAAGTAAAAAGGCAAGGAGACAACGAAGGACTAGTTCAGGCTGTTAATGCAGACTTCCC TCCATGTCAAATTTCAGATAACCTAGAACAA---CCTATGGGAAGTAGTCCTGTTTCTCAGGTTTGTTCTGCGACACCGG ATGACTTGTTAACTGATGATGAAATAAAGGAGAATAGCGGCTTTGATGAAAGTGGCATTAANGAAAGATCTGCTGTTTTT AGCAAAGATGTTCAGAAAGAAGAATTCAGAGGGAGCCCTAGCCCCTTAGCCCAT---ACATATTTGACTCGGAGTTGCCA AAGAAGGGCCAGGAAATTAGAGTCCTCAGAAGAGGA------------- >TombBat TGTGGCACAAGTACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGTTTACTACTCACTAAAGACAGAATGAACGTAGA AAAGCTTGACTTCTGTAATAAAAGCAAGCAGCCTGGCTTAGCAAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAAGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAGGGTAGTTGTGAATGCTGATACCCTGGATGGG--------- ------AGAAAAGAACCTCCATACTCTGACTGTCCTAATGAT---TCCCAAGAT---GTCCCTTGGATAACAGGGAATAG TAGCATACAGAAAGTTAGTGAGTGGTTTTCCAGGCGTGATGAAATATTAACTTCTGATGGATCACATGATGGGAGATCTG AATCAAATATGGAAGTAGCTGGTGCAGTAGAAGTTCCATAT------GAAGTAGATGGATATTCTGATTCTCCAGAGAAA ATAGGCTTAATGGCCAGTGATCCTCTTGGTGCTTTACTATGTGAAAGTGAAAGAATCCACTCCAAACCAGTAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCATATAACTGAAAATC TAATTATAGGAGCACCTACTATACAATCTCAGATAACACAA--------------------------------------- ---------------------AATTGTCCCCTCACAGATAAACTAAAGCATAAAAGAAGAACTACATCAGGCCTTCGTCC TGAGGATTTTATCAAGAAGGTAGATTTGGCAGCTGTTCAGAAGACTCCTGAAAAGATAATTGAGGGAACTGATCAAACAG AACAGAAT---------GGTTCTGTGATGAATATTACTGATAATGGTCATGAGGATGAAACAAAATGTGATTAT---GGT CAGAAAGAGAAAAATGCTAACCCAGCAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGCATAAGCAACATGGAACTAGAGTTAAATATCAACAGTTCAAAAGCACCTAAGAAGAATAGACTGAGGAGGA AGTCCTCTACCAGGCATATTTATGCACTTGAACTGGTAGTCAATAGAAATCCAAGCCCACCTAATCATATTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---GTGAAGAAAAGA---AATTCTGACCAAATACCAGTCAGGCACAGCAAAGA GCCTCAACTCGTGGAAGGTGAAGAACCTACGACTAGAGCCAAGAAG---AATAAGTCAAATGAACAAATAAATAAAAGAC TTGCCAGTGATACTTTTCCAGAACTAAATTTAAAAAACATACCTGGTTTTTTTACTAATGGTTCAAGTTCTAATAAACTT CAAGAGTTTGTTGATCCTAACCTTCGAAGAGAAGAAGTGGAAGAGAAC---CTAGGAACAATTCAAGTGTCTGATAGTAC CAAAGACCTCAAAGATCTGATATTAAGTGGAGGAAGA---AGTTTGCAA---ACTGATAGATCTATGGAGAGTACCAATA TTTTATTGGTACCTGAAACTGATTATGACACTCAGTATAGTATCTCATTACTGGAACCTGACACCCCAGGG---AAGGCA AAA---ACAGCACCAAGTCAACATGCGAGTCTGTGTGCAGCAATTGAAAACCCCAAGGAGTTTAACCATGGTTGT---TC TAAAGATACTAGAAGTGACACAGAGGGTGTTAAGGATCTACTGAGATGTGAAATTAAC---CNCACNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAATACATTCNNGNTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATTCAGAAAATCCAGAAAAGGAATGTGCAACAGTCTATGCCCACTCCAAGTCCTTAAGGAAACACAGTCCAAA AGTCACTCTTGGATGTGGTCAAAAAGAAGAAAATCAGGAAGAGAAAGAATCTAAAATCAAGCATGTACAGGCTGTTCACA CAGCTGCAGGCCTTCCTGCCATTTGTGAGAAAGACAAGAAGCCAGGAGAATATGACAGATACAATATAAAAGGAATCTCT AGGCTTTGTCAGTTATCTCAGTTCAGA---GGCAATGAAACTGAACTCACTATTGAAAATAAACACAGAATTTTACAAAA CCCATATCATATATCACCCATCTCTCCCATCAGGTCATCTGTTAAAACGATAAGTAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGAAACAGTCAGTGTCACCTGAAAAAGCAATGGGAAATGAGAACATCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAATGTTAGAGAAAGAGCTGTTAAAGAAGGCAGCTCAAGC---------------------AGTACTAACGAAGT AGGCTCTAGTATCGATGAAGCAGGTTCCAGT---------------------GGTAAAAACATTGGAGCAGAACTAGATA GAAACAGAGGATCAAAATTAAGTGCTGTTCTCAGATTAGGTCTCATGCAACCCGAAGTCTATAAGCCAAGCCTT---CCT ATAAGTAATTGTAAACACTCTGAAATAGAAAGGCAAGGAGAAAATGAAGTAGTAGTTCAGGCTGTTAATGCA-------- ----TGTCAAATTTCAGATAACTTAGAACAG---CCTATGGGAAGTAGTCCTGTTTCTCAGGCTTGTTCTGAGACACCAG ATGACCTATTAGATGATGACAAAATAAAGGAGAATAGCAGCTTTGCTGAAAGTGGCATTAAGGAAAGATCTGCTATTTTT AGCAAAAGTGCCCAGGAA---GAACTCAGCAGGAGCCCTAGCCCCTTAACCCAT---ACACATTTGGCTCAGGGTCAGCA GAGAAGGGCCGGGAAATTAGAGCC------------------------- >RoundEare ---------------------NGCTCATTANAGCNTGAGAACAGCAGTTTACTGCTCACTGAGGACCAGATGAGTGTGGG AAAGGCTGAATTCCGTCATGAAAGCAAGCAGCCCGGCTTAGCGAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAAGAAA CATGTGACGAT---AGGCAGGCTCCCAGCGCAGAGGAAAGGGCAGTTCTGAATGCTGATCCCCAGAATGGG--------- ------AGGGAAGAATCTCCATCCTCTGACCACCCTAGAGAT---TCCCAAGAT---GTTCCTTGGATAACACGGAATAG CAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAACACGGACTTCCAACGGCTCCCACGGTGGGAGGCCTG AGTCAGACACGGAAGGAGCCGACACGGTAGAAGCTCCGGAC------GAAGTGCGTGGATGCCCTGGCTCTCCAGAGAAC ACAGCCTCGCCGGCCGGCGAGCCTCATGGCGCTTTAATGTGCGGAAGTGAAAGCGTCCACTCCAAACCAGTGGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACGTAACTGACAGGC TAACTACGGGACTGTCCGCTCCAGACCCTCAGATAACACGA--------------------------------------- ---------------------GAGCGTCCCTTCACCAACAAACTAAAGCGTAAAAGGAGAACTACACCGGGCCTTCACCC AGAGGATTTTATCAAGACAGTGGATTTGACCGTTGTCCAGAAGACTCCTGAGAAGACCATTGAGGGAACTGACCAAACAG AACAGAAC---------GGTCGTGTGATGGATATTGCTAACAGTGGTCACGGGAATGAAGCAAAAGGTGATTAT---GTT CAGAATGAGAAGAGTTCTGACCCAACAGAA------TCACTGGGAGAAGAACCCGCTTTCAGAACTAAAGCTGGACCTAT AAGCAGCAGCATAAGCACCGTGGGACTAGAATTGAATGTCCACGGTTCAAAAGCGCCCAGGAAGACTAAGCGGAGGGAGA AGACTGCTGCCGAGCATACTTATGCACCTGGACTCGGGGTCAGCAAGAGCCCGAGCCCCCCTGCTCACGCCGGACTGCGG ACGGACGGTTGTTCTGGCGGCGAGGAG---GCAAAGATCGGG---AATTCTGGGCAGAGGCCAGCCAGGCGGAGCAGCAA GCTTCCGCTCGAGGAGGGTCAGGAGCCTGCAGCTGGGGCCAGCAAGGGTGACCGGTCAGATGCACCGATGAATAAGAGAC TTGCCAATGATGCTTTTCCGGAACTAAATTTAACAAGCGTATCTGCCGTTTTTACTAATGGTTCAGGTTCTACTAAACTT AAAGAGTGTGTCGATTCTAACCCTCAAGGAGAAGACACAGAAGAGAAC---CGAGGAACAGTTCAAGTGTCTAGTAGCAC CAAAGACCTCAAAGATCTGATATTCAGTGGGGGAAGA---AGTTTGCAA---ACTGACAGATCTGTGGAGAGTCCCAATA TTGNATTGGTACCTGAAACTGACTGTGACACTCAGGATAGCGTCTCCCTGCTGGCACCTGACACCCCAGGG---AAGGCA GAA---ACAGCACCAGGCCAACGTGTGGGTGGGTGCGCAGCTGCTGGAAGCCCGAAGGAACTTATCCGTGATTGT---TC GAAG------------GACACAGAGGGCGTTAAGGATCTCCCGAGATGTGAAGTTCAG---GAGACNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTGCTCTG TGTTCAAATCCAGGNAATCCAGGAAAGGAATGTGCAACCATGTATTCCCACTCCAGGTCCACAAGGAAACGAAGTCCAAA AGTTACTCTTGAATGTGGACGAAAAGAA---CATCAGGGAGAGAAGGAATTTAATATGGAGCGTGCGCAGCCACCTTACA CAACAGCAGGCTTTCCTAAGGGTTGTGAGAAAGACAAAACGCCAGGAGAGTGTGCCACATATACTATGAAAGGAATCTCT AAGCCTTGTCAGTCATCTTCGTTCAGA---GGCAATGAAACTAAACTCACTATTGAAAATAAATATGGGATTTCACAAAA CCCCTATCACATACCACCCATTTCTCCCGTCAGGTCATCTGTTAAAACTACAAGTAAGAAGAAC---CTGTCAGAGGAGA AGTGTGAGGAACATTCAGTGTCACCTGCCAGAGCCGTGGGACGTGAGAACATCATTCAGAGTACAGCGGGCACTCCTAGC CAGAACAAAACCAGGGAAAGTGCCGCCAGAGAAGGCAGCTCGAGC---------------------GGCACTAACGAAGT AGGCTCCAGT------------------------------------------GGTGAAAACGGTCAAGCAGAGCCAGGCA CAAACAGAGCATCAAAATTAAGCGCTCTTCTCAGATCAGGGCTCATGCAACCTGAAGTCTGTAAGCCGAGTCTT---CCT CTGAGTAATTGTGAAGATCCTGAAATAAAAAGGCAA---GAAGATGGGGGAGTAGTGCAGGCTGTTAATGCAGATTTCTC TCCGTGTCAAATTTTAGATAACCTAGAACAA---CCTCTGGGAAGCAGTCCCGCTTCTCGGGTTTGTTCCGAGACCCCAG ACGACCTGTTAAATGATGACAAGGTAAAGGAGGATAACAGCTTTGCTGAAGGCGGCATGAAGGACAGATCTGCTGTTTTT AGCAAAAGCGTCCTGAAA---GAATTCAGAAGGAGCCCCAGTCCCTTAGCCCAC---ACANTCTTGGCTCGGGGTCACCC CAGAAGGGCCAGGAAACTCGAGTCCTCAGAAGAGGA------------- >FalseVamp TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAATTTATTACTGACTGAAGACATAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGTGTTAGCAAGGAGCCAGCAGAGCAGATGGGCTAACAGTAAAGAAA TATGTAATGAT---AGGCAGACTCCCAACACAGAGAAAAGGGTAGTTCTGAATGCTGATCCCTTGAATGAGAGAAAAGAA CTGAATAATCAGAAACCTCTATGCTCTGACAGTCCTAGAGAT---TCCCAAGAC---GTTCCTTGGATAACACGTAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAATACTAACTTCTCATGGTTCACATGATGGGACAGGTG GATCAAATACAGAAGAAGCTGGTGCAGCAGAAATTCTAAAT------GAAGTAGATGGATATTCTGGTTCTTCAGAGAAA ATAGCTTTAATGGCCAGTGATCCTCCTGGTGCTTCAATTTGTGAAAGTGAAAGAGTCTACTCCAAACCAGTAGAAAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTACAGGAGGAAGGCAAGCCTCCCTAACTTGGGCCACAGAGCTGAAAATC TAATTATAGGAGCATCTGCTGTAGAACCTCAGATAATACAA--------------------------------------- ---------------------GAGTGTCTCCTCATAAATAAACTAAAGCGTAAAAGGAGAACTACATCAGCCCTCCATCC TGAGGATTTTATCAAGAAAGTAGATGTGGCAGTTGTTGAAAAGATTCCTGGAGATAGAATCAAGGGAACTGACCAAATAG AGCAGCAT---------GGTCATGTGATGAATATTACTAATTATGGTCATGAGAATGAAACA------------------ ------------AATGCTCACTCAACAGAA------TCACTGGACAAAGAATCTGCTTTTAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGTAATATGGAACTAGAATTAAATATCCACAGTTTAAAAGCACCTAAGAAGAATAGGCTGAGGAGGA AGTCCTCTACAAGGCATATTCATGCACTTGAACTA---GTCAGTAGAAATCCAAGCCCACCTAATCATACTGAACTACAG ATTGATAGTTGTTCTAGCAATGAAGAG---GTGGAGAAGAAA---AACTCTGACCAAATGCCAGCCAGACACAACAAAAA TCTTCAACTTATAGAAGATAAAGAACCTGCAACTAGAGCTAAGAAGAGTAACAAGCCAGATGAACAAATAAATAAGAGAC TTACCAGTGATGCTTTTTCAGAACTAAATTTAACAAACACACCTGGTTTTGTTACCAACAGTTCAAATTCTGATAAACTT AAAGAGTTTGTCAATCCTAGCCTTCAAAGAGAAGAAATAGAAGAGCAT---CTGGGAACGATTAAAGTGTCTAATAGTAC CAAAGACCCCAAAGATCTGATACTAAGTGGAGGAAGA---GGTTTGCAA---ACCGATCGTTCTATGGAGAGTACCAGTA TTTCATTGGTACCTGATACTGATTATGGCACTCAGGCTAGTATGTCATTACTGGAACCTGACACCCCAGGG---AAGGTA AAA---ACAGCACCAAATCGACGTGCAGGTGTGTGTGCTGCAATTGAAAACCCCAAGGAAATTATCCATGGTTGT---TC TAAAGATACTAGAAATGACACAGGGGACTTTAAGGATCCACTAAGAGGTGAAGTTAAC---CACACNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NTTTCAAATCCAGGAAATCCAGAAAAGGAATGTGCAACAGTCTGTGCCCACTCCAGTTCCTTAAGGAACGAAAGTCCAAA GGTCACTCTTGAATGTGGACAAAAAGAAGAAAATCACGGAAAGAAAGAGTCCAAAATCAAGCATGTGCAGGCAGTTCCAA CAACTGCAGGCTTTCCTGTGGTTTGTGAGAAAGAAAAAAATCCAGGAGATTATGCCAAATATACCAAAAAAGGAGTCTCT AGGCTTCGTCAGTCATCTCAGATCAGA---GGCAACAAAACCGAACTCACTGTTACAAATAAACATGGAATTTCTCAAAA CCCATATCATATACCACCCATTTCTTCCATCAGGTCATCTGTTAAAACTATATGTAAGGAAAAC---CTGTCAGAGGAAA ACCTTGAGGCATATTTGGTGTCACCTGAAAGAGCAATGGGAAATGAGAGCATTGTTCAAAGTACAGTGAGCACAGTTAGC CAAAGTAACATTAGAGAAAGCACTTTTAAAGAGGGCAGCTCCAGCAATATTTATGAAGCAGATTCCAGTGCTAATGAAGT AGGCTCTAGTATCAGTGAGGTAAGTTCCAGT---------------------GGTGAGAACATTCAAACAGAACTGGGTA GAAACCAAGGACCAAAATTAAATGCTGTGCTCAGATTAGGTCTCATGCAACCTCAAGTCTATGTGCAAAGCCTT---CCT GTAAGCAATTGTGAACATTCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTAGTTGAGGCTGTTAATGCAGAATTCTC TCCATGTCAAACTTCAGATAACCTGGAACAA---CCTATGGGAAGTAGTCATGCTTCTCAAATTTGTTCTGAGACACCGG ATGACCTGTTAAACGATTATGAAATAAAGGAAAATGTTAGCTTT---------------AAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAAAAAGAAGAATTGAGTAGAAGCCCTAGCCCCTTAATCCAT---ACATGTTTAGCTCAGGGTCACCG AAGA--------------------------------------------- >LeafNose TGTGGCACAAATACTCATGCCAGCTCTTTACATTATGAGCACAGCAGTTTATTACTCACTGAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAGCAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGGAAAATGTAGTTCTGAATACTGATCCCCTGAATGGGAGAAAAGAA CTGAATAAGCAGAAACCTCCATGCTCTGACAGTCCTAGGGAT---TCCCAAGTT---GTTGCATGGATAACACAGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAATATTAACTTCTCATAGCTCATGTTATGGGAGAGCTG AATCAAATACAGAAGTATCTGGTGCAGTAGAAGTTCCACTT------GAAGTAGATGGATTTTCTGGCTCTACAGAGAAA ATAACCTTAATGACCAGTGATCCTCATGATGCTGTAATATGTGAAAGTGGAAGAGTCCACTCCAAACCATTGGAAAGTAC T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTCAGCCACACAACTGAAAACA TAATTATAGGAGCATCTGCTGTAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGTGTCCCCTCACAAACAAACTAAGGCGTAAGAGGCGAACTACGTCAGGCCTTCATCC TGAGGATTTCATCAAGAAAGTAGATTTGACAGTTGTTCAAAAGACTCCTGAAAAGATAATTGAGAGAACTCACCAAACAG AACAGAAT---------GGTCATGTGATGAACATTACTGATAATGGTCATGGGAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGGATAATGCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTACAGCTGAACCTAT AAGCAGCAGTATAAGCCATATGGAACTAGAATTAAATATCCATAGTTCAAAAGCACCTAAGAAAAATAGGCTGAGGAGGA AGTCCTCTACCAGGCCTATTCATGCACTTGAACTAGTAGTCAGTGGAAATCCACGCCCACCTAGTCAGACTGAGCTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---GTGAAGAAAAAA---ATTTCTGACCAAGTGCCAGTCACACACAGCAAAAC GCTTGAACTCATAGAAGATCAAGAACCTGCAAATGGAGTCAAGAAGAGGAACAAGCCAAATGAACAAATAAATAAGAGAC TTACCAGTAATGCTTTTCCAGAACTAAATTTAACAAACATACCTGGTATATTG---AACCGTTCAAGTTCAAATAAACTT CAAGAGTTTGTCAATCCTAGCCTTCAAAGAGAAGAAATAGAAGAGAGC---CTAGGAACAATTCAAGTGTCTAATAGTAC CAAAGGCCTCAAAGATTTGATATCAAATGGGGGAAGA---GGCTTGCAA---ACTGGTCAATCTATGGAAAGTACCAGTA TTTTATTGGTACCTGATACTGATTATGGCAGTCAGGATAGTATGTCATTACTCGAACCTGACACCCCAGGG---AAGGCA AAG---ACTGCACCAAATCAACATGTGGGTGTATGTACAGCAGTTGAAAACCCCGAGGAACTTATCCATGGTGGT---TC TAATGATACTAGAAATGACACAGAGAGCTTTAAGGATTCATTGAGACATGAAGTTAAC---CACGGTCAGGAGACAAGCA TAGAAATGGAAGAGAGTGAACTTGATACACAGTATTTACAGGAAACATTCAAGGTTTCAAAGCGTCAATCATTTGCTCTG TTTTCAAATCCAAGAAATCAAAGAAAGGAATGTGCAACAATCCAGTCCAGGTCC------TTAAGGAAACAAAGTGCAAA AGTCACTCTTGAATGTGGACAAAAAGAAGAAAATCAGGGAAAGAAAGATTCTAAAATCAAGCTTGTACAGGCAGTTCATA CAACTGCAGGCTATCCTGTGGTTTGTGAGAAAGATGAAAATCCAGGAGATTATGCCAAATACAGCACAAAAGGAGTCTCT AGGCTTTGTCAATCATCTCTGTTCAGA---AGCAACGAAACTGAACTCACTATTGCAAATAAACATGGAATTTCTCAAAA CCCACATAATATACCACATATTTCTCCCATCAGGTCATCTGTTAAAACTGTATGTAAGAAAAAC---CTGTCAGAGGAAA ACTTGGAGGAATATCCAGTGTCACCTGAAAGAGCAATGGGAAATGAGAGCATCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAAGCAGCTCAAGCAATNTTTATGAAGCAGATTCCAGTACTAATGAAGT AGGCTCTAGTATCAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTCGGCA GAAACAGAGGACCGAAATTAAATGCTGTGCTCGGATTAGGTCTCGTGCAACCTGAAGTCTATAGGCAAAGCCTT---CCT GTAAGTAATTGTCAACATCCAGAAATAAAAAGGCAGGGAGAAAATGAAGGAATAGTTCAGGCTGTTAGTGCAGACTTCTC TCCATGTCAAATTTCAGATAACCTAGAACAA---CCTACGGGAAGTAGTCATGCTTCTCAGGTTTGTCCTGAGACACCGG ATGACCTGTTAAATGATAACGAAATAAAGGAAAATGACAGCTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAGAAAGGAGAATTCAGAAGGAGCCCTAGCCCCGTAGCTCAC---ACACGTTTGGCTCAGGGTCACCA AAGACGGGCCAGGAAATTAGAGTCCTCAGAAGAGGA------------- >Horse TGTGGCACAAATACTCATGCCAGCTCATTGCAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAAGAGCCAACAGAGCAGACGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGGCCCCCAACTCAGAGGAAAAGCTAGTTCTGAATGCTGATCCGCTGTATGGGAGAGAAGAA CTGAATAAGCAGAAACCTCCACGCTCTGACAGTCCTAGAGAC---TCCCAAGAT---GTTCCTTGGATAACACTGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGAAGTGAGGAAATGTTAACTTCTGATGACTCATGTGACGGAGGGCCTG AATCAAATACAGAAGTAGCTGGTGCAGTAGAAGTTCCAAAT------GAAGTACGTGGATATTCTGGTTCTTCAGAGAAA ATAGACTTAATGGCCGGTGATCCTTCTAGTGCTTTAATATGTGAAAGTGAAAGAGTCCGCTCCAAACCAGTAGAGAATAA T---ATTGAAGATAGAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCTTCCCTAACTTGAGCCACATAACTGAAGATC TAATTATAGGGGCATCTGCTATAGAACCTCAGATTACACAA--------------------------------------- ---------------------GAGCGTCCACTCACAAATAGAGTGAAGCATAAAAGGAGAACGTCATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCAGTTGTCCAAAAGACTCCTGAAAAGATAATTGAGGGAACTGACCAAATAG AGCAGAAC---------AGTCATGTGATGAATATTACTCCTAATGGTCATGGGAATGAAACAAAAGGCGATTAT---GTT CAGAATGAGAAAAACGCTTACCTAACAGAA------TCATTGGAGACAGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGCATAGGCAATCTGGAACTGGAATTAAATATCCACAGTTCAAAAGCACCTAAGAAGAATAGGCTGAGGAGGA AGTCTTGTACCAAGCAGATTCATGCACTTGAACTAGTAGTCAGTAAAAATCCAAGCCCACCTAATCATACTGAACTACAA ATCGATAGTTGTTCTAGTAGTGAAGAG---ATGAAGAAAAAA---AATTCTGACCAAATGCCGGTCAGACACAGCAAAAA GCTTCAACTCATGGAAGATAAAGAACCTGCCACTGGAGCCAAAAAGAGTAACAAGCCAAATGAACAAATAAATAAAAGAC TTGCCAGTGATGCTTTTCCAGAGCTAAAATTAACAAACATACCTGGTTTTTTTACTAACTGTTCAAGTTCTAATAAACTT CATGAGTTTGTCAATCCTAGCCTTCAAAGAGAAGAAATAGAACAGAAC---CTAGGAGCAAATCGACTGTCTAATAGTGC CAAAGACCCCAAAGATCTGATATTAAGTGGAGGAAAA---TGTTTGCAA---GCTGAAAGATCTGTAGAGAGTTCCGGTA TTTCATTGGTACCTGATACTGATTATGGGACTCAGGATAGTATCTCACTGCTGGAAGCTGACACCCTAGGG---AAGGCA AAA---ACAGCACCAAATCAATGTGCAAATCTATGCGCAGCAATTGAAAACCCCAAGGAACTTACCCATGATTGT---TC TAAGGATACTAGAAATGATACACAGGGCGTTAAGGATCCATTGAGACGTGAAGTTAAC---CACACTCAGGAGACAAGCA TAGAAATGGAAGAGAGTGAATTTGATACGCAGTATCTACAGAATATGTTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCAGGAAGTCCAGAAAAGAAGTGTGCAACAGTCAGTGCCCACTCCAGGTCCTTAAGGAAACAAAGTCCAAA AGTCACTCTTAAATGTGGACAAAAAGAAGGAAAGGAGGGAAAGAAAGAGTCTAAAATCAAGAATGTGCAGTCAGTTCACA CAACTGTGGGCTTTCCTGTGATTTGTCAGAAAGATAAGAAGCCAGGTGACTATGTCAAATGTAGCACAAAAGAAGCCTCT AGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAACGAAACNGAACTTATTACTGCAAATAAACATGGAATTTCACAAAA CCCATATTATATACCATCACTTTCTCCCATCAGGTCATCTGTTAAAACTGTATGTCAGAAAAAC---CTGCCAGAGGGAA AGCTTGAGGAACAGTCACTGTCACCTGAAAGAGCAATGGGAAATGAGAGCATTGTTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTAGAGAAAGCACATTGAAAGAAGTCAGCTCAAGC---------------------AGTATTAATGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAACACATTCAAGCAGAACTAGGCA GAAACAGAGGACCTAAATTAAATGCTATTCTCAGATTAGGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT ATAAGTAATTGTAAACATCTGGAAATAAAAAGGCAAGGAGAAAAGGAA---GTAGTTCAGGCTGTTAACGCAGACTTTTC TCCGTGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGTAGTTGTGCTTCTCAGGTTTGTTCTGAGACACCTG ATGACCTGTTAAATGATGACGAAATAAAGGAAAATATCAGCTTTGCTGAAAGTGGCGTTAAGGAAAGATCTGCCGTTTTT AGCAAAAGCGTCCAGAAAGGAAAGTTCAGAAGGAGTCCTAGCCCTATAGGCCGT---ACGTGTTTGGCTCAGGGTCACCA AAGACGGGCCAGGAAATTAGAGTCCTCAGAAGAGAACACGTCTAGTGAG >Rhino TGTGGCACGAATACTCATGCCAGCTCATTGCAGCATGAGAACAGCAGTGTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGTGAACAGCCTGGTTTAGCCAAGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAACTCAGAGAAAAAGCTAGTTCTGAACGCTGATCCYCTGTATGGGAGAAAAGAA CTGAATAAGCAGAAACCTCCATGTTCTGACAGTCCTAGAGAT---TCCCAAGAT---ATTCCTTGGATAACACGGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAAATATTAACTTCTGATGACTCACATGATGGGGGGCCTG AATCAAATACTGAAGTAGCTGGTGCAGTAGAAGTTCAAAAT------GAAGTAGATGGATATTCTGGTTCTTCAGAGAAA ATAGGCTTAATGGCCAGTGATCCTCCTGGTGCTTTAATATGTGAAAGTGAAAGAGTCCACTCCAAACCAGTAGAGAATAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCTTAACTTGAGCCACATAACTGAAGATC TAATTATAGGAGCATCTGCTATAGAATCTCAGATTACACAG--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAACTGAAGCATAAAAGGAGAACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCAGTTGTCCAAAAGACTCCTGAAAAGATAATTGAGGGAACTGACCAAATAG AGCAGAAC---------GGTCGTGTGATGAGTATTGCTAATAATGGTCATGAGAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGGAGAAAGAATCTGCTTTCAGAACGAAAGCTGAACCTAT AAGCAGCAGCATAAGCAATCTGGAACTGGAATTAAATATCCACAGTTCAAAAGCACCTAAGAAGAATAGGCTGAGGAGGA AATCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATCATACTGAACTACAG GTTGATAGTTGTTCTAGCAGTGAGGAG---ATGAAGAAGAAA---AGTCCCAGCCAGGTGCCAGTCAGACATAGCAGAAA GCTTCAACTCACMGAAGATAAAGAACCCGCAGCTGGAGCCAAGAAAAGTAACAAGCCAAATGAACAGATAAATAAAAGAC TCGCCAGTGATGCTTTTCCAGAACTAAAATTAACAAACGTACCTGGTTTTTTTGCTAACTGTTCAAGTTCTAATAAACTT CAAGAGTTTGTCAATCCTAGCCTTCAAAGAGAAGACATAGAACGGAAC---CTAGGAGCAATTCAAGTGTCTAATAGTAC CAAAGACCCCGAAGATCTGATATTAAGTGGAGGAAGA---GGTTTGCAR---GCTGAAAGATCTGTAGAGAGCACCAGTA TTTCATTGGTACCTGATACTGATTATGGCACTCAGGGTAGTATCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAA---ACAGCACCAGATCAACGTGCAAGTCTATGTGCAGCAATTGAAAACCCCAAGGAACTTATCCATGATTGT---TC TAAAGATACTAGAAATGACACAGAGGGCCTTAAGGATCCATTGAGATGTGAAGTTAAC---CACACTCAGGAGACAAGCA TAGAAATGGAAGAGAGTGAACTTGATACACAGTATCTACAGAATACGTTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATTCAGGAAATCCAGAAAAGGATTGTGCAGCAGTCTCTGCCCACACCAGGTCTTTAAGGAAACCAAGTCCGAA AGTCACTCTTGAATGTGGACAAAAAGAAGAAAATCAGGGAAAGAAAGAGTCTAAAGTCAAGCATGTGCAGTCAGTTCATA CAACTGTGGACTTTCCTGTGGTTTGTCAGAAAGATAAGAAACCAGGTGATCATGTCAAATATAGCATAAAAGAACTCTCT AGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAATGAAACTGAACTCATTACTGCAAATAAACGTGAACTTTCACAAAA CCTGTGTTATATACCATCACTTTCTCCCATCAGGTCATCTGTTAAAACTATATGTAAGAAAAAT---GTGTCAGAGGAAA AGCTTGAGGAACATTCAGTGTCCCCTGAAAGAGCACTGGGAAACAAGAGCGTCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAGTCGGCTCAAGCAGTATTAATGAAGTAGATTCCAGTACTAATGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTGAAGCAGAGCTAGGCA GAAACAGAGGACCTAAATTAAATGCTATTCTCAGATTAGGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCA ATAAGTAATTGTAAACATCCGGAAATAAAAAGGCGAGGAGAAAATGAAGGAGTAGTTCAGCCTGTTAATGCAGATTTCTC TCCGTGTCCAATTTCAGATAACCTAGAACAA---CCTGTGGGAATTAGTTGTGCTTCTCAGGTTTGTTCTGAGACACCTG ATGACCTATTAAATGACAACGAAATAAAGGAAAATATCAGCTTTACTGAAAGTGGCATTAAGGAAAGATCTGCTATTTTT AGCAAAAGCGTCCAGAAAGGAGAATTCAGAAGGAGCCCTAGCCCTTTAGCCCGT---ACATGTTTGGCT---------CA AAGAGGGGCCAGGAAATTAGAGTCCTCAGAAGAGAACATGTCTAGTGAG >Pangolin TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATAAATGTAGA AAAGACTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCTGCAAAGCAGATGGGCTGAATGTAAAGAAA CATGTAACGAT---AGGCAGGCTCTCAGCACAGGGAAAAAGGTAGTTCTGAATGCTGATCCCCTGTGTGGGAGAAAAGAA CTGAATAAGCAGAAACCTTCATGCTCTGACAGTCCTAGAGTT---TCTCAAGAC---GTTCCTTGGATAACACTAAATAG TAGCATACRGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAAATGTTAACTTCTGATGATCCATGTGATGGGAGGTCTG AATCAAATACTGAAGTAGCTGGTGCAGTAGAAGTTCCAAAT------GAAGTAGATGAAAATTGTGGTTCTTCAGAGAAA AAAGACTTAATGGCCAGTGATCCTCATGATGCTTTAATACGTGAAAGTAAAAGAGGCCACTCCAAACCAGTAGAGAGTAA T---ACTGAAGATAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGTCTCTCTCACTTGAGCCACGTAACTGAAAATC TAATTATTGGAGCATTTGCAGTAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGCGTCCCCTCAGAAATAGAGTAAAGCGTAAAAGGAGAACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGAAGTTGTTCAAAAGACTCCTGAAAAGATAATTGAGGGAACCGACCAAATAG AGCAGAAT---------GCTCTTCTGATGAATAGAACAAATAACGGTCATAAGAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGAAAAAAGAACCTGCTTTCAGAACTAGAGCTGAACCTAT AACCAGCAATATAAGCAATATGGAGCTAGAATTAAATATCCATAGTTCAAAAGTACTTAAGAAGAATAGGCTGAGGAGGA AGTCTTCTACCAGGCACATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGCCCCCCTAAACATACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAGTTGATGAAA---------------AACCAAATACCAGTCAAACATNCCAAAAA GCTTCAACTCAAGGAAGATAGAGAACCTGCATCTAGAGCCAAGAAGAGTAACAAGCCAAATGAACAAATAAATAAAAGAT TTACCAGTGACACTTTTCCAGAACTAAATTTAACAAACATACCTGGTTTTTTTAGTAAATGTTCAAGTTCTAATAAACTT CAAGAGTTTGTTGACCCTAGCCTTCAAAGAGAAGAAATAGAAAATAAT---CTAGAAACAATTCTA------------AC CAAAGACCCCAAAGATGTGATATTACATAGAGGA------GGTTTGCAA---ACTGAAAGATGTGTAGAGAGTAACAGTA TTTCATTGGTACCTGATACTGATTACGGCACTCAGGATAGTATCTCATTACTGGAAGCTGACACACTAGGG---AAGACA AAA---ACAGCACCAAATCAATGTGCAAGTCTGTGTGCAGCAATTGAAAACCCCAAGGAACTTACCCATTGTTGT---GC TAAAGATATTAGAAATGACACAGAGAGCTTTAAGGATCTATTGAGACATGAAGATAAC---CACACTCAGGAGACAAGCA TAGGAACGGAAGAGAATAAACTTGATAAGCAGTACTTACAGAATACTTTCAGGGTTTCAAAGCATCAGTCATTTGCTCAG TTTTCAAATATGGGAAATCCAGAAAAAGAATGTGCAGCAGTCAGTGCCCACTCTGGGTCCTTAAGGAAACAAAGTCCAAA AGTCACTCTTGCATATGGACAAAAAGAAGAAAATGAGGGAAAGAAAGAGTCTGAAATCAAGCATGTTCAAACAATTCATA CAACTGCAGGCCCTCTTGTGGTTAGTCAGAAAGATGAGAAGCCAGGTGATTATGTAAAATTTGGCATAAAAGGAGTCTCT ACGCATTGTCAGTCATCTCAGTTCAGA---GGCAATAAAACTGAACTTATTTTTGCAAATAAACCTGGAATTTCACAATA CCCATATCATATATCATCAATTTCTCCCATCAGGTCATCTGTTAAAATAATATGTAAGGAAAAC---CTGTCAGAGGAAA AGTCTGTGGAACATTCGATGTCATTGGAAAAAGCAGTGGGAAACAAGAGCATCATTCAAAGTACTGTGAGCACAATTAGC CAAAATAACATTAAAGGAAACAATTTTAAAGATGGCAGCTCCAGCAGCATTAATGAAATAGGTTCCAGTACTAATGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAAAATTCAAGCAGAACTAGGTA GAAACAGAGGACCTAAATTAAGTGCTGTGCTCAGATTAGGTCTTATGCAACCTGAGGCCTATAAGCAAAATCTT---CTT ATAGGTAATCGTAAACACCCTGAAATTAAAAGACAAGGAGAAAAAGAAGGAGTAGTTCAGGTTGTTAATGCTGATTTCTC TCCATGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGTAGTCATGCTTCTCAGGTTTGTTCTGAGACACCTG AGGACATATTAAATGGTGATGAAATAAAGGAAAATATCGGCTTTGCTGAAAGTGGTATTAAGGAAAGATCTGCTGTTTCT AGCAAAAGTGTCCAGAAAGGAGAATTCAGAAAGAGCCCAAGCCCTGTAGTCCAT---ATGAGTTTGTCTCAGAGTCACCA AAGAAGAGCTGGGATAATAGAGTCTTCAGAAGAGAACATGTTTAGTGAG >Cat TGTGCCACAAATACTCGTGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGGCAGAATGAATGTAGA AAAGGCTGAATTCTGTAATGAAAGCAAACAGCCTGGCGTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTGTGTAGGAGAAAA--- CTGAGTAAGCAGAAATCTCCATGCTCTGACAGTCCTAGAGAT---TCCCAAGAT---GTACCTTGGATAACACTGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAAATGTTAACTTCTGATGACTCACATGATGAGGGATCTG AATCGAATCCTGGAGTAGCTGGTGCA---GAAGTTCCAAAT------GTAGTGGATGGATATTCTGGTTCTTCTGAGAAA ATAGACTTAATGGCCAGTGATCCTCATGATGCTTTGATATGTGAAAGTGAAAGAGTTCACACCAAACCAGTAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTTCCTAACTTGAGCCACACAAGTGAAGATC TCATTATAGGAGCATGTGCTATAGAACCTCAGATAACGCAA--------------------------------------- ---------------------GCCTATCCCCTCACAAATAAAACAAAGCGTAAAAGGAGATCTACCGCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCCATTGTTCAAAAGACTCCTGAAAAGCTAATTGAGGGAACCGATCGAATAG GGCAAAAT---------GGCCATGTGATGAATAGAACAAATAATGGTCCTGAGAATGGGACAAAAGGTGATTAT---GTT CAGAAAGAGAAAAATGCTAAGCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTTAGAACCAAAGCTGAACCAAT AAGCAGCAATATAAGTAATATGGAATCAGAATTAAATAGCCACAGTTCAAGGGCACCTAAGGAGAACAGGCTGAGGAGGA AGTCCTCTACCAGGCACATGCGTGCGCTGGAATTAGTAGTCAATAGAAATCCAAGCCCACCTGATCATACCGAACTACAG ATTGATAGTTGTTCTAGCAGTGAAGAGATGGTGAAAAGAAAA---AGTTCGGAACAAATGCTAGTCAGACACAGCAAAAC ACTTCAACTCGTGGAAAATAAAGAACCTGCACCTGGAGCCAAGAAGCGTAACAAGCCAAGTAAACAAATAAATAAAAGAC TTGCCAGTAACACTTTTCCAGAGCTAAATTTAACAAACATACCTGGGGTTTTTACTAACTGTTCAAGTTCTAATACACTT CAAGAGTTTGTCAACCCTGGCATTCAAAGAGAAGAACTAGAAGAGAGC---CGAGGAACGATTCACGTGTCTGATAGGAC CAGAGATCCCAAAGCGCTGGTATCGAGTGGAGGAAGA---AGTTTGCAA---ACTGAAAGATCTGTAGAGAGTACCAGTA TTTCATTGGTACCTGATGCCGATTATGGCACTCAGGATAGTATCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAA---ACAGCACCAAATCAACGTGTGAGTCTGTGTGCAGCAATTGAAAACCCCAAGGAAGCTATCCGTGGTTGT---TC CAAAGATACTAGAAATGGCACAGAGAGTTTTACAGATCCTCTGAGACGTGAAGATANC---CATACTCAGGAGACAAGTA TAGAAATGGAAGAGAATGAACTTGATACGCAGTGGTTATACAATACGTTCAAGGGTTCAAAACGTCAGTCATTTGCTCTG TTTTCAAATCCAGGAAACCCAGAAAAGGAATGTGCTACAGCCTGGGCCCGTTCCACGTCCTTAAGGAAACAAAGTCCAAA AGTCGCTCTTGAATATGAACAAAAAGAAGAAAATCAGGGAAAGAATCAGTCTGAAATCAAGCATGTGCAGGCAGTCCGTG CAACTGCAGGCTTTTCTGCAGTCAGTCAGAAAGTGGAGAAGCCAGGTGATTATGCCAAATGTAGCATAAAAGGAGTCCCT GGGCTCTGTCAGTCATCTCAGTTCAGA---GGCAATGAAACTGAACTCTTTATTGCAAATAACCATGACATTTCAAAAAA CCCTTATCATATACCACCACTTTCTCCCATCAGATCATCTGTTAATGCTGTATGTAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGCAGCGTTCAATGTCACCTGAAAGAGCAGTGGGAAATGAGAGCGTCATTCAAAGTACAGTGAACACAATTAGC CAAAATAACATTAGAGAAAACACTTTTAAAGAAGTTAGCTCAAGCAGTGTTAATGAAGTAGGTTCCAGTGCTAATGAAGT AGGCTCTAGCATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAACAGAACTGGGTA GAAACCGAGGACCTAAGTTAAGTGCTATGCTCAGGTTAGGTCTTATGCAACCTGAAGTCTATGAGCAAAGTCTT---CCT ATAAGTAATTGTAAATGTCCAGAAATGAAAAGGCAAGGAGAAAATGAAGGAGTAGTTCAGTCTGTTAATGCAGATTTTTC TCCATGTCTAATTTCCGATAATGTAGAACAA---CCTATGGAAAGTAGCCGTGCTTCTCAGGTTTGTTCTGAGACACCCA ATGACCTATTAAATGGTGATGAAATAAAGGGAAAAATCAGCTTTGCTGAAAGT---GCTAAGGAAAGATCTGCTGTTTTT GGCAAGAGTGTCCAGAAAGGAGAATTCAGAAGGAGCCCTAGCCCTTTAGACCAT---ACACATCTGGCTCAGGGTCACCA AACAGAGACCAGGAAGTTAGAGTCCTCAGAAGAGAACGTGTCTAGTGAG >Dog TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAACACAGAATGAATGTAGA AAAGGCTGAAATCTGTAATAACAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGATTCCCAGCACAGAGAAAAAGGTAGTTGTGAATGCTGATCTCCTGTGTGGGAGAAAAGAA CTGAATAAACAGAAACCTCCACACTCTGATAGTCCTAGAGAT---TCCCAAGAT---GTTCCTTGGATAACACTGAATAG TAGCATACGGAAAGTTAATGAGTGGTTTTCCAGAAGTGACGAAATATTAACTTCTGATGATTCACATGACAGAGGATCTG AATTGAATACTGAAGTAGGTGGTGCAGTAGAAGTTCCAAAT------GAAGTGGGTGAATATTCTGGTTCTTCTGAGAAA ATAGACTTAATGGCCAGTGATCCTCAGGATGCTTTCATATGTGAAAGTGAAAGAGTCCACACCAAGCCAGTAGGAGGTAA T---ATCGAAGATAAAATATTTGGAAAAACCTATCGGAGGAAGGCAAGCCTCCCTAAGGTGAGCCACACAACTGAAGTTC TAACTATAGGAGCGTGTGCTATAGAACCTCAGACAATGCAA--------------------------------------- ---------------------ACCCATCCCTTCATGAATAAAGCAGAGCATAAAAGGAGAACTACATCTAGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGAGTTAGGCATTGTTCCAAAGACTCCTGAAAAGCTAATTGAGGGAATCAACCAAATCA AGCGAGAT---------GGTCATGTGATAAATATTACAAATAATGGTCCTGAGAATGAAACAGAAGGTGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTTAGAACCAAAACTGAACCAAT GAGCAGCAGGATAAGCAATATGGAACTGGAATTAAATAGCTCCAGTTCAAAAGCACCTAAGAAGAACAGGCTGAGGAGGA AGTCCTCTGCCAGGCACACTTGTGCCCTTGAATTCGTAGTCAATAGAAATCTAAACCCACCTGATCATAGTGAACTACAG ATTGAAAGTTGTTCTAGCAGTGAAGAG---ATGAAGAAACAG---CATCTGGACCAAGTACCAGTCAGACACAACAAAAC ACTTCAGCTCATGCAAGATAAAGAACCTGCAGGTAGAGCTAAGAAAAGTAGTAAGCCAGGAGAACAAATAAATAAGAGAC TCGCCAGCCATGCTTTTCCAGAGCTAACTTTAACAAATGTATCTGGTTTTTTTGCTAACTATTCAAGTTCTAGTAAGCCT CAAGAGTGCATCAACCCTGGCCTTCGAAGAGAAGAAATAGAAGAGAGC---CGAAGAATGACTCAAGTGTCTGATAGTAC CAGAGATCCCAAAGAGCTGGTATTGAGTGGAGGAAGA---GGTTTGCAA---ACTGAGAGATCTGTAGAGAGTACCAGTA TTTCATTGGTACTTGATACTGATTATGGTACACAGGACAGTATCTCATTACTGGAAGCTGACACCCTGAGG---AAGGCA AAA---ACAGTATCAAATCAACAGGCGAATCTGTGTGCAACAATTGAGAACCCCAAGGAACCTATCCATGGTTGT---TC TAAGGACACTAGAAATGACACAGAGGGTTTTGTAGTTCCATTGACGTGCAAAGATAAC---CACACTCAAGAGACAAGCA TAGAAATGGAAGAGAGTGAACTTGACACGCAGTGCTTACGCAATATGTTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCATATCCAAGAGATCCGGAAGAGGACTGTGTAACAGTCTGTCCCCGCTCTGGGGCCTTTGGAAAACAAGGTCCAAA AGTCACTCTAGAATGTGGACAGAAAGAAGAAAGTCAGGGAAAGAAAGAGTCTGAAATCAGACATGTGCAGGCAGTTCATA CAAATGCAGGCTTTTCTGCAGTTAGTCAGAAAGCTAAGAAGCCAGGCGATTTTGCCAAATGTAGCATAAAGGGAGTCTCT CGGCTTTGTCTGTCATCTCAGTTCAAA---GGCAAGGAAACTGAACTCCTTATTGCAAATTACCATGGAATTTCCCAAAA CCCTTATCATATACCACCACTTTCTCCCATCAGATCATGTGTTAAAACTCTATGTCAGGAAAAC---CTGTCAGAGGAAA AGTTTGAGCAACATTCAATGTCACCCGAAAGAGCAGTGGGAAATGAGAGAGTCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTAGAGAATGTGCTTCTAAAGAAGTCGGCTCAAGCAGTGTTAATGAAGTAGTTTCCAGTACTAATGAAGT AGGCTCTAGTGTTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAGGTA GAAACCGAGGACCTAAATTAAATGCTATGCTCAGATTAGGTCTTATGCAACCTGAAGTCTGTAAGCAAAGCCTT---TCT TTAAGTAATTGTAAACATCCAGAAATGAAATGGCAAGGACAAAGTGAAGGAGCAGTTCTGTCTGTTAGTGCAGATTTCTC TCCATGTCTGATTTCAGATAACCCAGAACAA---CCTATGGGAAGTAGTCGGTCTTCTCAGGTTTGTTCTGAGACACCTG ATGACCTATTAAATGGTGACAAAATAAAGGGAAAAGTCAGCTTTGCTGAAAGTGACATTAAGGAAAAATCTGCTGTGTTT AGCAAAAGTGTCCAGAGTGGAGAGTTCAGCAGAAGCCCTAGCCCTTCAGACCAT---ACACGTTTGGCCCAGGGTTACCA GAGAGGGACCAAGAAATTAGAGTCCTCAGAAGAGAACATGTCTAGTGAG >Llama TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAGGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGTCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGACA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTGGCTCTGAATGCCGATCGCTTATATGGGAGCGAAGAA CTGAATAAGCAGAAACCTGCATGCTCTGACAGTCCTAGAGAT---TCCCAAGAT---GTTCCTTGGATAACACTGAATAG TAGCATACAGAAAGTGAATGAGTGGTTTTCCAGAAGCGACGAAATGTTACCTTCTGATGACTCACATGAAGTGGGGCCTG AATCAAATACTGAAGTAGCTGGTGCAGTGGAAGTTCCAAAT------GAAGTAGATGGCTATTCAGGCTCTTCAGAGAAA ATAGACTTAATGGCCAGTGATCCTCATGGTGCTTTAAAATGTGAAAATGAAAAAGTCCACGCCAAACCAGTAGGGAGTAA C---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGACCCACAGAGCTGAAGATC TAATTCTAGGAGCATCTGTTCTAGAGCCTCAGATAACACAA--------------------------------------- ---------------------GAGCGCCCCTTCACAAATAAACTAAAGCGTAAAAGGAGAACTCTACCAGGTCTTCATCC TGAGGATTTTATCAAGAAAGTCGATTTGGCCGTTGTTCAAAAGTCTCCTGAAAAGATAATTGAGCGAACAGACCAAACAG AGCAGAAT---------GGTCATGGGATGAATATTACTAGTAATGGTCATGAGAATGAAACAAACGACGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAAAACTAAGGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTGAATACCCATAGTTCAAAAGCACCTAAG---AATAGGCCGAGGAGGA AGTCCTCTACCAGGCAGATTCATGCACTTGAACTAATAGTCAGTAGAAACCCAAGCCCACCTAATCACACTGAACTACAG ATTGAGAGTTGTTCCAGCAGTGAAGAG---ATGAAGGAAAAA---CATTCTGACCAAATGCCAGTCAGGCACCGCAAAAA GCTTCAGCTCACGGGAGATAAAGAACCTACGACTGGAACCAAGAAGAGTAACAAGCCACATCAACAAATAAATAAAAGAC TTGTCGGTAACACTTTTCCAGAACTAAATTTAACAAACACACCTGGTTTTTTTACTAAGTGTTCAAGTTCTAATAAACTT CAAGAGTTTGCCAATCCTAGCCTTCAAAGAGAA---------GAGAAC---CCAGGAGCAATTCAAGTATCGAACAGTAC CAAAGACCCCAAAGTTCTGATATTAAGTGGAGGGAGA---GGTTTACAA---ATTGAAAGATCTGTAGAGAATACCAGTA TTTCCTTGGTACCTGAAACTGATTATGACACTCAGGACAGTGTCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAA---ACAGCACCAAATCAATGTGTGAGTCTCTGTGCAGCAACTGAAAACCCCAAGGAACTTATCCATAGTTGT---TC TAAAGATACTAGAAATGGCACAGCAGGCTTTAAGGATCCATTGGGATGTGATGTTAAT---CACACTCAGGAGGCAAGCA TGGAAGTGGAAGAGAGTGAACTTGATACTCAGTATTTGCAGAATACGTTCAAGGTTTCAAAGCGTCAGACATTTGCTCTG TTTTCAAATCCAGGAGATTCAGAAAAGGAATGTGCAACGATCTATGCCCACTCTGGGCCCTTAAGGGAACAAAGTCCAAA AATCACTCTTGAATGTGGACAAAAAGAAGAAAATCAGAGAAAGAGCAAGTCTGAAATCAAGCATATGCAGGCAGTTCATA CAACTTTGAACTTTCCTGTGGTTGGTCAAAAAGATAAG---CCGAGTGATTATGCCAAATATAGCCCAAAAGGAATATCT AGGCTTTGTTGGCCATCACAGCTCAGA---GGCAATGAATCTGAGTTCATTATTGCAAATAAACATGGGATTTTACAAAA CCCATATCTTATAACATCACTTTCTCCTAACAGGTCATCTGTTAAAACTATGTGTAAGAAAAAC---CCGTCCGAGGAAA AGCTTGAGAAATGTGTCATATCACCTAAAAGAGTGATGGGAAACGAGAGCACCATTCAAAGTATAGTGAGTGCAATGAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAGTCAGCTCAAGCAGTGTCAATGAAGTAGGTTCCAGTACTAATGAAGT AGGTTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGCGAAAACATTCAAGCAGAACTAGGTA GAAACATGGGACCTAAATTAAATGCTATGCTCAGATTAGGTCTTTTGCAACCTGAAGTCTGTAAGCAAAGTCTT---CCT GTAAGTAATTGTAAACAACCTGAAATAAAAAGGCAAGGAGAAAATGAAGGCATCTTTCAGGCTGTTAATACCGACTTCTC TCCATGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGCAGTCATGCTTCTCAGGTTTGTTCTGAGACACCTG ATGACCTGTTATATGATGACGAAATAAAGGAAAATACCAGCTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAAAAA---GAATTCAGAAGGAGCCCTAGCCCTTTAGTGCAT---ACGCATTTGGCTCAGGGTCACCA AAGAGGGGCTAGGAAATTAGAGTCCTCAGAAGAGAATGTGTCTAGTGAG >Pig TGTGGCACAGATACTCATGCCAGCTCGTTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTTTGTAATAAAAGCAAGCAGCCTGTCTTAGCAAAGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGGCA CATGTAATGAT---AGGCAGACTCCTAACACAGAGAAAAAGGTAGTTCTGAATACTGATCTCCTGTATGGGAGAAACGAA CTGAATAAGCAGAAACCTGCGTGCTCTGACAGTCCTAGAGAT---TCCCAAGAT---GTTCCTTGGATAACATTGAATAG TAGCATACAGAAAGTTAATGAGTGGTTTTCTAGAAGCGATGAAATGTTAACTTCTGACGACTCACAGGACAGGAGGTCTG AATCAAATACTGGGGTAGCTGGTGCAGCAGAGGTTCCAAAT------GAAGCAGATGGACATTTGGGTTCTTCAGAGAAA ATAGACTTAATGGCCAGTGACCCTCATGGTGCTTTAATACGTGAACGTGAAAGAGGGCACTCCAAACCAGCAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACGTAATTGAAGATC TAATTTTAGGAGCATCTGCTGTAGAGCCTCAAATAACACAA--------------------------------------- ---------------------GAGCGCCCCCTCACAAATAAACTAAAGCGGAAAAGGAGAGGTACATCAGGCCTTGATCC CGAGGATTTTATCAAGAGAGCCGATTCGGCAGTTGTTCCAAAAACTCCTGAAAAGACAATTGAGGGAACTGATGAGACAG AGCAGAAT---------GGTCAGGGGATGAATATTACCAGTAAAGGTCATGAGAATGAAACAAAAAGTGGTAAT---GTT CAGAAAGAGAAAAATGTTAACCCAACTGCA------TCGTTGGGAAAAGAACCTGCTTTCAGAGCTAGGGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTGAATATCCATGGTTCGAAAGCACCAAAG---AACAGGCTGAGGAGGA AGTCCTCTACCAGGCAGATCCATGCACTTGAACTAGTAGTCAATAGAAACCCAAGCCCACCTAGTCATACTGAGTTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATGAAGAAGAGA---AATTCCGACCAAGTGCCAGTCAGGCACAGCAAAAA GCTTCACCTCAGGGGAGATAAAGAACCTACAACTGGAGCCAAGAAGAATAATAGGCCACATGAACAAGTAAATAAAAGAC CTATCAGTGATGCTTTTCCAGAACTAAATTTAACAAATGTACCTGTTTCTATTACTAACTGTTCAAATTCTAATAAGCTT CAAGAATCTGTCAGTCCTAACCTTCAAAGAGAA---------GAGAAC---CTAGGGACAATTCAAGTGTCGAATAGTAA CAAAGACCCCAAAGATGTGATGTTAAGTGGAGGAAAA---GGTTTTCAA---ATTGAAAGATCTGTAGAGAATACCAGTA TTTCCCTGGTACCTGATACTGATTATGGCACTCAGGACAGTATCTCATTACTGGAAACTGACACCCTAGGA---AAGGCA AAA---ACAACACCCAATCAACATGTGAGGCTGTGTGCAGCAACTGAAAACCCCAAAGAACTTAGCCTTGGTTGT---TC TAAAGGTGTTAGAAACGACACAGGGGACTTTAAGGATCCCCTGGCTCATGATGTTAAC---CACACTCAGGAAGCAAGCA TAGAAGTGGAAGAGAATGAACTTGATACACAGTATTTGCAGAGTATGTTCAAGGTTTCAAAGCGTCAGACATTTGCTCTG TTTTCAAATCCAGCAAATCCAGAAAAGGAATGTACAACCGTCCATGCCCACTCCAAGTCCTTAAGAGAACAAAGTCCAAA AGTCACTCATGAAGGTGGACAAAAAGATGAAAATCAGGGAAAGAGTGAGTCTAAAGTCAAGCATGGGCAGTCAGTTCATA CAACTGTGGACTTTCTAGTGGTTGGTCAAAAGGATAAGAAGCCGAGTGATTTTGCCAAATGTGGTGCAAAAGGAGTAACT GGGCTTTATCAGACATCACAGTTCAGA---GGCCACAAAACTGAGTTCATTAATGCAAATAAACCTGGGATTTCACAAAA CCCATATGTCATACCATCCCTTTCTCCCATCAGGTCGTCTGTTAAAACTATATGTAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACCTAAAATGTCACCCGAAAGAACAATGGGAAACGAGAGCATCATTCCAAATACAGAGAGCACAGTTAGC CAAAATAACATTCAAGAGAGAACTTTTAAAGAAGGCAGCTCAGGCAGTCCTAATGAAGTCGGGTCCAGTACCAACGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGC---------------------GGTGAAAACGTTCGAGCAGAACCAGGTA GAAACAGAGGACCTAAATTAAGTGCAATGCTCAGATTAGGTCTCATGCAACCCGAAGTTTATAAGCAAAGTCTT---CCT GTAAGTAATTGTAACCACACAGAAATAAAAAGGCAAGGAGAAAATGAAGGCATATTTCAGGCTGTTAATGCAGATTTCTC CCCATATCTAATTTCAGATAACCCCGAACAA---CCTATGGGAAGTAGTCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGACCTGTTAAATGATGACAAAATAAAGGAAAATCTCAACTTTGCTGAAAGTGACGTTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGCCCAAGAAGGGGAATTTAAAAGGAGCCCTAGCCCTTTAGCCCAC---AGACGTTTGGCTCAGGGTCACCA AAGATGGGCTAGGAAATTAGAGTCTTCAGAAGAGAGTGGGTCTAGTGAG >Cow TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTGCTCACTGAAAACAGACTGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGTCTTAGTAAAGAGCCAGCAGAGCAGATGGGCTGAAAGTAAGGGCA CATGTAAGGAT---AGGCAGATTCCCAGCACTGAGAAAAAGATAGTTCTGAATACTGATCCCCTGTACAGAAGAAAAGAA CTGCGTAAGCAGAAACCTGCATGCCCTGACAGTCCTGGAGAT---TCCCAAGAT---GTTCCTTGGGTAACCCTGAATAA TAGCATACAGAAAGTTAATGACTGGTTTTCCAGAAGTGATGAAATATTAACTTCTGATGACTCGTGCGATGGGGGGTYTG AATCAAATAATGAAGTAGCTGGTGCAGTGGAAATTCCAAAT------AAAGTAGATGGATATTCAGGTTCTTCAGAGAAA ATCAACTTAATGGCCAGTGATCCTCATGGTACTTTAATACAC------GAAAGAGTCCACTCCAAACCCGTAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGTCAAGTCTCCCTAACTTCAGCCACATAGCTGAAGATC TAATTCTAGGCGCATTTACTGTAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGCAGCCCCTCACAAATAAACTAAAATGTAAAAGGAGAGGTACATCAGGCCTTCAGCC TGAGGATTTTATCAAGAAAGTCGATTTGACAATTGTTCCAAAGACTCCTGAAAAGATGACGGAGGGAACTGACCAAACAG AGCAGAAA---------TGTCATGGGATGAATATTACTAGTGATGGTCATGAGAATAAAACAAAACGTGATTAT---GTT CAGAAAGAGCAAAACGCTAACCCAGCAGAA------TCATTGGAAAAAGAATCTGTTTTCAGAACTGAGGCTGAACCTAT AAGCATCAGTATAAGCAATATGGAACTAGAATTGAATATCCACCGTTCAAAAGCACCTAAG---AATAGGCTGAAGAGAA AGTCCTCTACCAGGAAAATTCCTGAACTTGAACTAGTAGTCAGTAGAAACCCAAGTCTACCTAATCATACTGAGCTACCA ATTGATAGCAGTTCTAGCAATGAAGAG---ATGAAGAAAAAA---CATTCTAGCCAAATGCCAGTCAGGCAGAGCCAAAA GCTTCAACTCATTGGAGATAAAGAACTTACTGCTGGAGCC---AAGAATAACAAAACATATGAACAAATAAATAAAAGAC TTGCTAGTGATGCTTTTCCAGAACTAAAGTTAACAAACACACCTGGTTATTTTACTAACTGTTCTAGT------AAACCT GAAGAGTTTGTTCATCCTAGCCTTCAAAGAGAG---------GAGAAC---CTAGGAACAATTCAAGTGTCGAATAGTAC CAAAGACCCCAAAGATCTGATATTAAGAGAAGGAAAA---GCTTTGCAA---ATTGAAAGATCTGTAGAGAGTACCAATA TTTCCTTGGTTCCTGATACTGATTATAGCACTCAGGATAGTATCTCATTACTAGAAGCTAAAACCCCAGAA---AAGGCA AAG---ACTGCACCAAATCCATGTGTGAGTCTGTGTACAGCAACCAAAAACCTCAAGGAACTTATCCATAGGGAT---TT TAAAGATACCAAAAACAACACAGAGGGCTTTCAGGATCTACTGGGACATGACATTAACTACGTCATTCAGGAGACAAGCA GAGAAATGGAAGACAGTGAACTTGATACACAGTATTTGCAGAATACATTCAAGGCTTCAAAGCGTCAGACATTTGCTCTG TTTTCCAATCCAGGAAATCCACAAAAGGAATGTGCCACAGTCTTTGCCCACTCGGGGTCCTTAAGGGATCAAAGTCCAAG AGACCCCCTCAAATGCAGACAAAAAGAAGACAGTCAGGGAAAGAGTGAGTCTAAAAGCCAGCACGTGCAGGCCATTTGTA CAACAGTGCACTTTCCTGTGGCTGATCAGCAAGATAGGACGCCAGGTGACGATGCCAAATGTAGCGCAAAAGAAGTAACT AGGGTTTGTCAGTCATCACAGTTGAGA---GGCCACAAAACTGAACTTGTTTTTGCAAATAAACAAGGGGTTTCAGAAAA ACCAAATCTTATACCATCACTTTCTCCCATCAAGTCATCTGTTAAAACCATATGTAAGAAAAGC---CCATCAGAG---A AGTTTGAGGAACCTGTAACGTCACCTGAAAAAACATTGGGGAGTGAGAGCATCATTCAAAGTGCAGTGAGCACAATCAGC CAAAATAACATTCAAGAAAGCACTTTTAAAGAAGTCAGCTCAAACAGTGTAAATGAAGTAGGTTCCAGTACTAATGAAGT AGGCTCTAGTGTTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACCAGGTA GAAACAGAGAACCTAAATTAAGAGCTTTACTCGGATTAGGTCTTACGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT GTAAGTAACTGTCACCATCCTGAAATAAAAAGGCAAGGAGAAAATGAGGACATGCCTCAGGCTGTTAAGGCAGATTTCTC CCCATGTCTAATTTCAGATAACCTCGAACAA---CCTACGGGAAGCCGTCATGCTTCTCAGGTTTGTTCTGAGACACCTG ACAACTTGTTAAATGATGATGAAATAAAAGAAAATAGCCACTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTGTTTTT AGTGAAAGTGTCCAAAAAGGAGAATTCAGAGGGAGCCCTGGCCCTTTCACCCAT---ACACATTTGGCTCAGGGTCACCA AAGAGGGGCTGGCAAACTAGAG---TCAGAAGAGACTGTGTCTAGTGAG >Hippo TGTGGCACAGATACTCGTGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGCAGA AAAGGCTGAATTCTGTAATAAAAGCNAACAGCCTCTCTTAGCAAAGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGACA CATGTAATGAT---AGACAGACTCCCAGCACAGAGAAAAAGGTAGTTCTGAACGCTGATCCCCTATACGGGAGAAAAGAA CTGAATAAGCAGAAACGTGCATGCTCTGACGGCCCTAGCGAT---TCCCAAGAT---GTTCCTTGGATAACACTGAATAG TAGCATACAGAAAGTTAATGAATGGTTTTCCAGGAGTGGCGAGATGTTAACTTCTGACGACTTATGTGTTAAGGGGTGTG AATCAAATACTGAAGTAGCTGGTGCAGCGGAAGTTCCAAAT------GAAATTGATGGGTGTTTGGGTTCTTCAGAGAAA ATAGATTTAATGGCCCGTGACCCTCGTGGTGCCTTAATACGTGAAAGTGAAAGAGTCCACGCCAAACCAGTAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACATAGCTGAAGATC TAATTAAAGGAGCATCTGCTGTAGGACCTCAGATAACACAA--------------------------------------- ---------------------GAGCGCCCCCTCACAAATAAACTAAAGCGTAAAAGGAGGCGGACATCGGGCCTTCATCC TGAGGATTTTATCAAGAAAGTCGATGCGGCAGTTGTTCCAAAGACTCCTGAAAAGATAATTGAGGGAACTGATCAAACAG AGCAGAAT------------------------------------GGTCATGAGAATGAAATGAAAGGTGATTAT---GTT CAGAAAGAAAAAAATGCTAACCCAACAGAA------TCTTTGGAAAAAGAATCTGCTTTCAGAACTAAGGCTGAACCTAT AAGCATCAGTATAAGCAATATGGAACTAGAATTGAATATCCACAGTTCAAAAGCACCTAAG---AGTACGCTGAGAAGGA AGTCCTCTACCAGGCAGATTCATGCACTTGAACTAGTAGTCAGTAGAAACCCGAGCCCACCTAATCGTACTGAACTACAA ATCGATAGTTGTTCTAGCAGTGAAGAG---ATGAAGAAAAAA---CATTCCTGCCAAATGCCAGTCAGGCACAGCAAAAA GCTTCAATTCATGGGATATAAAGAACCCGCAACTGGAGTCAGGAAGAGTAATAAACCACACGAACAAATAAATAAAACAC TTGCCAGTGGTGCTTTTCCAGAACTAAATGTAACAAACATACCTGGTTTTTTTACTAACTGTTCTAGTTCTAGTAAACTT CAAGAGGTTGTTAATCCTGGCCTTCCAAGAGAG---------GAGAAC---CTAGGAACAATTCAAGTGTCAAATAGTAC CAAAGACCCCAAAAATCTGATATTAAGTAGAGGAAAA---GGTTTGCAA---ATTGAAAGATCTATAGAGAGCACCAGTA TTTCCTTGGTACCTGATACTGATTATGGCACTCAGGACAGTATCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAG---ACAGCAACAAATCAACGTGTGGGTCTGTGTGCAGCAACTGAAAACCCCAAGGAACTTATCCATGGTTAT---TC CAAAGATACTAGAAACGACATGGACGGCGTCCAGCATCCATTGGGACAGGATGTTAAC---CACACTCAGGATGCAAGCA TAGAAGTGGAAGACAGTGAACTTGATACACAGTATTTGCAGAATACATTCAGGGTTTCAAAGCGTCAGACATTTGCCCTG TTTTCAAATCCAGGAAATCCAGGAAAGGAACGTGCAACAGTCTGTGCTCATTCCGGGTCCTTAAGGGAACAAAGTCCAAG AGTCCCTCTTGAATGCGGACAAAAAGAAGAAAATCAGGGCAAGAGTGAATCTAAA---------ATGCAGGCAATTTATA CAACTGTGGACTTTGCTGTGGCTGGTCAAAATGATAGGAAGCCGAGTGATTACACCAAATGTAGCACTAAAGGAGTAACT AGGCTTTGTCCCTCATCACAGTTTGGA---AGCAACAAAACTGAGCACATTATTGCAAATAAATATGGAATTTCACAAAA CCCATATGTTATACCATCACTTTCTCCCATCAGGTCATCTGTTAAAACTATACGGAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACCTGTAGTGTCAGCTGAAAGAGCAATGGCAAATGAGAGCATCCTTCAAAGTACAGTGAACACAATTAGC CAAAATAACATTCGAGAAAACACTTTTAAAGAAGTCAGTTCAAGCAGTATTAATGAAGTAGTTTCCAGTACTAATGAAGT AGGCTCTAGTATCAGTGAAGTAGGTTCTAGT---------------------GGTGGAAACATTCAAGCAGAACTAGACA GAAAGAGAGGACCTAAACTAAGTGCTTTGCTTAGATTAGGTCTTATGCAACCTGAAGTATATAAGCAAAGTCTT---CCT GTAAGTAATTGTCAACATCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGCATACTTCAGGCTGTTAATGCAGATTTCTC CCCGTGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGCAGTCATGCTTCTCAGGTTTGTTCTGAAACACCTG ATGATTTGTTAAATGATGACGGAATAAAGGAAAATAGCAACTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTGTTTTT AGCAAAAATGTCCAAAAAGGAGAATTCAGAAGGAGCCCTGGCCCTTTAGCCCAT---ACACGTTTGGCTCAGGGTCACCA AAGAAGGGCTGGGAAATTGGAGTCCTCAGAAGA---------------- >SpermWhale TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAAAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGTCTTAGCAAAGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGACA CATGTAATGAT---AGGCAGACTCCCAGTACAGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTATATGGGAGAAAAGAA CTGAATAAGCAGAAATCTGCATGCTCTGACAGTCCTAGAGAT---TCCCAAGAG---TTTCCTTGGATAGCAGTGAATAG TAGCATACAGAAAGTTAATGAATGGTTTTCCAGAAGTGATGAAATGTTAACTTCTAACGACTTACGTGATGGGGGATTTG AATCAAACCCTGAAGTAGCTCGTGCAGTGGAAGTTCCACAG------GAAGTTGATGGATATTTGGGTTCTTCAGAGAAA ATAGACTTAATGGCCAGTGATCCTCATGGTGCTTTAATACGTGAAAGTGAAAGAGTCCACTCCAAACCAGTAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACGTAGCTGAAAATC TAATTATAGGAGCATCTACTGTAGGACCTCAGATTACACAA--------------------------------------- ---------------------GAGCGCCCCCTCACAAATAAACTAAAGCGTAAAAGGAGAAGTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTCGATTTGGCAGTTATTCCAAAGACTCCTGAAAAAATAATTGAGGGAACTGACCAAACAG AGCAGAAT---------GGTCATGGGGTGAATATTACTAGTAATGGTCATGAGACTGAAATGAAAGGTGATTGT---GTT CAGAAAGAGAAAAATGCTAACCTAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTAAGGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTGAATATCCATAGTTCAAAAGCACCTAAA---AATAGGCTGGGGNGGA TGTCCTCTTCCAGGAAGATTCATGCACTTGAACTAGTAGTCAGTAGAAACCCAAGCCCACCTAATCATACTGAACTACAA ATTGATAGTTGTTCTAGCACTGAAGAG---ATGAAGAAAAAA---CATTCCAGCCAAATGCCAGTCAGGTGCGGCAAAAA GCTTCAATTCATTGGAGATAAAGAACCTACAACTGGAGCCAAGAAGAGTAACAAGCCACATGAACAAATAAATAAAAGAC TTGACAGTGACACTTATCCAGAACTAAATTTAACAAACATACCTGTTTTTTTTACTCACTGTTCTAGTTCTAATAAACTT CAAGAGTTTGTTAATCCTAGCCTTCAAAGAGAG---------GAGAAC---CTAGGAACAATTCAAGTGTCGAATAGTAC TGAAGACCCCAAAGATCTGACATTAAGTGGAGGAAAA---GGTTTGCAA---ATTGAAAGATCTGTAGAGAGTTCCAGTA TTTCCTTGGTACCTGATACTGATTATGGCACTCAGGATAGTATCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAG---ACAGCACCAAATCAACATGTGAGTCTGTGTGCAGCAATTGAAAGCCCCAAGGAACTTATCCACGGTTGT---TC TAAAGATATTAGAAACGACACAGAGGACTTTAAGGATCCACTGGGACATCACGTTAAC---CACATTCAGGAGGCGAGCA CAGNNNNNNNNNNNNNNNNACTTGATACTCAGTTTTTGCAGAATATGTTCAAGGTTTCAAAGCGTCAGACGTTTGCTCTG TTTTCAAATCCAGAAAATCCAGAAAAGGAATGTGCAACAGTCTGTGCCCACTCTGGGTCCTTAAGAGAACAAAGTCCAAG AGTCCCTCTTGAATGCAGACAAAAAGAAGAAAATCAGGGAAAGAGTGAGTCTAAAATCAAGCATGTGTGGGCAATTAATA CAACTGTGGACTTCCCTGTTGCTGGTCAAAAAGATAAG---CCGAGCGATCATGCCAAACGTAGCCCCAAAAGAGTAACT AGGCTTTGTCAGTCATCACAGTTCAGA---AGCAACAAAACTGAGCTCATTATTGCAAATAAACATGGGATTTCACAAAA CCCATATCTTATACCATCACTTTCTCCCATCAGGTCATCTGTTAAAACTATATGTAAGAAAAAC---CTGTCAGAGGAAA AGTTTGCGGAACCTGTAATGTCACCTGAAAGAGCAATGGAAAACGAGAGCATCATTCAAAGTACAGTGAGCACAATCAGC CAAAATAACATTCGAGAAAGCACTTTTAAAGAAGTCAGCTCAAGCAGTACTAATGAAGTAGGTTCCAGTACCAATGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAAAACTAGATA GAAACAGAGGACCTAAATTAAGTGCTTTGCTCAGATTAGGTCTCATGCAACCCGAAGTCTATAAGCAAAGTCTT---CCT GTAAGTAACTGTCAACTTCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGCACACTTCAGGCTGTTAATGCAGATTTCTC CCCATGTCTAGTTTCAGATAACCTAGAACAA---CCTATGGGAAGAAGCCATGCTTCTCAGGTTTGCTCTGAGACATCTG ATGAGTTGTTAAATGATGACAAAATAAAGGAAAATAGCAACTTTGCTGAAAGTGACATTAAGGAAAGATCT---GTTTTT AGCAAAAGTGTCCAAAAAGGAGAATTCAGAAAGAGCACTGGCCCTTTAGCCCATCATACATGTTTGGCTCAGGGTCACGA AAGAGGGGCT---------GAGTCCTCAGAAGAGAAAGTGTCTAGTGAG >HumpbackW TGTGGCACAGATACTCATGCCAGCTCATTACAACATGAAAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGTCTCAGCAAAGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGACA CATGTAATGAT---AGGCAGACTCCCAGTACAGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTGTATGGGAGAAAAGAA CTGAATAAGCAGAAACCTGCATGCTCTGACAGTCCTAGAGAT---TCCCAAGAG---TTTCCTTGGATAACAGTGAATAG TCGCATACAGAAAGTTAATGAATGGTTTTCCAGAAGTGATGAAATGTTAACTTCTAACGACTCACGTGATGGGGGATTTG AATCAAACACTGAAGTAGCTTGTGCAGTGGAAGTTCCAAAG------GAAGTTGATGGATATTTGGGTTCTTCAGAGAAA ATAGACTTAATGGCCAGTGATCCTCATGGTGCTTTAATACGTGAAAGTGAAAGAGTCCACTCCAAACCAGTAGAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACATAGCTGAACATC TAATTATAGGAGCATCTACTGTAGAACCTCAGATAACACAA--------------------------------------- ---------------------GAGCGCCCCCTCACAAATAAACTAAAGCGTAAAAGGAGAAGTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTCGATTTGGCAGTTATTGCAAAGACTCCTGAAAAAATAACTGAGGGAACTGACCAAACAG AGCAGAAT---------GGTCATGGGATGCATGTTACTAGTAATGGTCCTGAGACTGAAATGAAAGATGATTAT---GTT CAGAAAGAGAAAAATGCTAACCTAACAGAG------TCATTGGAAAAACAATCTGCTTTCAGAACTAAGCCTGAACCTAT AAGCAGCAGTATAGGCAATATGGAACTAGAATTGAATATCCATAGTTCAAAAGCACCTAAA---AATAGGCTGAGGAGGA AGTCCTCTACCAGGAAGATTCATGCACTTGAACTAGTAGTCAGTAGAAACCCAAGCCCACCTAATCATACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATGAAGAAAAAA---CATTCCAGCCAAATGCCAGTCAGGCACGGCAAAAA GCTTCAATTCATGGGAGATAAAGCACCTACAACTGGAGCCAAGAAGAGTAACAAGCCACACGAACAAATAAATAAAAGAC TTACCAGTGACGCTTATCCAGAACTAAATTTAACAAACATACCTGGTTTTTTTACTCACTGTTCTAGTTCTAATAAACTT CAAGAGTTTGTTAATCCTAGCCTTCAAAGAGAG---------GAGAAC---CTAGGAAAAATTCAAGTGTCGAATAGTAC CAAAGACCCCAAAGATCTGACATTAAGTGGAGGAAAA---GGTTTGCAA---ATTGAAAGATCTGTAGAGAGTACCGGTA TTTCCTTGGTACCTGATACTGATTATGGCACTCAGGATAGTATCTCATTACTGGAAGCTGACACCCTAGGG---AAGGCA AAG---ACAGCACCAAATCAACATGTGAGTCTGTGTGCAGCAATTGAAAGCCCCAAGGAACTTATCCATGGTTGT---TC TAAAGATATTAGAAACGACACAGAGGACTTTCAGGATCCACTGGGACATCACGTTAAC---CACATTCAGGAGGCGAGCG CAGAAATGGAAGAGAATGAACTTGATACACAGTATTTGCAGAATATGTTCAGGGTTTCAAAGCGTCAGACGTTTGTTCTG TTTTCAAATCCA---------GAAAAGGAATGTGCAACAGTCTGTGCCCGCTCTGGGTCCTTAAGAGAACAAAGTCCAAG AGTCCCTCTTGAATGCAGACAAAAAGAAGAAAATCAGGGAAAGAGTGAGTCTAAAATCAAGCATGTGCGGGCAATTAATA CAACTGTGGACTTCCCTGTTGCTGGTCAAAAAGATAAGAAGCCGAGCGATCATGCCAAACGTAGCCCAAAAAGAGTAACT AGGCTTTGTCAGTCATCACAGTTCAGA---AGCAACAAAACTGAGCTCATTATTGCAAATAAACATGGGATTTCACAAAA CCCATATCTTATACCATCACTTTCTCCCATCAGGTCATCTGTTAAAACTATATGTAAGAAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACCTGTAAGGTCACCTGAAAGAGCAATGGAAAACGAGAGCATCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTCGAGAAAGCACTTTTAAAGAAGTCAGCTCAAGCAGTATTAATGAAGTAGGTTCCAGTACTAATGAAGT AGGCTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAAAACTAGRTA GAAACAGAGGACCTAAATTAAGTGCTTTGCTCAGATTAGGTCTTATGCAACCCGAAGTCTATAAGCAAAGTCTT---CCT ATAAGTAACTGTCAACTTCCTGAAATAAAAAGGCAAGGAGAAAGTGAAGGCACACTTCAGGCTGTTAATGCAGATTTCTC CCCAAGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGAAGCCATGCTTCTCAGGTTTGTTCTGAGACATCTG ACGAGTTTTTAAATGATGACAAAATAAGGGAAAATAGCAACTTTGCTGAAAGTGACATTAAGGAAAGATCT---GTTTTT AGCAAAAGTGTCCAAAAAGGAGAATTCAGAAGGAGCCCTGGCCCTTTAGCCCATCATACATGTTTGGCTCAGGGTCACGA AAGAGGGGCT---------GAGTCCTCAGAAGAGAATGTGTCTAGTGAG >Mole TGTGGCATAAATACTCATGCCAGCTTATTACAGCATGAAAACAGCAGTTTATTACTCACTGAAAACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGACTTAGCAAAGAGCCAGCAGAACAGATGGGCTGAAAGTAAAGAAA CATGTAATGAT---AGGCAGACTTCCAGCCCAGAGAAAAGGGTAGACCCGAATGCTGATCCCATGTATGGGAGAAAAGAA CTGAATAAGCAGAAACCTCCATGCTCTGACAGCCCCAGAAAT---TCCCAAGGT---GTTGCCTGGATAACACTGAACAG TAGCATTCGAAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAAATATTAACTTCTGATGAATCTCGTGATGGGGAGTCCC CATCAAATATTGAAATGACTGGTGAAGTAGAAGTTCCAAAC------ACAGTAGATGGATTTTCTGGTTCTTCAGAGAAA ATAGACTTAATGGCCAGCGATCCTCCC---GCTTTAATGTGTAAAAGTGAAAGAGTCCGCACCAAACCAGTAGAAAGTAA T---ATTGAAGATAAGATATTTGGGAAAACCTATCGGAGGAAGACAAGCCTCCCAAACTTGAGCCACGTGGCTGAATATT TAATTACAGGGACATCTGTCACAGAACCTCAGATAGTTCAA--------------------------------------- ---------------------GAGCGTCCCTTCACAAATAGATTAAAACGTAAAAGGAGAACTCTACCAGGCCTTTGTCC TGAGGATTTTATCAAGAAAGTAGATTTGGAAGTTGTTCAGAACACCCCTGAAAAGATAAGTGCGGGAACTGATCAAATGG ATCAGAGT---------GGTCAAGTGATGGATGTTGCTTATAATGGTCATGGGAATGAAACAAAACGTGATTAT---GTT CAGAAAGAGAAAAGTGCTAACCCAGCCGAA------TCTTTAGAAAAAGAATCTACCTTCAGAATTAAAGCTAAGCCCAT AAGTAGCAGTATAAGCAATATGGAACTAGAATTAAATATCCACTCTTCAAAAGCACCTAAGAAGAACAGGCTGAGGAGGA AGTCTTCTACCAAGCATATTCATGCACTTGAACTGGTGGTCAAAAGAAATCCAAGCCCACCTAATCATACAGAACTACAA ATTGATAGCTGTTCTAGCAGTGAAGAG---ATGAAGGAAAGA---AATTCCAACCAAATGCCAGTAAGACACAGCAAAAG GCTTCAACTCATGGAAGATAAAGACCCCGCAACTGAAGCCGTCAAGAGTAACAAGCCAAAAGAACAAATT---AAAAGAT TTGCCAGTGATACTTTGCCAGAACTAAATTCAACACATGTACCTGGCTTTCTTAGCAACTATTCAAGTTCTAATAACCTT GAAGAGTTTTCCAATCCTAGCCTTCAAAGAGAAGAACTAGAAGAGAAC---CTAGGAACAAATCAAGTGTCAAATAATAC CAAAGACCCCAAAGATCCGATACTTAGTGGACAAAGA---GATTTGCAA---GCTGAAAGATCTGGCGAGAGTACCAATA TTTCATTGGTACCTGAGACTGATTTTGGCAGTCAGGATAGTGTCTCATTACTGGAAATTGACATCCTAGGC---AAGGCA AAA---AAAGTGCCGAATCAGTGTGCAAGTCTGTGTACAAAAATTGAAAACTCTAAGGAACTTATTCATAGTTGT---TC TAAAGATACTAGAAATGACACACAGGGCTGTAAGGATCCACTGAAATACGAAGTTAAC---CACACTCAGGAGATAAGGA TAGAGATGGAAGAGAATGAACTTGATACACAGTATTTACAGTCTACGTTCAAAGCTTCAAAGCGTCAATCATTTGCTCTG TCTTCACATCCGGGAAATTCAGAAAAGAAATATCCCCCAGTCTCTGCCCCTTCCAGGTCCTTGAAGAGACAAACTCCAAA AATCACTCTGGAATGTGAACTGAAAGAAGAAAATCAAGGGAAGAAAGAGTCTAAAACCGAGCATGTACAGGCAGTACATA GAATTGCAGACTTAACTCTGGCTTGTCAGAACGATAAG---CCACATGATTCTGCCAAATGTAGCATAAAAGGAGTCTCT AGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAACGAAACAGAAGTCATTGTTGCAAATAAACGTTTAATCTTACAAAA CCCATATCTTATTCCACCACTTTCTTCAATTCAGTCATCTGTTAAAAGTGCTTGTAAGAAAAAC---CTGGCGGAGGAAA AGCTTGAGGAACACTCATGGTCACCCGAAAGAGAAACAGGAAACGGGAGCATCATTGAAAGTACAGTGAGCAGGGTTAGC CAAAATAACAATAGAGAAAATGCTTTTAAGGAAGTCAGCTCAAGTAGTATTAACGAAGTAGGTTCCAGTACTAACGAAGC TGACTCTAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAATATTCAAGCTGAACTAGATG GAAGCAGAGGACCTAAATTAAATGCTATGCTCAGATTGGGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT AAAAGTACTTGTAAACATCCTGAAGTTAAAAGGCAAGGAGAAAATGAAGGCATAGTTCAGGCTGTTCATACAGATTTCTT TCCATGTCTGATTTCAGATAACCAAGAACAA---CCTATGGGAAGTAGTCATGCTTCTCAGGTTTGTTCTGAGACGCCTG AGGACCTGTTAAATGACGAAATAAAGGATAATAACATCAGCTTTGCTGACAGTGGCATTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTTTCCAGAAAGGAGAATTC---AGGAGCCCTAGCCTTTTAGACCAT---ACATGTTTGGCTCAGGACCACGA AAGTGGG------AAATTGGAGTCCCCAGAAGAGACTATGTCTAGTGAG >Hedgehog ---------------------------------CGTGAGAACAGCAGTTTATTACTCACTAAAGGCAAAATGAATGTAGA AAAGGCTGAATTCTGTAGTAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAATATGGAAA CATGTAATGAT---AGGCAGACTCCTAGCCCAGACAAAGAAGTAGATCTGAATGCTGATTCCTTATATGAGAGAAAAGAA CTAAACGAGCAGATCTCTTCATACTCCAGCAGTCCTAAAGAT---TCCAAAGAC---ATTTCTTGGGTAGCACTGAAT-- -AGCATACAGAAAGTGAATGAATGGCTTTCCAGAAGTGATGAACTGTTAACTTCTGATGACTCATATGATAAGGGATCTA AATCAAAAACTGAAGTAACTGTAACAACAGAAGTTCCAAAT------GCAATAGATAGRTTTTTTGGTTCTTCAGAGAAA ATAAACTTAACAGCCAGTGATCCTCATGTTGCTTTAATACGTGAAGGTGAAGGAGTCCACTTCAAACCAGTAAAGAATAA T---ATTGAAGATAAAATATTTGGGAAAACCTATGGGAGGAAGGCAAGCCTTCTTAATTTGAGCCACGTAACTGAAGATG TAATTATAAGG------------GAACCTCAGGTAGCCAAA--------------------------------------- ---------------------GAGCCTCTCCTTGCAACTAAATTAAAACGTAAAAGGAGAACTGATGTAGGTCTTTGTCC TGAGGATTTTATCAAGAAAGTAGATGTGGCAATTGTTCAGAAGACTCCTGAAAAGATAATCAAGAGACCTGGCCAACTGG ATCAAAGT---------GACCAAGTAATGAATATTGCTACTAATGGTCATGAAACTGAAACAAAGAGTGATTAT---GTT CAGAAGGAGAAAAATGCTAACCCAGCAGAA------TCACCAGACAAAGAATCTGCTTTTAGAAATAAACCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATGTCCACAGTTCAAAAGCACCTAAGAAGAATAAACTCAGGAGAA AGTCATCTGCCAGGCATATTCATGCACTTGAAGTAGTAGTCAATAAAAACTCAAGCCCACCTAGCCATACAGAACTACAA ATTGATAGCTGTTCTAGCAATGAAGAG---ATGAGGGGGGTG---AAAGCTGATCAAATGCCAGTCAGGCACAGCAAACA ACTTCCACTCATTGAAGACAAAGAACCTGCAACTAGAGCCTTTAGTAGTAGCAAGTCAAATGAACAAATAAGTAAACAGT TTGTTGGTGAGACTTTTTCAGACCTAAATTTAACAAACATCCCTGGTTTTCTTACCAGCTGTTCAAGTTCCAATAAACAT CAGGAATGTATTAATCCTAACCTACAAAGAGAGGTATTAGGAGAAAGC---CGTGGGACTATTCAAGTGTGTAATAATAC CAAAAACCCTAAAGATCTGATAGTAAGTGGAAAGAGA---GGTTTACAA---ATTGAAAGATCTGTAGAGAGTCCT---- --------GTAGAGAACACTGACTATGGCAGTCAGGATAGTATCTCATTACTGGAAACTGATACTCTAGGG---AAGGCA AAA---AAGACACCAAATCAATGTGTAAGTCTGTGTGTAGCAACTGAAACCCCCAAAGAACTTAGCCATAGTTGT---TC TAAAGATACTAGAAATGACACTGAAGGCTTTAGGGATTCACTCAGATGTGAAGTTGAT---CACACGCAGGAGACAAGCA CAGAACTGGAGGAGAGTGAACTTGATACACAGTATTTACAGAATACCTTCAAGGTTTCAAAACGTCGGTCATTTGCTCTG TTTTCAAGTCCAGAGAATTCAGAAAAGGCATGTACAAGAGGCTCTGTCCATTCTAAGTCTTTAAGGAAACAAAGTCCAAA AGTTATTCTGGACTGTGAACAAAAAGAAGAAAATCAAGGAGAGAAAGCATCTGAAATCAAGTATTTGCCATCAGAACATA CAACTACAGGCTTTCCTGTGGTGTGTTATAAAGATACA---TCAGGTGATTATGCCAAATGTAGCGTAAAAGGAGTCTCC AGGCATTGTCAGCCATCTCGGTTCAGA---GGCAGTGAACCTGAACTCATTGTTGCAAATAAAAATTTAATTTTACAAAA CCTATATCATATACCAACACTTCCTACCATCATGTCATCTACTAAGAGTATATGCAAGAAAAAC---CTGTCTGAGGAAA AT------------TCACTGTCGCCTGAAAGAGCAGTGACAAACAAAAGCATCATCCAAAGTACAGTGAACACCATTAGC CAGACTAATGCCAGTGAAAATGCTCTTAAAGATGTCAGTTCAAGCAGTGTTAATGATATGGGTTCCAGTACTAATGAAGT AGGTTCCAGT------------------------------------------AGTGAAAACATTCAAGCTAAACTATGTA GAAACAGAGGACCCAGATTAAATGCTACTCTTAGAGCAAGTCTTATGCAACCTGAAGTCTTTGAGCAGTGTCTT---CTG ATGAGTAACTGTAAACATTCTGAAATGAAAAGACAAGGAGAAAATGAAGGTGTTATTCAGACTGTTAATATAGAATCCGA TTCATGCCCAAGACCAGATAACTTAGAACAA---CCGGTGGAAACTAGTCAT---------GTTTGTTCTGAAACACCTG ACGATCTGTTAAATGACGATGAAATGAAGGAAAATACAAGCTTTACTGAAAGTGGCATTAAGGAACAATCTGCTGTTTTT GGCAAAAGTACCCAGAAAGGT---TTCAGAAGGAGCCCTAGCCCTTTAGGCCAC---ACATGTTTGAGACAGGATCAGCA AAGAGGGGCACAAAAATCAGAGCCCTCTGAAGATTTCATGTCTAGTGAG >TreeShrew TGTGGCATAAATACTTATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAGGACAGAATGAATGTAGA AAAGGCTGAATTGTGTAATAAAAGCCAACAACCTGACTTAGCAAGGAGCCAGCAGAGCAGATGGACTGAAAATAAGGAAA CATGTAATGAT---AGGCAGATTCCCAGCACAGAAAAAAAGGTAGATCTAAATGCTGATCCCCTGTGTGGGAAGAAAAAA CAAGCTAAGCAGAAACYGCTATGTTCTAACAGTCCTAGAGAT---GACCAAGAT---TCTCCTTGGATAACTCTAAATAG TAGCATTCAGAAAGTTAATGAATGGTTTTCCAGAAGTGATGAAATGTTAACTTCTAACGACTCACATGATGGTGAGTCTG AA------------ATAGCTGGTGCATTYGAAGYTCCAAAT------AAAGTAGATGAATATTCTGGTTCTTCAGAAAAA ATAGACTTAATGGCCAACAATCTTCATGATGCTTTAATAAGTAAAAGTGAAGGAATCTACTCCAAACCAGTAGAGGGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAAGCAAGTCTTCCTAACTTGAGCCGTGTAACTGATGATC TAATTAGAGGGGCATTTGTTACAGAGCCTGAGATAACTCGA--------------------------------------- ---------------------GAGCGTCCCTTCACAAATAAATTAAAGCGGAAAAGGAGAACTATATCAGGCCTTCATCC TGAAGATTTTATCAAGAAAACAGATTTGGCAGTTGTTCAAAAGACTCCTGAAAAGATAAATCAGAGAACTGACCAAATAG AGCATAAT---------GGTCAGGTGATGAGTATTGCTAATAGTGGTCATGAGAATGAAACAAAAGGTGATTAT---ATT TCGAAAGAGAAAAATGCTAACCCAATGGAA------TCATTAGAAAAAGAATCTGCTCTCAGAACTAAAGCTGAGCCCAT AAGCAGCAGTGTAAGCAATATGGAACTAGAAATAAATAACCACAGTTCAGAAGCACCTAAGAAGAATAGGCTGAGGAGAA AGTTTTCTGCTAGGCATATTCGCACACTTGAACTAGTAGACAATAAAAGTCCAAGCCCACCTAATCGTACTGAACTACAA ATTGACAGTTATTCTAGCGGTGAAGAG---AGAAAGAAAAAG---GGT---GAGCAAATGCCAGTTGGACACAGCAGAAA GTTTCAACTTGAGGAAGAGAAAGAACCTACAACTGGAGCCAAGAAAAATAACCAGCCAAATACAGAAATAAGTGAAAGAC ATGCCAGTGGTGTTATCCCAGATCTGAAGTTAACAAACATACCTGGTTTTTTCACAAACTCTTCGAGTTCTAATAAACTT CCAGAATTTGTCCATCGTAGCCTTCAAAGAGAAAAA---GAAGAGAAC---CGAGAAACAATTCAAATATCCAGTAGTAC C---------AAAGATCTGGTATTAAGGGGAGAAAGG---GGTTTGCAA---GATGTAAGGTCTGCAGAGAGTACCAGTA TTTCTTTGGTACCTGATACTGATGATAACACCCAGGATAGCATCTCATTACTAGATGCTAACCCCCTAGCTAGGAAGGCA AAA---ACAGCACCAAATCAATGTGTAAATCAGAGTGCAACAACTGAAAACCCCAAGGAACTTATACACAGTTGT---TC TAAAACTACTAGGAAT------GAAGGCTTCAAGGATCCATTGAAAAGTGAAGTTAAT---CATATTCAGGAGATGAGTG TAGAAATGGAGGAGAGTGAACTTGATACTCAGTATTTACAGAATACATTCAGGAGTTCAAAGCGTCAGTCATTTGCTCTG TCTTCAAATCCAGGAAATCCAGAAAAGGAACATGTCTGTGTT-------------------------------------- -------------------------------AAAGAAAGTCTGAAAGAGTCTAACATCCAACATATACAGGCAGTTAGTA CC---------------ATGGTTTTTCAGAAAGATAAG---CTAGGTGATTTTGCTACATCTGGCATTAAAGAAGTCCCT AGACTTTGTCCATCATCTCAGTTCAGA---GGCAATGAAACTGATCTCATTACTGCAAATAAACCTGAAGTTTCACAAAA CCCGTATCATATGCCATTACTTTATCCTGTCAAGTCACCTATTATAACTAAAAGTAAGAAAAGC---CTGTCAGAGGAAG GGTTTGAGGAACAGGCAATGTCACTTGAAAGAGCAATGGAAAATGAGAACATCATTCAAAGTACAGTGAGCACAATTAGC CAAGATAACATTAGAGAAGGTGCTTTTAAAGAAGCCAGCTCAAGCAGTATTAATGAAATAGGTCCTAGTACTAATGAAGG AAGCTCTAGTATTAATGAGGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAGGTA AAAAGAGAGGATCCAAATTAAATGCTGTGCTTAGATTAGGTCTTATGCAACCCGAAGTCTATAAGCAAAGTCTT---CCT TTAAGTAATCATAATGATCCTGAAATGAAAAGACAAGAAAAAAATGAAGGAGGAGTTCAGGCTATTAAA---GATTTACC TCCATGTCTAATTTCAGATAATCAAGAGCAT------ATGGGAAGTAGCCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGATCTGTTAGATGATGATGAAGGAAAAGAAAAT---AGCTTTGCTGAGGTTGATGTTAAGGAAAGATCTGCTGTTTTT GGCAAAACTGTCCAGAGAAGAGAGTTAAGAAGGAGCTCTAGCCCTTTAACTCGT---GCATGTTTGACTGAGGGTCAGCA AACAGGAGCCCAGAAATTAGATTCATCAGAAGAGAACCTATCTAGTGAG >FlyingLem TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACGCACTAAAGACAGAATAAATGTTGA AAAGACTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGGAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACGCCCAGCACAGAGAAAAAGATAGATCTAAATGCTGATTCCCAGCATGGGAGAAAAGAA CGGAATATGCAGAAACCTCCATACCCTGAGAGTCCTAGAGAT---ACCCAAGAT---GTTCCTTGGATAACACTAAACAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAAATTTTAGCTTCTGATGACTCACGTGACAGGGTGTCTG AATCAAATGCCAAAGTAGCTGGTGCATTAGAAGTTCCAAAT------GATGTAGATGGATATTCTGATTCTTCAGAGAAA GTTGATTTAATGGCCAGTGATCCTCATGATGCTTTAATATGTAAAAGTGAAAGAATCCACTCCAGACCAGTAGAGAGTAA T---ATCAAAGATAAAATATTTGGGAAAACCTATCAGAGGAAGACAAGCCTCCCTAACTTGAGCCACGTAAATGAAGATC TAATTATAGGAGCATTTGTTACAGAACCACAGATAACACAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAGGTAAAGCCTAAAAGGAGAACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGACTTGGCAGTTGTTCAAAAAACTCCTGAAAAGATAAATCAGGGAATTGACCAAATGG AGCAGAAT---------GATCGAGTGATGAATATTATTAATAGTGGTCATGAGAATGAAACAAAGGATGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTAAAGCAGAACCTAT AAGCAGCAGTATAAGCAATATGGAAATAGAATTAAATATCCACAATTCAAAACCATCTAAGAAGAATAGGCTGAGGAAGA TGTCCTCTACTAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGCCCACCTAATTATACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAA---ATAGAGAAAAAA---AATTCCAGCCAAATGCCAGTCAGGCACAGCAGAAA GCTTCAACTCATGGAAAATAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAAATAAGTAGAAGAC ATTCCAGTAATGCTTTCCCAGAACTGCGGTTAACAAATGTACCTGTTTTTTTTGCTAACTGTTCAAGTTCTAATAAACTT CAAGAATTTATCGATCCTAGCCTTCAAAGAGAAGAAATAGAAGAGAAC---CTAGAAACAATTCATGTGTCTAATAGTGC CAAAGACCCCAAAGATTTGGTGTTAAGTGGGGAGAAG---GGTTTGCAA---ACTGAAAGATCTGTAGAGAGTACCAGTA TTTCATTAGTACCTGATACTGATTATGGCACTCAAGACAGTATCTCAATATTAGAAGCTAACATCCTAGGG---AAGGCA AAA---ACAGCACCAAGTCAACATGCAAATCAGTGTGCAGCAATTGAAAACCCCAAAGAACTTATCCATGGTTGT---CC TAAAGGTACTAGAAATGACACAGAGGATTTTAAGGATCCATTGAGATGTGGAGTTGAC---CACATTCAGAAGACAAGCA TAGAAATGCAAGAGAGTGAACTTGATACTCAGTATTTACAAAATATATTCAAGGTTTCAAAACGTCAGTCATTTGCTCTC TTTTCAAATCCAGGAAATCCAGAAAAGGAGTGTGCAACAGTCTATGCCCACTCCAGGTTGTTAAGGAAACAAAGTCCAAA AGTCACTCCTGAATGTGAACAAAAAGAAGAAAATGAGGGAAATAAAGAGTCTAAAATCAAGCACATACAGGCAGTTAATA CCACTGTGGGCTTTTCTGTCCTTTGTCAGAATGTTAAGAAGCCAGGTGATTATGCCAAATTTAGCATTAAAGGAGTCTCT AGGCATTGTTCATCATCTCAGTTCAGA---GGCAATGAAACTGAACTCATTACTGCAAATAAACATGGAATTTTACAAAA CTCATGTCATATGTCATCACTTTCCCCCATCAGGTCATCTGTTAAAATTAAATGTAAGAAGAAC---CTGTCAGAGGAAA GGTTTGAGGAACATTCAGTGTCACCTGAAAGAGCAATGGCAAACAAGAGAATCATTCAAAGTACAGTGAACACAATTAGC CAAAATAACATTAGAGACAGTGCTTCTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGTTCCAGTACTAATGAAGT AGGCTCCAGTATTAATGAAGTAGGTCCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGACCTAAATTAAGTGCTATGCTTAGATTAGGCCTCATGCAACCTGAAGTTTACAAGCAAAATCTT---CCT TTAGGTAATTGTAAACATCCTGAAATA---AGGCAAGAAGAAAATGAAGGAATAGTTCAGGCTGTTAATACAAATCTGTC TCTGTGCCTAATTTCACATAACCTCGAACAA---CCTATGGAAAGTAGTCATGCTTCCCAGGTTTGTTCTGAGACACCTG ATGACCTGTTAGATGGTGATGAGATAAAGGAAAACACCAGCTTTGCTGAAAGTGACAGTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAGAGAGGAGAGTTAAGCAGGAGCCCTAGCCCTTTTGCCCAA---ACATGTTTGGCTCAGGGTCACCA AAGAGGAGCCAGGAAATTAGAGTCTTCTGAAGAGAACGTATCTAGTGAG >Galago TGTGGCAAAAATACTCATGCCAGCTCATTACAGCATGAGAGCAGCAGTTTATTACTCACTAAAGACAAAATGAATGTAGA AAAGGCTGAATTTTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATCGGCTCAAAGTAAGGAAA CATGCAATGAT---AGGCACACTTGCAGCCCTGAGCAAAAGGTAGATCTGAATACTGCTCCCCCATATGGGAGAAAAGAA CAGAATAAGGAGAAACTTCTATGCTCCAAGAATCCTAGAGAT---AGCCAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAATGGTTTTCTAGAAGTGATGAAATGTTAACTTCTGATGACTCACATGATGAGGGTTCTG AATCACATGCTGAAGTAGCTGGAGCCTTAGAAGTTCCAAGT------GAAGTAGATGGATATTCCAGTTCCTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATTATCCTATAATATGTAAAAGTGAAAGAGTTCACTCCAAACCAATAAAGAGTAA A---GTTGAAGATAAAATATTTGGGAAAACTTATCGGAGGAAGGCAAGCCTCCCTAACTTAAGCCATGTAACTGAAAATC TAATTATAAGAGCAGCTGCTACTGAGCCACAGATAACACAA--------------------------------------- ---------------------GAGTGTTCCCTCACAAATAAATTAAAACGTAAAAGGAGAACTACATCAGGTCTTTGTCC TGAGGATTTTATCAAGAAGGCAGATTTGGCAGTTGTTCAAAAGACACCTGAAAAGAGAATTCAGGGAACTAACCAAGTGG ATCAGAAT---------AGTCACGTGGTAAATATTACTAATAGTGGTTATGAGAATGAAACAAAAGGTGATTAT---GTT CAGAATGAAAAAAATGCTAACTCAACAGAA------TCATTGGAAAAAGAATCTTCTCTCGGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGTAATATGAAATTAGAATTAAATATTCACAATTCAAAAGCAAGTAAAAAGAAAAGGCTGAGGAAGA AGTCTTCTAGCAGGCATATTCGTGCACTTGAACTAGTAGTCAATAAAAATCCAAGCCCTCCTAATCATACCAACCTACAA ATTGACAGTTGTTCTAGCAGTGAAGAA---ATAAAGGATAAA---AGTTCTGACCAAATACCAGTCAGGCATAGCAGAAA GCCTGGACTCATGGAAGATAGAGAACCTGCAACTGGAGCCAAGAAAAGTAACAAGCCAAATGAGCAAATAAGTAAAAGAC ATGTCAGTGATACTTTCCCAGAAGTGGCATTAACAAATATATCTAGTTTTTTTACTAACTGTTCAGGTTCTAATAGACTT AAAGAATTTGTCAATCCTAGCCTTCAAAGAAAAAAAACAGAAGAGAACTTAGAAGAAACAATTCAAGTGTCTAATAGTAC CAAAGGTCCGGTGTTAAGTGGAGAAAGGGTTTTGCAA---ATTGAAAGT---GAAGAAAGATCTATAAAAAGCACCAGTA TTTCATTGGTACCTGATACTGATTATGGTACTCAGGACAGTAACTCGTTACTGAAAGTTAAAGTCTTACGG---AAGGTG AAA---ACAGCACCAAATAAACATGCAAGTCAGGGTACAGCCACTGAAAACCCCAAGGAACTAATCCATGGTTGC---TC TAAAGATACTGGAAATGACACAGAGGGCTATAAGGATCCATTGAGACATGAAATTAAC---CACATTCAGAAGATAAGCA TGGAAATGGAAGACAGTGAACTTGATACTCAGTATTTACAGAATACATTCAAGTTTTCAAAGCGTCAGTCGTTTGCTCTG TTTTCAAACCTAGGA---------AAGGAATGTGCAACAGTCTGTGCCCAGTCTCTCTCTGCGTCCTTAAGAAAAGGTTC AAAAGTCATTCTTGAATGTGAACAAATAGAAAATCCAGGAATGAAAGAGCCTAAAATCAAGCATATACAGGGAAATAATA TCAATACAGGCTTCTCTGTAGTTTGTCAGAAAGATAAGAAAACAGATGATTATGCCAAATACAGCATCAAAGAAGCATCT AGGTTTTGTTTGTCAAATCAGTTTCGA---GACAATGAAACTGAATCCATTACTGTAAATAAACTTGGAATTTTACAAAA CCTCTATCATATACCACCACTTTCTCCTATCAGGCTATTTGATAAAACTAAATGTAATACAAAC---CTGTTAGAGGAAA GGTTTGAAGAACATTCAGTGTTACCTGAAAAAGCAGTAGGAAACGAGAACACCGTTCCAAGTACAATGAATACAATTAAC CAAAATAAC---AGAGAAAGTGCTTATAAAGAAGCCAGTTCAAGCAGTATCAATGAAGTAAGCTCGAGTACTAATGAAGT GGGCTCCAGTGTTAACGAAGTAGGCCCCAGT---------------------AGTGAAAACATTCAAGCAGAACTAGATA AAAACAGAGGACCTAAGTTGAATGCTGTGCTTAGATTAGGTCTTATGCAACCTGAAGTCTATAAACAAAATCTT---CCT ATAAGTAATTGTGAACATCCTAAAATAAAAGGGCAAGAAGAAAATGGA---GTAGTTCAACCTGTTAATCCAGATTTTTC TTCATGTCTAATTTCAGATAACCTAGAACAA---CCTACGAGAAGTAGTCATGCTTCTCAGCTTTGTTCTGAGACACCTG ATGACTTATTAGTTGATGATGAACTAAAGGAAAATACCAGTTTTGCTGAAAATAACATTAAGGAAAGATCTGCTGTTTTT AGCAAAAATGTCATGAGAAGAGAGATTAGCAGGAGCCCTAGCCCTTTAGCCCAT---ATACATTTGACTCAGGCTCACCA AAGAGAGGTTAGGAAATTAGAGTCCTCAGAAGAGAACATGTCTAGTGAA >HowlerMon TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTGTTACTCACTAAAGACACACTGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGAAAGTGAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGCATGGGAGAAAAGAA TGGAATAAGCAGAAACCTCCGTGCTCTGAGAATCCTAGAGATGATACTGAAGAT---GTTGCTTGGATAATGCTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAACTTCTGATGACTCACATGATGGGGGGTCTG AATCAAATGCCAAAGTAGCTGAAGCATTGGAAGTTCTAAAT------GAGGTAGATGGATATTCTAGTTCTTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATGATCATTTGATATGTAAAAGTGAAAGAGTTCACTGCAAATCAGTAGAGAGTAG T---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACGTAACTGAAAATC TAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA--------------------------------------- ---------------------GAGCATCCTCTCACAAATAAATTAAAGCGTAAAAGGAGAGTTACATCAGGACTTCATCC TGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAAAGATAAATCAGGGAACTAACCAAACAG AGCGGAAT---------GATCAAGTGATGAATATTACTAACAGTGGTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAACAATCCTAACCCAGTAGAA------TCACTGGAAAAAGAATCA---TTCAAAAGTAAAGCTGAACCTAT AAGCAGTAGTATAAGCAATATGGAATTAGAATTGAATGTCCACAATTCCAAAGCATCTAAAAAGAATAGGCTGAGAAGGA AGTCTTCTACCAGGCATATTCATGAGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTATACTGAAGTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AATTACAACCAAATGCCAGTCAGGCACAGCAGAAA GCTACAACTCATGGAAGATAAAGAACGTGCAGCTAGAGCCAAAAAGAGTAGCAAGCCAAATGAACAAACAAGTAAAAGAC ATGCCAGTGATACTTTCCCAGAACTGAGGTTAACAAACATACCTGGTTCTTTTACTAACTGTTCAAATACTAATGAATTT AAAGAATTTGTCAATCCTAGCCTTCCAAGAGAACAAACAGAAGAGAAA---CTAGAAACAGTTAAACTGTCTAATAATGC CAAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGT---GTTTTGCAA---ATTGAAAGATCTGTAGAGAGTAGCAGTA TTTTGTTGATACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCATTACTGGAAGTTAGCACTCTGGGG---AAGGCA AAA---ACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCATGGTTGT---TC TAAAGATACTAGAAATGGCACAGAAGGCTTGAAGTATCCATTGGGACCTGAAGTTAAC---TACAGTCAGGAAACAAGCA TAGATATGAGAGAAAGTGAACTTGATACTCAATATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTG TTTTCAAATCCAGGAAATCCAGAAAAGGAATGTGCAACATTCTCTGCCTGCTCTAGGTCCTTAAAGAAACAAAGTCCAAA GGTCACTCCTGAATGTGAACAAAAGGAAGAAAATCAAGGAGAGAAAGAGTCTAATATCGAGCTTGTAGAGACAGTTAATA CCACTGCAGGCTTTCCTATGGTTTGTCAGAAAGATAAG---CCAGTTGATTATGCCAGATGT---ATCGAAGGAGGCTCT AGGCTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTATTCCAAATAAACATGGACTTTTACAGAA CCCATATCATATGTCACCGCTTATTCCCACCAGGTCATTTGTTAAAACTAAATGTAAGAAAAAC---CTGCTAGAAGAAA ACTCTGAGGAACATTCAATGTCACCTGAAAGAGCAATGGGAAACAAGAACATCATTCCAAGTACAGTGAGCACAATTAGC CATAATAAC---AGAGAAAATGCTTTTAAAGAAACCAGCTCAAGCAGTATTTATGAAGTAGGTTCCAGTACTAATGAAGC AGGTTCTAGTACTAATGAAGTAGGCTCCAGTATTAATGAAGTAGGTTCCAGTGATGAAAACATTCAAGCAGAGCTAGGTA GAAACAGAAGGCCAAAATTGAATGCTATGCTTAGATTAGGGCTTCTGCAACCTGAGATTTGTAAGCAAAGTCTT---CCT ATAAGTGATTGTAAACATCCTGAAATTAAAAAGCAAGAACATGAAGAA---GTAGTTCAGACTGTTAATACAGACGTCTC TCTATGTCTGATTTCATATAACCTAGAACAG---CATATGGGAAGCAGTCATACATCTCAGGTTTGTTCTGAGACACCTG ACAACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAATATGGCATTAAGGAGACTTCTACTGTTTTT AGCAAAAGTGTCCAGAGAGGAGAGCTCAGCAGGAGCCCTAGCCCTTTCACCCAT---ACACATTTGGCTCAGGTTTACCA AAGAGGGGCCAAGAAATTAGAGTCCTCGGAAGAGAATTTATCTAGTGAG >Rhesus TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAAC---AGTTTGTTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTGGCAAGGAGCCAACATAACAGATGGACTGGAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTAGATCTGAATGCTAATGCCCTGTATGAGAGAAAAGAA TGGAATAAGCAAAAACTGCCATGCTCTGAGAATCCTAGAGAC---ACTGAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAAGTTCTGATGACTCACATGATGGGGGGTCTG AATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAAT------GAGGTAGATGAATATTCTGGTTCTTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATGAGCCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAGTTCAGTAGAGAGTAA T---ATTAAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAACCTTCCCAATTTAAGCCATGTAACTGAAAATC TAATTATAGGAGCACTTGTTACTGAGTCACAGATAATGCAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAACTACATCAGGTCTTCATCC TGAGGATTTTATAAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAATAATAAATCAGGGAACTAACCAAATGG AGCAGAAT---------GGTCAAGTGATGAATATTACTAATAGTGCTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAAAAATCCTAACCCAATAGAA------TCACTGGAAGAAGAATCTGCTTTCAAAACTAAAGCTGAACCTAT AAGCAGCAGTATAAACAATATGGAACTAGAATTAAATATCCACAATTCAAAAGCACCTAAAAAAAATAGGCTGAGGAGGA AGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAACTGTACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AATTACAACCAAATGCCAGTCAGGCACAGCAGAAA CCTACAACTCATGGAAGATAAAGAATCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGAC ATGCCAGTGATACTTTCCCAGAACTGAAGTTAACAAAGGTACCTGGTTCTTTTACTAACTGTTCAAATACTAGTGAA--- AAAGAATTTGTCAATCCTAGCCTTTCAAGAGAAGAAAAAGAAGAGAAA---CTAGAAACAGTTAAAGTGTCTAATAATGC CAAAGACCCCAAAGATCTCATCTTAAGTGGAGAAAGG---GTTTTACAA---ACTGAAAGATCTGTAGAGAGTAGCAGTA TTTCATTGGTACCTGGTACCGATTATGGCACTCAGGAAAGTATCTCATTACTGGAAGTTAGCACTCTAGGG---AAGGCA AAA---ACAGAACGAAATAAATGTATGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCATGGTTGT---TC TGAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGAAGTGAAGTTAAC---CACAGTCAGGAAACAAGCA TAGAAATAGAAGAAAGTGAACTTGATACTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCCTTTGCTCTG TTTTCAAATCCAGGAAATCCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAA AGTTACTTCTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAAACAGTCTAATATCAAGCCTGTACAGACAGTTAATA TCACTGCAGGCTTTTCTGTGGTTTGTCAGAAAGATAAG---CCAGTTGATAATGCCAAATGTAGTATCAAAGGAGGCTCT AGGTTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTGTTACAAAA CCCATACCATATACCACCACTTTTTCCTGTCAAGTCATTTGTTAAAACTAAATGTAACAAAAAC---CTGCTAGAGGAAA ACTCTGAGGAACATTCAGTGTCACCTGAAAGAGCAGTGGGAAACAAGAACATCATTCCAAGTACAGTGAGCACAATTAGC CATAATAACATTAGAGAAAATGCTTTTAAAGAAGCCAGCTCGAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGT GGGCTCCAGTATTAATGAAGTAGGTTCCAGT---------------------GATGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGGCCAAAATTGAATGCTGTGCTTAGATTAGGGCTTTTGCAACCTGAGGTCTGTAAACAAAGTCTT---CCT ATAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAACATGAAGAA---TTAGTTCAGACTGTTAATACAGACTTCTC TCCATGTCTGATTTCAGATAACCTAGAACAG---CCTATGGGAAGTAGTCATGCGTCTGAGGTTTGTTCTGAGACTCCTG ATGATCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAGAGTTCTGCTGTTTTT AGCAAAAGCATCCAGAGAGGAGAGCTCAGCAGGAGCCCTAGCCCTTTCACCCAT---ACACATTTAGCTCAGGGTTACCG AAAAGAGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Orangutan TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAAAAAAAGGTAGACCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAA TGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGAT---ACTGAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGACGAACTGTTAGGTTCTGATGACTCACATGATGGGAGGTCTG AATCAAATGCCAAAGTAGCGGATGTATTGGACGTTCTAAAT------GAGGTAGATGAATATTCTGGTTCTTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATGAGGCTTTAATTTGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAA T---ATTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAAATC TAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAATGG AGCAGAAT---------GGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAAAAATCCTAACCCAATAGAA------TCACTCGAAAAAGAATCTGCTTTCAAAACAAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCATAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGA AGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AAATACAACCAAATGCCAGTCAGGCACAGCAGAAA CCTACAACTCATGGAAGATAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGAC ATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACCAGTGAGCTT AAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAA---CTAGGAACAGTTAAAGTGTCTAATAATGC CAAAGACCCCAAAGATCTCATGTTAAGTGGAGGAAGG---GTTTTGCAA---ACTGAAAGATCTGTAGAGAGTAGCAGTA TTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGG---AAGGCA AAA---ACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCATGGTTGT---TT CAAAGATACTAGAAATGACACAGAAGGGTTTAAGTATCCATTGGGACATGAAGTTAAC---CACAGTCAGGAAACAAGCA TAGAAATGGAAGAAAGTGAACTTGATACTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTG TTTTCAAATCCAGGAAATCCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAA AGTCACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGCTAATA TCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAG---CCAGTTGATTATGCCAAATGTAGTATCAAAGGAGGCTCT AGGTTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTCACAAAA CCCATATCATATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAAC---CTGCTAGAGGAAA ACTCTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAACGAGAAC---ATTCCAAGTACAGTGAGCATAATTAGC CGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGT GGGCTCCAGTATTAATGAAGTAGGTTCCAGT---------------------GATGAAAACATTCAAGCAGAACTAGGTA GAAGCAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTTTT---CCT GGAAGTAATGGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAA---GTACTTCAGACTGTTAATACAGACTTCTC TCCATGTCTGATTTCAGATAACCTAGAACAG---CCTATGAGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTA ATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTT AGCAAAAGCGTCCAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCAT---ACACATTTGGCTCAGGGTTACCG AAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Gorilla TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAACAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAA CATGTAATGAT---AGGCGGACTCCCAGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAACGAA TGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGAT---ACTGAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGTTCTGATGACTCACATGATGGGGGGTCTG AATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAAT------GAGGTAGATGAATATTCTGGTTCTTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAA T---ATTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAGCTTAAGCCATGTAACTGAAAATC TAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAATGG AGCAGAAT---------GGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAAAAATCCTAACCCAATAGAA------TCACTAGAAAAAGAATCTGCTTTCAAAACGAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCGCCTAAAAAGAATAGGCTGAGGAGGA AGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AAGTACAACCAAATGCCAGTCAGGCACAGCAGAAA CCTACAGCTCATGGAAGATAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGAC ATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACCAGTGAACTT AAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAA---CTAGAAACAGTTAAAGTGTCTAATAATGC CGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG---GTTTTGCAA---ACTGAAAGATCTGTAGAGAGTAGCAGTA TTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGG---AAGGCA AAA---ACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCATGGTTGT---TC CAAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAAGTTAAC---CACAGTCGGGAAACAAGCA TAGAAATGGAAGAAAGTGAACTTGATGCTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTG TTTTCAAATCCAGGAAATCCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAA AGTCACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGTTAATA TCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAG---CCAGTTGATTATGCCAAATGTAGTATCAAAGGAGGCTCT AGGTTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAA CCCATATCATATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAAC---CTGCTAGAGGAAA ACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAAC---ATTCCAAGTACAGTGAGCACAATTAGC CGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGT GGGCTCCAGTATTAATGAAGTAGGTTCCAGT---------------------GATGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTT---CCT GGAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAA---GTAGTTCAGACTGTTAATACAGATTTCTC TCCATGTCTGATTTCAGATAACTTAGAACAG---CCTATGGGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTAAAAATGACATTAAGGAAAGTTCTGCTGTTTTT AGCAAAAGCGTCCAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCAT---ACACATTTGGCTCAGGGTTACCG AAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Human TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAA CATGTAATGAT---AGGCGGACTCCCAGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAA TGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGAT---ACTGAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGTTCTGATGACTCACATGATGGGGAGTCTG AATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAAT------GAGGTAGATGAATATTCTGGTTCTTCAGAGAAA ATAGACTTACTGGCCAGTGATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAA T---ATTGAAGACAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAAATC TAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGACCTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAACGG AGCAGAAT---------GGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAAAAATCCTAACCCAATAGAA------TCACTCGAAAAAGAATCTGCTTTCAAAACGAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGA AGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AAGTACAACCAAATGCCAGTCAGGCACAGCAGAAA CCTACAACTCATGGAAGGTAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGAC ATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAAGTGTTCAAATACCAGTGAACTT AAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAA---CTAGAAACAGTTAAAGTGTCTAATAATGC TGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG---GTTTTGCAA---ACTGAAAGATCTGTAGAGAGTAGCAGTA TTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGG---AAGGCA AAA---ACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCATGGTTGT---TC CAAAGATAATAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAAGTTAAC---CACAGTCGGGAAACAAGCA TAGAAATGGAAGAAAGTGAACTTGATGCTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCCG TTTTCAAATCCAGGAAATGCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCTGGGTCCTTAAAGAAACAAAGTCCAAA AGTCACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGTTAATA TCACTGCAGGCTTTCCTGTGGTTGGTCAGAAAGATAAG---CCAGTTGATAATGCCAAATGTAGTATCAAAGGAGGCTCT AGGTTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAA CCCATATCGTATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAAT---CTGCTAGAGGAAA ACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAAC---ATTCCAAGTACAGTGAGCACAATTAGC CGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGT GGGCTCCAGTATTAATGAAATAGGTTCCAGT---------------------GATGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTT---CCT GGAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAA---GTAGTTCAGACTGTTAATACAGATTTCTC TCCATATCTGATTTCAGATAACTTAGAACAG---CCTATGGGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTT AGCAAAAGCGTCCAGAAAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCAT---ACACATTTGGCTCAGGGTTACCG AAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Chimpanzee TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAA CATGTAATGAT---AGGCGGACTCCCAGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAA TGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGAT---ACTGAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGTTCTGATGACTCACATGATGGGGGGTCTG AATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAAT------GAGGTAGATGAATATTCTGGTTCTTCAGAGAAA ATAGACTTACTGGCCAGCGATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAA T---ACTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAAATC TAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA--------------------------------------- ---------------------GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT---CAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAATGG AGCAGAAT---------GGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCT---ATT CAGAATGAGAAAAATCCTAACCCAATAGAA------TCACTCGAAAAAGAATCTGCTTTCAAAACGAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGA AGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAAAA---AAGTACAACCAAATGCCAGTCAGGCACAGCAGAAA CCTACAACTCATGGAAGATAAAGAACCTGCAACTGGAGTCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGAC ATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACCAGTGAACTT AAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAGAAGAAGAGAAA---CTAGAAACAGTTAAAGTGTCTAATAATGC CGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG---GTTTTGCAA---ACTGAAAGATCTGTAGAGAGTAGCAGTA TTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGG---AAGGCA AAA---ACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCATGGTTGT---TC CAAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAAGTTAAC---CACAGTCGGGAAACAAGCA TAGAAATGGAAGAAAGTGAACTTGATGCTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTG TTTTCAAATCCAGGAAATCCAGAAGAGGAATGTGCAACATTCTCTGCCCACTGTAGGTCCTTAAAGAAACAAAGTCCAAA AGTCACTTTTGAACGTGAACAAAAGGAACAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGTTAATA TCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAG---CCAGTTGATTATGCCAAATGTAGTATCAAAGGAGGCTCT AGGTTTTGTCTATCATCTCAGTTCAGA---GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAA CCCATATCATATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAAC---CTGCTAGAGGAAA ACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAAC---ATTCCAAGTACAGTGAGCACAATTAGC CGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGT GGGCTCCAGTATTAATGAAGTAGGTTCCAGT---------------------GATGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTT---CCT GAAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAA---GTAGTTCAGACTGTTAATACAGATTTCTC TCCATGTCTGATTTCAGATAACTTAGAACAG---CCTATGGGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTT AGCAAAAGCGTCCAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCAT---ACACATTTGGCTCAGGGTTACCG AAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Jackrabbit -------------------------------------------------------------------------------- -AAGGCTGAATTCTGTAATAAGAGCAAACAGCCTGGCTTAGCAAGAAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTAGTTCTGAATGTTGACTGCCTGTATGGGAGAAAACAA CAGGATAAGCAGAAACCTCCATGCCCTGAGACCTCTGGAGAT---AACCAAGAT---GTTTCTTGGATAACAGTAAATAG CAGCATTCGGAAAGTTAACGAGTGGTTCTCCAGAAGTAATGAAATGTTAACTCCTGATGACTCACTTGACCGGCGGTCTG AATCAAATGCCAAAGTGGCTGGTGCATTAGAAGTCCCAAAG------GAGGTAGATGGATATTCTGGTTCTACAGAGAAA ATAGACTTACTGGCCAGTGATTCCCATAATGCTTTAATATGTGAAAGCAAAAGAGTCCATTCCAAACCAGTAGAGAATAA T---ATCAAAGATAAAATATTTGGGAAAACCTACCACAGGAAGACAAGCCTCCCTAACTTGAGCCACATAACTGAAGATC TAACTATAGGAGCATTTGCTGCGGAACCACTGGTA--------------------------------------------- ---------------------CCATGTCCCCCCGCAAATAAATTAAAGCGTAAAAGAAGAACTTCTTCAGGCCTTCAACC TGAAGATTTTATCAAGAAGGTAGATTTGGCAGTTGTTCCAAAAACCACTGCACAGATAAATCAGGGAACTGATCAAACGG TGGACAGT---------GATCAGGTGATGAATATTACTAATTGTGGTAATGAGAATGAAACAGAAGGTGACTAT---ATT CAGAAAGAGACAAATGCTAACCCAACAGAA------TCCCTAGAAAAAGACTCTTCCTTCAGAACTAAAGTTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATGTCCATAATTCAAAAAAACCCAAGAAGAATAGGCTGAGGAGGA AGTCCTCTACCAGGCGTGTTCATGCACTTGAACTAGTAGTCAATAAGAAACCGAGCCCACCTAATCATGCTGAACTACAA ATTGACAGTTGTACTAGCAGTGAAGAA---ATG------------AATTTTGACCAAATACCAGTCAGTCACAGCAGACA GGCTCAAGTCATAGAAGATACAGAACCTCCAACTGGAGCTAGGAAG---AACAAGCCAAGTGAACAAATAAGTAAAAGAC ATGCCAGTGATGTTTTCCCAGAAGTCAAATTAACAAACATACCTGGTGTTTTTACAAACTGTTCAAGTTCTAATAAACTT CAAGAATCTGTCGATCCTAGCCTTCAAAGAAGGGAAATAGAAGAGAAC---CTAGACACAGTTCAAGTGCCTAACAGTGC CATAGACCCCAAAGATCTCCTGTTATGTGGAGAAAGG---GGCTTGCAA---ACTGAAAGGTCTGCGGAGAGTACCAGTA TTTCACTGGTACCTGATACTGAATATGGTACCCAGGACAGCATCTCATTACTGGGCGCTAACACCCTTGGG---AAGGCA AAA---ACAGCAGCAAATGGACATGTGAGTGAGAGCACAACAATTGGAAATCCCAAGGAACTTAGCCATGATTGT---TT GAAAGATACTGGAAATGACCCAGACAACTGTAAGGATCCACTGAGAAGTGAAGTTGAC---------CAAGAGACAAGCG TAGAAATGGAAGAGAGTGAGTTTGATACTCAGTATTTACAGAATACATTCATGGGTTCAAAGCGTCGTTCATTTGCTCTG TGTTCAAAACCAGGAGATCCAGAAAAGGAATGTGCAGCAGTCTGTACCCGCTCCAACTCCTCAAGGAAACAAAGTCCAGA AGTCACTCTTGAACGTGAACAAAAAGAA---AGTCAGAGAAAGGAAGAGTGGAAAATCAGTCATGTCCAGGCAGCTGATA GCACTGTGGGCTTTCCTGTGGTGTGTCAGAAAGAAAAG---GCAGGTGATTGTGCCAAATGGAGCACTAAAGAGATCTCT AGGCTTTGTCTGTCATCTCAGTCCAAA---GGCAGTGAAACTGAGCTCATTGCTGTAAGTAAACATGGGATGTCACAAAA CCCATATCATATACCACCAATTTCTCCCATCAAGACATCTGTTAAAGCCACACGCCAGGTACAC---CTGTCAGGGGAAA GGTCTGAGGAGCATTCCGTGTCATCTGAAAGAGCAGTGGGAAGCGAGAGCATCATTCAAAGCACAGTGAGCACAATTAGC CAAGAGTACATTAGAGAAAGTGCTTTGAAAGGATTCAGCTCAAGCAGTATTAATGAAGGGGGCTCTAGTGCTAACGAAGT ATGCTCCAGTGTGAATGAAGTAGGATCCAGT---------------------GGTGAAAACATGCAAGCACAACCAGGCA GAAGCAGAGCACCTGAGTTAAATGCTGTGCTGAGAATAGGTCTTCTGCAGCCTGAAGTCTCTGAGCAAAGCCTT---CCT ATAAGTAATTCTGAACTTCCCAAACTACAAAGGCAAGGAGAAAACGAAGGAGTAGTTCAGGCTGTGAATAGAGATTTCTC TTCGTATCTGGTTCCTGATAGCCAAGAGCAG---TCTATGGGAGGAAGGCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGACCTGTTAGATATTTATGAAATAAAGGAAAATACCAGCTTTGCTGAGAGCGGCATTAAGGAAAGATCTGCTGTGTTT AGTAAAAGTGTTCAGAGGGGAGAGTGCAGTAGAACCCCTAGCCCTTCAGGCCAT---GCATGTTTGGCTCAGAGTCA--- ------------------------------------------------- >FlyingSqu TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGCAGAGCAGATGGGCTAAAAGTAAGGAAA CCTGTAATGAT---AGGCAAATTCCCAGCTCAGAGAAAAAGGTAGATTTGAATGCTGATCCCCAATATGAGAAAAAAGAA CCAAGTAAGCAGAAACATCCATGCTCTGAGAATTCCAGAGAT---ACCCAAGAT---GTTCCTTGGATAACACTAAATAG CAGCATTCGGAAAGTTAACGAGTGGTTTTCCAGAAGTGACGAAATGTTAACTTCTGATGACTCAGATGATGGGGGTTCTG AATCAAATGCTGAAATAGCTGGTATATTAGAAATTCCAAAT------GAAGTAGATGGATTTTCTGGTTCTTCGGAGAAA ATAGACTTGTTGGCCACTGATCCGCATAATGCTTTAATTTCTAAATGTAAAAGAGTCTGCTCCAAAGCAGTCAAGAGTAA T---ATTGAAGATAAAATATTTGGGAAAACCTATCAGAGGAAGGCAAGCCTCCCTAACTTGAGCCATATAACTGAAAATC TAATTATAGGAGCATTTGCCAGAGAACCACAAATAACACAAGAGCTTGCCAGAGAACCACAAATAACACAACAGCTTGCC AGAGAACCACAAATAACACAAGAGCGTCCCCTCACAAATAAATTAAGACGTAAAAGGAGAACTACATCATGCCTTCATCC TGAGGACTTTATCAAGAAAACAGATTTGGCAATTGTTCAAAAGACTCCTGAAAAGATAAATCAGGGAACTGACCAAATGG AACACAAT---------GATCAAGTAATGAATATTACTAATAGTGGTCAAGAGAATGAAACAAAAGTTGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAGTTGAA------TCATTGGAAAAAGAGTCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGTAGTATAAGCAACATGGAACTAGAATTAAATATCCACAATTCAAAAGCACCTAAGAAAAATAGGCTGAGGAGGA AGTCTTCTACTAGGTACATTCATGTGCTTGAACCAGTAGTTAATAGAAATACAAGTCCACCTCATCACACTGAATTGCAA ATTGATAGTTGTACTAGTAGTGAAGAA---ATAAAGACAAGA---AATTCCAACCAAATGTCAGTCAGGCATGGCAAAAA GCTTCAGTTCATGGAAGATGCAGAACCTGCAACTGATGTCAGAAAAAGTAACAAGCCAAATGAACAAGTAAATAAGAGAC ATACCAATGATGCTTTCCCAGGACTGAAGTTAACAGGCATATCTGGTATTTTTACTAACTGCTCAAGTTCTAGTAAAGTT GAAGAATTTATCAATCCTAACCTTCAGAAAGAAGGAACAGAAGAGAAC---ATAGAAATAATTCAAGTGTCTGATAATAC CCAAGACCCCAAAGATACGGTGTTAAGTGGAGAAAGG---GTTTTGCAA---ACTGAGAGATCTGTAGAGAGTACCAGTA TTTCATTGGTACCTGATACTGATTATGGCACTCAGGACAGTATCTCATTACTGGAAGCTAACACCTTTGGA---AAAGCA AGA---ACAGCATCAAATCAACATGTTACTCAGTATGTGGCAATTGAAAATCCCAAAGAATTTGTCCATGGTCAT---TC TAAAGATACTAAAAATGACCCAGAGGGTTTCAAGGATTCATTGAGATGTGAAGTTAAC---CACATTCAAGAGACAAATG TAGAAATGGAAGAAAGTGAACTTGATACTCAGTCTTTAGAGAATACATTCCAAGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCAGGAAATCTAGAAAAGGAATGTGCAGCCAACTATACCTCTTCCAAGTCCTTACGGAAACAAAGCCCAGA CATGACTCTTGAATGTGAGCAAGAAGAAGAAAATTGGGGAAAGAAAGAGTCTAAAATTAGGCATGTACAGGCAATTAATG CTACT------------ATGGTTTGTCAGAAAAGTAAG---CCAGATGATGATGCCCAATGTAGTGTTACAGAAGTCTCT AGAATTTTTCCATCATTTCAGTTCAGA---GACAATCTAACTAAACTCATAACTGCAGATAAACATGGAATTTCACAAAA CCCATATCATATGCCATCCATTTCTCCCAGCAGGTCATCTGTTAACACTAAATGTAGGAGAAAC---CTATCAGAAGAAA AGTTTGAGAAAGATTCAAGGTCACCTAAAGAAGCGGTGGGATATAAGAGAATCATTCAAAGTACAGTGAGCACAATTAGC CAAAATAACATTAGACAAAGTGCCTTTAAAGAAGCGAACTCAGGCAGTATTAATGAAGTAGGCTCTAGTACTAATGAAGT AGGCTCCAGTATTAATGAAGTAGGTTCCAGT---------------------GGTGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGGACCCAAATTAAATGCTGTGCTTAGATTAGGTCTTATGCAACCTGAAGCCTGTAAACAAAATCTT------ CTAAGTAATTGTAAATACCCTGAAATAAAAAGACAAGGAGAAGATGAA---GTAGTTCAAGTTGTTAATGCAGATTTCTC TCCATGTCTAATTTCAGATGACTTAGAACAA---CCTATGGGAAATAGTCATGTCACTCAGGTTTGTTCTGAGACTCCTG ATGACCTGTTAGATGATGATGAAATACAGGAAAATACCAGCTTTGCTGAAGGTGGTATTAAGGAAAGATCTGCTGTTTTT AGTAAGAGTGTCCAAAGAGGAGAGTTTAGCAGGAGCCCGAGTCCTTTATCCCAT---ACGTCTTTGGCTCGGAGTCATCA AAAAGGGGCCAGGAAATTAGAGTCCTCAGAAGAGAGCATCTGTAGTGAG >OldWorld -------------------------------------------------------------------------------- ----------------------------------------------AGCCAACAGAGCAGGTGGGCTGAGAGCAAGGAGA GGTGCCATGAC---AGGCAGGCTCCTGGCACAGAGCAGAAGGTAGAGCTGACTGCTGAGCCCCTCCACGAGAGAAAAAGA CGGAAAAAGCAGAACCCTCCGAGCTCCGAGGCTCATGGAGGG---ACCCAGGAT---GTTCCTTGGATCACACTAAATAG CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGACAAAGCAGTGACTTCTGACAGCACCTGTGACAGGGAGTCCA AGTCAGATGCTGAAGAAGCTGGTGTGGCAGAAGATCCAAAA------GGCCTAGAT---------GGCTCTTCAGAGAAA GTAGGCTTGCTGACCAGCAGTCCTCAAAAAGCTATAATCTGTGCCAGGGAGAGGGTCTGCTCCAAAGCACTGAAGAGTGA C---ATGGAAGATAAAGTATTTGGGAAAACCTATCAGAGGAGGGTGAGCCTCCGCAGCTCAAGCCACGGAGCTGAAAGTC TGACCGTGGGAGCGTTTGTTAGAGAGCCGCAGCTGACACTG--------------------------------------- ---------------------GAGCGCTCCTTCACAAATAAAATAAAGCGCAAGAGGAGAACCACATCGTGCCTTCATCC TGAGGACTTTGTCAAGAAAGCAGATTTGACAGTGGCTCAAAAGACTCCTGAAAAGGTAAATCAGGGAACGAAGCAAATGG AGGAGAGT---------AGTCAAGTGACAAATACTACTAATAACAGTCATGAGAATGAGACAAAAATGGGCAAT---GTT CAG------AGAAACCCTAACCCAGTACTA------TCAGTGGAAAAAGAGTCTGCTTCTGGTACTAACGCAGAGCCCTC GAGCAGCAGCATAAACAACAGGGAACTAGAATTAAACACCCCTCTTGCAAAAGAACCTAAGGAAGACAGGCTGAGGGGGA CGTCCTCAAGCAGACACAGACCT------------------------AATCCACACCTGCTGGATCACACAGAGCTGCAA AGTGGCAATTCTATCAGCAGTGAAGAA---ATAAAGGAAAAA---AGCTCCCCCCAAATGCCCATCAGGCACAGCAGGAC GCTGCACCTCACAGGGGCTGTGGAAGCTTCTATTGGAGCCAGGGAGAGTAAGAAGCCAGCTGGACAAGTAAGGGAGAGAC ATGCCGGGGACTCTTTCCTAGAACCAAGATTAGCAAGACTACCTGCT---TTTACTAACAGTTCAAGCCCTGATAACCTT AAAGAATTTGTCAACCTTAGTGCACAGACAGAAGAGATGGAAGAGAAC---CCAGAAACAGTACAAGTGTCCGAAAGTAC CAGAGACTCCAAAGGTCCTGTGTTAAGTGGGGAAAAG---GGAGTGCAA---ACCGAGAGGTCTATGGAGAGCACTAGCA TTTCACTAGTCCCTGACACTGACTGTGGCACTCAGGACAGTGTCTCCTTACTGGAAGCCGACAGCCTCAGG---AAGGCA CGG---AGAGCATCGCATCAGTGTATGGCTCAATATGTGGCGGTTGCGAAGCCCAAGGAACTTCTGCCTGCTTGT---TC TGAAGACACTGGAAACGGCACAGACAGCTTAAATGATCCATTGAGATGTGGAGGGAGC---CACATCCAGGAGGCAAATA TAGAAATGGAAGATAGTGAACTCGACACTCAGTATTTACAGAATACATTCCAGGCTTCAAAGCGTCAGTCATTTGCTCTC TTTTCAAATCCAGGAAACTCAAAAAAGGAATCAACAACAGTCTGTGCCCACTCTGAGTCCTTTAAGAAACAAAATCCAGA AGTCATTCCTGAATGTGAACAAACAGAAGAAAATTGGGGCAAGAAAGTGCCTAAAATTAGTTGTGTGCAAGAGAGCGCC- --------------CCTCTGGTTTCTCAGAGGGATCAG---CCAGGCACCAGCATCATATGTAGCGGCACAGGAGTCTCG AGGCTCTGTCTCTCGTCTTGGTTCACA---GGCAGCAAAACTGAACTCGTCACAGCTGACAAACATGGAATTTCACAAAA TCCATATCACATGCCATCAATTTCTCCCATCAGGCCATTTGTTAAAACTCCATGTAAGAAAACC---------------- -----------CGTTCCTCATCACCTGGAGAAGCCACAGGTAACCAGATCATCCTTCAGAGCACC--------------- ------------AGCCATCGCGCTTGCAGAGAAGCCAGCTTGGGCAGTGGGAACGAAGGGGGCTCCAGT----------- ----------------------------------------------------GGGGAGCACATTCAAGCAGAACCCAGTA GACACCAAGAGCCTGAACTA------------AGATTAGGTCTGACGCAGCCCGAAGTCTACCAGCAAAGTCTT---CCT GTAGGTGACTGTAGACATCCCGAAATACAAACACGAGGAGAAAATGGAGTGGTAGCTCAGGCTGTCCATGCAGATTTCTC TCCGTGTCTAATTTTAGATAACGTGGAACAG---CCTATGGGAAATAATCCTGCTTCTCAGATCTGTTCTGAGACGCCCG ATGACCTGTTAGATGATGAGAACAAAAAGGAAGATGCCAGCTTTGCCGAAGGTGGCATTAAGGAAACTTCTGCCATTTTT AGCAAGAGTGTCCAGACAAGACGATTCAGCAGGAGCCCCAGCCCTGTAACCAAT---ACCACTTTGGCTCAGGGTCACCG AAGAAGGGCGAGAAAACTCGAGTCTTCTGAGGAGAGCATGTCAAGTGAG >Mouse TGTGGCACAGATGCTCATGCCAGCTCATTACAGCCTGAGACCAGCAGTTTATTGCTCATTGAAGACAGAATGAATGCAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCATAGCAGTGAGCCAGCAGAGCAGATGGGCTGCAAGTAAAGGAA CATGTAACGAC---AGGCAGGTTCCCAGCACTGGGGAAAAGGTAGGTCCAAACGCTGACTCCCTTAGTGATAGAGAGAAG TGGACTCACCCGCAAAGTCTGTGCCCTGAGAATTCTGGAGCT---ACCACCGAT---GTTCCTTGGATAACACTAAATAG CAGCGTTCAGAAAGTTAATGAGTGGTTTTCCAGAACTGGTGAAATGTTAACTTCTGACAGCGCATCTGCCAGGAGGCACG AGTCAAATGCTGAAGCAGCTGTTGTGTTGGAAGTTTCAAAC------GAAGTGGATGGGGGTTTTAGTTCTTCAAGGAAA ACAGACTTAGTAACCCCCGACCCCCATCATACTTTAATGTGTAAAAGTGGAAGAGACTTCTCCAAACCAGTAGAGGATAA T---ATCAGTGATAAAATATTTGGGAAATCCTATCAGAGAAAGGGAAGCCGCCCTCACCTGAACCATGTGACTGAA---- --ATTATAGGCACATTTATTACAGAACCACAGATAACACAA--------------------------------------- ---------------------GAGCAGCCCTTCACAAATAAATTAAAACGTAAGAGA------AGTACATCCCTTCAACC TGAGGACTTCATCAAGAAAGCAGATTCAGCAGGTGTTCAAAGGACTCCTGACAACATAAATCAGGGAACTGACCTAATGG AGCCAAAT---------GAGCAAGCAGTGAGTACTACCAGTAACTGTCAGGAGAACAAAATAGCAGGTAGTAAT---CTC CAGAAAGAGAAAAGCGCTCATCCAACTGAA------TCATTGAGAAAGGAACCTGCTTCCACAGCAGGAGCCAAATCTAT AAGCAACAGTGTAAGTGATTTGGAGGTAGAATTAAACGTCCACAGTTCAAAAGCACCTAAGAAAAATAGGCTGAGGAGGA AGTCTTCTATCAGGTGTGCTCTTCCACTTGAACCA---ATCAGTAGAAATCCAAGCCCACCTACTTGTGCTGAGCTTCAA ATCGATAGTTGTGGTAGCAGTGAAGAA---ACAAAGAAAAAC---CATTCCAACCAACAGCCAGCCGGGCACCTTAGAGA GCCTCAACTCATCGAAGACACTGAACCTGCAGCGGATGCCAAGAAG---AACGAGCCAAATGAACACATAAGGAAGAGAC GTGCCAGCGATGCTTTCCCAGAAGAGAAATTAATGAACAAAGCTGGTTTATTAACTAGCTGTTCAAGTCCTAGAAAATCT CAAGGGCCTGTCAATCCCAGCCCTCAGAGAACAGGAACA---GAGCAA---CTTGAAACACGCCAAATGTCTGACAGTGC CAAAGAACTCGGGGATCGGGTCCTAGGAGGAGAGCCC---AGTGGCAAAACCACTGACCGATCTGAGGAGAGCACCAGCG TATCCTTGGTACCTGACACTGACTACGACACTCAGAACAGTGTCTCAGTCCTGGACGCTCACACTGTCAGA---TATGCA AGA---ACAGGATCCGCTCAGTGTATGACTCAGTTTGTAGCAAGCGAAAACCCCAAGGAACTCGTCCATGGC------TC TAACAATGCTGGGAGTGGCACAGAGGGTCTCAAGCCCCCCTTGAGACACGCGCTTAAC---CTCAGTCAGGAGAAA---G TAGAAATGGAAGACAGTGAACTTGATACTCAGTATTTGCAGAATACATTTCAAGTTTCAAAGCGTCAGTCATTTGCTTTA TTTTCAAAACCTAGAAGTCCCCAAAAGGACTGTGCT------------CACTCTGTGCCCTCAAAGGAACTGAGTCCAAA GGTGACAGCTAAAGGTAAACAAAAAGAA---CGTCAGGGACAGGAAGAATTTGAAATCAGTCACGTACAAGCAGTTGCGG CCACAGTGGGCTTACCTGTGCCCTGTCAAGAAGGTAAG---CTAGCTGCTGATACAATGTGT------GATAGAGGTTGT AGGCTTTGTCCATCATCTCATTACAGA---AGCGGGGAGAATGGACTCAGCGCCACAGGTAAATCAGGAATTTCACAAAA CTCACATTTTAAACAATCAGTTTCTCCCATCAGGTCATCTATAAAAACTGACAATAGGAAACCT---CTGACAGAGGGAC GATTTGAGAGACATACATCATCAACTGAGATGGCGGTGGGAAATGAGAACATTCTTCAGAGTACAGTGCACACAGTTAGC CTGAATAAC---AGAGGAAATGCTTGTCAAGAAGCCGGCTCGGGCAGTATTCATGAAGTATGTTCCACT----------- ----------------------------------------------------GGTGACTCCTTCCCAGGACAACTAGGTA GAAACAGAGGGCCTAAGGTGAACACTGTGCCTCCATTAGATAGTATGCAGCCTGGTGTCTGTCAGCAAAGTGTT---CCT GTAAGTGAT---AAGTATCTTGAAATAAAAAAGCAGGAG---------------GGTGAGGCTGTCTGTGCAGACTTCTC TCCATGTCTATTCTCAGACCATCTTGAGCAA---TCTATG---AGTGGTAAGGTTTTTCAGGTTTGCTCTGAGACACCTG ATGACCTGCTGGATGATGTTGAAATACAGGGACATACTAGCTTTGGTGAAGGTGACATAATGGAGAGATCTGCTGTCTTT AACGGAAGCATCCTGAGAAGGGAGTCCAGTAGGAGCCCTAGTCCTGTAACCCAT---GCATCGAAGTCTCAGAGTCTCCA CAGAGCGTCTAGGAAATTAGAATCGTCAGAAGAGAGCGACTCCACTGAG >Rat TGTGGCACAGATGCTCGTGCCAGCTCATTACAGCGTGGGACCCGCAGTTTATTGTTCACTGAGGACAGACTGGATGCAGA AAAGGCTGAATTCTGTGATAGAAGCAAACAGTCTGGCGCAGCAGTGAGCCAGCAGAGCAGATGGGCTGACAGTAAAGAAA CATGTAATGGC---AGGCCGGTTCCCCGCACTGAGGGAAAGGCAGATCCAAATGTGGATTCCCTCTGTGGTAGAAAGCAG TGGAATCATCCGAAAAGCCTGTGCCCTGAGAATTCTGGAGCT---ACCACTGAC---GTTCCTTGGATAACACTGAATAG CAGCATTCAGAAAGTGAATGAGTGGTTTTCCAGAACTGGTGAAATGTTAACTTCTGACAATGCATCTGACAGGAGGCCTG CGTCAAATGCAGAAGCTGCTGTTGTGTTAGAAGTTTCAAAT------GAAGTGGATGGATGTTTCAGTTCTTCAAAGAAA ATAGACTTAGTTGCCCCTGATCCCGATAATGCTGTAATGTGTACAAGTGGAAGAGACTTCTCCAAGCCAGTAGAGAATAT T---ATCAACGATAAAATATTTGGGAAAACCTATCAGAGAAAGGGAAGCCGCCCTCACTTGAACCATGTGACTGAA---- --ATTATAGGCACATTTACTACAGAACCACAGATTATACAA--------------------------------------- ---------------------GAGCAGCCCTTCACAAATAAATTAAAACGCAAAAGA------AGTACATGCCTTCATCC TGAGGACTTTATCAAGAAAGCAGATTTAACAGTTGTTCAAAGGATTTCTGAAAATTTAAATCAGGGAACTGACCAAATGG AGCCAAAT---------GACCAAGCAATGAGTATTACCAGTAACGGTCAGGAGAACAGAGCAACAGGTAATGAT---CTT CAGAGAGGGAGAAATGCTCATCCAATAGAA------TCATTGAGAAAGGAACCTGCTTTCACAGCTAAAGCCAAATCTAT AAGCAACAGTATAAGTGATTTGGAGGTAGAATTAAATGTTCACAGTTCAAAAGCACCTAAGAAAAATAGGCTGAGGAGGA AGTCT---ACCAGGTGTGTTCTTCCACTCGAACCA---ATCAGTAGAAATCCGAGCCCACCTACTTGTGCTGAACTTCAG ATCGAGAGTTGTGGTAGCAGTGAAGAA---ACAAAGAAAAAC---AATTCCAACCAAACCCCAGCCGGGCACATTAGAGA GCCTCAACTCATCGAAGACACAGAACCCGCAGCTGATGCCAAGAAG---AACGAGCCAAATGAACACATAAGGAAGAGAA GTGCCAGTGATGCGTTCCCAGAAGAGAAATTAATGAACAAAGCTGGTTTATTAACTAGCTGTTCAAGTCCTAGAAAGCCT CAAGGACCTGTCAATCCTAGCCCTGAGAGAAAAGGAATA---GAGCAA---CTTGAAATGTGCCAGATGCCTGATAATAA CAAAGAACTCGGGGATTTGGTCCTGGGAGGAGAGCCC---AGTGGGAAACCTACTGAACCATCTGAGGAGAGCACCAGTG TGTCCTTGGTACCCGACACAGACTACGACACCCAGAACAGTGTCTCAATACTGGAAGCGAACACTGTCAGA---TATGCA AGA---ACAGGATCAGTTCAGTGTATGACTCAGTTTGTCGCAAGTGAAAACCCCAAGGAACTTGTCCATGGT------TC TAACAATGCTGGAAGTGGCTCGGAGTGCTTCAAGCACCCATTGAGACATGAACTTAAC---CACAATCAAGAGACA---A TAGAAATGGAAGACAGTGAACTTGATACTCAGTATTTGCAGAATACATTTCAAGTTTCAAAGCGTCAGTCATTTGCTTTA TTTTCAAAACTTAGAAGTCCCCAAAAGGACTGTACTCTGGTAGGTGCCCGCTCTGTGCCCTCAAGGGAACCAAGTCCAAA GGTGACTTCTAGAGGTGAACAAAAAGAA---CGTCAGGGACAAGAAGAGTCTGAAATCAGTCATGTACAGGCAGTCACAG TCACAGTAGGCTTACCTGTGCCCTGTCAGGAAGGTAAG---CCAGGTGCTGTTACAATGTGT------GCTGATGTTTCT AGGCTTTGTCCGTCATCTCATTATAGA---AGCTGTGAGAATGGACTCAACACCACAGATAAATCTGGAATTTCACAAAA CTCACATTTTAGACAATCAGTTTCTCCCCTCAGGTCATCTATAAAAACTGACAATAGAAAAACT---CTGACAGAGGGNC GATTTGAGAAACAT---------ACTGAAAGGGGRATGGGAAATGAGACTGCTGTTCAAAGTACAATACACACAATTAGT CTAAATAAC---AGAGGAGATGCTTGTCTAGAAGCCAGCTCAGGCAGTGTTATTGAAGTACATTCCACT----------- ----------------------------------------------------GGTGAAAACGTCCAGGGGCAACTAGATA GAAACAGAGGGCCTAAGGTAAACACCGTGTCTCTATTAGATAGTACACAGCCTGGTGTCTCTAAGCAGAGTGCT---CCT GTAAGTGAT---AAGTATCTTGAAATA---AAGCAGGAG---------------AGTAAGGCTGTCAGTGCAGACTTCTC TCCATGTCTGTTCTCAGATCATCTTGAAAAA---CCTATGAGAAGTGATAAGACTTTTCAGGTTTGCTCTGAGACACCTG ATGACCTGTTGGATGATGTTGAAATACAGGAAAATGCTAGCTTCGGTGAAGGTGGCATAACGGAAAAGTCTGCTATTTTT AATGGAAGTGTCCTGAGAAGAGAGTCCAGTAGGAGCCCTAGCCCTGTAACCCAT---GCATCGAAGTCGCGGAGTCTCCA CAGAGGGTCTAGGAAATTAGAATTCTCAGAAGAGAGCGACTCCACTGAG >NineBande TGTGGCACAAATACTCATGCCAACTTATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGCGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAGCATAGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGTATGGGCGAAAAGAA CTGAATAAGCAGAAACCTCCATGCTCTGAGAGTCATAGAGAT---ACCCAAGAT---ATTCCTTGGATAATGCTGAATAG TAGCATTCAGAAAGTTAACGAGTGGTTTTCCAGAGGTGATGACATATTAACTTCTGATGACTCACACGATAGGGGGTCTG AATTAAATGCAGAAGTAGCTGGTGCATTGAAAGTTTCAAAA------GAAGTAGATGAATATTCTAGTTTTTCAGAGAAG ATAGACTTAATGGCCATTAATCCTCATGATACTTTACAATTTGCAAGTGAAAGAGTCCAATTGAAACCAGCAGAGAGTAA C---ATCAAAGATAAAATATTTGGGAAAACCTATCATAGGAAGGCAAGCCTCCCTAACTTGAGCCACATAACCCGAAACC TTTTTATAGGAGCTATTGCTGCAGAGCCCAAGATAACACAA--------------------------------------- ---------------------GAGCATTCCCTCCAAAATAAAATAAAGCGTAAAAGGAGAACTGCATCAGGCCTTCGTCC TGAGGATTTATCCAAGAAAGTAGATTTGACAGTTGTTCAAAAAACCCCTGAAAAGATAAATCAGGGAACTGACCAAATGG AGCAGAAT---------GATCCAGTGATGAATATTGCTAATAGTGGTCATGAGAATGAAACAAAAGGTGATTGT---GTT CAGAAAGAGAAAAATGCTAATCCGACAGAA------TCATTGGGAAAAGAATCTGCTTTCAGAACTAAAGGCGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATTTTAAATTCAAAAGCATCTAAGAAGAATAGGCCGAAGAGGA TGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTCGGCAGTAGAAATCCAAGCCCACCTAATCATACTGAACTACAA ATTGATAGTTGTTCTAGCATTGAAGAG---ATAGAGAAAATA---AATTCTAACCAAAAGCCAATCAGACACAACAGAAT GCTTCAACTCACGAAAGAAAAAGAAACCACAACTGGAGCCAAAAAGAACAATAAGCCAAATGAACAAATAAGTGAAAGAC ATGCCAGTGATGCTTTCCTAGAACTTAAA------AATGTAACTGATTTTCTTCCTAAATGTTCAAGTTCTGATAAACTT CAAAAATTT---AATTCTAGCCTGCAAGGAGAAGTAGCA---GAGAAC---CTAGAAACAATTCAAGTGTCTGATAGTAC CAGGGACCCTGAAGATCTGGTGGTAAGTGGAGAAAAG---TGTTTGCAA---ACTGAAAGATCTGCAGAGAGTACCGGTA TTTCAGTGGTACCTGATACTGATTATGGCACTCAAGACAGTATCTCATTACTGGAAGCTGACACCCTGGGG---AAGGCA AAA---ACAGCACTAAATCAACATGTGAGTCAGTATGTAGCAATTAGAAATGCCACTGAACTTTCCCATGGTTGT---TC TAAAGACACTAGAAATGACACTGAAGATTTTAAGGATTCATTGAGACATGAAGTTAAC---CACACTCAGGGGACAAATG TTGAAATAGAAGAGAGTGAACTTGATACTCAGTATTTGCAGAATACATTCAAGATTTCAAAGCGCCAGTCATTTGCTCTG TTTTCGAATCCA---------GAAAATGAATGTGCAACAGTCTGTGCCCACTCCAGGTTCTTAGGGAAACAAAGTCCAAA AGTCACCTTTGAATGTAGACATAAAGAAGAAAATCAGGGGAAGAAAGAGTCTAAAATCAAACATGTGCAGGTAATTCACA CAACTGCAGGCTTTCCTATAGTTTGTCAGAAAGATAAG---CCAGGTGATTATGCCAAAGGTAGCATTCAAGGAGTCTCT AGGCTTTGTCAGTCCTCTCAGGCCAGA---GGCAATGAATCTGAACTCATTAATTCAAATGAACATGAAATTTCACAAAA CCCAGATCAAATGCCATCACTTTCTCACATGAAGTCATCTGTTAAAACTAAATGTAAGGAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACTTACAGTGTCACTTGAAAGAACAATGGTAAATGAGAACATCATTCAAAGTACAGTAAGCACAATTAGC CACAGTAACATTAGAGAAAACACTTTTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGGTCCAGT----------- ----------------------------------------------------GATGAGAACATTCAAGCAGAAGTAGGTA GAAACAGAGCACCTAAATTAAATGCTATGCTCAGATTAGGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT ATAACCAATTGTAAATATCCTGAAATAAAAAGTCAAGGAGAAAATGAAGAAGCAATTCGGGCTGTTGATATAGACTTCTC TCCATGTCTAATTTCAGATAACCTACAACTA---CCTATGGGAAATAGTTGTGCTTCCCAGATTTGTTCTGAGACACCTG ATGACTTGTTAGATGATGATGAAATAAAGGAAAATAACTGCTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTATTTTT AGCAAAACTGTCCAGAAAAGAGAGTTCAGAAGGAGCCCTAGCCCTTTAGTCCAT---ACAAGTTTTGCTCAGGGTCACCA AAGAAAGCCCAGGAAATTAGACTCCTCAGAAGAGGACGTATCTAGTGAG >HairyArma TGTGGCACAAATACTCATGCCAACTTATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAGTGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGCGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCCAACACAGAGAAAAAGGTAGATGTGGCTGCTGATTCCCTGTATGGGCGAAAAGAA CTGAATAAGCAGAAACTTCCATGCTCTGAGAGTCCTAGAGAT---ACCCAAGAT---ATTCCTTGGATAACGCTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGACCTATTAACTTCTGATGACTCACACGATGGGGGGTCTG AATCAAAAGCAGAAGTAGCTGGTGCATTAAAAGTTCCAAAT------GAAGTAAATGGATATTCTAGTTCTTCAGAGAAG ATAGACTTATTGGCCAGTGATCCTCATAATGCTTTAATATTTGCAAGTGAAAGAGTCCAATCCAAACCAGCAGAGAGTAA C---ATCAAAGATAAAATATTTGGGAAAACCTATCACAGGAAGGCAAGCTTTCCTAACTTGAGCCACATAACTGAGGATC TTTTTATAGGAGCTATTGCTACAGAACCCAAGATAATACAA--------------------------------------- ---------------------GAGCATTCCCTCACAAATAAAATAAAGCGTAAAAGGAGAACTACGTCATGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGACAGTTGTTCAAAAGACGCCTGAAAAGATAAATCAGGGAACTGACCAAATGG AGCAGAAT---------GATCAAGTGATGAATAGTGCTAATAGTGGTCATGAAAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGGAAAATGCTAACCCAATAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTAAAGGTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATCTATAATTCAAAAGCATCTAAGAAGAATAGGCTGAGGAGGA TGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGGCAATAGAAATCCAAGCCCACCTAAACATACTGAACTACAA ATTGATAGTTGTTCTAGCACTGAAGAG---ATAGAGAAAATA---AATTCTAACCAAAAACCAATCAGACACAACAGAAT GCTTCAACTCATGAAAGAAAAAGAAAACACAACTGGAGCTAAAAAGAATAACAAGCCAAATGAACAAATAAGTGAAAGAC ATGCCAGTGATGTTTTCCCAGAACTAAAATTAACAAATGTAACCGATTTTCTTCCTAAATGTTCAAATCCTGATAAACTT CAAGAATTTGTTAATTCTAGCCTGCAAGGGGAAGTAGCA---GAGAAC---CTAGAAACAATTCAAGTGTCTGATACTAC CAGGAATCCTGAAGATCTGGTGTTAAGTGAAGGAAAG---AGTTTGCAA---ACTCAAAGGTCTGCAGAGAGTACCAGTA TTTCAGTGGTACCTGATACTGATTATGGCACTCAAGACAGTGTCTCATTACTGGAAGCTGACACCCTGGGG---AAGGCA AAA---ACAGCACTAAATCAACCTATGAGTCAGTATGCAGCAATTAAAAATGCCACTGAACTCTCCCATGGTTGT---GC TAAAGACACTAGAAATGACACTGAGGATTTTAAGGATCCGTTGAGACATGAAGTTACC---CACACTCAGGAGACTAGTG TAGAAATGGAAGAGAGTGAACTTGATACTCAGTATTTACAGAATACATTCAAGATTTCAAAGCGTCAGTCATTTGCTCTG TTTTCGAATCCA---------GAAAATGAATGCGCAACAGTGTGTGCCCACTCCAGGTTCTTAGGCAAACAAAGTCCAAA AGTCATTTTTGAATGTAGGCAAAAAGAAGAAAATCAGGGGAAGAAAGAGTCTAAAATCAAACATGTGCAGGCAGTTCATA CAACTGCAGGCTTTCCTGTAGTTTGTCAGAAAGATAAG---CCAGGTGATTATGCCAAATGTAGCATTCAAGAAGTCTCT AGGCTTTGTCAGTCCTCTCAGTTCAGA---GGCAATGAATCTGAACTCATTACTGCAAATGAACATGAAATTTCACAAAA CCCAGATCAAATGCCATCACTTTCTCACATCAGGTCATCTGTTAAAACTAAATGTAAGGAAAAC---CTGTCAGAGGAAA AGTTTGAGGAACTTACAATATCACTTGAAAGAACAGTGGGAAATGAGAACATCGTTCAAAGTACAGTAAGCACAATTAGC CACAATAACATTAGAGAAAACGCTTTTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGTTCTAGT----------- ----------------------------------------------------GGTGAAAACATTCAAGCAGAACTAGGTA GAAACAGAGCACCTAAATTAAATGCTATGCTCAGATTAGGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT ATAACTAGTTGTAAACATCCTGAAATAAAAAGGCAAGGAGAAAATGAAGAAGCAATTCAGGCTGTTGATACTGATTTCTC TCCACATCTAATTTCAGATAACCTAGAACTA---CCTATGGGAAATAGTCATGTTTCTCAGATTTGTTCTGAGACGCCTG ATGATTTGTTAGATGATGATGAAATAAAGGAAAATAACAGCTTTGCTGAARATGGCATTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAGAAAAGAGAGTTCAGAAGGAGTCCTAGCCCTTTAGGCCAT---ACAAGTTTGGCTCAGGGTCACCA AAGAAGGGCCAGGAAATTAGACTCCTCAGAAGAGGACGTATCTAGTGAG >Anteater TGTGGCACAAATATTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAAAGAATGTAGA AAAGGCTGAATTCTGTGATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCACACTACCAGCACAGAGAAAAAAGTAGATGTGGATGCTGATCCCCTGCATCGGAGAAAAGAA CTGAAGAAGTGGAAATCTCCATACTCTGAGAATCCTAGAGGT---ACCCAAGAT---ATTCCTTGGATAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGACATATTGACTTCTAATGACTCATGCAATGAGGGGTCTG AATTAAATGCAGAAGTAGCTGATGCATCAAAAGTTCCAAAT------GACGTAGATAGATATTCTGGTTCTTCAGAGAAA ATAGACTTAATAGCCAGTGACCCTCATAATGCTTTAATATGTGCAAGTGAAAAAGTCCAGTCCAAACCAACAGAGAGTAA T---ATCAAAGATAAAATATTTGGGAAAACCTATCACAGGAAGGCAAGCCTCCCTAACTTGAGCCGTATAGCTAAAGATC TTTTTATAGGAGCTGTTGCTGCAGAACCTAAGATAACACAA--------------------------------------- ---------------------GAGCTCCCCCTGACAAATGAAATGAAGCTTAAAAGGAGGACTACATCAGGACTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGACAGTTGTTCAAAAGAAGCCTAAAATGATAAATCAGGGAACTAACCAAATAG AGCAGAAT---------TGTCAACTGATGAATATTGCCAATAGTGGTAATGAAAATGAAACAAAAGGTGATTTT---GTT CAGAAGGAGAAAAGTGCTAACCCAACAGAA------TCATCAGAAAAAGAATCTGCTTTCAGAACTAAAGGTGAACCTAT AAGTAGTAGTATAAGCAATATGAAACTAGAATTAAATACCTGCAATTCAAAAGCATCTAAGAAGAATAGGCTGAGGAGAA TGTCCTCTACCAGGCATATGCATGCACTTGAACTAGTAGCCAATAGAAAGCCAAGCCAGCCTAATCACACTGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAA---ATAAAGAAAAAA---AAATCTGACCAAAAGCCAATAAGACACAGCAGAAC AGTTCAATTCATGAAAGATAAAGAAACTGCAATTGGAGCCAAGAAGAGTAACAAGCGAAATGAACAAATAAATAAAAGAC ATGCCAGTGATGCTTTCCCAGAACTAAATTTAACAAACGTAACTGGTTTTCCTACTAAATGTTCAAATTCTGATAAACTT CAAGAATTTGTCAATTCTAGCCTGCAAGGAGAAGCAGCA---GAGAAC---CTAGAAACAATACAAGTGTCTGATACTAC CATGGACCCTGAAGGTCTGGTATTAAGTGAAGGAAAG---AATTTGCAA---ACTGAAAGATCTGTAGAGAGCACCAGTA TTTCATTGGTGCCTGACACTGATTATGGCACTCAAGATAGTATCTCATTACTAGAAGCTGCTACCCTAGGG---AAAGCA AAA---GCAGCACCAAATCAACATGTGAGTCTGTGTGCAGCAGTTGGAAATGCCACTGAACTTGTCCATGGTTGT---TC TAAAGATACTAGAAATGACACTGAAGATTTTAAGGATTCATTGAGACATGAAGTTAAC---CACACACAAGGGACAGTCA TAGAAAAGGAAGAGAGTGAACTTGATACTCAGTATTTNNNNNNNNNNTACAAGATTTCAAAGCGTCAGTCATTTGCTCTG TATTCAAATTCT---------GAAAAGGAATGTGTAACAATCTGTGCCCACTCCAGGTCCGTACGGAAACAAAGTCCAAA AGTAACTTTTGACTATAGACAAAAAGAAGAAAATCAAGGAAAGAAAGAGTCTAAGATCAAACATGTGCAGGCAGTTCATA CAACCGCAGGCTTCTCTGTAGTTTGTCAGAAAGATAAGAAGCCCCATGATTATGCCAAGTGTAGCATTCAGGGAGTCTCT AAGCTTTGTGAGTCATCTCAGTTCAGA---GGCAATGAATCTGAACTCATTACTGCAAACGAACATGGAATTTCCCCAAA TCCAGATCAAATGCCATCACTTTCTCCCAACAGGTCATCTGTTAAAACTAAATATAAGAAAAAC---TTGTCAGAAGAAA GGTTTGAGGAACATACAGTGTCACTTGATAGAGCAGTGGGAAATGAGAGCATCATTCAAAGTACAGTAAGCACAATTAGC CAAAATAACATTAGAGAAAGCACTTTTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGTTCCAGTATTAATGAAGT GGGTTCCAGTATTAATGAAGTGGGTTCCAGT---------------------GGTGAGAACGTTCAAGCAGAGCTAGGTA GAAACAGA---CCTAAGCTAAATGCTATGCTCAGATTAGGTCTTATGCAACCTGAAGTCTATGAGCAAAATCTT---CCT ATAACTAATTTTAAACTTTCTGAAATTAAAAAACAAGGAGAAAATGAAGAAGTAGTTCAGGCTGTTAATACAGATTTCTC CCCATGTCTAATTTCAGATAACCTAGAACTG---CCTATGGGAAGTAGTCGTGTTTCTCAGATTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGATGAAATAAAGGAAAATAACAGCTTTGCTGAAAGTGGCGTTAAGGAAAGATCTGCTGTTTTT AGCAAAAGTGTCCAGAGAAGAGAATTCAGAAGGAGCCCTAGCCCTTTAGCCCAA---ACAAGTGTGGCTCAGGGTCACCA AAGAGGGGCCAGGAAATTAGCCTCCTCAGAAGNGGACNAGTCTAGNGAG >Sloth TGTGGCACATATACTCATGCCAGCTCATTACAGCGTGAGAACAGCAGTTTATTACTCACTAAAAACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAGCAGCCTGGCTTAGCAAGGAGCCAACAGAACAGATGGGCTGAAAGTAAGGAAA CACGTAATGAT---AGGCAGACTCCCAGCACAGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGTATGGGCGAAAAGAA CTGAATAAGCAGAAACCTCCATGCTCTGAGAGTCCTCAAAAT---ACCCAAGAT---ATTCCTTGGATAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGACATACTAACTTCTGATGACTCACACAATGGGGGGTCTG AATCAAATGCAGAAGTAGTTGGTGCATTGAAAGTTCCAAAT------GAAGTAGATGGATATTCTGGTTCTTCAGAGAAG ATAGACTTAATAGCCAGTGATCCTCACAATGCTTTAATATTTGCAGGTGAAAGAGTCCAGTCCAAACCAACAGAGACTAA C---ATTGAAGATAAAATATTTGGGAAAACCTATCACAGGAAGGCAAGCCTCCCTAACTTGAGCCACATAGCTGAAAATC TTTTTATAGGAGCCATTGCTACAAAACCTAAGATAACACAA--------------------------------------- ---------------------GAGCACCCCCTGACAAAGAAAATAAAGCATAAAAGTAGGACTACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTCGATTTGACAGTTGTTCAAAAGATGCCTGAAAAGATAAATCAGGGAACTGACCAAATGG AGCAGAAG------AATAGTCAAGTGATAAATATTGCTAATAGTGGTCATGAGAATGAAACAAAAGATGATTAT---GTT CAGAAAGAGAAAAATGCTAACCCAACAGAA------TCATTGGAAAAAGAATCTGCTTTCAGAACTGAAGGTGAACCCAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATCTTCAATTCAAAAGTATCTAAGAAGAATAGGCTGAAGAGGA TGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGCCAATAGAAATCCAAGCCAACCTAATCATAATGAACTACAA ATTGATAGTTGTTCTAGCAGTGAAGAG---ATAAAGAAAGAA---AATTCTGTCCAAAAGCCAATAAGGCACAGCAGAAT GCTTCAACTCCTGAAAGGTAAAGAAACTCCAACCGTAACCAAGAAGAGTAACAAGCGAAATGAACAAATAAGTAAAAGGC ATTCCAGTGATGCTTTCCCAGAACTAAATTTAACAAATGTAACTGGTTTTCTTACTAAATGTTCAAGTTCTGATAAACTT CAAGAATTTGTCAATTCTAGCCTGCAAGGAGAAGTAGCA---GAGAAC---CTAGAAACAATTCAAGTGTCTGATAGTAC CAGAGACCCCGGAGCTCTGGTGTTAAGTGGAGGAAAG---GGTTTGCAA---ACTGAAAGATCTCTAGAGAGTACCAGTA TTACAATGATACCTGAAACTGATTATGACACTCAAGACAGTATCTCCTTACTGGAAGCTGACACCCTAGGG---AAAGCA AAA---GCAGCACCAAATCAACATGTGAGTCAGTATGCAGCAATTGGAAATGCCACTAAACTTTTCCATGGTTGT---TC TAAAGATACTAGAAGTGACACTGAGAATTTTAAGGATCCATTGAGACATGAAGTTAAC---CACACACAGGAGACATTTG TAGAAATGGAAGAGAGTGAACTTGATACTCAGTATTTACAGAATACATTCAAGATTTCAAAGCGTCAATCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATGCGCAACAGTCTCTGCCCACTCCAGGCCCTTAGGAAAACAAAGTCCAAA AGTCACTTTTGACTGTAGACAAAAAGAA---GATCAGGAGAAGAAGGAGTCTAAAATCAAACACGTGCAGGCAGTTCATA CAACTGCAGACTTTCCTGTAGTTTGTCAGAAAGATAAG---CCAGGTGATTATGCTAAATGTAGCATTCAAGGAGTCTCT AAGCTTTGTCAGTTATTTCAGTTCAGA---GGCAATGAATCTGAACCCATTACTGCAGATGAACATGAAATTTCACAAAA TCCAGATCAGATGCCATCACTTTCTCCCATGAAGTCATCTGTTAAAAGTAAATTTAAGGAAAAC---CTGTCAGAGGAAA GATTTGAGGAACATACAGTATCACTTGAAAGAGCAGTGGGAAAGGAGCACATCATTCAAAGGACAGTGAGTCCAATTAGC CAAAATAACATTAGAGAAAGCGCTTTTAAAGAAGCCAGCTCAAGCAGTATCAATGAAGTAGGTTCCAGTGTTAATGAAGT AGGTTCCAGTGTTAATGAAGTAGGTTCCAGT---------------------GGTGAGAACACTCAAGCAGAGCTAGGTA GAAACAGAGGATCTAAATTAAGTGCTATGCTCAGATTAAGTCTTATGCAACCTGAAGTCTATAAGCAAAGTCTT---CCT ATAACTAATTGTAAGCATCCTGAAATTAAAAAGCAAGGAGAAAATGAAGAAGTAGTTCAGGCTGTTAAAACA-------- ----TGTCTAATTTCAGATAACCTAGAACTA---CCTATGGGAAGTAGTCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGATCTGTTAGATGATGGTGAAATAAAGGAAAATAACAGCTTTGCTGAAAGTGGCATTAAGGAAAGATCTGCTGTTTTT ACCAAAAGTGTCCAGAAAAGAGAGTTCAGAAGGAGCCCGAGCCCGTTAGCCCAA---ACAAGGGTCACC---------AA AAGACGGGCCAGGAAATTAGACTCCTCAGAAGAGGATGTGTCTAGTGAG >Dugong TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAATAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTCATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCTAGCACAGAGAAAAAGGTAGATATGAATGCTAATCCATTGTATGAGAGAAAAGAA GTGAATAAGCAGAAACCTCCATGCTCCGAGAGTGTTAGAGAT---ACACAGGAT---ATTCCTTGGCTAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTTCAGAAGTGATGGCCTG---------GATGACTTGCATGATAAGGGGTCTG AGTCAAATGCAGAAGTAGCTGGTGCTTTAGAAGTTCCAGAA------GAAGTACATGGATATTCTAGTTCTTCAGAGAAA ATAGACTTAATGGCTAGTGATCCTCATAGTGCTTTAATATGTGAAAGTGAAAGAGTCCTCTTCAAACCAGCAGAAAGTAA C---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAATTCTCCCTCATTTGAGCCATGTAACTGAAGATC TAATTATAGGAGCTGTTGCTACAGAACCTCAGATAGCACAA--------------------------------------- ---------------------GAACGTCCTCTTACAAATAAATTAAAGCGTAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTCATCAAGAAAGTAGATTTGGCAGTTGTTCAAAAGACTCCTGAAAAGATAAATCAAGAAACTGACCAAGTGG AGCAGAAT---------GGTCAAGTGATGAATATTGCTAATGGTGGTCATGGAAATGAAACAAAAGATGATTAT---GTT CAGAAAGAGAAGAATGCTAACCCAACAGAA------TCACTGACAAAGGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATGCATAATTCAAAAGCACCGAAGAAGAACAGGCTGAGGAGGA AGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGTCCACCTACTCATACTGAACTACAA ATTGATAGTTGGTCTAGCAGTGAAGAG---ATAAAGAAA------AGTTCTGAGCAAAAGCCAGTCAGACACAACAGAAA CCTTCAACTCATGAAAAACCAAGAAACCACAACTGGAGCCAAGAAAAGTAACAAGCCAAAGGAACAAATAAGTAAAAGAC ATGCCAGTGACGCTTACCCAGAACTAAATTTAACAAGCACAACTGGCTTAATTACTAACTGTTCAAGTTCTCATAATTAT CAAGAATTT---AATCCTAGCCTTCAGGGAGAAGAAATAGAAGAAAAT---CTGGGAACAATTCAAGTGTCTAATAGAAC CAGAGACCCCGAGGATCTAGTGTTAAATGGAGGAAGA---GGTTTGCAA---ACTGAAATATCTGTTGAGAGTACCAGTA TCTCAGTGATACCTGATACTGACTATGGCAGTCAGAACAGCATCTCATTACTGGAAGCTGACACCCTCAGG---AAGGCA AAA---ACAGCACCAAATCAATGTGCAAGTCAGTGTGCAGCAATTGAAAACCCCAATGAACTTATCCATGGTTGT---CC TAAAGATACTAGAAATGACACAGAGGATTTTAAGGATCTGTTGAGATGTGAAGTTAAC---CACATTCAGGAGACGTGCG TAGAAATGGAAGACAGNGAACTTGATACTCAGTATTTGCAGAGTACATTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATGTGCAACAATTTGTGCCCACTCCAAGTCCTTAAGGAAACAAAGTCCAAA AGTCACTCCTGAGTATGGAGAAGAAGAAGAAAATCAAGGGAACAAAGAGTCTAAAATCAAGCATGAGCAGGCAGTTCATA CGACTGCGGGCTATCCTGAGGATTGTCAGAAAGAGAAGAAGCCAAGTGATTATACCAAAAGTAGCATCAAAGGAGTCTCT AGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAGTGAATCCCAACACATTACTGCAGGTGAACATGGAATTTCACAAAA TCCAGATCAAATGCCATTGCTTTCTCCCATCAGGGCATCTGTTAAAAGT------AAGAAAAAC---TTGTCAGAAGAAA GGTTTGAGGAACATACAATATCACTTGAAAGAGCAGTAGGAAATGAGAGCATCGTTCAAAGTACAGTGAGCACAGTTAGC CAAAATGACATTAAGGAAAGTGCTTCTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGTTCTAGT----------- ----------------------------------------------------GGCGAAAACATTCGAGCAGAGCTAGGTA GGAACAGAGGACCTAAATTAAATGCTGTGCTCAGATTAGGTCTTATGCAACCTGAAGTCTATAAACAAAGTCTT---CCT GTAAGTAACTGTAAACGTCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTAGTTCAGGATGTTAATATGGATTTCTC TCCATGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGTAGTCGTGCTTCTCAGATTTGTTCTGAGACTCCTG ATGACCTGTTAGATGATGATGAAATAAAGGAAAATATCAGCTTTGCTGAAAGTGGCATTAAGGAAAGATCTGCTGTTTTT AGTAAA---GACCAGAGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTATCCCAT---TCAGGTTTGGCTCAGGGTCACCT AAGAGGGGCCAGGGAATTAGAGTCCTCAGACGAGAACATATCTAGTGAG >Manatee TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAATAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTCATAAAAGCAAACAGCCTGGCTTAACAAGGAGCCAGCAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCTAGCACAGAGAAAAAGGTAGATATGAATGCTAATCCATTGTATGAGAGAAAAGAA GTGAATAAGCAGAAACCTCCATGCTCCGAGAGTGTTAGAGAT---ACACAAGAT---ATTCCTTGGCTGACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTTCAGAAGTGATGGCCTG---------GATGACTTGCATGATAAGGGGTCTG AATCAAATGCAGAAGTAGCTGGTGCATTAGAAGTTCCAGAA------GAAGTACATGGATATTCTAGTTCTTCAGAGAAA ATAGACTTAATGGCCAGTGATCCTCATAGTGCTTTAATATGTGAAAGTGAAAGAGTCCTCTCCAAACCAGCAGAAAGTAA C---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAATTCTCCCTCATTTGAGCCATGTAACTGAAGATC TAATTATAGGAGCTGTTGCTACAGAACCTCAGATAGCACAA--------------------------------------- ---------------------GAACGTCCCCTTACAAATAAATTAAAGCGTAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTCATCAAGAAAGTAGATTTGGCAGTTGTTCAAAAGACTCCTGAAAAGATAAATCAGGAAACTGACCAAGTGG AGCAGAAT---------GGTCAAGTGATGAATATTGCTAATGGTGGTCATGAAAATGAAACAAAAGATGATTAT---GTT CAGAAGGAGAAGAATGCTAACCCAACAGAA------TCACTGACAAAGGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATGCATAATTCAAAAGCACTGAAGAAGAATCGGCTGAGGAGGA AGTCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGTCCACCTACTCATACTGAACTACAA ATTGATAGTTGGTCTAGCAGTGACGAG---ATAAAGAAA------AGTTCTGAGCAAAAGCCAGTCAGACACAACAGAAA CCTTCAACTCATGAAAAACCAAGAAACCACAACTGGAGCCAAGAAAAGTAACAAGCCAAAGGAACAAATAAGTAAAAGAC ATGCCAGTGACACTTACCCAGAACTAAATTTAACAAGCACAACTGGCTTAATTACTAACTGTTCAAGTTCTCATAATTAT CAAAAATTTGTTAATCCTAGCCTTCAGGGAGAAGAAATAGAAGAAAAT---CTGGGAGCAACTCAAGTGTCTAATAGAAC CAGAGACCCCGAGGATCTAGTGTTAAATGGAGGAAGA---GGTTTGCAA---ACTGAAATATCTGTTGAGAGTACCAGTA TCTCAGTGATACCTGATACTGATTATGGCAGTCAGAACAGCATCTCATTACTGGAAGCTGACACCCTCAGG---AAGGCA AAA---ACAGCACCAAATCACTGTGCAAGTCAGTGTGCAGCAATTGAAAACCCCAATGAACTTATCCATGGTTGT---CC TAAAGATACTAGAAATGACACAGAGGATTTTAAGGATCTGTTGAGATGTGAAGTTAAC---CACGTTCAGGAGACATGCA TAGAAATGGAAGACAGTGAACTTGATACTCAGTATTTGCAGAGTACATTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATGTGCAACAATTTGTGCCCACTCCAAGTCCTTAAGGAAACAAAGTCCAAA AGTCACTCCTGAGTATGGAGAAAAAGAAGAAAATCAAGGGAACAAAGAGTCTAAAATCAAGCATGAGCAGGCAGTTCATA CAACTGCTGGCTATCCTGAGGATTGTCAGAAAGAGAAGAAGCCAAGTGATTATACCAAATGTAGCACTAAAGGAGTCTCT AGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAGTGAATCCGAACACATT---ACAGGTGAACATGGAATTTCACAAAA TCCAGATCAAATGCCATTGCTTTCTCCCATCAGGGCATCTGTTAAAAGT------AAGAAAAAC---TTGTCAGAAGAAA GGTTTGAGGAACATACAATATCACTTGAAAGAGCAGTAGGAAATGAGAGCATCGTTCAAAGTACAGTGAGCACAGTTAGC CAAAATAACATTAAGGAAAGTGCTTCTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTAGGTTCCAGTGTTAATGAAGT AGGTTCTAGT------------------------------------------GGCGAAAACATTGAAGCAGAGCTAGGTA GGAACAGAGGACCTAAATTAAATGCTGTGCTCAGATTAGGTCTTATGCAACCTGAAGTGTATAAACAAAGTCTT---CCT GTAAGTAACTGTAAACATCCTGAAATAAAAAGGCAAGGAGAAAATGAAGGAGTCGTTGAGGATGTTAATATGGATTTCTC TCCATGTCTAATTTCAGATAACCTAGAACAA---CCTATGGGAAGTAGTCGTGCTTCTCAGATTTGTTCTGAGACTCCTG ATGACCTGTTAGATGATGATGAAATAAAGGAAAATATCAGCTTTGCTGAAAGTGGCATTAAGGAAAGATCTGCTGTTTTT AGTAAA---GACCAGAGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTATCCCGT---TCAGGTTTGGCTCAGGGTCACCT AAGAGGGGCCAGGGAATTAGAGTCCTCAGAAGAGAACATATCTAGTGAG >AfricanEl TGTGGCACAAATACTCATGCCAGCTCATTACAGCATAAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAACAAAAGCAAACAGCCTGGCTTAGCAAGAAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCTAGCACAGAGAAAAAGGTAGATGTGAATGCTGATCCCTTGTATGAGAGAAAAGAA GTGAATAAGCAGAAACCTCCACGCTCTGAGAATCTTAGAGAC---ACCCAAGAT---ATTCCTTGGATAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTTCAGAAGTGACGGCCTG---------GATGTCTTAAATGATGAGGGGCCTG AATCCAGTGCAGAAGTAGCTGGTGCATTAGAAGTTCCAAAT------GAAGTACAT------TCTAATTCTTCAGAGAAA ATAGACCTAATGGCCAGTGATCTGCATGGTGCTTTAATATGTGAAAGTGAAAGAGTCCCCTCCAAACCAGCAGAAAGTAA C---ATCGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAGGTATGTCTCATTTGAGCCACATAACTGAAGATC TGATTATGGGAGCTATTGCTTCAGAACCTCAGATAGCACGA--------------------------------------- ---------------------GAACATCCTTTTACAAATAAATTAAAGCGTAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCAGTTGTTCAAAAGATCCCTGAAAAGATAAATCAGGAAACTGACCATGTGG AGCAGAAC---------GGTCAAGTGATGAATATTGCTAATGGTGGTCGTGAGAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGAAGAATGCTATCCCAACAGAA------TCATTGGCAAAAGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATGCATAATTCAAAAGCACCAAAGAAGAATAGGCTGAGGAGGA AGTCGTCAACCAGACATATTCATGCACTTGAACTAGTAGTCAATAGAAATCCAAGTCCACCTACTCATACTGAACTACAA ATTGACAGTTGGTCTAGCAGTGAAGAG---ACAAAGAAAAAA---AGTTCTGAGCAAAAGCCAATCAGACACAACAGAAA CCTTCAACTCATGAAAAATCAAGAAACCGCAACTGGAGCCAAGAAGAGTAACAAGCCAAAGGAACAAATAAGTAAAAGAC ATGGCGCTGACTCTTACCCAGAACTACATTTAACAACCACAGCTGGCTTTATTACTAAGTGTTCAAGTTCTGATAATCTT CAAGAATTTGTCAATCCTAGCCTTCAAGGAGAGAAAACAGAAGAAAAC---CTGGAAACAATTCAAGTGTCTAATATTAC CAAAGAGCCCAAGGATCTAGTGTTAAATGGAGGAAGA---GATTTGCAA---ACCAAAAAATCTATTGAGAGTACCAATA TCTCAGTGATACCTGATACTGTTTATGGCACTCAGGACAGCGTCTCATTGCTGGGAGCTGACACCCCAGGG---AAGGCA AAA---ACAGCACCAAATCGATGTGCAAGTCAGTGTACAGCAATTGAAAACCCAAGTGAACTTACCAACAGTTGT---CC TAAAGATACTAGAAATGACACAGAGGGTTTTAAGGATCTATTGAGATGTGAAGCTAGC---CACATTCAGGAGACATGCA TAGAAATAGAAGAGAGTGAACTTGATACTCAGTATTTGCAGAGTACATTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGAAATGTGCAACAATTTGTGCCCACTCCAAGTCCTTAAGGAAGCAAAGCCCAAA AGTCACTCTTGTGTGTGGAGAAAAAGAAGAAAATCAAGGGAACAGAGAGCCTAAAATCAAGCATGAGCAGGCAGTTCATA TGCCTACAGGCTATCCTGAGGCTTGTCAGAAAGAGAAG---CCAAGTGACTATACCAAATATAGCATTAAAGGAGTCTCT GGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAGTGAATCTGAACTCATTACTGCAGATGGACATGGAATCTCACAAAA CCCAGATCAAATACCATCACTTTCTCCCACCAGGTCATCTGTTAAAACTAAATGTAAGACAAAC---CTGTTGGAAGAAA GGTTTGAGGAACATACAATATCACTTGAAAGAGCAATGAGAAATGAGAACGTCATTCAAAGTACAGTGAGCACAGTTAGC CAAAATAACATTAGGGAAAGTGCTTCTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTATGTTCCAGTATTAATGAAGT AGGTTCTAGT------------------------------------------GGTGAAAACATTCAAGCAGAAATAAGTA GGAAGAGAGGACCTAAATTAAATGCTGTGCTCAGATTAGGTCTTATGCAACCTGAAGTTTATAAACAAAGTCTT---CCT ATAAGTGACTGTAAACATCCTGGAATAAAAACGCAAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAATACAGATTTCTC CCCATGTCTAATTTCAGATAACCTAGAACAA---CCTGTGGGAACTAGTCGTGCTTCTCAGGTTTGTTCTGAGACTCCCG ACGACCTGTTAGATGATGATGAAATAAAGGAAAATATCAGCTTTGCTGAAAACGGCATTAAGGAAAGATCT---GTTTTT ATTAAAGATGACCAGAGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTATCCCAT---TCAGGTTTGGCTCAGGGTTGCCT AAGAGGGGCCAGGGAATTGGAGCCCTCACAAGAGAACATATCTAG---- >AsianElep TGTGGCACAAATACTCATGCCAGCTCATTACAGCATAAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAACAAAAGCAAACAGCCTGGCTTAGCAAGAAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCTAGCACAGAGAAAAAGGTAGATGTGAATGCTAATCCCTTGTATGAGAGAAAAGAA GTGAATAAGCAGAAACCTCCACGCTCTGAGAATCTTAGAGAC---ACCCAAGAT---ATTCCTTGGATAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTTCAGAAGTGACGGCCTG---------GATGTCTTAAATGATGAGGGGCCTG AATCCAGTGCAGAAGTAGCTGGTGCATTAGAAGTTCCAAAT------GAAGTACAT------TCTAATTCTTCAGAGAAA ATAGACCTAATGGCCAGTGATCTGCGTGGTGCTTTAATATGTGAAAGTGAAAGAGTCCCCTCCAAACCAGCAGAAAGTAA C---ATCGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAGGTATCTCTCATTTGAGCCACATAACTGAAGATC TGATTATGGGAGCTATTGCTTCAGAACCTCAGATAGCACGA--------------------------------------- ---------------------GAACATCCTTTTACAAATAAATTAAAGCGTAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGTAGATTTGGCAGTTGTTCAAAAGATCCCTGAAAAGATAAATCAGGAAACTGACCATGTGG AGCAGAAC---------GGTCAAGTGATGAATATTGCTAATGGTGGTCGTGAGAATGAAACAAAAGGTGATTAT---GTT CAGAAAGAGAAGAATGCTATCCCAACAGAA------TCATTGGCAAAAGAATCTGCTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATATGGAGGTAGAATTAAATATGCATAATTCAAAAGCCCCAAAGAAGAATAGGCTGAGGAGGA AGTCGTCAACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGAAATCTAAGTCCACCTACTCATACTGAACTACAA ATTGACAGTTGGTCTAGCAGTGAAGAG---ACAAAGAAAAAA---AGTTCTGAGCAAAAGCCAATCAGACACAACAGAAA CCTTCAACTCATGAAAAATCAAGAAACCGCAACTGGAGCCAAGAAGAGTAACAAGCCAAAGGAACAAATAAGTAAAAGAC ATGGCGCTGACTCTTACCCAGAACTACATTTAACAACCACAGCTGGCTTTATTACTAAGTGTTCAAGTTCTGATAATCTT CAAGAATTTGTCAATCCTAGCCTTCAAGGAGAGAAAACAGAAGAAAAC---CTGGAAACAATTCAAGTGTCTAATATTAC CAAAGAGCCCAAGGATCTAGTGTTAAATGGAGGAAGA---GATTTGCAA---ACCAAAAAATCTATTGAGAGTACCAATA TCTCAGTGATACCTGATACTGTTTATGGCACTCAGGACAGCGTCTCATTGCTGGGAGCTGACACCCCAGGG---AAGGCA AAA---ACAGCACCAAATCAATGTGCAAGTCAGTGTACAGCAATTGAAAACCCAAGTGAACTTACCAACAGTTGT---CC TAAAGATACTAGAAATGACACAGAGGGTTTTAAGGATCTATTGAGATGTGAAGCTAGC---CACATTCAGGAGACATGCA TAGAAATAGAAGAGAGTGAACTTGATACTCAGTATTTACAGAGTACATTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGAAATGTGCAACAATTTGTGCCCACTCCAAGTCCTTAAGGAAGCAAAGCCCAAA AGTCACTCTTGTGTGTGGAGAAAAAGAAGAAAATCAAGGGAACAGAGAGCCTAAAATCAAGCATGAGCAGGCAGTTCATA TGCCTGCAGGCTATCCTGAGGCTTGTCAGAAAGAGAAG---CCAAGTGACTATACCAAATATAGCATTAAAGGAGTCTCT GGGCTTTGTCAGTCATCTCAGTTCAGA---GGCAGTGAATCTGAACTCATTACTGCAGATGGACATGGAATCTCACAAAA CCCAGATCAAATACCATCACTTTCTCCCACCAGGTCATCTGTTAAAACTAAATGTAAGACAAAC---CTGTTGGAAGAAA GGTTTGAGGAACATACAATATCACTTGAAAGAGCAATGAGAAATGAGAACGTCATTCAAAGTACAGTGAGCACAGTTAGC CAAAATAACATTAGGGAAAGTGCTTCTAAAGAAGCCAGCTCAAGCAGTATTAATGAAGTATGTTCCAGTATTAATGAAGT AGGTTCTAGT------------------------------------------GGTGAAAACATTCAAGCAGAAATAAGTA GGAAGAGAGGACCTAAATTAAATGCTGTGCTCAGATTAGGTCTTATGCAACCTGAAGTTTATAAACAAAGTCTT---CCT ATAAGTGACTGTAAACATCCTGGAATAAAAACGCAAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAATACAGATTTCTC CCCATGTCTAATTTCAGATAACCTAGAACAA---CCTGTGGGAACTAGTCGTGCTTCTCAGGTTTGTTCTGAGACTCCCG ACGACCTGTTAGATGATGATGAAATAAAGGAAAATATCAGCTTTGCTGAAAACAGCATTAAGGAAAGATCT---GTTTTT ATTAAAGATGACCAGAGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTATCCCAT---TCAGGTTTGGCTCAGGGTTGCCT AAGAGGGGCCAGGGAATTGGAGCCCTCACAAGAGAACA----------- >RockHyrax TGTGGCACAGATACTTGTGCCAGCTCGTTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGGATGTAGA AAAGGCTGAATTCTGTAGTAAGAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAGAGTACGGAAA CATGTAATGGT---AGGCAGATTCTTAGAACAGAGAAAAAGGTAGAAACGAATGCTGATCCTTTGTATGGGAAAAAAGAA GGGAATAAGCAGAAACCTCCATGCTCTGAGAGTCGCACAGAT---ACGCAAGAT---ATTCCTTGGGTAACACTGAATAA CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGACAACCTA---------AGTGATTCACCTAGTGAGGGGTCTG AATTAAATGGAAAAGTGGCTGGTCCAGTAAAACTTCCAGGT------GAAGTACATAGATATTCTAGTTTTCCAGAGAAC ATAGATTTAATGGCCAGTGGTCCTCCTGGT------------------GAAAGAATCCCCTGCAAACCACCAAAAAGTAA C---ATCAAAGATAAAATATTTGGGAAAACCTATCAGAGGAAGGGAAGTCTCCCTCACCTGAGCCACATAACCGAAGATC TAGTTAAAGGCGCCATTGCTACACAACCACAGATAGCACGA--------------------------------------- ---------------------GAACATCCTCTTACAAATAAGTTACAGCATAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTTATCAAAAAAGCAGATTTGACAGTTGTTCAGAAGGCATCTGAGAAGATAAATCAGGAAACTGACAAAGTGG AGCAGAGT---------GGTCAAGTGATAAATATTGGTAATGGTGGTCATGAGAATGAAACAAAAGATGATTAT---GTT CTGAAAGAGAAGAATGGTAACCCAGCAGAA------TCACTGAAAAAAGAATCTGTGTTCAAAACTAAAGCTGAACCTAT AAGCAACAGTATAAGCAATATGGAACTAGAATTAAATACACATAATTCAAAATCACTGAAGAAGAACAGGCTGAGGAAGA AGTCCTCCTCCAGACATGTTCATGCCCTTGAACTGGTAGTCAATAGAAATCCAAGTTCACCACCTCATACTGGGCTACAA ATTGATAGCTGGTCTGGCAGTGAAGAA---ATGAAGAAAACA---AGTTCTGAGCAAAAGCCAGTCAGACACAACATAAA CCTCCAACTCATGAAAAACCAAGAAACCACAACTGGAGCCAAGAAGAGTAACAAGCCAAAGGAACAAATAAATAAAAGAC ACACTAGTAATCCTTACCCAGAACTAAATTTAACAAGCACAGCTGGCTTTATTACTGTATGTTCAAGTTCTGATAATCTT CAAGAACCTGTCAACCCCAGCCTTCAAGGAGAGGAAATAGAAGAAAAC---TTGGCAACAGTCCAAATGTCTAATACTGC CAAAGAACCTGAGGATCTAGAGTTAAATGGAGGGAGA---GTTTTGCAA---ACCAAAAGATCTGTTGAAAGTACTAGTA CCTCAGTGATACCTGATGCTGACTGTGGTGCTCAGGACAGCATCTCGTTACTGGAAGCTGACACTCTAGGG---AAGGCA AAA---ACAGCACCAAATCAAGGGGCAGGTCAATGTTCGGCAATCGAAAACCCCAACGAACTTATTCATGGTAGT---CC TAAAGACACTAGAAATGATATAGAGGGTTTTAAGGATCCACTGAGATGCAAAGTTAAC---CCTATTCAGGAGATATGTG TAGAAATGGATGAAAATGAACTTGATACTCAGTATATACAGAGTACATTCAAGGTTTCAAAGCGTCATTCTTTTGCTCTG TTTTCAAATCCG---------GAAAAGGAGTGTGCGACAAGTTATACCCACTCCAAGTCGTTAAGGAAAGAAAATCCCAA AGTCACTCTTCAGCGTGGAGAAGAAGAAGAAAATCAAGGGAACAAAGAATCTAAAATCAAGCAT---------GTTCATA CAACTGCAAGCTGTCCTGAGGTTTGTCAGGAAGACACAAAGCCTAGTGATCGTACTAACTGTAGTGTTAAAGGACTCCCT AGGCTTTGTCACTCATCTCAATTCAAA---GGTAGTGAGTCTGAACTCATTACTGAAGGTGAACATGGAATTGCACAAAA CCCGGATCAGATGCCATCATCTTCTCCCATCAGATCATCTGTTAACTCTAAGTGTAACAAAAAC---CTGTCAGAAGAGC GATTTAAGGAACATAAAATATTACTTGAAAGAACAACAGGAAATGAAACCATTGTCCAAAGTACAGTGAGCACAGGTAGC CAAGATAACATTAGGGGAAGTGCTTCGAAAGAAACCAGCTCAAGCAGTATTAATGAAGTAGGTTCTAGT----------- ----------------------------------------------------AGCGAAAACATTCAAGCAGAAATAAGTA GGAACAGAGAACCTAGATTAAATGCTGTGCTCAGGTTAGGTGTTATGCAACCTGAAGTGTATAAACAAAGTCTT---TCT ATAAGTAACTGTAAACAGCCAGAAATAAAAAAGCGAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAGTACAGATTTCTC TGCATGTCCCATTTCAGAAAACCTAGAACAA---CCTGTGAGAAGTAGTCACACTTCTCAGGTTTGTTCTCAGACTCCTG ATAACCTGTTAGATGATGATGAAATAAAGGGAAAGACTGACTTTGCTGGAAGTAGCATTAAGGACAGACCT---GTCTTT AGTAAAGATGACCAGGGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTATCCCAT---TCAGGTTTGGCTCAGGGCCACCT GATAGGGGCCAGGGAATTAGAGGCCTCACAAGAGAACACATCTAGCGG- >TreeHyrax ------NCAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGGATGTAGA AAAGGCTGAATTCTGTAGTAAGAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAGAGTACGGAAA CATGTAATGAT---AGGCAGATTCTTAGAACAGAGAAAAAGGTAGATACAAATGCTGATCCTTTGTATGGTAAAAAAGAA GGGAATAAGCAGAAACCTCCATGCTCTGAGAGTCGCACAGAT---ACGCAAGAT---ATTCCTTGGGTAACACTGAATAA CAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGACAACCTA---------AGTGATTCACCTAGTGAGGGGTCTG AATTAAATGGAAAAGTGGCTGGTCCAGTAAAGCTTCCAGGT------GAAGTACATAGATATTCTAGTTTTCCAGAGAAC ATAGATTTAATGGCCAGTGGTCCTCCTGGTGATTTAATATGTGAAAGTGAAAGAATCCCCTGCAAACCACCAAAAAGTAA C---ATCAAAGATAAAATATTTGGGAAAACCTATCAGAGGAAGGGAAGTCTCCCTCACCTGAGCCACATAACTGAAGATC TAGTTAAAGGCGCCATTACTACACAACCACAGATAGCACGA--------------------------------------- ---------------------GAACATCCTCTTACAAATAAGTTACAGCATAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTTATCAAAAAAGCAGATTTGACAGTTGTTCAGAAGGCATCTGAGAAGATAAATCAGGAAACTGACAAAGTGG AGCAGAGT---------GGTCAAGTGATAAATATTGCTAATGGTGGTCATGAGAAGGAAACAAAAGATGATTAT---GTT CTGAAAGAGAAGAATGGTAACCCAGCAGAA------TCACTGAAAAAAGAATCTGTGTTCAAAACTAAAGCTGAACCTAT AAGCAACAGTATAAGCAATATGGAACTAGAATTAAATACACATAATTCAAAATCACTGAAGAAGAACAGACTGAGGAAGA AGTCCTCCTCCAGACATGTTCATGCCCTTGAACTGGTAGTCAATAGAAATCCAAGTTCACCACCTCATACTGGGCTACAA ATTGATAGTTGGTCTGGCAGTGAAGAA---ATGAAGAAAACA---AGTTCTGAGCAAAAGCCAGTCAGACACAACATAAA CCTCCAACTCATGAAAAACCAAGAAACCACAACTGGAGCCAAGAAGAGTAACAAGCCAAAGGAACAAATAAATAAAAGAC ACACTAGTAATCCTTACCCAGAACTAAATTTAACAAGCACAGCTGGCTTTATTACTGCGTGTTCAAGTTCTGATAATCTT CAAGAACCTGTCAACCCCAGCCTTCAAGGAGAGGAAATAGAAAAAAAC---TTGGCAACAGTCCAAATGTCTAATACTGC CAAAGAACCTGAGGATCTAGAGTTAAATGGAGGGGGA---GTTTTGCAA---ACCAAAAGATCTGTTGAAAGTACCAGTA CCTCAGTGATACCTGATGCTGACTGTGGTGCTCAGGACAGTATCTCGTTACTGGAAGCTGACACTCTAGGG---AAGGCA AAA---ACAGCACCAAATCAAGGGGCAGGTCAATGTTCAGCAATCGAAAACCCCAACGAACTTATTCATGGTAGT---CC TAAAGACACTAGAAATGATATAGAGGGTTTTAAGGATCCACTGAGATGCGAAGTTAAC---CCTATTCAGGAGATATGTG TAGAAATGGATGAAAATGAACTTGATACTCAGTATATACAGAGTACATTCAAGGTTTCAAAGCGTCATTCTTTTGCTCTG TTTTCAAATCCA---------GAAAAGGAGTGTGTGAAAAGTTATACCCACTCCAAGTCGTTAAGGAAAGAAAATCCCAA AGTCACTCTTCAGCGTGGAGAAGAAGAAGAAAATCAAGGGAACAAAGAATCTAAAATCAAGCAT---------GTTCATA CAACTGCAAGCTGTCCTAAGGTTTGTCAGGAAGACGCAAAGCCTAGTGATCGTACCAACTGTAGTGTTAAAGGACTCCCT AGGCTTTGTCACTCATCTCAATTCAAA---GGTAGTGAGTCTGAACTCATTACTGAAGGTGAACATGGAATTGCACAAAA CCCAGATCAGATGCCATCATCTTCTCCCATCAGATCATCTGTTAACTCTAAGTGTAACAAAAAC---CTGTCAGAAGAGC GATTTAAGGAACATAAAATATTATTTGAAAGAACAACAGGAAATGAAACCATTGTCCAAAGTACAGTGAGCACAGGTAGC CAAGATAACATTAGGGGAAGTGCTTCGAAAGAAACCAGCTCAAGCAGTATTAATGAAGTAGGTTCTAGT----------- ----------------------------------------------------AGCGAAAACATTCAAGCAGAAATAAGTA GGAACAGAGAACCTAGATTAAATGCTGTGCTCAGGTTAGGTGTTATGCAACCTGAAGTGTATAAGCAAAGTCTT---TCT ATAAGTAACTGTAAACAGCCAGAAATAAAAAAGCAAGGAGAAAATGAAGGAGTAGTTCAGGCTGTTAGTACGGATTTCTC TGCATGTCCCATTTCAGAAAACCTAGAACAA---CCTGTGAGAAGTAGTCACACTTCTCAGGTTTGTTCTCAGACTCCTG ATAACCTGTTAGATGATGATGAAATAAAGGGAAAGACTGACTTTGCTGGAAGTAGCATTAAGGACAGACCT---GTCTTT AGTAAAGATGACCAGGGAAGAGAGTTCAGAAGGAACCCAAGCCCTTTGTCCCAT---TCAGGTTTGGCTCAGGGCCACCT GATAGGGGCCAGGGAATTAGAGNCCTCACAAGAGAACACATCTAGCAA- >Aardvark TGTGGCACAAATACTCATGCCAGCTCGTTACAGCATGAGAACAGCAGTTTATCACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGTGAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCTTAGCACAGGGGAAAAGGTAGATATGAAGGCTGATCCCTTGTATGGGAGAAAAGAA GTGAATAAGCAGAAACCTCCATGCTCTGAGAATCCTAGAGAT---ACTGAAGAT---ATTCCTTGGATAACACTGAATAG TAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGGCCTG---------GATGGCTCACATGATGAAGGGTCTG AATCAAATGCAGAAATAGGTGGTGCATTAGAAGTTTCAAAT------GAAGTACATAGTTACTCTGGTTCTTCAGAGAAA ATAGACTTAATGGCCAGTGAACCTCATGTTGCTTTAATATGTGAAAGTGAAAGAGTCCTCTCCAAACCAGCAGAAAGTAA C---ATCGAAGATAAGATATTTGGGAAAACCTATCGGAGGAAGTCGAGTCTCCCTAACATAAGCCATATAACTGAAGATC TAATTTTAGGAGCTATTGCTACAGAACCTCAGGTTGCACGA--------------------------------------- ---------------------GAATGTCCTCTTGGAAATAAATTAAAGCGTAAAAGGAGA---ACATCAGGCCTTCATCC TGAGGATTTTATTAAGAAAGTAGATTTGGCAGTT---CAAAAGACTCCTGAAAAGATAAATCAGGAAAATGACCAAATGG AACAAAAT---------GGTCAGGAGGAGAATATTGCTGATGGCTGTCATGAGAATGCAACAAAAGGTGAATAC---ATG CAGAAAAAGAAGAGTGCAAATCCAACAGAA------TCATTGGCAACAGAATCTGTTTTCAGAACTAAAGCTGAACCTAT AAGCAGCAGTATAAGCAATTTGGAACTAGAATTAAATACACACAATTCAAAGGCATCCAAGAAGAATAAGCTGAGGAGGA AATCCTCTACCAGGCATATTCATGCACTTGAACTAGTAGTCAATAGGAATCCAAGTCCCCCTAGTCATACTGAGTTACAG ATTGATAGCTGGCCTAGCAGTGAAGAG---TTAAAGAAAAAA---AGTTCTGAGCACAAGCCAATCAGACAGAATACAAA CCTGCAACTCATGAAAGATCAAGAGGCCACAACTGGGGCCAAGAAGAGTAACAAGCCAAATGAACAAATAAGTACAGGAC ACGCCTCTGACATTTTCCCAAAATTGAATTTAACAAACATAACTGGTTTTATTACTAATTGTTCAAGTTCTGATAATCTT CAAGAATTTGTCAATCCTAGCCTTCAAGAAGAGGAAATAAAAGAGAAC---CTGGGAACAATTCAAGTGTCTGATAGTAC CAGAGATCCTACGGATGAGGTGTTAAAC---AGAAGA---GGTTTGCAA---ACTGAAAGATCTGTGGAGAATACCAGTA TTTCAGTGAAACCTGATACTGATTATAGCACTCAGGACAGCATCTTATTACTGAAAGCTAACTCCCTAAGG---AAGGCA AAA---ACAGCACCA------------AGTCAGTGTGCAGCAATTGAAAATCCTAACAAACTTAGCCATGGTTTT---CC TAAAGATACCAGAAATGACATAGAGGGTTTTAAGGATCTATTTAGAGGTGAAGATAAC---CACGTTCAGGAGACATACA TAGAAATGGAAGAGAGTGAACTTGATACTCAGTATTTACAGAATACATTCAAGGTTTCAAAGCGTCAGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATTTGCAACAGTCCATGCCCACTCCAGGTCCTTGAGGAAACAAAGTCCAAA CATCACTCTTGAGTGTGGAGAAAAAGAAGAAAATCAGGGGAACGAGGAATCTAAAATCAAGTGCGTACAGCTAGTTCTTT CAACTACAGGCTATGCTGGAGCTTGTCAGAAAGAGAAG---CCAAGTGATTATGCCAAATGTAGCATTAAAGGAGTCTCT AGACTTTGTCAGTCATCTCAATTCAGA---GGCAATGAATCTGAAATCATTACTGCAAATGAACATGGAGTCTCACAAAA CCTGGATCAGACACCATCACTTTCTCCCACTAGGTCATCTGTTAAAGCTAAATGTAAGACAAAT---CTGTCCAAAGAAA GATTTGAGCAACAGAAAATATCACATGAAAGAGTAATGGGAAATGAGAGCACCATTCAGAGTACAGTGGGCACAGTTAGC CAAAGTAACATTAGGGAAAGTGCTTTTAAAGAAGCTAGCTCAAGCAGTATTAATGAGGTAGGTTCCAGTGTTAATGAAAT AGGTTCTAGT------------------------------------------GGTGAAAACATTCAAGCAGAACTAGGTA AGAACAGAGGACCTAAATTGAATGCTGTGCTCAAATTATGTCTTATGCAACCTGAAGAGTATAAACAAAGTCCT---CCT ATAAGTAATTGTAAACATCCTTCAATAAAAACCCAAGGACAAAATGAAGGAGTAGTTCAGGCTGTTAATACAGGTTTTTC TTCATGTCTGATTTCAGATAACCTAGGACAA---CCTATGGGAAGCAGTCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGACAAAATAAAGGAAAATACCAGCTTTGCTGTAAGTGGCATTAAGGAAAGATCTGCTGTTTTT AGTAAAGATGACCAGGAAAGAGAGTTCAGAAGGAGCCTGAGCCGTTTCTCCCAT---TCAAGTTTGGCTCAGGTTCACGT AAGAGGTGCCAGGGAATTAGAGTCCTCAGAAGAGAACATATCTAGTGAG >GoldenMol TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTGTAATAAAAACAAACAGTCTGGCTTAGCGAGGAGCCAGCAGAGCAGATGGGCTGGAAGTAAGGCAG CGTGCAATGAC---AAGCAGACTCCTAGCACACAGACAGAGCTATATAGGAGTGCTGGTCCCATGCACAGGAGAAAAGAA GTAAATAAGCTGAAATCTCCATGGTCTGAGAGTCCTGGAGCT---ACCCAAGAG---ATTCCTTGGATAACACTGAATAG TAGCATTCGGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGGCCTG---------GATGAGTCACATGATGAGGGGTCTG AATCAAATGCAGAGGTTGCTTGTAAATTCGAATTTCCCAAG------GAAGTACAAGGATATTCTAGTTCTTCAGAGAAA ATGGACTTAATGGCCAATGATCCTCATGATGCTTTAATATGTGAAAATGAGAGAATCCTCTCCAAACCAGCTGTAAGTAA C---ATTGAAGATAAAATTTTTGGGAAAACCTATCGGAGGAAGACAAGTCTCCCTAACTTGAGCCATGTAACTAAAGACC TAATGATAGGAGCTGTTGCTACAGAAGCTCAGATGGCACAA--------------------------------------- ---------------------GAACGTCCTCTTACAAATAAATTAAAGCGCAAAAGGAGA---ACCTCAGGCCTTCATCC TGAGGATTTTATCAAGAAAGCAGGCATGAAAGTTGTTCAAAAGACTCCTGAAAAGATAAATCAAGAAACTAACCAAATGG AGCAGAAT---------GATCAAGTGAGGAATATTGTTGATGGTGGTCATGAAAATGATACAAAAAGTGATTAT---GTT CAGAAAGAGAAGAGTGCTAACCCAGCCAAA------TCATTGGCAAAAGAGTCCGCTTTCACAACTAAAGCGGAACCTAT AAGCAGTAGTATAAGCAATATGGAACTAGAATTAAATATGTACAATTCTAAAGCACTGAGGAAGAATAGGTTGAGGAGGA AGTCCTCTTCCAGGCATATTCACACACTTGACTTGGTAGTCAATAGAAATCCAAGTCCACCTCCTTACACTGAACTACAG ATTGATAGTTGGCCTAGCAGTGAAGAA---ATAAACGAAACA---AGTTCTGAACAAAAGCCAACCAGACACAGCAGAAA CCTTCACCTCATGAAAGAACAGGAAACTGCAACTGGAACCAAGAACAGTAACAAGCCAAATGAACAAATAAGTAAAAGAC ATGCCACTTACACTTTTTCAGAACTAAACATAACAAATAGAACTGACTTTATTACTAACTGCCCACGTTCTGATAATCTT CAAGAACTTGTCAATCCTAGCCTTCAAGGAGAGGAAAGGGAAGAGAAA---TCGGAAACAATGCAAGTATATGATAACAC CAAAGAACCTGAGGATCAGGTGTTAAGTGGAAGAAGG---GATTTGCAA---ATGGAAAGATCTGTTGAGAGTACCAGTG TTTCAGTGATACCCGATACTGATCATGGCACTCAGAACCACACCTCATTACCGGAAGCTGGCACCCTCGGG---AAGGCA GAA---ACAGCACCAAATCAATGTACAAGTCAGTGTAAAGCAATTGAAAATCCCAACCAACTTATCCATGGTTGT---CC T---------AGAAATGACACAGAGGGCTTTAAGGATCTATTGAAACATGAAGTTAAA---CACAATCAGGAGACATGCA TAGAAATGGAAGAGGGTGAGCTTGATATTCAGTATTTACAGAATACATTCAAGGTTTCAAAGCGTCGGTCATTTGCTCTG TTTTCAAATCCA---------GAAAAGGAATGTGCAACAGTCAGTGCCCACTCTAGGTCCTTCAGGAAACAAAGTCCAAA AGCCACTCTTGAATGTGGCGAAAAAGAAGAAAATCAGGGGCACAAAGAGTCTAAAGTCAAGCATGTACAGGCAGTTCATA CAAGTGTGGGCTATCCTGGACTCTGTCAGAAAGAGAAG---CCAAGTGATTATACCAAAGGTAGCATTCAGGGGGCCTCT AGGCTTCATCAGTCATCTCAGTTCAGT---GGCAATGAATCTGAACAAATTACTGCAAATGAAAATGGAATTTCACAAAG CCCAGATCAAACAGCATTGCTTTCT------------------------AAATGTAAGAAAAAC---TTGTCTGAAGAAA GATTTGAAGAACGTGCAGTATCACTCGAAAAAGCAGTGGGAAATGAGAGCATCATTCAAAGTACAGTGAGCACAGTTAGC CACAATAACATTAGGGAAAGGGCTTTTAAGGAAACCAGCTCAAGTAGTACTAATGAAGTAGGTTCCAGTATTAATGAAGT AGGTTCTAGT------------------------------------------AACGAAAACATCCAAGCAGAGGTAGGTA GGAACAGAGGACCTAAGTTAAATGCTATGCTCACATTAGGTTTTATGCAACCTGAAGTCTATAAACAAAATCTT---ACT CTAAGTAATTGTAAACATCCTGAAATAACAAAGCAGGGAGACAATGAAGAAAGAGTTCAAGCTGCTGACCCAGGTTTCTC TCCGTGTCTAATTTCAGATAACCTAGAACCA---CCTATGGGAAGTAATCATGCTTCTCAGATTTGTTCTGAGACACCTG ATGACCTGTTAGATGATGATGAAATCAAAGAAAATATCAGCTTTGCTGAAAGTGGCATTAACGAAAGATCTGCAGTTTTT AGTAAAGATGACCATAGAAGACAATTCAGAAGGAACCTAAGCCCTTTATCCCAT---TTAGGTTTGACTCAGGGTCACTT AAGAGGTACCAGAGAATTAGAGTCTTCAGAAGAGAACCTGTCTAGTGAG >Madagascar TGTGGAACAAATACGCTTGCCAACTCATTACAGCGTGAGAACTACAGTTTATTACTCACTAAAGACAGACTGAATGTAGA AAAGGCTGGATTCTGTAATGAAAGCAAACAGCCCGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGAAAGTAAGGAGA CATGTAATGAC---AAGCCGACTCCTAGCACAGAGAAAAAGGTAGATAAGAATGCTGACCCCGTGCATGGGAGAAAAGGA GTGCCTAAGCAGAAACCTCCGTGCCATGGAAGTCCTAGAAAG---AGCCACGAT---GTGCCTTGGAAAACACGGAAGAG TAGCATTTGGAAAGTCAATGAGTGGTTTTCCAAAAGCGATGGCCTG---------GGTGACTCGCATGATGAGCGGCCTG AATCAGATGCAGACGTCGCTGGGGCCTTCGAAGTTCCAGAT------GAAGCACGCGAATCTTCTAGTTCACCAGAGAAA ACAGACTTGATGGTCAGTGATCCTCATGTTCCTTTACTGCCTAAAAGTGAAATAGTCCTTTCCAAACCAGTAGAAAGTAA C---ATCGAAGACAAAATATTTGGGAAAACGTATCGGAGGAAGTCAAGTCTCCTTAACTCGAGCCATGTAACCGAAGATC TAATGGTAGGAGCTGCTGCTGCTGTAGCACCTCCGATAGCG--------------------------------------- ------------------CAAGAGCACCTTCTTACCAGTAAATTAAAACGC---AGAAGA---TCATCCGTCCTCCACCC TGAGGACTTTATCAAGAAAGCAGATTTGGCAGTTGTTGAAAAG------------ATCAACCAGGGAACTGACCAAACAC GGCCGAAG---------GGTCAAGTGGAGCATACTCCCGATGGTGGTCACGGGCATGGAACAAAAGGCGGGAAT---TGT GTTCAGAAGGAGAGTGCTGACCTGGCAGAA------TCATCGGCCAAAGAATCTGCCTTTCGAACTGTAGCTGAGCCTTT GAGCAGTAGGAAAAGCGGTATGGAACTGGAACCGACTGTGTTCAATGCAAAAGCACAN---AAGAACAGGCTGAGGAGGA AGCCCTCTGCCAGGCACATCCACCCCCTGGAACTA---GTCAACAGACAGCCAAGCCCGACGAGGCACACAGAACTGCAA ATCAGTAGCTGGCCTAGCAGTGAAGAG---CTAAAGAAAGAA---GAATCTGAGCCAAAGCCAGTCAGACGGAGCCGGAA CTTACAGCTCACGAAAGACCAAGAAATGGCCATCGGAGCCAAGAAGAATGACAAACCAAATGAAGAAAGGGGCAAAAAAC ACGCCCCTGACACTTTCCCAGAACGAAATTTAACAGACACAACTGACCTAATTACTACCTGTTCGTTTTCTGGTAATCTT CAAGCATCTGTCTGTCCTAGCCTTCCAGGGGCAAACCTGAAAGAGAAA---CTGGGAACAGTGCAGGTGTCTAATGGTAC CAGAGACCCCAAAGGTGAGGTGTTGGGTAGAGCAAGG---GGCCTGCAA---ACCGCAAGATCTGTTGAGAGTACCAGTG TTTCACTGATACCTGATGCTGATGATGGCCCTCAGGGTACCATCTCAATCATGGAAGCAGGCAACCGAGGG---GAGGCA CAA---ACAGCACCAGGTGAATGTGCAAGGCAGTGTGCAGCAACTGAAAACCCCAACAGGAGTGTCCAGGGTCTT---TC CAAAGACACTAGAAATAACAGGGAGGGCTCTGAGGATCTCTTGACACACGACGTTAAC---CACCTTCAAGAGACATGCA CAGAAATGGAAGAGAGTGAACTTGATATTCAATCCTTACAGAATACATTCAAAGTCTCAAAGCGTCAGTCATTTGCTCTG TCTTCAAATCCA---------GAAAAGGAGTGTGCCTCGCTCGGTGCCCGCCCT---------------------CCAAA CATTGCTCTTGAGTGTGGGGAA---GAAGGGACTGAGGAGAACAAAGAGTCTGCAGTTCGACCTGTGCAGGCAGTTCCTG CAACTGTGGGCGGTGCTGGAGGTAGTCCAACAGAGCAACAGCCACGTGATTATACCAAAAGTCACCTTCAAGGAGTGTCC AGGCTTGGTCAGGCAGCTCCATTCAGA---GGCAGTGAATCTGACCCCAGTACCTCAGTTGAACATGGGATTGTGCAGAC CTCGGAGCNTGCCCCACCACTCGCTCCCATCAGGTCATCTGTTAACAGTGAACGTAAG------------------GAAA GACTGGAGGATCATGCAGAATCACTTGAAAGAGCCTCAGAAAACGAGAGTATCATTCCAAGTACAGTGAGCACAATTAGC CAAAATACCCCGAGAGAAAATGCTTTCAAAGGAACCAGCTCAAGCAGTCTTAATGAAGTAGGTTCTAGC----------- ----------------------------------------------------AGCGAAAACATTCAAGCAGAGCTAGCTA GGAACAGAGAACCTAAATTGGATGCTGTGCTCAGGTTGGGTCTTGTGCAACCTGAAGTCTGTAAGGAAAGTCTT---CCT ATGAGGAAATGTAAGCATTCTGAAGTAAAAAGGCAAAAAGGCAATGGAGGACTAGTTCCAGCTGTTCATCCAGATTTCTC TCGCTATCTAATGTCACATAACCCAGAGCAG---CCTATGGAGGACAAT---GATCCTCAGATTTGTTCTGAGACACCTG AAGACCTGTTAGATGACAGTGAAATAAATAAAAACAGCCACTTGGTTCAAAGTGACATTAGGGAAAGATCTGCTGTTTTT AGCAAAGATAACCAGAGAAGAGATTTCAGAAGGAGCCCTGGGCCTACATCCCAT---TTAGGTTTGTCTTGGGGTCACCC NAGAGGTGCTGAAGAGTTAGAGTCCTTAG-------------------- >Tenrec TGTGGCACACGTACGCTTGCCAGCTCGGCACAGCGCGAGGACTGCAGCTTATTACTCACCGAAGACAGACTGGATGGAGA AAAGGCTGGATTCTGCAAGGAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAGCATAGCAGATGGGCTGAAAGTAGGGGAA CGGGTAATGAC---AAGCCGACTCGTAACACAGAGGAAGAGGTAGTTGTGAGCGCTGACTCCGCGCATGGGAGGAAAGGA GTGTCTAAGCAGAAACCTCCGCGCCAGAAAAGGCCCAGAAAG---AGCCATGAT---GTGCCTTGGAAAACACGGAAGAG CAGCATTTCGAAGGTTAACGAGTGGTTTTCCAAAAGCCACGGCCTG---------GGTGACTCTCGCGATGGGCGGCCTG AGTCAGGCGCAGACGTAGCTGTAGCCTTCGAAGTTCCAGAC------GAAGCATGTGAATCTTATAGTTCTCCAGAGAAA ACAGACCTGATGGCCAGGGACCCTCCTATTCCCTTACTGCATAAAAGCGCAAGAGTCTTTTCCAAACCAGTAGAAAGTAA C---ATCGAAGACAAAATATTTGGGAAAACGTATCGGAGGAAGTCAAGCCTCCTTAACTCGAGCCATGTAACTGAAGATC TGGTGCTAGGAGCTGCGGCTGCCGTAGCGCCTCCGGTAGCA--------------------------------------- ------------------CAAGAGCAGCTTCTTACCAGTAAATTAAAGCGCAAGAGGAGA---TCCTTCGTCCTGCACCC TGAGGACTTCATCAAGAAAGCAGATTTGGCTGTCGTTCAAAAGACTCCTGAAACGATCCATCAGGGACCTGACCCAATGC AGCCGAAG---------GGGCCAGTGCAGAAGATTCCCGATGGTGGTCCCGGGCGAGGGACAAAAGGCGGGGATCGTGTT CCGAAGGAGAAGAGTGCTAACCTGGCAGAA------ACATCGGCCAAAGAATCTGCCTTTCGAACTGTAGCTGAACCTTT GAGCAGCCGGAAAAGCAGTATGGAACTGGAGTTGCCTGTGTTCAGTTCAAGAGCGCAG---AAGAACAGGCTGAGGAGGA AGCCCTCTGCCAGGCACATCCATCCGCTGGAACTA---GTCAACAGACAACCAAGCCCGACTACGCACACGGAGCTGCAA ATCAGCAGTTGGCCTAGCAGTGAGGAG---CTAAAGAAAGAA---GAATCCGAGCCAAAGCCAATCAGACGGAGCCGGCA CTTAAAGCGGGAAAGA---------------------GCCAAGAAGAATGACAAGCCAAACGGAGAAAGGGGCAAAAAAC ACGCCCCTGACACATTCCCAGACCGAAAGTCAACAAGCACAACGGACTTAATTGCTAACTGTTCCGTTTCTGGCAGTCTT CATGGAGCTGTCTGTCCTGGCCTTCCAGGGGCAAAGCAAACAGAGAAA---CGGGGAGCAGTGCAGGTGTCTAATAGCGC CAGAGACCCCAAAGATGAGGTGTTGGGTAGAGCAAGG---GGCCTGCAA---ACTGCAAGATCTGTTGAGAGCACCAGTT TTTCACTGATACCTGCTGCTGCTGACGGCACCCCGGGTGGCGTCTCAATAATGGAAGCAGGCAGCCGAGGA---CAGGCA CAA---ACAGCACCAGATCCATGTGCAAGGCAGTGTACAGCAACGGACAACCCCAGCGAAAGTGCCCACGGTCTT---TC CAGAGATACTAGAAACAGCACGGAGGGCTTTGCGGATCTATTGACACATGGCGTTCCC---CACATCCAAGAGACATGCA CAGAAATGGAAGAGAGTGAACTTGATATTCAGTCTCTACAGAATATGTTCAAAGTCTCCAAGCGTCAGTCATTTGCTCTC TCTTCAAATCCA---------GAAAAGGAGTGTGCCACACTCTGTGCCCATCCTAGCGCCTTCAGGAAACAGAATCCAAA CGATGCTCTTGAGGGTGGAGAA---GAAGGGACTCGGGAGAACAAAGAGTCTACAATTAAGCCTGTGCGGGCAGTTCATA CCACCGTGGGC------------------ACGGAGAAGCAGCCAAGTGATTACACCAACAGTAGCTTTCAAGGAGTTTCT AGGCGTGGTCAGGCATCTCAATTCAGA---GCCAGTGAATCTGACCCCGGTACCTCAGTCGAACACGGAAGTTTGCAAAC CACAGAGCATACCCCACCACTCTCTCCCATCAGATCCTCCCTTAAAAGTAAATGTAAGGAAAAG---CTGTCAGAAGAAA GATTGGAGGATCAGGCAGAATCCCTTGAAAGAGCCTCGGGGAATGAGAGCATCATTCAAAGTACAGTGAGCACAATTAGC CAAAATACGCTTAGAGAAAATGCTTTTAAAGGAACCAGCTCAAGCAGTCTTAATGAAGTAGGTTCTAGC----------- ----------------------------------------------------AGCGAAAACATTCAGGCAGAACTAGCTA GGAACAGAGAACCTAAATTGGATGCTGTGCTCAGACTAGGACTTGTGCAGCCTGAAGGCTGTAAGGAGGAAAGTCTTCCC CTAAGGAAATGCAAGCATCCTGAAGTAAGAAGGCAAAAAGGCAATGGAGGACTAGTTCCAGCTGTCAATCCGGATCTCTC CCGCTATCTAATGTCACGTAACCCGGAGCAA---CCGATGGAGAGCAAC---GGTTCTCAGATTTGCTCGGAGACACCTG AAGACCTGTTAGATGACAGTGAAATAAAAAATAACAGCTACTTTGTTCAAAGTGACGTTAAGGAAAGGTCTGCAGTTTTT GGCAAAGATAACCAGAGAAGAGATTTCAGAAGGAGCCCTGGGCCTACATCCCAT---TTAGGTTTGACTTGGGGTCACCC AAGGGGTGCGGAAGAATTAGAGTCCTTAGAAGAAAGCGAAGCCAGTGAG >LesserEle TGTGGCACAGATCCTCATGCCAGCTCATTACAGCATGAGAGCAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGA AAAGGCTGAATTCTATAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAA CATGTAATGAT---AGGCAGACTCCTAGCACAGAAAAAAAGGTAGCTGTGAGTGCTGATCTTTATTATGGGATAAAAGAA GTGAATAAGCAGAAATCTCCGCACTCTGAGAGTCCAAACAAT---AACCAGAAT---ACTCCTTGGATGACATTGAACAG TAGCATTCGGAAAGTTAATGACTGGTTTTCCAGAAGCGGTGGCCTG---------GATGGCTGCCATGAT---AGATCTG AATCGAATGTAGTAATAGCTGGTGAAGTGGAAGTTCCAAAT------GAAGTACACGGATATTCTATTTATTCAGAGAAA TTAGACTTAATGCCCAACAATCCTCATGATACTTCACTGTATGAAAGTGAAAGAACCCTCTCCAAACCAGCTGAAAACAA C---ATTGAAGATAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGTCTCTCTAACTTGAGCCATGTGACTCAAGATC TCATTACAGGAGCTGTTGCTACAGAATGTCAAACAGCACAA--------------------------------------- ---------------------GAACGCCCTCTTACAAATAAATTAAAGCGTAAAAGAAGA---ACATCAGGCCTTCAGCC TGAGGATTTTATCAAGAAGGTAGATTTGACAGTTGTTCAAAAGATTCCTGAAAAGACAAATCAGGAAACTGAACAAATGG TGCAGAAT------GGTCAAGTGAAGAATAAGATTGCTAGTGGTGGTCATGAGAGTGATACAAAAGGTGATTAT---GTT CAGGAAGAGAAGAATACTAACCCAACAGAG------TCACTGGTAAGAGAATCTGATTTCAGAAATAAAGCTGAACCTAT AAGCAGCAGTATAAGCAAAATGGAGCTAGATTTAAATACACACAGTTCGGTAGCACCAAAGGAGAACAGGCTGAGGAGGA TGTCCTCAACCGGGCATGTTCACGCACTTGAACTAGTAGTCAATATAAGTCCAAGGTCACCTATCCATACTGAAGTACAG ATTGATGGTTGGTCTAGCAGTGAAGAG---AAAACGAAAAAA---CATTCCGAGCATAAGCCAGTCAGACCTAGCAGAAA CGTTTCAGTTACAACAAATCAAGAAACTGAAACTGGGTCCAAGAAGAATAACAAGCCAAATGAACAAATGAGTAAAAGAC ATGCCACTGACACTTCCCCAGGGCAC---------AACATAACTGGCTTCATTACTGACTGTTCAAATACCAGCAGTCTT CAAGAATGTTTTAATCCTAGACTTCAGGGAGAGGCAGTAGAAGACAAC---TTTGGACCAGTTCAAGTGTCTAATAGTAT CAAAGATCCCAAGGATCAAGTATTAAATAGAGGAAGG---AGTTTGCAGCACACTGGAAAATGTGTTGAGAGTACCAGTA TTTCAGTGTTATCTGATACTGATTATGGCACTCAGAATAGCATCTCATTACTGGAGGCTGGCACCCTAGGG---AAGGCA GAA---ACAGCACCAGATCAATGTGCAAGTCACTGTGCAGCAATGGAAAACTCCAAGAAACATGTTCATGACTTT---CC TAAAGATACTGGAAATGATACAAAGGGTTTAAAGGACTCATTGAAATGTGATGCTAGC---CACAGTAAGGATACGTACA TGGAAATGGAAGATAGTGAACTTGATACTCAGTATTTACAGCATACATTCAAGGTTTCAAAGCGTCAGTCATTTTCTCTG TTTCCAAATCCA---------GAAAGGGACTATACA-------------------------------------------- ----------------------------------------------------------AAGCTAGTGCAGCTAGTTCATA CCACTGCGAGCTATCCTGGAGCCTGTGAGGAAAAGAAA---CCAAATGATTTTACCAAATGTAGCATTAAAGGAGTCACT AGACTCTCTCATTCATCTCCATTGAGA---GGCAATGAATCTGAACTCATTACCGCAAATGAACATGAAATGTCACATAG CCCAGATCAGACACCGTCACTTTCTCCCACCAAGGTTCCTGTTAAAATGACATGGAAGAAAAACCACCTGTCAGAAGAAA ACTTTGAGGAACATACAATA------------ACAATGGGAAATGAGAGCATCGTTTATAGTCCAGTGAGCACAGTTAGC CAAAGTAATGTTAAGGGA------TATGCTCAGGCCAGTTCAAGCAGTATTAACGAAGTAGGTTCCAGTACTAATGAAGG AGGTTCCAGTATTAATGAAGTAGGTTCTAGT---------------------GGTGAAAACATCCAAGCAGAGCTCGATA GGAACAGAGGACCTGCATTAAATGCTGTACTCAGATTAGGTCTTGTGCAACCTGAAGTCTATGAACAAAGTCTT---CCT GCAGGAAATTGTAAACATCCTGAAATGACAAGGCCAGGAGACAGTGAAGGAGAAAAAAAGACTGTTAATCCAGATTTCTC TCCATGTCTACTCTCAGATAATTTAGAACAA---CCTAGGGGAAGTTGTCATGCTTCTCAGATTTGTTCTGAGACACCAG ATGACCTGTTTGGTGATGATGAAATAAAGGAAAATAGCAGCTTTGGTGAAATCAGCTTTAAAGAGAGATCTGCTGTTTTT AGTAAAGATGAGCAGGCCAGAGAGTTCAATAGGAACTCAAGCCGTTCATCTCGT---TCAGATTTGGCTCCCAGTCATCT AAGACGCGATAGGGAATTAGAGTCCTCAGAAGAGAATGTATGTA----- >GiantElep TGTGCCACAAGTATTCATGCCAGCTCATTACTGCATGAGAACAGCAGGTTATTGCTCAGTAAAGACAGAATGGATGTAGA AAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACAGAGCAGATGGACTGAAAATAAGGAAA CATGTAATCAT---AGGCAGACTCCTAGTATAGAGAAAAAATTAGATATGAATGCTGATCCCTCTTATGGGCGAAAAGAA GTGAATAAGCAGAAATATGAATGCTCTGAAAGCCCTCAAGAT---ACCCAAGAT---ATTCCTTGGATGACACTGAATAG TAGCGTTCAGAAAGTTAATGCATGGTTTTCCAGAAGTGATGGCCTC---------GATGATGCCCCTGATGTAGGGTCTG AATCAAATACAGCAATAACTGGTGCACTGGAAGTTTCAAAG------GAAGTACATGGATATTCTAGTTCTTCAGAGAAA GTACACACAATGACTGGTGATCCTCAGGGTGCTTCAGTATGTGAAAGTGAAAGAGTCTTCTCTAAACCAGCAGAAAATAA C---GTTGAAGATAAAATATTTGGGAAAACCTATCGGAGAAAGTCAAGTCTCCCTAACTTGAACCATATAACGCAAGATC TCATTGTAGGAGCTGTTGCTACAGGACCTCAGACAGCACAA--------------------------------------- ---------------------GAACACCCTCTTATAAATAAACTAAAGCGTAAAAGGAGA---ATATCTGGCCTTCGTCC TGAAGATTTTATCAAGAAGGTAGATTTGACT---AGTCAGAAGACTCCTGAAAAGAGAAATCAGACCATTTACCAAGTGG AC------------------------AGTAGTATTGCTGGCGGTGGTCATGAGCATGAAACGAAAGGTGACTGT---GTT CAGAAAGAGAAGAATGTTAGCCCAGCAGAA------TCATTGACAAAAGCTTCTGCTTTCAGAACTAAGGATGAACCTAT AAGCAGCAGTGTGAGCAGTATGGAACTAGAATTAAATATGTACAGTTCGATAGCACCAAAGAAGAACAGGCTGAGAAGGA AGTCCTCTACTAAGCGTATTTGTGCACTTGAACTAGTAGTCGATAGAAATCCAACTTCACCTACCCATGCTGAGCTAGAG ATTGATAGTTGGCCTAGCAGTGAAGAG---AGAAGGGAAAAA---TGTTCAGAGTTTAACTCCGTCAGACACAGCAGAAC CTTTCAGCTTATGAAAGGTCAAGAAACTGAAATTGGAGACCAAAAGAATAATGACAAGCAAAATGAACAAATAAGTAAAA AACATGCCACTGACACTTCTCCAAAACGA------AACATAACTGGCTTTATCACAAACTGTTCAAATACTGATAATCTT CAAGAATTTGTTAATCCTAGATTTGAAGGAGAGGAAATAGCC---------GTGGGACCAATTCAAGTGTTGAATAGTAC CAAGGACCCTGGAACTCAAGTGTTAAGTAAGGAAAGG---GGTTTGCAC---ACTGGGAATTATGTTGAGAGTACCAGTA TTTCAGTAATACCTGATACTGATGATGGTATTCAAAATAGCACCTCATTACTGGAAGCTGGCACATTAGAA---AAGGCA GAAGAAACAGCATTGACTCAATGTGCAAGTCACTGTGCAACAATTTTAAACCCCAACACACATGTTCATGGCTTT---CC TAAAGATCCTGCAAATGACACAGAGGGTGTTAAGGATCTGTTGAAATGTAATCCTAAC---CACCTTCAGGATACGTGCA TAGAAATGGAAGATAGTGAACTAGATACTCAGTATTTACAGAATACATTCAAAGTTTCAAAACGTCAGTCATTTACCCTG TTTCCAAATCCA---------GAAAAAGAATGTACAACAGTCTGTGCCCACTCCAGGCTCTCAAAGAAACAAAATCCAAG TGTCACTCTTGAGTGTAGAGAAAAAAAAGAAAATCAGGGGAATGAAGAGTCTAAAATGAAACATGTGCAGGTCGTTCATA CATCTGTGCACTATCCGGGAGTTTGTGAAGAAGAGAAGATACCAGATGATCACACCAAAGTTAGCATTAAAGGGATCTCT AGGCTTTGTCAGTCATCTCCTTTCAGG---AGCAATGAATCTGAACGCATTACTGAAAATGAATGTAAAATGTCACATAG CCCAAACCAGACACCATCATTTTCTCCCACCAAGCCATCAGTTAGAACCAAATATAAGCAATTCTACTTGTCAGAAGAAG AGTTGGAGGAACATGCACTATCACCTGAAAGAGCAATGGAAAATGAGAGTTTCATTCACAGTACAGTGAACGCAGTTAGC CAAAATAACCTTAGGGGAAGTGTCTTTAAAGAAGGCAGCTCAAGCAGTATTAATGAAGCAGGATCCAGTACTAATGAAGT AAGTTCCAGTATTAATGAAGTATGTTCTAGT---------------------GATGAAAACATCCAAGCCGAACTAGATA GGAACAGAAGACCTGAATTAAATGCTGTACTCAGGTTAGGTCTTATGCAACCTGAAGTGTATCAACCAAGTCTT---CCA ACAAATAACTGTGAATATCTTGAAATTAGTGGGCCAGGAAAAAATGAAGGCATAATTCAGGCTGTTAATCCAGATTTTAC TCCGTGTCTGATTTCAGATAACTTAGAACAACAGCCTATGGGAAGTAATCATGCTTCTCAGATTTGTTCTGAAACACCTG ATGACCTGTTAAATGATGAGAAAAGAAATGAGAATATCAGCTTTGCTCAAATCAGCATTAAGGAACGATCCACTGTTTTT AGTAAA---GACCAGGCAAGAGAATTCAGAGGAAACCCAAACCCTTTATCCCAT---TCAGATTTGACTCAGAGTCATCT AAGAAGTGGTAGGGAATTAGAGTCATCAGAAGANAA------------- >Caenolest ---------------------NGCTCATTACTGCCTGAGATCACCAGTTTATTGCCCAACACAGACAGAATGAATGTAGA AAAGGCTGAACTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGCAACCAGCAGAGCAATCTGGATGAAAGTAAGGAAA TATGTAGTGCTAGCAAGACCCCAGGTGCAGAGGAA---CAGCCTGAGCTGAATGCCAATCATCTGTGTGAGAGGAAAGAA CTAGAGGAG---AAGCTGCAGTGCCCTGAGAGCCCCAGAGGTGATTCTGGGGACTGCCTGTCTGGAACCAAAGTGAAAAA CAGTATTCAGAAAGTTAATGAGTGGTTATTCAGAAGTAATGACGTTTTAGCCCCAGATTACTCAAGTGTTAGGAGCCATG AACAGAATGCAGAGGCAACCAATGCTTTAGAATATGGGCATGTAGAG---ACAGATGGAAATTCTAGCATTTCTGAAAAG ACTGATATGGTGGCTGACAGTACTGATGGTTCCTGGCTACATGCACCTGAAAAAAACTGCCCCAGACCGGCAGAGAGCAA CAATATTGAAGATAAAATATTTGGAAAAACCTATCGAAGAAAGTCGGTTCACCCTGTTTTGAATTACGTAACTGAAAACT TGATTGTTGAAACCATTGCTCCTGATTCTGTGATCCCTCCA--------------------------------------- ---------------------GAGCCTCTCAAAAAAACCAAATTAAAGCGTAAAAGAAAGACCATATGTRACCTGCAGCC TGAGGATTTCATCAAAAAGACAAACATTCCAGTTATTCCCAAAACCCCTGAAAAGAAAAATCACCCTGTTGACCAGATCC TTGAAAAAGAACAAAATGGCCAGGTGATGAGCACAGCTGATGGTCAGTCCAAGCAGAAAGCACCCGGTTGTCAT---GTG GGGGAACTGAAAGAGGCTCAGGCATCAGAACCATCCTCTGTAGAGAAAGGATCCACTTTCAAAACTGGATCCGAGCCTGC AGCTGGTAGCACAAACCAGAGGGAACTTGAATTAAATGGTAGAGATGCAAAAATGACAAAAAAAGACAGACCAAGGAAGA GGCCTTCAGTTAGGACTGTCTGTGCTCTGGAGCTCGTGACTGATAGACACCCAGGCTCTTCTAATGAGACAGAACTACAG ATCGATAGCTATCCCAGCAGTGAAGAG---ATAAGGAAAGGAAATAATTCTGAGCAAAAGCAAGTCAGACGCAGCAGAAG ACTTCAGATGCTGTCAGAA---GAAATTGCAGCAGGAGCCAAGAAGGCCCATAAGCCAGATGAAGAAGTGGAAGAGAATT GTGTCAATGAAGTTTCCCCAGAACTAAAAATAGGAAAAGTATCTGCCTGTTCTACTGACAGTCTAGCTACTGATAGTGAT CTGGTATTAGCTAGCTGCAGTTTCACAGAA---GGAGATGAAAAGAGC---CTGGAAGCCTTCCAG---------CCCAG CAGAGACCAAGAC---AGCCTGGCAGTAAGCAGAGGAGAGAAGTTGCAA---GGGAAAAGAACCAAAGGAAACATGGAGG TTTTGGAGGTTGCCAGTACTGATTGGGACACTCAGGACAGCACCTCATTGTTTCCAACCGATGTTCCCCCA---AATCCA ACA---GCAGACTCTGGCCCACACAGAAATCAACATGAGGTCATAGAAACCCCCAAGGAACTCTTTGATGATTGCTCATC CAAAAACACCGGAAGTGGTAAAGAAGATTTG---------ACAAGACAAGAAGTCAAA---AATACCTCAGAGACAACCA CAGAGATAGAGGACAGTGAACTTGATACCCAATATTTACAGAATACCTTCAAACGTTCAAAACGCCAATCATTTGCTCTG TGTCCCAGTCCAAGG------CAGACAAGTATGAAACTCCGGTCCATCCCTAGAGCTCTAAGTCATCAGAGCCCAGATAA AGCCACGAATCATGGGGGGCAGGAAAAAAAAAAGCAGGGAAGCAGAGAATCAGACAAGGCTGTGCAGCCAGAACTTGCAG TCATGAGTTCAGCTGCGGTTTGTCAGACAAAGGAGAAAAAGCCAGGTGATTATGCCAAATGTAGCACA------GTCTCT AGGCTTTGTCACATAGCTCCATTACATGATGACAATGACTGTGACCACAGTGCTGAAAACAACAAGGGAATTTCACAAGT TCCTGATCAAAAGCAATCTGTCTCTCCAGCAAAGTCATCAGCTGGTAGATCTATATACACAAAAAGCCTCCTGGAGGAAA GACTTGATGAACAGACCACATGTCCCGAAACAGTGATGGGAAATGAAAGCTTAGTCCACAGTAGCTTAAGCCTGGTTAGC CAAAGTAACAGCGGAGAATATATTTCTAAAGCAACTGACTCAAACAGATTTGTTGGTGTAGACTCTAGC----------- ----------------------------------------------------GGAGAAGGTAGTCAGGAAGAAAAAGGTG AAAACAAATTAAATCAGTCCCAAGTCTGTCAACAAAGCTT---------------------------------------- -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- ------------------------------------------------- >Phascogale ---------------------NGCTCATTACTGCCTGAAACCTCCAGTTTATTGCCCAACACAGACAGAATGTCT---GA AAAAGCTGAGCTCAGTAATAAAAGCAAACAGTCTGGCTTAGCAAGGAACCAACAGAGCAGACTGGATGAAAGTAAGGGAA TATGTAGTACTGGGAAGACCCCCAGTGTAGGAGAG---CAGCCTGAGTTGAATGCCCATCATCTGTGTGAGAAGCAAGAA CTAAAGGAG---AAGCCACAAGGTCCTGAAGGCCCCAGAGGAAATCCTCAGAATTGCCTATCTGGAACCAAACTGAAAAG CAGTATTCAGAAAGTTAATGAGTGGTTATCCAGGAGTAATGACATTTTAGCCTCCGATTGCTCCCTTGACAAGAATCATA AGCAGAATGCAGAGATAGCTAGTGCCTTAGAAGATGGGCATCCAGATAACTCTGATGGAAATTCTAGCATTTCTGAGAAG ACTGACGTTGTGGCTGACACTGCCGATGGAGCCTGGCTACATGTGCCTAAAAGAACTTACACCAGGCCAGCAGAAAACAA CAATATTAAAGATAAAATATTTGGAAAAACCTATCGGAGAAAGTCGGGTCACCCTAATTTGAATCACATAACTGAAAATT TGATTGTTGAGACAGTTGTTCCTGATTCTTTGGTTCCTCCA--------------------------------------- ---------------------GAACCTCTGAAAAATACCAAGTTAAAGCGTAAAAGAAAGACTATATGTGACTTGCAGCC TGAGGATTTCATTAAGAAGACG---GTTCCAGTTACTCACAAAACCCCTGAAAAGAAACATCACGCTGCTGACCAAACCC TTGAAAGAGAACAAAATGACCAAGTGATGAACATGGATAATGGTCATCCTGAACAGAAAGCACTAAATGGTCAT---GTA GGGGAAATTAAAGAAGCTCAGACATCGGAACTGTTCTCTGCAGAGAAAGAATCCACTTTCAGAACTGGAACAGAGCCTGC AGCTAGC------------------------TTAAATGGTGAAGAAACCAACATAACAAAAAAAGACAAGCTGAGGAAGC AGCCTTCATTCAAGATTGTCTGTGCTCTTGAGCTTGGAGTTGATGGAAGCCCAAGCTCTTCTAATGAGACTGAACTACAG ATCGATAGCTATCCCAGCAGTGAAGAG---ATAAGGAGAAAAAATAATTCTGAACAGAAGCAACTTAGACGCAGCAGAAG ACTTCAGCAGGTGTCAGAA---GAGATTGCAATTGGAGCCAAGAAAGCCCATAAGCCAGATGATCAAGCAGAAGAAAGTT GTATCACTGAAGTTGTCTCAGAACTAAAATTAGGAAATCTACCTGCCTGTGCTCCTGACAATGTCAGTACTGATAAGGAT CAAGTATTAGCTAGTTGCAGTTTCACA------GGAGATGAAAGGAGT---CTGGAAGTAATCCCT---------AGCAG CCAAGACCAAGAT------TTGGCATTGAGTGGAAAGGAAGGGTTGCAA---GGTGAAATATCTCAAGGAAGCCTGGGGA CTGTGGAGGTTCCTGATACTGCTTGGGACACTCAGGACAGTACCTCGTTGTTTCCAGCTGATACTCCTCAA---AATTCA AAA---CCAGGTCCCAGTCCTCACAGAAATCACTGTGAAATAATGGAAACCCCCAAGGAACTCTTAGAAAGTAGTTCATC CAAAAACACTGAAAGTAGTGTAGAAGATTTAAGGAGCCTGATGAAACAAGAAGTTAAA---AATGCCTCCAAGACAATCA CAGAAATGGAGGATAGTGAACTTGACACCCAGTATTTACAGAATACTTTCAAGCGTTCAAAGCGCCAGTCATTTGCACTG TGTTCTAGCCCAAGG------CAAGAATATGTGAAAACCTGTGCTGTCCCTGGGGCTATAAGTCAGCAAAGT-------- ----ACAGTTCGTAGGTACCAGGAAAAAGAAAAGCAAGAAAACAGAGAATCAAACAAGCCTGTGCAGCCAAAGCCTGCTG TTGTG---------------------ATAGAGAAGGGGAAGCCAGGTGATCATGCCAAATGGAGCACCAGAGAAGTCTCC AGGCTTTGTCACATAGCTTCATTAAATAGTGGCAATGACTGTGAACCCATTGCTGAAATCAACCAGGGAATTTCACAAGT TCCTGATCAAAACCAATCAGTTTCTCCAGCAAGGTCATCTGATAGTAAAACCATATATGCAAAAAACTTCCTGGAGGAAA GGCTTGATGAGCAAACCACGTGCCCTGAAACAACTATGGGAAATGAAAGCTTAGCCCAAAGCAGCATTAGTTTAGTTAGT CAAAGTAATAGCAGAGAATATGTTTCTAACCCAATTGACTTAATT------ATCAGCGTAGGCTCCAGT----------- ----------------------------------------------------GGAGAAGGCACTCAGGCAGAAAAAAGTG AAAACAAAGAATCTGAATTAAATACACCACCCAAATTAAAGCTTATGCAACCCCAAGTCTCTCAACAGACCTTT---CCT CAGAATAATTGCAAA----------------------------------------------------------------- -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- ------------------------------------------------- >Wombat ---------------NNTGCCAGTTCATTACTTCCTAAAACCAGCAGTTTATTGCCCAACACAGACAGAATGAATGTAGA AAAGGCTGAACTCTGCCATAAAAACAAACAGCCTGCCTTAGGAAGGAACCAAGAGAGCAGTCTCGATGAAAGTAAGGAAA TATGTAGCCCTGGGAAGACCCCAGGTGCCTGTGAG---CGGCATGAGCTGAATGACCATCATCTGTGTGAGAGGCAAGGC CTAGAGGAG---AAGCCGCAGCACCCTGAGAGCCCTAGAGGTAATCCTCAGAACTGCCTGTCTGGAACCAAACTGAAAAG CAGCATTCAGAAAGTTAATGAGTGGTTATCCAGAAGTAGTGACATTTTAGCCTCTGATAACTCCAACGGTAGGAGCCATG AGCAGAGCGCAGAGGTGCCTAGTGCCTTAGAAGATGGGCATCCAGATACCGCAGAGGGAAATTCTAGCGTTTCTGAGAAG ACTGACCCAGTGGCTGACAGCACTGATGATGCCTGGCTACATGTGCCTAAAAGAAGCTGTCTCAGGCCTGCAGAAAAC-- -AATATTGAAGATAAAATATTTGGAAAAACCTATCGGAGAAAGTCAGGTCACCCTAATTTGAATTATGTAACTGAAAACT TGTTTGCTGAAGCTGTGGCTCCTGATTCCTTGATCCCTCTA--------------------------------------- ---------------------GAGGCTCCCAAAAACACCAGGTTAAAGCGTAAAGGAAGGAGCATAGCTGACCTGCAGCC TGAGGATTTCATCAGGAAGACGGACGTTCCAGTTATTCACAAAACCCCTGAAAAGAAAAATCACTCTGTTGACCAAATTC TCAAAAGAGAACAAAGTGACCAAGTGATGAACACGGCTAACAGTCTTCCTGAGCAGAAAGCCCTAGGTGGTCAT---GTA GGGGAAGTGAAAGATGTTCAGGTATTAGAGCTGTTCTCTGCGGAGAAAGAATCCACTTTCAGAACTGGAACAGAGCCTGT AGCTGGCAGCACAAACCATGGGGAGCTTGAATTAAATAGTAGAAATGCCAAAATGACAAGAAAAGACAGGCTGAGGAAGA AGCCTTCAGCCAGGATCGTCCGTGCTCTTGAGCTCGTAGTTGATAGAAACCCAAGCTCTTCTAATGAGAGTGAACTGCAG ATCGATAGCTATCCCAGCAGTGAAGAG---ATAAGGAAAGGAACTAATTCTGAACAGAAGCAAATCAGACGCAGCAGAAG GCTTCGGCTGCTATCAGAA---GAAATTGTGGTTGGAGCCAAGAAGGCCCATAAGCCAGATGACCAAGCAGAAGAAAGTT GTATCAGTGAAGTTTTCCCAGAACTAAAAATAGGAAACGTGCCTGCCTGTGCTACTGACAGTCTAACTACTGATAGGGAT CAAGTGTTAGCTAGCTGCAGTTTCACAGAAGAAAGAGATGAAAGGAGC---CTGGAAGCAATCCCA---------AGCAG CAAAGACCAAGAT------CTGCCCTTGAATGGAGGGGAGGGGTTGCAA---GGTGAAAGAGCCCCAGGAAGCCTGGAGG CTTTGGAGGTTCCTGATACTGATTGGGACACTCAGGACAGTACCTCATTGTTTCCAGCTGATACTCCCCAA---AATTCA AAA---GCAGGACCCAGTCCTCACAGAAGCCACAGTGAAATAATGGAAACCCCCAAGGAACTCTTAGATGGTTGTTCATC CAAAAACACTGAAAGTGACGAAGAAGATTTGAGGAGCCTGATGAGACAGGAAGTTAAA---AATGCCTCCAAGACAACCA CAGAAATGGAGGATAGTGAACTCGACACCCAGTATTTACAAAATACCTTCAAACGTTCAAAGCGCCAGTCATTTGCTCTG CGTTCTAGCCCAAGG------CAGGAATGTAGGAAACCCTCTGCTGTCCCTGGGACTGTAAATCAGCAGAGTCCAGATAA CACCACAGATTGTGGGGGCCAGGAAAAAGAAAAGCAGGGAAACAGAGAATCAAACAAGCCTGTGTGGCCAAAGTCTGCAG TCATGAGCTTAGCTGCGGCTTGTCAGACAGAGGAGAGGAAGCCAGGTGTTTATGCCAAATGTAGCACCACAGAAGTGTCC AGGCTTTGTCACATAGCTCCATTACATGGTGTCATTGACTGTGAACACATTGCTGGAAACAACCAGGGAATTTCGCAAGT TCCTGATCAAAAACCATCAGTTTCTCCAGCAAGGTCATCTGCTAGTAAAACTATAAATACAAAAAACCTCCTGGAGGAAA GGCTTGATGAACAGACCACATGTCCTGAAACAGCTATGGGAAATGAAAGCTTAGCCCAAAGCAGCTTAAGTCCAGTAAGC CAAAGTAATAGCAGAGAATATATTTCTAAAGCAACTGACTTAAATAGATTTATCAGCATAGGCTCTAGT----------- ----------------------------------------------------GGAGAAGGCAGTCAGGCACAAAAAGGTG AAAACAAAGAATCTGAATTAAATACACAACCCAAATTAAAGCTTGTGCAACCCCAAATCTGTCAACAAAGCTTT---CCT CAGGATAATTGCAAAGAGTCTAAAAGAAAAGGGAAGGGAGGAAATGGAAAATTAGCTCAGGCCATCAGTACAGATTCATC TCCATGTTTAGAACAA------------------ACTAAAGAGAGTACACATTCTTCTCAGGTTTGTTCTGAGACACCTG ATGACCT------------------------------------------------------------------------- -------------------------------------------------------------------------------- ------------------------------------------------- >Bandicoot ---------------------NACTCATTAATGCTTGAAACCAGCAGTTTATTGTCCAACATAGACAGAATGACTACAAA AAAGGCTGAACTCTGTAATAAAAGTAAAGATCCTGGCTTAGCAAGGAACCAACAGGACTGTTTGGGTGAAAGTAAAGAAA TAAGTAGCACTGTGAAGACCCCAGCTGCAGGGGAG---CAGCATAAGCTGAATGCCCACCATCTGTGTGAGAAGCAAGAA CTAGAGGTA---AAGTCACAGCACCCTGAGAGCCCCAAAGGTAGTCCTCGGAGGTGCCGGTCTGGAACCAAACTGAAAAG CAGTATTCAGAAAGTCAGTGAGTGGTTGTCTAGAAGTAATGACATTTTAAACTCTGATTATTCCCATGAGAGAAGCCAGG AGCAGAATGCAGAGATTGCTATTGCCTTAGAAGATGGGCATCTGAATACTGCTGATGGAAATTCTAGCATTTCTGAGAAG ACTGNCCTGGTGGCTGNCACCACTGAT---------------GTGCTGGAAACTAGCTTNTCCAGGCCAGCTAAAAGC-- -AATATTGAAGATAAAATATTTGGGAAGACCTATCGGAGAAAGTCANGTCACTCTAATTTGAATTATCTAACTGAAAACC TCCTTGTTGAAACTATTGCTCCTGATTCTTTGATCCCTCCA--------------------------------------- ---------------------GAATCTCTCAAGAACATCAAGTTAAAGCGAAAAAGAAAAACTATATCTGACCTGCAGCC TGAAGATTTCATCAGGAAGACAGATGTTCCAGTTAGTCACAAAACCCCTGAAAAGAAAAATCCTGCTGTTGACCAGATTC TTGAAAGAGAACAAAATGACCAAGTGATGAACACAGCTAATGGTCATCTTAAACAGAAAGCACTGGGTGATCAT---GTC AAGGAAGTGAAAGATGCTCAGGCAGCAGAACTGTTCTCTACAGAGAAAGAATCAACATTCAGGACTAGAACAGAGCCTAA AGCTGGCAGCATAGTCCACGGGGAGCTCGAATTAAATGGGAGAGGTGCCAAAATGACAAAAAAGGACAGGTTGAGGAAAA AGCCTTCAGCCAGGATTGTCCGTGCTCTTGAGCTTGTGGCTGATAAAAACTCAGGCTCTTCTAATGAGGCTGAATTACAG ATCGATAGTTATCCCAGCAGTGATGAG---ATAAGGAAAGGAAATAATTCTGAACAGAAGCAAATCAGACGCAGCAGAAG ACTTCAGCTACTGTCAGAA---GAAATTGCAGTTGGAACCAAGAAGGCCCATGAGCCATATGACCAAGCAGAAGAAAGAT GTGTCAAGAAGGTTTTCCCAGAATTGGAAATGGGAACTGTGTCTGCTGGTGCTACTGACAGCCTATCTACTGATAGGGAT GAAGTGTTAGCTAGTTGCAGTTTCACAGAT---GGAGAGGAAAGGAAC---CTGGAAGTAATCCCA---------AGCAG CAAAGACCAAGACCAAGATCTGGCATTAAGTGAAAGGGAAAGGTTGCAA---GAAAAAAGAACCCAAGGAAACCTGGAGC TTCTGGAGGTTCCAGATACTTATTGGGAAACTCAAGACAGTACTTCACTGTTTCCAGCTGAAACTCCCCAG---AATTCA AAA---GCAGGACCCAGTCCTCACAGAAGTAACTGTGAAATAATGGAATCCTCCAAAGAACTCTTCGATGCTTATTCATC CAAAAACACTGACAGTGGCACTGAAGGTTTG---------ATAAGACAGGAAATTAAA---TATGGCTCTGAGACATCTT CAGTAATGGAGGATAGTGAACTTGACACCCAGTATTTGCAGAATACCTTCAAACGTTCTAAGCGCCAAACATTTGCTCTG TGTTCCAACCCAAAG------CAGGAACAGATAAAACCCTGCTCTGTTCCTAGGGCTGTCAGCCACCAGAGTTCAGATAA TGCCTCAGACTGTGGGGGCCAAGAAAAAGAAAAGCAAGGAAACAGAGAATCAAATAAACCTGGCCAACTAGCATCTGCAG TCAGGAGCTCAGCTGCCACTTGTCAGACA---GAGAGGAAGCCAGGTGATCCTGACAAATGTAGCGCCACAGGAGTTTCC AGACTTTGTCACATAGCTCCATTACAAGAAGGCAATGACTGTGAATACATTGCTGGAAAAAAACAGGGAATTTCACAAGT TCCTAATCAAAAACAATCAGTTTCTCCGACAAGGTCATCAGTTAGTAAA---ATATATACAAAAAACCTCTTGAAAGATA GACTT---GAACAGACCACATGCCCTGAAACAGTTATGGGAAATGAAAGCTTAGGCCAAAGCAGCTTAAGTCCAGTTAGC CAAAATAACAGCAGAGAATATATTTCTAAAGCAACTGACTTAAATAGATTTATTAGCAGGGACTCTAGT----------- ----------------------------------------------------GGAGAAGACAGTCGGGCAGAAAAAGGTA AAATCAAAGAATCTGAATTAAATACACCAACCAAATTCAAACTTGTGCAACCACAAGTATGTCAACTAAGCTTT---TCT CAGGATAATTGCAAAGAGCCAAAAAGAAAAGGGAAAGGAGGAAATGGAATATTAGCTCTGGCCACCAGTACAGATTCATC TCCATGTTTAAAAGAA------------------ACTAAAGAGAGTACACATTCTTCTCAGG------------------ -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- ------------------------------------------------- cogent3-scinexus-e0aee79/tests/data/c_elegans_WS199_dna_shortened.fasta000066400000000000000000000001501520253266500262020ustar00rootroot00000000000000>I gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa gcctaagcctaagcctaagcctaagcctaagcctaagcctcogent3-scinexus-e0aee79/tests/data/formattest.fasta000066400000000000000000000060621520253266500227110ustar00rootroot00000000000000>Rhesus with extra words tgtggcacaaatactcatgccagctcattacagcatgagaac---agtttgttactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggcttg gcaaggagccaacataacagatggactggaagtaaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagatctgaatgctaatgccctgtatgagagaaaagaatgg aataagcaaaaactgccatgctctgagaatcctagagacactgaagatgttccttgg >Manatee tgtggcacaaatactcatgccagctcattacagcatgagaatagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtcataaaagcaaacagcctggctta acaaggagccagcagagcagatgggctgaaagtaaggaaacatgtaatgataggcagact cctagcacagagaaaaaggtagatatgaatgctaatccattgtatgagagaaaagaagtg aataagcagaaacctccatgctccgagagtgttagagatacacaagatattccttgg >Pig tgtggcacagatactcatgccagctcgttacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattttgtaataaaagcaagcagcctgtctta gcaaagagccaacagagcagatgggctgaaagtaagggcacatgtaatgataggcagact cctaacacagagaaaaaggtagttctgaatactgatctcctgtatgggagaaacgaactg aataagcagaaacctgcgtgctctgacagtcctagagattcccaagatgttccttgg >GoldenMol tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaaacaaacagtctggctta gcgaggagccagcagagcagatgggctggaagtaaggcagcgtgcaatgacaagcagact cctagcacacagacagagctatataggagtgctggtcccatgcacaggagaaaagaagta aataagctgaaatctccatggtctgagagtcctggagctacccaagagattccttgg >Rat tgtggcacagatgctcgtgccagctcattacagcgtgggacccgcagtttattgttcact gaggacagactggatgcagaaaaggctgaattctgtgatagaagcaaacagtctggcgca gcagtgagccagcagagcagatgggctgacagtaaagaaacatgtaatggcaggccggtt ccccgcactgagggaaaggcagatccaaatgtggattccctctgtggtagaaagcagtgg aatcatccgaaaagcctgtgccctgagaattctggagctaccactgacgttccttgg >Human tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccaacataacagatgggctggaagtaaggaaacatgtaatgataggcggact cccagcacagaaaaaaaggtagatctgaatgctgatcccctgtgtgagagaaaagaatgg aataagcagaaactgccatgctcagagaatcctagagatactgaagatgttccttgg >Jackrabbit ------------------------------------------------------------ ---------------------aaggctgaattctgtaataagagcaaacagcctggctta gcaagaagccaacagagcagatgggctgaaagtaaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagttctgaatgttgactgcctgtatgggagaaaacaacag gataagcagaaacctccatgccctgagacctctggagataaccaagatgtttcttgg >FlyingSqu tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccagcagagcagatgggctaaaagtaaggaaacctgtaatgataggcaaatt cccagctcagagaaaaaggtagatttgaatgctgatccccaatatgagaaaaaagaacca agtaagcagaaacatccatgctctgagaattccagagatacccaagatgttccttgg >FreeTaile tgtggcacagatactcatgccagctcattacagcatgagaacagcagtttactactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaagcagcctggctta gcaaagagccagcagagcagatgggctgaaagtaaagaaacatgtaatgataggcagact ctcagcacagagaaaagggtagttctgaatgctgatcctctgaat--------------- aggagaaaagaacctccaggctctaactatcctagagattcccaagatgttccttgg >Mole tgtggcataaatactcatgccagcttattacagcatgaaaacagcagtttattactcact gaaaacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctgactta gcaaagagccagcagaacagatgggctgaaagtaaagaaacatgtaatgataggcagact tccagcccagagaaaagggtagacccgaatgctgatcccatgtatgggagaaaagaactg aataagcagaaacctccatgctctgacagccccagaaattcccaaggtgttgcctgg cogent3-scinexus-e0aee79/tests/data/formattest.fasta.gz000066400000000000000000000013401520253266500233220ustar00rootroot00000000000000Uformattest.fasta͖on0 ſ@Џ0@,a#etɀaڒH?={zm}cz^ AL`*@YP37ȅbiA_Gsx7!c!Is/MHLg݇;?s9"Z#$Mg"e` pՆ[I4Gaymc`udli!6ņ1]&\GY(} 7zFcd:R ȬVdFd+m dVFXPHdD12T"NiIJ߾Qk֬ך.h/ј&e!/#Z*Pk|hMKNZ{UZK)~l9gJdȹ<5tTBd3jώZ8a,TaϳӢB9ˌH Xހv!ch;vLg;mT:i:nm<[֭u+Vvƚ_ϑ%rjvW?h26dFA[*4|Y!wCr:ENjpḀhCFQ$CM}}{ܴ'6B3]' 8\V֊+P="h&b!3̋Rlek1Q3PW@l4g:f˳n~vx2 cogent3-scinexus-e0aee79/tests/data/long_testseqs.fasta000066400000000000000000000311711520253266500234120ustar00rootroot00000000000000>Human TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT AAAGACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTA GCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACT CCCAGCGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAAT AAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGT TCTGATGACTCACATGATGGGGAGTCTGAATCAAATGCCTTGGACGTTCTAAATGAGGTA GATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGTGATCCTCATGAGGCT TTAATATGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAATATTGAAGACAAAATATTT GGGAAAACCTATCGGAAGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAATTATA GGAGCATTTGTTACTGAGCCACAGATAATACAAGAGCGTCCCCTCACAAATAAATTAAAG CGTAAAAGGACATCAGGCCTTCATCCTGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT CAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAACGGAGCAGAATGGTCAAGTG ATGAATATTACTAATAGTGGTCATGAGAATAAAACAAAAGGTGATTCTATTCAGAATGAG AAAAATCCTAACCCAATAGAATCACTCGAAAAAGAATCTTTCAAAACGAAAGCTGAACCT ATAAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCACCT AAAAAGAATCTGAGGAGGAAGTCTACCAGGCATATTCATGCGCTTGAACTAGTCAGTAGA AATCTAAGCCCACCTAATTGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAG ATAAAGAAAAAAAAGTACAACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATG GAAGGTAAAGAACCTGCAACTGGAGCCAAGAAGAACAAGCCAAATGAACAGACAAGTAAA AGACATGACAGCGATACTTTCCCAGAGCTGAAGAATGCACCTGGTTCTTTTACTAAGTGT TCAAATACCAGTGAACTTAAAGAATTTAATCCTAGCCTTCCAAGAGAAGAAAAAGAGAAA CTAGAAACAGTTAAAGTGTCTAATAATGCTGAAGACCCCAAAGATCTCATGTTAAGTGGA GAAAGGGTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGT ACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCA AAAACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTA ATTCATGGTTCCAAAGATAATAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACAT GAAGTTAACCACTCAAATCCAGAAGAGGAATGTGCACACTCTGGGTCCTTAAAGAAACAA AGTCCAAAAGTCACTTTTGAATGTGAACAAAAGGAAAATCAAGGAAAGAATGAGTCTAAT AAGCCTGTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTGGTCAGAAAGATAAG CCAGTTGATAATGCCAAATGTAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGA GGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCGT ATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAATCTGCTA GAGGAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATT CCAAGTACAGTGAGCACAATTAGCCGTAATAACAGAGAAAATGTTTTTAAAGAAGCCAGC TCAAGCAATATTAATGAAGTAGGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGA AACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTAT AAACAAAGTCTTCCTGGAAGTAATAAGCATCCTGAAATAAAAAAGCAAGAAGTTCAGACT GTTAATACAGATTTCTCTCCACTGATTTCAGATAACTTAGAACAGCCTATGAGTAGTCAT GCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTC CAGAAAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGT TACCGAAGAGGG >HowlerMon TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTGTTACTCACT AAAGACACACTGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTA GCAAGGAGCCAACATAACAGATGGGCTGAAAGTGAGGAAACATGTAATGATAGGCAGACT CCCAGCGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGCATGGGAGAAAAGAATGGAAT AAGCAGAAACCTCCGTGCTCTGAGAATCCTAGAGATACTGAAGATGTTGCTTGGATAATG CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAACT TCTGATGACTCACATGATGGGGGGTCTGAATCAAATGCCTTGGAAGTTCTAAATGAGGTA GATGGATATTCTAGTTCTTCAGAGAAAATAGACTTACTGGCCAGTGATCCTCATGATCAT TTGATATGTGAAAGAGTTCACTGCAAATCAGTAGAGAGTAGTATTGAAGATAAAATATTT GGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTGAGCCACGTAACTGAAATTATA GGAGCATTTGTTACTGAGCCACAGATAATACAAGAGCATCCTCTCACAAATAAATTAAAG CGTAAAAGGACATCAGGACTTCATCCTGAGGATTTTATCAAGAAAGCAGATTTGGCAGTT CAAAAGACTCCTGAAAAGATAAATCAGGGAACTAACCAAACAGAGCGGAATGATCAAGTG ATGAATATTACTAACAGTGGTCATGAGAATAAAACAAAAGGTGATTCTATTCAGAATGAG AACAATCCTAACCCAGTAGAATCACTGGAAAAAGAATCATTCAAAAGTAAAGCTGAACCT ATAAGCAGTAGTATAAGCAATATGGAATTAGAATTGAATGTCCACAATTCCAAAGCATCT AAAAAGAATCTGAGAAGGAAGTCTACCAGGCATATTCATGAGCTTGAACTAGTCAGTAGA AATCTAAGCCCACCTAATTATACTGAAGTACAAATTGATAGTTGTTCTAGCAGTGAAGAG ATAAAGAAAAAAAATTACAACCAAATGCCAGTCAGGCACAGCAGAAAGCTACAACTCATG GAAGATAAAGAACGTGCAGCTAGAGCCAAAAAGAGCAAGCCAAATGAACAAACAAGTAAA AGACATGCCAGTGATACTTTCCCAGAACTGAGGAACATACCTGGTTCTTTTACTAACTGT TCAAATACTAATGAATTTAAAGAATTTAATCCTAGCCTTCCAAGAGAACAAACAGAGAAA CTAGAAACAGTTAAACTGTCTAATAATGCCAAAGACCCCAAAGATCTCATGTTAAGTGGA GAAAGTGTTTTGCAAATTGAAAGATCTGTAGAGAGTAGCAGTATTTTGTTGATACCTGGT ACTGATTATGGCACTCAGGAAAGTATCTCATTACTGGAAGTTAGCACTCTGGGGAAGGCA AAAACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTA ATTCATGGTTCTAAAGATACTAGAAATGGCACAGAAGGCTTGAAGTATCCATTGGGACCT GAAGTTAACTACTCAAATCCAGAAAAGGAATGTGCATGCTCTAGGTCCTTAAAGAAACAA AGTCCAAAGGTCACTCCTGAATGTGAACAAAAGGAAAATCAAGGAGAGAAAGAGTCTAAT GAGCTTGTAGAGACAGTTAATACCACTGCAGGCTTTCCTATGGTTTGTCAGAAAGATAAG CCAGTTGATTATGCCAGATGTGAAGGAGGCTCTAGGCTTTGTCTATCATCTCAGTTCAGA GGCAACGAAACTGGACTCATTATTCCAAATAAACATGGACTTTTACAGAACCCATATCAT ATGTCACCGCTTATTCCCACCAGGTCATTTGTTAAAACTAAATGTAAGAAAAACCTGCTA GAAGAAAACTCTGAGGAACATTCAATGTCACCTGAAAGAGCAATGGGAAACAAGAACATT CCAAGTACAGTGAGCACAATTAGCCATAATAACAGAGAAAATGCTTTTAAAGAAACCAGC TCAAGCAGTATTTATGAAGTAGGTTCCAGTGATGAAAACATTCAAGCAGAGCTAGGTAGA AACAGAAGGCCAAAATTGAATGCTATGCTTAGATTAGGGCTTCTGCAACCTGAGATTTGT AAGCAAAGTCTTCCTATAAGTGATAAACATCCTGAAATTAAAAAGCAAGAAGTTCAGACT GTTAATACAGACGTCTCTCTACTGATTTCATATAACCTAGAACAGCATATGAGCAGTCAT ACATCTCAGGTTTGTTCTGAGACACCTGACAACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAATATGGCATTAAGGAGACTTCTACTGTTTTTAGCAAAAGTGTC CAGAGAGGAGAGCTCAGCAGGAGCCCTAGCCCTTTCACCCATACACATTTGGCTCAGGTT TACCAAAGAGGG >Mouse TGTGGCACAGATGCTCATGCCAGCTCATTACAGCCTGAGACCAGCAGTTTATTGCTCATT GAAGACAGAATGAATGCAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCATA GCAGTGAGCCAGCAGAGCAGATGGGCTGCAAGTAAAGGAACATGTAACGACAGGCAGGTT CCCAGCGGGGAAAAGGTAGGTCCAAACGCTGACTCCCTTAGTGATAGAGAGAAGTGGACT CACCCGCAAAGTCTGTGCCCTGAGAATTCTGGAGCTACCACCGATGTTCCTTGGATAACA CTAAATAGCAGCGTTCAGAAAGTTAATGAGTGGTTTTCCAGAACTGGTGAAATGTTAACT TCTGACAGCGCATCTGCCAGGAGGCACGAGTCAAATGCTTTGGAAGTTTCAAACGAAGTG GATGGGGGTTTTAGTTCTTCAAGGAAAACAGACTTAGTAACCCCCGACCCCCATCATACT TTAATGTGTGGAAGAGACTTCTCCAAACCAGTAGAGGATAATATCAGTGATAAAATATTT GGGAAATCCTATCAGAGAAAGGGAAGCCGCCCTCACCTGAACCATGTGACTGAAATTATA GGCACATTTATTACAGAACCACAGATAACACAAGAGCAGCCCTTCACAAATAAATTAAAA CGTAAGAGAAGTACATCCCTTCAACCTGAGGACTTCATCAAGAAAGCAGATTCAGCAGGT CAAAGGACTCCTGACAACATAAATCAGGGAACTGACCTAATGGAGCCAAATGAGCAAGCA GTGAGTACTACCAGTAACTGTCAGGAGAACAAAATAGCAGGTAGTAATCTCCAGAAAGAG AAAAGCGCTCATCCAACTGAATCATTGAGAAAGGAACCTTCCACAGCAGGAGCCAAATCT ATAAGCAACAGTGTAAGTGATTTGGAGGTAGAATTAAACGTCCACAGTTCAAAAGCACCT AAGAAAAATCTGAGGAGGAAGTCTATCAGGTGTGCTCTTCCACTTGAACCAATCAGTAGA AATCCAAGCCCACCTACTTGTGCTGAGCTTCAAATCGATAGTTGTGGTAGCAGTGAAGAA ACAAAGAAAAACCATTCCAACCAACAGCCAGCCGGGCACCTTAGAGAGCCTCAACTCATC GAAGACACTGAACCTGCAGCGGATGCCAAGAAGAACGAGCCAAATGAACACATAAGGAAG AGACGTGCCAGCGATGCTTTCCCAGAAGAGAAAAACAAAGCTGGTTTATTAACTAGCTGT TCAAGTCCTAGAAAATCTCAAGGGCCTAATCCCAGCCCTCAGAGAACAGGAACAGAGCAA CTTGAAACACGCCAAATGTCTGACAGTGCCAAAGAACTCGGGGATCGGGTCCTAGGAGGA GAGCCCAGTGGCAAAACTGACCGATCTGAGGAGAGCACCAGCGTATCCTTGGTACCTGAC ACTGACTACGACACTCAGAACAGTGTCTCAGTCCTGGACGCTCACACTGTCAGATATGCA AGAACAGGATCCGCTCAGTGTATGACTCAGTTTGTAGCAAGCGAAAACCCCAAGGAACTC GTCCATGGCTCTAACAATGCTGGGAGTGGCACAGAGGGTCTCAAGCCCCCCTTGAGACAC GCGCTTAACCTCTCAAAACCTCAAAAGGACTGTGCTCACTCTGTGCCCTCAAAGGAACTG AGTCCAAAGGTGACAGCTAAAGGTAAACAAAAAGAACGTCAGGGACAGGAAGAATTTGAA AGTCACGTACAAGCAGTTGCGGCCACAGTGGGCTTACCTGTGCCCTGTCAAGAAGGTAAG CTAGCTGCTGATACAATGTGTGATAGAGGTTGTAGGCTTTGTCCATCATCTCATTACAGA AGCGGGGAGAATGGACTCAGCGCCACAGGTAAATCAGGAATTTCACAAAACTCACATTTT AAACAATCAGTTTCTCCCATCAGGTCATCTATAAAAACTGACAATAGGAAACCTCTGACA GAGGGACGATTTGAGAGACATACATCATCAACTGAGATGGCGGTGGGAAATGAGAACCTT CAGAGTACAGTGCACACAGTTAGCCTGAATAACAGAGGAAATGCTTGTCAAGAAGCCGGC TCGGGCAGTATTCATGAAGTATGTTCCACTGGTGACTCCTTCCCAGGACAACTAGGTAGA AACAGAGGGCCTAAGGTGAACACTGTGCCTCCATTAGATAGTATGCAGCCTGGTGTCTGT CAGCAAAGTGTTCCTGTAAGTGATAAGTATCTTGAAATAAAAAAGCAGGAGGGTGAGGCT GTCTGTGCAGACTTCTCTCCACTATTCTCAGACCATCTTGAGCAATCTATGAGTGGTAAG GTTTTTCAGGTTTGCTCTGAGACACCTGATGACCTGCTGGATGATGTTGAAATACAGGGA CATACTAGCTTTGGTGAAGGTGACATAATGGAGAGATCTGCTGTCTTTAACGGAAGCATC CTGAGAAGGGAGTCCAGTAGGAGCCCTAGTCCTGTAACCCATGCATCGAAGTCTCAGAGT CTCCACAGAGCG >NineBande TGTGGCACAAATACTCATGCCAACTTATTACAGCATGAGAACAGCAGTTTATTACTCACT AAAGACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTA GCAAGGCGCCAACAGAGCAGATGGGCTGAAAGTAAGGAAACATGTAATGATAGGCAGACT CCCAGCGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGTATGGGCGAAAAGAACTGAAT AAGCAGAAACCTCCATGCTCTGAGAGTCATAGAGATACCCAAGATATTCCTTGGATAATG CTGAATAGTAGCATTCAGAAAGTTAACGAGTGGTTTTCCAGAGGTGATGACATATTAACT TCTGATGACTCACACGATAGGGGGTCTGAATTAAATGCATTGAAAGTTTCAAAAGAAGTA GATGAATATTCTAGTTTTTCAGAGAAGATAGACTTAATGGCCATTAATCCTCATGATACT TTACAATTTGAAAGAGTCCAATTGAAACCAGCAGAGAGTAACATCAAAGATAAAATATTT GGGAAAACCTATCATAGGAAGGCAAGCCTCCCTAACTTGAGCCACATAACCCGATTTATA GGAGCTATTGCTGCAGAGCCCAAGATAACACAAGAGCATTCCCTCCAAAATAAAATAAAG CGTAAAAGGGCATCAGGCCTTCGTCCTGAGGATTTATCCAAGAAAGTAGATTTGACAGTT CAAAAAACCCCTGAAAAGATAAATCAGGGAACTGACCAAATGGAGCAGAATGATCCAGTG ATGAATATTGCTAATAGTGGTCATGAGAATGAAACAAAAGGTGATTGTGTTCAGAAAGAG AAAAATGCTAATCCGACAGAATCATTGGGAAAAGAATCTTTCAGAACTAAAGGCGAACCT ATAAGCAGCAGTATAAGCAATATGGAACTAGAATTAAATATTTTAAATTCAAAAGCATCT AAGAAGAATCCGAAGAGGATGTCCACCAGGCATATTCATGCACTTGAACTAGGCAGTAGA AATCCAAGCCCACCTAATCATACTGAACTACAAATTGATAGTTGTTCTAGCATTGAAGAG ATAGAGAAAATAAATTCTAACCAAAAGCCAATCAGACACAACAGAATGCTTCAACTCACG AAAGAAAAAGAAACCACAACTGGAGCCAAAAAGAATAAGCCAAATGAACAAATAAGTGAA AGACATGCCAGTGATGCTTTCCTAGAACTTAAAAATGTAACTGATTTTCTTCCTAAATGT TCAAGTTCTGATAAACTTCAAAAATTTAATTCTAGCCTGCAAGGAGAAGTAGCAGAGAAC CTAGAAACAATTCAAGTGTCTGATAGTACCAGGGACCCTGAAGATCTGGTGGTAAGTGGA GAAAAGTGTTTGCAAACTGAAAGATCTGCAGAGAGTACCGGTATTTCAGTGGTACCTGAT ACTGATTATGGCACTCAAGACAGTATCTCATTACTGGAAGCTGACACCCTGGGGAAGGCA AAAACAGCACTAAATCAACATGTGAGTCAGTATGTAGCAATTAGAAATGCCACTGAACTT TCCCATGGTTCTAAAGACACTAGAAATGACACTGAAGATTTTAAGGATTCATTGAGACAT GAAGTTAACCACTCGAATCCAGAAAATGAATGTGCACACTCCAGGTTCTTAGGGAAACAA AGTCCAAAAGTCACCTTTGAATGTAGACATAAAGAAAATCAGGGGAAGAAAGAGTCTAAA AAACATGTGCAGGTAATTCACACAACTGCAGGCTTTCCTATAGTTTGTCAGAAAGATAAG CCAGGTGATTATGCCAAAGGTCAAGGAGTCTCTAGGCTTTGTCAGTCCTCTCAGGCCAGA GGCAATGAATCTGAACTCATTAATTCAAATGAACATGAAATTTCACAAAACCCAGATCAA ATGCCATCACTTTCTCACATGAAGTCATCTGTTAAAACTAAATGTAAGGAAAACCTGTCA GAGGAAAAGTTTGAGGAACTTACAGTGTCACTTGAAAGAACAATGGTAAATGAGAACATT CAAAGTACAGTAAGCACAATTAGCCACAGTAACAGAGAAAACACTTTTAAAGAAGCCAGC TCAAGCAGTATTAATGAAGTAGGGTCCAGTGATGAGAACATTCAAGCAGAAGTAGGTAGA AACAGAGCACCTAAATTAAATGCTATGCTCAGATTAGGTCTTATGCAACCTGAAGTCTAT AAGCAAAGTCTTCCTATAACCAATAAATATCCTGAAATAAAAAGTCAAGGAATTCGGGCT GTTGATATAGACTTCTCTCCACTAATTTCAGATAACCTACAACTACCTATGAATAGTTGT GCTTCCCAGATTTGTTCTGAGACACCTGATGACTTGTTAGATGATGATGAAATAAAGGAA AATAACTGCTTTGCTGAAAGTGACATTAAGGAAAGATCTGCTATTTTTAGCAAAACTGTC CAGAAAAGAGAGTTCAGAAGGAGCCCTAGCCCTTTAGTCCATACAAGTTTTGCTCAGGGT CACCAAAGAAAG >DogFaced TGTGGCACAAATACTCATGCCAACTCATTACAGCATGAGAACAGCAGTTTATTATACACT AAAGACAGAATGAATGTAGAAAAGACTGACTTCTGTAATAAAAGCAAACAGCCTGGCTTA GCAAGGAGCCAGCAGAACAGATGGGTTGAAACTAAGGAAACATGTAATGATAGGCAGACT TCCAGCGAGAAAAAGGTAGTTCTGAATGCTGATCCCCTGAATGGAAGAATAAAACTGAAT AAGCAGAAACCTCCATGCTCTGACAGTCCTAGAGATTCCAAAGATATTCCTTGGATAACA CGGAATAGTAGCATACAGAAAGTTAATGAGTGGTTTTCCAGACGTGATGAAACATTAACT TCTGATGTCTTACTTGATGAGAGGTCTGAATCAAATGTGGTAGAAGTTCCAAATGAAGTA GATGGATACTCTGGTGCTTCAGAGGAAATAGCCTTAAAGGCCAGTGATCCTCATGGTGCT TTAATATGTGAAAGAGTTCACTCCAAATTGATAGAAAGTAATATTGAAGATAAAATATTT GGGAAAACATATCGGAGGAAAGCAAGCCTCCCTAACTTAAGCCACATAACTGAAATTACA AGAGCATCTGCTACAGAACCTCAGATAACACAAGAGTGCCCCCTCACAAATAAACTAAAA CGTAAAAGAACATCAGGCCTTCATCCTGAGGATTTTATCAAGAAAATAGATTTGACAACT CAAAAAACTTCTGAAAATATAATTGAGGGAACTGACCAAATAGAGCAGAATGGTCATGTG ATGAATAGTTCTAATGATGGTCATGAGAATGAAACAAAAGGTGATTATGTTCAGAAGAAG AAAAATACAAACCCAACAGAATCATTGGAAAAAGAATCTTTCAGAACTAAAGTTGAGTCT GTACCCAACAACATAAGCAATGTGGAACTAGAATTAAATATTCACGGTTCAAAAGCACTC AAGAAGAATCTGAGGAGGAAGTCCACCAGGCATATTCATGCACTTGAACTAGTCAATAGA AATTCAAGCCCACCTAATCATACTGAACTACAAATTGATAGTTGTTCCAGCAGTGAAGAA CTGAAGGAAAAAAATTCTGACCGAATGCCAGACAGACACAGCAAAAAACTTCAGTTCGTA GAAGATAAAGAATCTGCAACTGGAGCCAAGAAGAACATGCCAAATGAGGCAATAAATAAA AGACTTTCCAGTGAAGCTTTTCCCGAATTAAATAACGTACCTGGTTTTTTTACTAATGGT TCAAGTTCTAATAAACGTCAAGAGTTTAATCCTAGCCTTCAAGGAGAAGAAATAGAGAAT CTACGAACAATTCAAGTGTCTAATAGCACCAAAGACCCCAAAATTCTAATCTTTGGTGAA GGAAGAGGTTCACAAACTGATCGATCTACAGAGAGTACCAGTATTTTATTGGGACCTGAA ACGGATTATGGCACTCAAGATAGTATCTCATTACTGGAATCTGACATCCCAGGGAGGGCA AAGACAGCACCAAACCAACATGCAGATCTGTGTGCAGCAATTGAAAACCCCAGAGAACTT ATTCATGATTTTAAAGAAACTAGAAATGACACAGAGAGCTTTAAAGATCCATTGAGACAT GAAGTTAACTCCTCAGACCCAGAAAAGGAATGTGCACACTCCAGGTCCTTGATAAAACAA AGTCCAAAAGTCACTCTTGAATGTGACCGAAAAGGAAATCAGGGAAAGAAAGAGTCTAAC GAGCATGTGCAGGCAGTTTATACAACTATAGGCTTTCCTGGGGTTTCTGAGAAAGACAAA CCAGGAGATTATGCCAGATATAAAGAAGTCTCTAGGCTTTGTCAGTCATTTCAGTCTAGA AGAAATGAAACTGAGCTCACTATTGCAAATAAACTTGGACTTTCACAAAACCCATATCAT ATGCCATCCATTTCTCCCATCAAGTCATCTGTTAAAACTATATGTAAGAAAAATCTGTCA GAGGAAAAGTTTGAAGAACATTCAATATTCCCTGAAAGAGCAATAGGAAATGAGACCATT CAAAGTACAGTGGGCACAATTAGCCAAAATAACAGAGAAAGCACTTTTAAAGAAGGCAGC TCAAGCGGTATTTATGAAGCAGGTTCCAGTGGTGAAAACATTCAAGCAGAACTAAGTAGA AACAGAGGACCAAAATTAAATGCTGTGCTTCAGTTGGGTCTCATGCAGCCTGAAGTCTAT GAGCAAAGCCTTCCTCTAAGTAATAAACATTCTGAAATAAAAAGGCAAGGAGTTCAGGCT GTTAATGCAGATGTCTCTCCACAAATTTCAGATAACTTAGAGCAACCTATGAACAGTAAT ATTTCTCAGGTTTGTTCTGAGACACCGGATGACCTGTTAAATGATGACAAAATAAAGGAC AATATCAGCTTTGATGAAAGTGGCATTCAGGAAAGATCTGCTGTTTTTAGCAAAAATGTC CAGAAAGGAGAATTCAGAAGGAGCCCTAGTCCCTTAGCCCATGCAAGTTTGTCTCAAGGT CGCCCAAGAAGG cogent3-scinexus-e0aee79/tests/data/primate_brca1.fasta000066400000000000000000000471751520253266500232440ustar00rootroot00000000000000>Galago TGTGGCAAAAATACTCATGCCAGCTCATTACAGCATGAGAGCAGTTTATTACTCACTAAA GACAAAATGAATGTAGAAAAGGCTGAATTTTGTAATAAAAGCAAACAGCCTGGCTTAGCA AGGAGCCAACAGAGCAGATCGGCTCAAAGTAAGGAAACATGCAATGATAGGCACACTTGC AGCCCTGAGCAAAAGGTAGATCTGAATACTGCTCCCCCATATGGGAGAAAAGAACAGAAT AAGGAGAAACTTCTATGCTCCAAGAATCCTAGAGATAGCCAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAATGGTTTTCTAGAAGTGATGAAATGTTAACT TCTGATGACTCACATGATGAGGGTTCTGAATCACATGCTGAAGTAGCTGGAGCCTTAGAA GTTCCAAGTGAAGTAGATGGATATTCCAGTTCCTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATTATCCTATAATATGTAAAAGTGAAAGAGTTCACTCCAAACCAATAAAGAGT AAAGTTGAAGATAAAATATTTGGGAAAACTTATCGGAGGAAGGCAAGCCTCCCTAACTTA AGCCATGTAACTGAAAATCTAATTATAAGAGCAGCTGCTACTGAGCCACAGATAACACAA GAGTGTTCCCTCACAAATAAATTAAAACGTAAAAGGAGAACTACATCAGGTCTTTGTCCT GAGGATTTTATCAAGAAGGCAGATTTGGCAGTTCAAAAGACACCTGAAAAGAGAATTCAG GGAACTAACCAAGTGGATCAGAATAGTCACGTGGTAAATATTACTAATAGTGGTTATGAG AATGAAACAAAAGGTGATTATGTTCAGAATGAAAAAAATGCTAACTCAACAGAATCATTG GAAAAAGAATCTCTCGGAACTAAAGCTGAACCTATAAGCAGCAGTATAAGTAATATGAAA TTAGAATTAAATATTCACAATTCAAAAGCAAGTAAAAAGAAAAGGCTGAGGAAGAAGTCT TCTAGCAGGCATATTCGTGCACTTGAACTAGTAGTCAATAAAAATCCAAGCCCTCCTAAT CATACCAACCTACAAATTGACAGTTGTTCTAGCAGTGAAGAAATAAAGGATAAAAGTTCT GACCAAATACCAGTCAGGCATAGCAGAAAGCCTGGACTCATGGAAGATAGAGAACCTGCA ACTGGAGCCAAGAAAAGTAACAAGCCAAATGAGCAAATAAGTAAAAGACATGTCAGTGAT ACTTTCCCAGAAGTGGCATTAACAAATATATCTAGTTTTTTTACTAACTGTTCAGGTTCT AATAGAAAAGAATTTGTCAATCCTAGCCTTCAAAGAAAAAAAACAGAAGAGAACGAAGAA ACAATTCAAGTGTCTAATAGTACCAAAGGTCCGGTGTTAAGTGGAGAAAGGGTTTTGCAA ATTGAAAGTGAAGAAAGATCTATAAAAAGCACCAGTATTTCATTGGTACCTGATACTGAT TATGGTACTCAGGACAGTAACTCGTTACTGAAAGTTAAAGTCTTACGGAAGGTGAAAACA GCACCAAATAAACATGCAAGTCAGGGTACAGCCACTGAAAACCCCAAGGAACTAATCCAT GGTTGCTCTAAAGATACTGGAAATGACACAGAGGGCTATAAGGATCCATTGAGACATGAA ATTAACCACATTCAGAAGATAAGCATGGAAATGGAAGACAGTGAACTTGATACTCAGTAT TTACAGAATACATTCAAGTTTTCAAAGCGTCAGTCGTTTGCTCTGTTTTCAAACCTAGGA AAGGAATGTGCAACAGTCTGTGCCCAGTCTCTCTCTGCGTCCTTAAGAAAAGGTTCAAAA GTCATTCTTGAATGTGAACAAATAGAAAATCCAGGAATGAAAGAGCCTAAAATCAAGCAT ATACAGGGAAATAATATCAATACAGGCTTCTCTGTAGTTTGTCAGAAAGATAAGACAGAT GATTATGCCAAATACATCAAAGAAGCATCTAGGTTTTGTTTGTCAAATCAGTTTCGAGAC AATGAAACTGAATCCATTACTGTAAATAAACTTGGAATTTTACAAAACCTCTATCATATA CCACCACTTTCTCCTATCAGGCTATTTGATAAAACTAAATGTAATACAAACCTGTTAGAG GAAAGGTTTGAAGAACATTCAGTGTTACCTGAAAAAGCAGTAGGAAACGAGAACGTTCCA AGTACAATGAATACAATTAACCAAAATAACAGAGAAAGTGCTTATAAAGAAGCCAGTTCA AGCAGTATCAATGAAGTAAGCTCGAGTACTAATGAAGTGGGCTCCAGTGTTAACGAAGTA GGCCCCAGTAGTGAAAACATTCAAGCAGAACTAGATAAAAACAGAGGACCTAAGTTGAAT GCTGTGCTTAGATTAGGTCTTATGCAACCTGAAGTCTATAAACAAAATCTTCCTATAAGT AATTGTGAACATCCTAAAATAAAAGGGCAAGAAGAAAATGGAGTAGTTCAACCTGTTAAT CCAGATTTTTCTTCATGTCTAATTTCAGATAACCTAGAACAACCTACGAGAAGTAGTCAT GCTTCTCAGCTTTGTTCTGAGACACCTGATGACTTATTAGTTGATGATGAACTAAAGGAA AATACCAGTTTTGCTGAAAATAACATTAAGGAAAGATCTGCTGTTTTTAGCAAAAATGTC ATGAGAAGAGAGATTAGCAGGAGCCCTAGCCCTTTAGCCCATATACATTTGACTCAGGCT CACCAAAGAGAGGTTAGGAAATTAGAGTCCTCAGAAGAGAACATGTCTAGTGAA >HowlerMon TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTGTTACTCACTAAA GACACACTGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCA AGGAGCCAACATAACAGATGGGCTGAAAGTGAGGAAACATGTAATGATAGGCAGACTCCC AGCACAGAGAAAAAGGTAGATGTGGATGCTGATCCCCTGCATGGGAGAAAAGAATGGAAT AAGCAGAAACCTCCGTGCTCTGAGAATCCTAGAGATACTGAAGATGTTGCTTGGATAATG CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAACT TCTGATGACTCACATGATGGGGGGTCTGAATCAAATGCCAAAGTAGCTGAAGCATTGGAA GTTCTAAATGAGGTAGATGGATATTCTAGTTCTTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATGATCATTTGATATGTAAAAGTGAAAGAGTTCACTGCAAATCAGTAGAGAGT AGTATTGAAGATAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCTAACTTG AGCCACGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA GAGCATCCTCTCACAAATAAATTAAAGCGTAAAAGGAGAGTTACATCAGGACTTCATCCT GAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAAAGATAAATCAG GGAACTAACCAAACAGAGCGGAATGATCAAGTGATGAATATTACTAACAGTGGTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAACAATCCTAACCCAGTAGAATCACTG GAAAAAGAATCATTCAAAAGTAAAGCTGAACCTATAAGCAGTAGTATAAGCAATATGGAA TTAGAATTGAATGTCCACAATTCCAAAGCATCTAAAAAGAATAGGCTGAGAAGGAAGTCT TCTACCAGGCATATTCATGAGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAT TATACTGAAGTACAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAATTAC AACCAAATGCCAGTCAGGCACAGCAGAAAGCTACAACTCATGGAAGATAAAGAACGTGCA GCTAGAGCCAAAAAGAGTAGCAAGCCAAATGAACAAACAAGTAAAAGACATGCCAGTGAT ACTTTCCCAGAACTGAGGTTAACAAACATACCTGGTTCTTTTACTAACTGTTCAAATACT AATGAAAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAACAAACAGAAGAGAAACTAGAA ACAGTTAAACTGTCTAATAATGCCAAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGT GTTTTGCAAATTGAAAGATCTGTAGAGAGTAGCAGTATTTTGTTGATACCTGGTACTGAT TATGGCACTCAGGAAAGTATCTCATTACTGGAAGTTAGCACTCTGGGGAAGGCAAAAACA GAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCAT GGTTGTTCTAAAGATACTAGAAATGGCACAGAAGGCTTGAAGTATCCATTGGGACCTGAA GTTAACTACAGTCAGGAAACAAGCATAGATATGAGAGAAAGTGAACTTGATACTCAATAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTGTTTTCAAATCCAGGA AAGGAATGTGCAACATTCTCTGCCTGCTCTAGGTCCTTAAAGAAACAAAGTCCAAAGGTC ACTCCTGAATGTGAACAAAAGGAAGAAAATCAAGGAGAGAAAGAGTCTAATATCGAGCTT GTAGAGACAGTTAATACCACTGCAGGCTTTCCTATGGTTTGTCAGAAAGATAAGCCAGTT GATTATGCCAGATGTATCGAAGGAGGCTCTAGGCTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTATTCCAAATAAACATGGACTTTTACAGAACCCATATCATATG TCACCGCTTATTCCCACCAGGTCATTTGTTAAAACTAAATGTAAGAAAAACCTGCTAGAA GAAAACTCTGAGGAACATTCAATGTCACCTGAAAGAGCAATGGGAAACAAGAACATTCCA AGTACAGTGAGCACAATTAGCCATAATAACAGAGAAAATGCTTTTAAAGAAACCAGCTCA AGCAGTATTTATGAAGTAGGTTCCAGTACTAATGAAGCAGGTTCTAGTACTAATGAAGTA GGCTCCAGTGATGAAAACATTCAAGCAGAGCTAGGTAGAAACAGAAGGCCAAAATTGAAT GCTATGCTTAGATTAGGGCTTCTGCAACCTGAGATTTGTAAGCAAAGTCTTCCTATAAGT GATTGTAAACATCCTGAAATTAAAAAGCAAGAACATGAAGAAGTAGTTCAGACTGTTAAT ACAGACGTCTCTCTATGTCTGATTTCATATAACCTAGAACAGCATATGGGAAGCAGTCAT ACATCTCAGGTTTGTTCTGAGACACCTGACAACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAATATGGCATTAAGGAGACTTCTACTGTTTTTAGCAAAAGTGTC CAGAGAGGAGAGCTCAGCAGGAGCCCTAGCCCTTTCACCCATACACATTTGGCTCAGGTT TACCAAAGAGGGGCCAAGAAATTAGAGTCCTCGGAAGAGAATTTATCTAGTGAG >Rhesus TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTGTTACTCACTAAA GACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTGGCA AGGAGCCAACATAACAGATGGACTGGAAGTAAGGAAACATGTAATGATAGGCAGACTCCC AGCACAGAGAAAAAGGTAGATCTGAATGCTAATGCCCTGTATGAGAGAAAAGAATGGAAT AAGCAAAAACTGCCATGCTCTGAGAATCCTAGAGACACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAAGT TCTGATGACTCACATGATGGGGGGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGAC GTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATGAGCCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAGTTCAGTAGAGAGT AATATTAAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAACCTTCCCAATTTA AGCCATGTAACTGAAAATCTAATTATAGGAGCACTTGTTACTGAGTCACAGATAATGCAA GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAACTACATCAGGTCTTCATCCT GAGGATTTTATAAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATAATAAATCAG GGAACTAACCAAATGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGCTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTG GAAGAAGAATCTTTCAAAACTAAAGCTGAACCTATAAGCAGCAGTATAAACAATATGGAA CTAGAATTAAATATCCACAATTCAAAAGCACCTAAAAAAAATAGGCTGAGGAGGAAGTCT TCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAC TGTACTGAACTACAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAATTAC AACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGATAAAGAATCTGCA ACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGCCAGTGAT ACTTTCCCAGAACTGAAGTTAACAAAGGTACCTGGTTCTTTTACTAACTGTTCAAATACT AGTGAAAAAGAATTTGTCAATCCTAGCCTTTCAAGAGAAGAAAAAGAAGAGAAACTAGAA ACAGTTAAAGTGTCTAATAATGCCAAAGACCCCAAAGATCTCATCTTAAGTGGAGAAAGG GTTTTACAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACCGAT TATGGCACTCAGGAAAGTATCTCATTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACA GAACGAAATAAATGTATGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCAT GGTTGTTCTGAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGAAGTGAA GTTAACCACAGTCAGGAAACAAGCATAGAAATAGAAGAAAGTGAACTTGATACTCAGTAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCCTTTGCTCTGTTTTCAAATCCAGGA GAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAAAGTT ACTTCTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAAACAGTCTAATATCAAGCCT GTACAGACAGTTAATATCACTGCAGGCTTTTCTGTGGTTTGTCAGAAAGATAAGCCAGTT GATAATGCCAAATGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTACTCCAAATAAACATGGACTGTTACAAAACCCATACCATATA CCACCACTTTTTCCTGTCAAGTCATTTGTTAAAACTAAATGTAACAAAAACCTGCTAGAG GAAAACTCTGAGGAACATTCAGTGTCACCTGAAAGAGCAGTGGGAAACAAGAACATTCCA AGTACAGTGAGCACAATTAGCCATAATAACAGAGAAAATGCTTTTAAAGAAGCCAGCTCG AGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAGTA GGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAAT GCTGTGCTTAGATTAGGGCTTTTGCAACCTGAGGTCTGTAAACAAAGTCTTCCTATAAGT AATTGTAAGCATCCTGAAATAAAAAAGCAAGAACATGAAGAATTAGTTCAGACTGTTAAT ACAGACTTCTCTCCATGTCTGATTTCAGATAACCTAGAACAGCCTATGGGAAGTAGTCAT GCGTCTGAGGTTTGTTCTGAGACTCCTGATGATCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAAAATGACATTAAGGAGAGTTCTGCTGTTTTTAGCAAAAGCATC CAGAGAGGAGAGCTCAGCAGGAGCCCTAGCCCTTTCACCCATACACATTTAGCTCAGGGT TACCGAAAAGAGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Orangutan TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTATTACTCACTAAA GACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCA AGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCAGACTCCC AGCACAGAAAAAAAGGTAGACCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAAT AAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGACGAACTGTTAGGT TCTGATGACTCACATGATGGGAGGTCTGAATCAAATGCCAAAGTAGCGGATGTATTGGAC GTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATGAGGCTTTAATTTGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGT AATATTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAACTTA AGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCCT GAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAG GGAACTAACCAAATGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTC GAAAAAGAATCTTTCAAAACAAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAA CTCGAATTAAATATCCATAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGAAGTCT TCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAT TGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAATAC AACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGATAAAGAACCTGCA ACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGAT ACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACC AGTGAGAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAACTAGGA ACAGTTAAAGTGTCTAATAATGCCAAAGACCCCAAAGATCTCATGTTAAGTGGAGGAAGG GTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGAT TATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACA GAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGAACTAATTCAT GGTTGTTTCAAAGATACTAGAAATGACACAGAAGGGTTTAAGTATCCATTGGGACATGAA GTTAACCACAGTCAGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATACTCAGTAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTGTTTTCAAATCCAGGA GAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAAAGTC ACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCT GTACAGACAGCTAATATCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAGCCAGTT GATTATGCCAAATGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTCACAAAACCCATATCATATA CCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAACCTGCTAGAG GAAAACTCTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAACGAGAACATTCCA AGTACAGTGAGCATAATTAGCCGTAATAACAGAGAAAATGTTTTTAAAGAAGCCAGCTCA AGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAGTA GGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAGCAGAGGGCCAAAATTGAAT GCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTTTTCCTGGAAGT AATGGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTACTTCAGACTGTTAAT ACAGACTTCTCTCCATGTCTGATTTCAGATAACCTAGAACAGCCTATGAGAAGTAGTCAT GCATCTCAGGTTTGTTCTGAGACACCTAATGACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTC CAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGT TACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Gorilla TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTATTACTCACTAAA GACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAACAAACAGCCTGGCTTAGCA AGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACTCCC AGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAACGAATGGAAT AAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGT TCTGATGACTCACATGATGGGGGGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGAC GTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGT AATATTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAGCTTA AGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCCT GAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAG GGAACTAACCAAATGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTA GAAAAAGAATCTTTCAAAACGAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAA CTCGAATTAAATATCCACAATTCAAAAGCGCCTAAAAAGAATAGGCTGAGGAGGAAGTCT TCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAT TGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAGTAC AACCAAATGCCAGTCAGGCACAGCAGAAACCTACAGCTCATGGAAGATAAAGAACCTGCA ACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGAT ACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACC AGTGAAAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAACTAGAA ACAGTTAAAGTGTCTAATAATGCCGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG GTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGAT TATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACA GAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCAT GGTTGTTCCAAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAA GTTAACCACAGTCGGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATGCTCAGTAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTGTTTTCAAATCCAGGA GAGGAATGTGCAACATTCTCTGCCCACTCTAGGTCCTTAAAGAAACAAAGTCCAAAAGTC ACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCT GTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAGCCAGTT GATTATGCCAAATGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCATATA CCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAACCTGCTAGAG GAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATTCCA AGTACAGTGAGCACAATTAGCCGTAATAACAGAGAAAATGTTTTTAAAGAAGCCAGCTCA AGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAGTA GGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAAT GCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTTCCTGGAAGT AATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTAGTTCAGACTGTTAAT ACAGATTTCTCTCCATGTCTGATTTCAGATAACTTAGAACAGCCTATGGGAAGTAGTCAT GCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTAAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTC CAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGT TACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Human TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTATTACTCACTAAA GACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCA AGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACTCCC AGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAAT AAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGT TCTGATGACTCACATGATGGGGAGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGAC GTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGT GATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGT AATATTGAAGACAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGCCTCCCCAACTTA AGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGACCTACATCAGGCCTTCATCCT GAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAG GGAACTAACCAAACGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTC GAAAAAGAATCTTTCAAAACGAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAA CTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGAAGTCT TCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAT TGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAGTAC AACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGGTAAAGAACCTGCA ACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGAT ACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAAGTGTTCAAATACC AGTGAAAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAACTAGAA ACAGTTAAAGTGTCTAATAATGCTGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG GTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGAT TATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACA GAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCAT GGTTGTTCCAAAGATAATAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAA GTTAACCACAGTCGGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATGCTCAGTAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCCGTTTTCAAATCCAGGA GAGGAATGTGCAACATTCTCTGCCCACTCTGGGTCCTTAAAGAAACAAAGTCCAAAAGTC ACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCT GTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTGGTCAGAAAGATAAGCCAGTT GATAATGCCAAATGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCGTATA CCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAATCTGCTAGAG GAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATTCCA AGTACAGTGAGCACAATTAGCCGTAATAACAGAGAAAATGTTTTTAAAGAAGCCAGCTCA AGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAATA GGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAAT GCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTTCCTGGAAGT AATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTAGTTCAGACTGTTAAT ACAGATTTCTCTCCATATCTGATTTCAGATAACTTAGAACAGCCTATGGGAAGTAGTCAT GCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTC CAGAAAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGT TACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG >Chimpanzee TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGTTTATTACTCACTAAA GACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCA AGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACTCCC AGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAAT AAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACA CTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGT TCTGATGACTCACATGATGGGGGGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGAC GTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGC GATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGT AATACTGAAGACAAAATATTTGGGAAAACCTATCGGAGGAAGGCAAGCCTCCCCAACTTA AGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAA GAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGAGCTACATCAGGCCTTCATCCT GAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAG GGAACTAACCAAATGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAG AATAAAACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTC GAAAAAGAATCTTTCAAAACGAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAA CTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGAAGTCT TCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAAT TGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAGTAC AACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGATAAAGAACCTGCA ACTGGAGTCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGAT ACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAACTGTTCAAATACC AGTGAAAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAGAAGAAGAGAAACTAGAA ACAGTTAAAGTGTCTAATAATGCCGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGG GTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGAT TATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACA GAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCAT GGTTGTTCCAAAGATACTAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAA GTTAACCACAGTCGGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATGCTCAGTAT TTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCTGTTTTCAAATCCAGGA GAGGAATGTGCAACATTCTCTGCCCACTGTAGGTCCTTAAAGAAACAAAGTCCAAAAGTC ACTTTTGAACGTGAACAAAAGGAACAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCT GTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTTGTCAGAAAGATAAGCCAGTT GATTATGCCAAATGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGC AACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCATATA CCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAACCTGCTAGAG GAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATTCCA AGTACAGTGAGCACAATTAGCCGTAATAACAGAGAAAATGTTTTTAAAGAAGCCAGCTCA AGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAGTA GGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAAT GCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTTCCTGAAAGT AATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTAGTTCAGACTGTTAAT ACAGATTTCTCTCCATGTCTGATTTCAGATAACTTAGAACAGCCTATGGGAAGTAGTCAT GCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAA GATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTC CAGAGAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGT TACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAG cogent3-scinexus-e0aee79/tests/data/primates_brca1.fasta000066400000000000000000000713471520253266500234250ustar00rootroot00000000000000>TreeShrew tgtggcataaatacttatgccagctcattacagcatgagaacagcagtttattactcact aaggacagaatgaatgtagaaaaggctgaattgtgtaataaaagccaacaacctgactta gcaaggagccagcagagcagatggactgaaaataaggaaacatgtaatgataggcagatt cccagcacagaaaaaaaggtagatctaaatgctgatcccctgtgtgggaagaaaaaacaa gctaagcagaaacygctatgttctaacagtcctagagatnnngaccaagattctccttgg ataactctaaatagtagcattcagaaagttaatgaatggttttccagaagtgatgaaatg ttaacttctaacgactcacatgatggtgagtctgaannnnnnnnnnnnatagctggtgca ttygaagytccaaataaagtagatgaatattctggttcttcagaaaaaatagacttaatg gccaacaatcttcatgatgctttaataagtaaaagtgaaggaatctactccaaaccagta gagggtaatattgaagataaaatatttgggaaaacctatcggaggaaagcaagtcttcct aacttgagccgtgtaactgatgatctaattagaggggcatttgttacagagcctgagata actcgagagcgtcccttcacaaataaattaaagcggaaaaggagaactatatcaggcctt catcctgaagattttatcaagaaaacagatttggcagttgttcaaaagactcctgaaaag ataaatcagagaactgaccaaatagagcataatggtcaggtgatgagtattgctaatagt ggtcatgagaatgaaacaaaaggtgattatatttcgaaagagaaaaatgctaacccaatg gaatcattagaaaaagaatctgctctcagaactaaagctgagcccataagcagcagtgta agcaatatggaactagaaataaataaccacagttcagaagcacctaagaagaataggctg aggagaaagttttctgctaggcatattcgcacacttgaactagtagacaataaaagtcca agcccacctaatcgtactgaactacaaattgacagttattctagcggtgaagagagaaag aaaaagnnnggtgagcaaatgccagttggacacagcagaaagtttcaacttgaggaagag aaagaacctacaactggagccaagaaaaataaccagccaaatacagaaataagtgaaaga catgccagtggtgttatcccagatctgaagttaacaaacatacctggttttttcacaaac tcttcgagttctaataaacttccagaatttgtccatcgtagccttcaaagagaannnaaa gaagagaacnnncgagaaacaattcaaatatccagtagtaccaaannnnnnnnngatctg gtattaaggggagaannnaggggtttgcaagatgtaaggtctgcagagagtaccagtatt tctttggtacctgatactgatgataacacccaggatagcatctcattactagatgctaac cccctagctaggaaggcaaaaacagcaccaaatcaatgtgtaaatcagagtgcaacaact gaaaaccccaaggaacttatacacagttgttctaaaactactaggaatnnnnnngaaggc ttcaaggatccattgaaaagtgaagttaatcatattcaggagatgagtgtagaaatggag gagagtgaacttgatactcagtatttacagaatacattcaggagttcaaagcgtcagtca tttgctctgtcttcaaatccaggaaatccagaaaaggaacatgtctgtgttnnnnnnnnn nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn aaagaaagtctgaaagagtctaacatccaacatatacaggcagttagtaccatggttnnn nnnnnnnnnnnntttcagaaagataagnnnctaggtgattttgctacatctggcattaaa gaagtccctagactttgtccatcatctcagttcagaggcaatgaaactgatctcattact gcaaataaacctgaagtttcacaaaacccgtatcatatgccattactttatcctgtcaag tcacctattataactaaaagtaagaaaagcctgtcagaggaagggtttgaggaacaggca atgtcacttgaaagagcaatggaaaatgagaacatcattcaaagtacagtgagcacaatt agccaagataacattagagaaggtgcttttaaagaagccagctcaagcagtattaatgaa ataggtcctagtactaatgaaggaagctctagtattaatgaggtaggttccagtnnnnnn nnnnnnnnnnnnnnnggtgaaaacattcaagcagaactaggtaaaaagagaggatccaaa ttaaatgctgtgcttagattaggtcttatgcaacccgaagtctataagcaaagtcttcct ttaagtaatcataatgatcctgaaatgaaaagacaagaaaaaaatgaaggaggagttcag gctattaaannngatttacctccatgtctaatttcagataatcaagagcatnnnatggga agtagccatgcttctcagatttgttctgagacacctgatgatctgttagatgatgatgaa ggaaaagaaaatnnnagctttgctgaggttgatgttaaggaaagatctgctgtttttggc aaaactgtccagagaagagagttaagaaggagctctagccctttaactcgtgcatgtttg actgagggtcagcaaacaggagcccagaaattagattcatcagaagagaacctatctagt gag >Orangutan tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccaacataacagatgggctggaagtaaggaaacatgtaatgataggcagact cccagcacagaaaaaaaggtagacctgaatgctgatcccctgtgtgagagaaaagaatgg aataagcagaaactgccatgctcagagaatcctagagatnnnactgaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgagtggttttccagaagtgacgaactg ttaggttctgatgactcacatgatgggaggtctgaatcaaatgccaaagtagcggatgta ttggacgttctaaatgaggtagatgaatattctggttcttcagagaaaatagacttactg gccagtgatcctcatgaggctttaatttgtaaaagtgaaagagttcactccaaatcagta gagagtaatattgaagacaaaatatttgggaaaacctatcggaggaaggcaagcctcccc aacttaagccatgtaactgaaaatctaattataggagcatttgttactgagccacagata atacaagagcgtcccctcacaaataaattaaagcgtaaaaggagagctacatcaggcctt catcctgaggattttatcaagaaagcagatttggcagttnnncaaaagactcctgaaatg ataaatcagggaactaaccaaatggagcagaatggtcaagtgatgaatattactaatagt ggtcatgagaataaaacaaaaggtgattctattcagaatgagaaaaatcctaacccaata gaatcactcgaaaaagaatctgctttcaaaacaaaagctgaacctataagcagcagtata agcaatatggaactcgaattaaatatccataattcaaaagcacctaaaaagaataggctg aggaggaagtcttctaccaggcatattcatgcgcttgaactagtagtcagtagaaatcta agcccacctaattgtactgaattgcaaattgatagttgttctagcagtgaagagataaag aaaaaaaaatacaaccaaatgccagtcaggcacagcagaaacctacaactcatggaagat aaagaacctgcaactggagccaagaagagtaacaagccaaatgaacagacaagtaaaaga catgacagcgatactttcccagagctgaagttaacaaatgcacctggttcttttactaac tgttcaaataccagtgagcttaaagaatttgtcaatcctagccttccaagagaagaaaaa gaagagaaannnctaggaacagttaaagtgtctaataatgccaaagaccccaaagatctc atgttaagtggaggannnagggttttgcaaactgaaagatctgtagagagtagcagtatt tcattggtacctggtactgattatggcactcaggaaagtatctcgttactggaagttagc actctagggnnnaaggcaaaaacagaaccaaataaatgtgtgagtcagtgtgcagcattt gaaaaccccaaggaactaattcatggttgtttcaaagatactagaaatgacacagaaggg tttaagtatccattgggacatgaagttaaccacagtcaggaaacaagcatagaaatggaa gaaagtgaacttgatactcagtatttgcagaatacattcaaggtttcaaagcgccagtca tttgctctgttttcaaatccaggaaatccagaagaggaatgtgcaacattctctgcccac tctaggtccttaaagaaacaaagtccaaaagtcacttttgaatgtgaacaaaaggaagaa aatcaaggaaagaatgagtctaatatcaagcctgtacagacagctaatatcactgcaggc tttcctgtggtttgtcagaaagataagnnnccagttgattatgccaaatgtagtatcaaa ggaggctctaggttttgtctatcatctcagttcagaggcaacgaaactggactcattact ccaaataaacatggactttcacaaaacccatatcatataccaccactttttcccatcaag tcatttgttaaaactaaatgtaagaaaaacctgctagaggaaaactctgaggaacattca atgtcacctgaaagagaaatgggaaacgagaacnnnattccaagtacagtgagcataatt agccgtaataacattagagaaaatgtttttaaagaagccagctcaagcaatattaatgaa gtaggttccagtactaatgaagtgggctccagtattaatgaagtaggttccagtnnnnnn nnnnnnnnnnnnnnngatgaaaacattcaagcagaactaggtagaagcagagggccaaaa ttgaatgctatgcttagattaggggttttgcaacctgaggtctataaacaaagttttcct ggaagtaatggtaagcatcctgaaataaaaaagcaagaatatgaagaannngtacttcag actgttaatacagacttctctccatgtctgatttcagataacctagaacagcctatgaga agtagtcatgcatctcaggtttgttctgagacacctaatgacctgttagatgatggtgaa ataaaggaagatactagttttgctgaaaatgacattaaggaaagttctgctgtttttagc aaaagcgtccagagaggagagcttagcaggagtcctagccctttcacccatacacatttg gctcagggttaccgaagaggggccaagaaattagagtcctcagaagagaacttatctagt gag >Rhesus tgtggcacaaatactcatgccagctcattacagcatgagaacnnnagtttgttactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggcttg gcaaggagccaacataacagatggactggaagtaaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagatctgaatgctaatgccctgtatgagagaaaagaatgg aataagcaaaaactgccatgctctgagaatcctagagacnnnactgaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgagtggttttccagaagtgatgaactg ttaagttctgatgactcacatgatggggggtctgaatcaaatgccaaagtagctgatgta ttggacgttctaaatgaggtagatgaatattctggttcttcagagaaaatagacttactg gccagtgatcctcatgagcctttaatatgtaaaagtgaaagagttcactccagttcagta gagagtaatattaaagacaaaatatttgggaaaacctatcggaggaaggcaaaccttccc aatttaagccatgtaactgaaaatctaattataggagcacttgttactgagtcacagata atgcaagagcgtcccctcacaaataaattaaagcgtaaaaggagaactacatcaggtctt catcctgaggattttataaagaaagcagatttggcagttnnncaaaagactcctgaaata ataaatcagggaactaaccaaatggagcagaatggtcaagtgatgaatattactaatagt gctcatgagaataaaacaaaaggtgattctattcagaatgagaaaaatcctaacccaata gaatcactggaagaagaatctgctttcaaaactaaagctgaacctataagcagcagtata aacaatatggaactagaattaaatatccacaattcaaaagcacctaaaaaaaataggctg aggaggaagtcttctaccaggcatattcatgcgcttgaactagtagtcagtagaaatcta agcccacctaactgtactgaactacaaattgatagttgttctagcagtgaagagataaag aaaaaaaattacaaccaaatgccagtcaggcacagcagaaacctacaactcatggaagat aaagaatctgcaactggagccaagaagagtaacaagccaaatgaacagacaagtaaaaga catgccagtgatactttcccagaactgaagttaacaaaggtacctggttcttttactaac tgttcannnaatactagtgaaaaagaatttgtcaatcctagcctttcaagagaagaaaaa gaagagaaannnctagaaacagttaaagtgtctaataatgccaaagaccccaaagatctc atcttaagtggagaannnagggttttacaaactgaaagatctgtagagagtagcagtatt tcattggtacctggtaccgattatggcactcaggaaagtatctcattactggaagttagc actctagggnnnaaggcaaaaacagaacgaaataaatgtatgagtcagtgtgcagcattt gaaaaccccaaggaactaattcatggttgttctgaagatactagaaatgacacagaaggc tttaagtatccattgggaagtgaagttaaccacagtcaggaaacaagcatagaaatagaa gaaagtgaacttgatactcagtatttgcagaatacattcaaggtttcaaagcgccagtcc tttgctctgttttcaaatccaggaaatccagaagaggaatgtgcaacattctctgcccac tctaggtccttaaagaaacaaagtccaaaagttacttctgaatgtgaacaaaaggaagaa aatcaaggaaagaaacagtctaatatcaagcctgtacagacagttaatatcactgcaggc ttttctgtggtttgtcagaaagataagnnnccagttgataatgccaaatgtagtatcaaa ggaggctctaggttttgtctatcatctcagttcagaggcaacgaaactggactcattact ccaaataaacatggactgttacaaaacccataccatataccaccactttttcctgtcaag tcatttgttaaaactaaatgtaacaaaaacctgctagaggaaaactctgaggaacattca gtgtcacctgaaagagcagtgggaaacaagaacatcattccaagtacagtgagcacaatt agccataataacattagagaaaatgcttttaaagaagccagctcgagcaatattaatgaa gtaggttccagtactaatgaagtgggctccagtattaatgaagtaggttccagtnnnnnn nnnnnnnnnnnnnnngatgaaaacattcaagcagaactaggtagaaacagagggccaaaa ttgaatgctgtgcttagattagggcttttgcaacctgaggtctgtaaacaaagtcttcct ataagtaattgtaagcatcctgaaataaaaaagcaagaacatgaagaannnttagttcag actgttaatacagacttctctccatgtctgatttcagataacctagaacagcctatggga agtagtcatgcgtctgaggtttgttctgagactcctgatgatctgttagatgatggtgaa ataaaggaagatactagttttgctgaaaatgacattaaggagagttctgctgtttttagc aaaagcatccagagaggagagctcagcaggagccctagccctttcacccatacacattta gctcagggttaccgaaaagaggccaagaaattagagtcctcagaagagaacttatctagt gag >Chimpanzee tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccaacataacagatgggctggaagtaaggaaacatgtaatgataggcggact cccagcacagaaaaaaaggtagatctgaatgctgatcccctgtgtgagagaaaagaatgg aataagcagaaactgccatgctcagagaatcctagagatnnnactgaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgagtggttttccagaagtgatgaactg ttaggttctgatgactcacatgatggggggtctgaatcaaatgccaaagtagctgatgta ttggacgttctaaatgaggtagatgaatattctggttcttcagagaaaatagacttactg gccagcgatcctcatgaggctttaatatgtaaaagtgaaagagttcactccaaatcagta gagagtaatactgaagacaaaatatttgggaaaacctatcggaggaaggcaagcctcccc aacttaagccatgtaactgaaaatctaattataggagcatttgttactgagccacagata atacaagagcgtcccctcacaaataaattaaagcgtaaaaggagagctacatcaggcctt catcctgaggattttatcaagaaagcagatttggcagttnnncaaaagactcctgaaatg ataaatcagggaactaaccaaatggagcagaatggtcaagtgatgaatattactaatagt ggtcatgagaataaaacaaaaggtgattctattcagaatgagaaaaatcctaacccaata gaatcactcgaaaaagaatctgctttcaaaacgaaagctgaacctataagcagcagtata agcaatatggaactcgaattaaatatccacaattcaaaagcacctaaaaagaataggctg aggaggaagtcttctaccaggcatattcatgcgcttgaactagtagtcagtagaaatcta agcccacctaattgtactgaattgcaaattgatagttgttctagcagtgaagagataaag aaaaaaaagtacaaccaaatgccagtcaggcacagcagaaacctacaactcatggaagat aaagaacctgcaactggagtcaagaagagtaacaagccaaatgaacagacaagtaaaaga catgacagcgatactttcccagagctgaagttaacaaatgcacctggttcttttactaac tgttcaaataccagtgaacttaaagaatttgtcaatcctagccttccaagagaagaagaa gaagagaaannnctagaaacagttaaagtgtctaataatgccgaagaccccaaagatctc atgttaagtggagaannnagggttttgcaaactgaaagatctgtagagagtagcagtatt tcattggtacctggtactgattatggcactcaggaaagtatctcgttactggaagttagc actctagggnnnaaggcaaaaacagaaccaaataaatgtgtgagtcagtgtgcagcattt gaaaaccccaagggactaattcatggttgttccaaagatactagaaatgacacagaaggc tttaagtatccattgggacatgaagttaaccacagtcgggaaacaagcatagaaatggaa gaaagtgaacttgatgctcagtatttgcagaatacattcaaggtttcaaagcgccagtca tttgctctgttttcaaatccaggaaatccagaagaggaatgtgcaacattctctgcccac tgtaggtccttaaagaaacaaagtccaaaagtcacttttgaacgtgaacaaaaggaacaa aatcaaggaaagaatgagtctaatatcaagcctgtacagacagttaatatcactgcaggc tttcctgtggtttgtcagaaagataagnnnccagttgattatgccaaatgtagtatcaaa ggaggctctaggttttgtctatcatctcagttcagaggcaacgaaactggactcattact ccaaataaacatggacttttacaaaacccatatcatataccaccactttttcccatcaag tcatttgttaaaactaaatgtaagaaaaacctgctagaggaaaactttgaggaacattca atgtcacctgaaagagaaatgggaaatgagaacnnnattccaagtacagtgagcacaatt agccgtaataacattagagaaaatgtttttaaagaagccagctcaagcaatattaatgaa gtaggttccagtactaatgaagtgggctccagtattaatgaagtaggttccagtnnnnnn nnnnnnnnnnnnnnngatgaaaacattcaagcagaactaggtagaaacagagggccaaaa ttgaatgctatgcttagattaggggttttgcaacctgaggtctataaacaaagtcttcct gaaagtaattgtaagcatcctgaaataaaaaagcaagaatatgaagaannngtagttcag actgttaatacagatttctctccatgtctgatttcagataacttagaacagcctatggga agtagtcatgcatctcaggtttgttctgagacacctgatgacctgttagatgatggtgaa ataaaggaagatactagttttgctgaaaatgacattaaggaaagttctgctgtttttagc aaaagcgtccagagaggagagcttagcaggagtcctagccctttcacccatacacatttg gctcagggttaccgaagaggggccaagaaattagagtcctcagaagagaacttatctagt gag >Gorilla tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaaacaaacagcctggctta gcaaggagccaacataacagatgggctggaagtaaggaaacatgtaatgataggcggact cccagcacagaaaaaaaggtagatctgaatgctgatcccctgtgtgagagaaacgaatgg aataagcagaaactgccatgctcagagaatcctagagatnnnactgaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgagtggttttccagaagtgatgaactg ttaggttctgatgactcacatgatggggggtctgaatcaaatgccaaagtagctgatgta ttggacgttctaaatgaggtagatgaatattctggttcttcagagaaaatagacttactg gccagtgatcctcatgaggctttaatatgtaaaagtgaaagagttcactccaaatcagta gagagtaatattgaagacaaaatatttgggaaaacctatcggaggaaggcaagcctcccc agcttaagccatgtaactgaaaatctaattataggagcatttgttactgagccacagata atacaagagcgtcccctcacaaataaattaaagcgtaaaaggagagctacatcaggcctt catcctgaggattttatcaagaaagcagatttggcagttnnncaaaagactcctgaaatg ataaatcagggaactaaccaaatggagcagaatggtcaagtgatgaatattactaatagt ggtcatgagaataaaacaaaaggtgattctattcagaatgagaaaaatcctaacccaata gaatcactagaaaaagaatctgctttcaaaacgaaagctgaacctataagcagcagtata agcaatatggaactcgaattaaatatccacaattcaaaagcgcctaaaaagaataggctg aggaggaagtcttctaccaggcatattcatgcgcttgaactagtagtcagtagaaatcta agcccacctaattgtactgaattgcaaattgatagttgttctagcagtgaagagataaag aaaaaaaagtacaaccaaatgccagtcaggcacagcagaaacctacagctcatggaagat aaagaacctgcaactggagccaagaagagtaacaagccaaatgaacagacaagtaaaaga catgacagcgatactttcccagagctgaagttaacaaatgcacctggttcttttactaac tgttcaaataccagtgaacttaaagaatttgtcaatcctagccttccaagagaagaaaaa gaagagaaannnctagaaacagttaaagtgtctaataatgccgaagaccccaaagatctc atgttaagtggagaannnagggttttgcaaactgaaagatctgtagagagtagcagtatt tcattggtacctggtactgattatggcactcaggaaagtatctcgttactggaagttagc actctagggnnnaaggcaaaaacagaaccaaataaatgtgtgagtcagtgtgcagcattt gaaaaccccaagggactaattcatggttgttccaaagatactagaaatgacacagaaggc tttaagtatccattgggacatgaagttaaccacagtcgggaaacaagcatagaaatggaa gaaagtgaacttgatgctcagtatttgcagaatacattcaaggtttcaaagcgccagtca tttgctctgttttcaaatccaggaaatccagaagaggaatgtgcaacattctctgcccac tctaggtccttaaagaaacaaagtccaaaagtcacttttgaatgtgaacaaaaggaagaa aatcaaggaaagaatgagtctaatatcaagcctgtacagacagttaatatcactgcaggc tttcctgtggtttgtcagaaagataagnnnccagttgattatgccaaatgtagtatcaaa ggaggctctaggttttgtctatcatctcagttcagaggcaacgaaactggactcattact ccaaataaacatggacttttacaaaacccatatcatataccaccactttttcccatcaag tcatttgttaaaactaaatgtaagaaaaacctgctagaggaaaactttgaggaacattca atgtcacctgaaagagaaatgggaaatgagaacnnnattccaagtacagtgagcacaatt agccgtaataacattagagaaaatgtttttaaagaagccagctcaagcaatattaatgaa gtaggttccagtactaatgaagtgggctccagtattaatgaagtaggttccagtnnnnnn nnnnnnnnnnnnnnngatgaaaacattcaagcagaactaggtagaaacagagggccaaaa ttgaatgctatgcttagattaggggttttgcaacctgaggtctataaacaaagtcttcct ggaagtaattgtaagcatcctgaaataaaaaagcaagaatatgaagaannngtagttcag actgttaatacagatttctctccatgtctgatttcagataacttagaacagcctatggga agtagtcatgcatctcaggtttgttctgagacacctgatgacctgttagatgatggtgaa ataaaggaagatactagttttgctaaaaatgacattaaggaaagttctgctgtttttagc aaaagcgtccagagaggagagcttagcaggagtcctagccctttcacccatacacatttg gctcagggttaccgaagaggggccaagaaattagagtcctcagaagagaacttatctagt gag >FlyingLem tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattacgcact aaagacagaataaatgttgaaaagactgaattctgtaataaaagcaaacagcctggctta gcaaggagccaggagagcagatgggctgaaagtaaggaaacatgtaatgataggcagacg cccagcacagagaaaaagatagatctaaatgctgattcccagcatgggagaaaagaacgg aatatgcagaaacctccataccctgagagtcctagagatnnnacccaagatgttccttgg ataacactaaacagcagcattcagaaagttaatgagtggttttccagaagtgatgaaatt ttagcttctgatgactcacgtgacagggtgtctgaatcaaatgccaaagtagctggtgca ttagaagttccaaatgatgtagatggatattctgattcttcagagaaagttgatttaatg gccagtgatcctcatgatgctttaatatgtaaaagtgaaagaatccactccagaccagta gagagtaatatcaaagataaaatatttgggaaaacctatcagaggaagacaagcctccct aacttgagccacgtaaatgaagatctaattataggagcatttgttacagaaccacagata acacaagagcgtcccctcacaaataaggtaaagcctaaaaggagaactacatcaggcctt catcctgaggattttatcaagaaagcagacttggcagttgttcaaaaaactcctgaaaag ataaatcagggaattgaccaaatggagcagaatgatcgagtgatgaatattattaatagt ggtcatgagaatgaaacaaaggatgattatgttcagaaagagaaaaatgctaacccaaca gaatcattggaaaaagaatctgctttcagaactaaagcagaacctataagcagcagtata agcaatatggaaatagaattaaatatccacaattcaaaaccatctaagaagaataggctg aggaagatgtcctctactaggcatattcatgcacttgaactagtagtcaatagaaatcca agcccacctaattatactgaactacaaattgatagttgttctagcagtgaagaaatagag aaaaaaaattccagccaaatgccagtcaggcacagcagaaagcttcaactcatggaaaat aaagaacctgcaactggagccaagaagagtaacaagccaaatgaacaaataagtagaaga cattccagtaatgctttcccagaactgcggttaacaaatgtacctgttttttttgctaac tgttcaagttctaataaacttcaagaatttatcgatcctagccttcaaagagaagaaata gaagagaacnnnctagaaacaattcatgtgtctaatagtgccaaagaccccaaagatttg gtgttaagtggggagnnnaagggtttgcaaactgaaagatctgtagagagtaccagtatt tcattagtacctgatactgattatggcactcaagacagtatctcaatattagaagctaac atcctagggnnnaaggcaaaaacagcaccaagtcaacatgcaaatcagtgtgcagcaatt gaaaaccccaaagaacttatccatggttgtcctaaaggtactagaaatgacacagaggat tttaaggatccattgagatgtggagttgaccacattcagaagacaagcatagaaatgcaa gagagtgaacttgatactcagtatttacaaaatatattcaaggtttcaaaacgtcagtca tttgctctcttttcaaatccaggaaatccagaaaaggagtgtgcaacagtctatgcccac tccaggttgttaaggaaacaaagtccaaaagtcactcctgaatgtgaacaaaaagaagaa aatgagggaaataaagagtctaaaatcaagcacatacaggcagttaataccactgtgggc ttttctgtcctttgtcagaatgttaagaagccaggtgattatgccaaatttagcattaaa ggagtctctaggcattgttcatcatctcagttcagaggcaatgaaactgaactcattact gcaaataaacatggaattttacaaaactcatgtcatatgtcatcactttcccccatcagg tcatctgttaaaattaaatgtaagaagaacctgtcagaggaaaggtttgaggaacattca gtgtcacctgaaagagcaatggcaaacaagagaatcattcaaagtacagtgaacacaatt agccaaaataacattagagacagtgcttctaaagaagccagctcaagcagtattaatgaa gtaggttccagtactaatgaagtaggctccagtattaatgaagtaggtcccagtnnnnnn nnnnnnnnnnnnnnnggtgaaaacattcaagcagaactaggtagaaacagaggacctaaa ttaagtgctatgcttagattaggcctcatgcaacctgaagtttacaagcaaaatcttcct ttaggtaattgtaaacatcctgaaataaggnnncaagaagaaaatgaaggaatagttcag gctgttaatacaaatctgtctctgtgcctaatttcacataacctcgaacaacctatggaa agtagtcatgcttcccaggtttgttctgagacacctgatgacctgttagatggtgatgag ataaaggaaaacaccagctttgctgaaagtgacagtaaggaaagatctgctgtttttagc aaaagtgtccagagaggagagttaagcaggagccctagcccttttgcccaaacatgtttg gctcagggtcaccaaagaggagccaggaaattagagtcttctgaagagaacgtatctagt gag >Galago tgtggcaaaaatactcatgccagctcattacagcatgagagcagcagtttattactcact aaagacaaaatgaatgtagaaaaggctgaattttgtaataaaagcaaacagcctggctta gcaaggagccaacagagcagatcggctcaaagtaaggaaacatgcaatgataggcacact tgcagccctgagcaaaaggtagatctgaatactgctcccccatatgggagaaaagaacag aataaggagaaacttctatgctccaagaatcctagagatnnnagccaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgaatggttttctagaagtgatgaaatg ttaacttctgatgactcacatgatgagggttctgaatcacatgctgaagtagctggagcc ttagaagttccaagtgaagtagatggatattccagttcctcagagaaaatagacttactg gccagtgatcctcattatcctataatatgtaaaagtgaaagagttcactccaaaccaata aagagtaaagttgaagataaaatatttgggaaaacttatcggaggaaggcaagcctccct aacttaagccatgtaactgaaaatctaattataagagcagctgctactgagccacagata acacaagagtgttccctcacaaataaattaaaacgtaaaaggagaactacatcaggtctt tgtcctgaggattttatcaagaaggcagatttggcagttgttcaaaagacacctgaaaag agaattcagggaactaaccaagtggatcagaatagtcacgtggtaaatattactaatagt ggttatgagaatgaaacaaaaggtgattatgttcagaatgaaaaaaatgctaactcaaca gaatcattggaaaaagaatcttctctcggaactaaagctgaacctataagcagcagtata agtaatatgaaattagaattaaatattcacaattcaaaagcaagtaaaaagaaaaggctg aggaagaagtcttctagcaggcatattcgtgcacttgaactagtagtcaataaaaatcca agccctcctaatcataccaacctacaaattgacagttgttctagcagtgaagaaataaag gataaaagttctgaccaaataccagtcaggcatagcagaaagcctggactcatggaagat agagaacctgcaactggagccaagaaaagtaacaagccaaatgagcaaataagtaaaaga catgtcagtgatactttcccagaagtggcattaacaaatatatctagtttttttactaac tgttcaggttctaatagacttaaagaatttgtcaatcctagccttcaaagaaaaaaaaca gaagagaacttagaagaaacaattcaagtgtctaatagtaccaaaggtccggtgttaagt ggagaaagggttttgnnncaaattgaaagtgaagaaagatctataaaaagcaccagtatt tcattggtacctgatactgattatggtactcaggacagtaactcgttactgaaagttaaa gtcttacggnnnaaggtgaaaacagcaccaaataaacatgcaagtcagggtacagccact gaaaaccccaaggaactaatccatggttgctctaaagatactggaaatgacacagagggc tataaggatccattgagacatgaaattaaccacattcagaagataagcatggaaatggaa gacagtgaacttgatactcagtatttacagaatacattcaagttttcaaagcgtcagtcg tttgctctgttttcaaacctannnnnnnnnggaaaggaatgtgcaacagtctgtgcccag tctctctctgcgtccttaagaaaaggttcaaaagtcattcttgaatgtgaacaaatagaa aatccaggaatgaaagagcctaaaatcaagcatatacagggaaataatatcaatacaggc ttctctgtagtttgtcagaaagataagaaaacagatgattatgccaaatacagcatcaaa gaagcatctaggttttgtttgtcaaatcagtttcgagacaatgaaactgaatccattact gtaaataaacttggaattttacaaaacctctatcatataccaccactttctcctatcagg ctatttgataaaactaaatgtaatacaaacctgttagaggaaaggtttgaagaacattca gtgttacctgaaaaagcagtaggaaacgagaacaccgttccaagtacaatgaatacaatt aaccaaaataacnnnagagaaagtgcttataaagaagccagttcaagcagtatcaatgaa gtaagctcgagtactaatgaagtgggctccagtgttaacgaagtaggccccagtnnnnnn nnnnnnnnnnnnnnnagtgaaaacattcaagcagaactagataaaaacagaggacctaag ttgaatgctgtgcttagattaggtcttatgcaacctgaagtctataaacaaaatcttcct ataagtaattgtgaacatcctaaaataaaagggcaagaagaaaatggannngtagttcaa cctgttaatccagatttttcttcatgtctaatttcagataacctagaacaacctacgaga agtagtcatgcttctcagctttgttctgagacacctgatgacttattagttgatgatgaa ctaaaggaaaataccagttttgctgaaaataacattaaggaaagatctgctgtttttagc aaaaatgtcatgagaagagagattagcaggagccctagccctttagcccatatacatttg actcaggctcaccaaagagaggttaggaaattagagtcctcagaagagaacatgtctagt gaa >Human tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccaacataacagatgggctggaagtaaggaaacatgtaatgataggcggact cccagcacagaaaaaaaggtagatctgaatgctgatcccctgtgtgagagaaaagaatgg aataagcagaaactgccatgctcagagaatcctagagatnnnactgaagatgttccttgg ataacactaaatagcagcattcagaaagttaatgagtggttttccagaagtgatgaactg ttaggttctgatgactcacatgatggggagtctgaatcaaatgccaaagtagctgatgta ttggacgttctaaatgaggtagatgaatattctggttcttcagagaaaatagacttactg gccagtgatcctcatgaggctttaatatgtaaaagtgaaagagttcactccaaatcagta gagagtaatattgaagacaaaatatttgggaaaacctatcggaagaaggcaagcctcccc aacttaagccatgtaactgaaaatctaattataggagcatttgttactgagccacagata atacaagagcgtcccctcacaaataaattaaagcgtaaaaggagacctacatcaggcctt catcctgaggattttatcaagaaagcagatttggcagttnnncaaaagactcctgaaatg ataaatcagggaactaaccaaacggagcagaatggtcaagtgatgaatattactaatagt ggtcatgagaataaaacaaaaggtgattctattcagaatgagaaaaatcctaacccaata gaatcactcgaaaaagaatctgctttcaaaacgaaagctgaacctataagcagcagtata agcaatatggaactcgaattaaatatccacaattcaaaagcacctaaaaagaataggctg aggaggaagtcttctaccaggcatattcatgcgcttgaactagtagtcagtagaaatcta agcccacctaattgtactgaattgcaaattgatagttgttctagcagtgaagagataaag aaaaaaaagtacaaccaaatgccagtcaggcacagcagaaacctacaactcatggaaggt aaagaacctgcaactggagccaagaagagtaacaagccaaatgaacagacaagtaaaaga catgacagcgatactttcccagagctgaagttaacaaatgcacctggttcttttactaag tgttcaaataccagtgaacttaaagaatttgtcaatcctagccttccaagagaagaaaaa gaagagaaannnctagaaacagttaaagtgtctaataatgctgaagaccccaaagatctc atgttaagtggagaannnagggttttgcaaactgaaagatctgtagagagtagcagtatt tcattggtacctggtactgattatggcactcaggaaagtatctcgttactggaagttagc actctagggnnnaaggcaaaaacagaaccaaataaatgtgtgagtcagtgtgcagcattt gaaaaccccaagggactaattcatggttgttccaaagataatagaaatgacacagaaggc tttaagtatccattgggacatgaagttaaccacagtcgggaaacaagcatagaaatggaa gaaagtgaacttgatgctcagtatttgcagaatacattcaaggtttcaaagcgccagtca tttgctccgttttcaaatccaggaaatgcagaagaggaatgtgcaacattctctgcccac tctgggtccttaaagaaacaaagtccaaaagtcacttttgaatgtgaacaaaaggaagaa aatcaaggaaagaatgagtctaatatcaagcctgtacagacagttaatatcactgcaggc tttcctgtggttggtcagaaagataagnnnccagttgataatgccaaatgtagtatcaaa ggaggctctaggttttgtctatcatctcagttcagaggcaacgaaactggactcattact ccaaataaacatggacttttacaaaacccatatcgtataccaccactttttcccatcaag tcatttgttaaaactaaatgtaagaaaaatctgctagaggaaaactttgaggaacattca atgtcacctgaaagagaaatgggaaatgagaacnnnattccaagtacagtgagcacaatt agccgtaataacattagagaaaatgtttttaaagaagccagctcaagcaatattaatgaa gtaggttccagtactaatgaagtgggctccagtattaatgaaataggttccagtnnnnnn nnnnnnnnnnnnnnngatgaaaacattcaagcagaactaggtagaaacagagggccaaaa ttgaatgctatgcttagattaggggttttgcaacctgaggtctataaacaaagtcttcct ggaagtaattgtaagcatcctgaaataaaaaagcaagaatatgaagaannngtagttcag actgttaatacagatttctctccatatctgatttcagataacttagaacagcctatggga agtagtcatgcatctcaggtttgttctgagacacctgatgacctgttagatgatggtgaa ataaaggaagatactagttttgctgaaaatgacattaaggaaagttctgctgtttttagc aaaagcgtccagaaaggagagcttagcaggagtcctagccctttcacccatacacatttg gctcagggttaccgaagaggggccaagaaattagagtcctcagaagagaacttatctagt gag >Mouse tgtggcacagatgctcatgccagctcattacagcctgagaccagcagtttattgctcatt gaagacagaatgaatgcagaaaaggctgaattctgtaataaaagcaaacagcctggcata gcagtgagccagcagagcagatgggctgcaagtaaaggaacatgtaacgacaggcaggtt cccagcactggggaaaaggtaggtccaaacgctgactcccttagtgatagagagaagtgg actcacccgcaaagtctgtgccctgagaattctggagctnnnaccaccgatgttccttgg ataacactaaatagcagcgttcagaaagttaatgagtggttttccagaactggtgaaatg ttaacttctgacagcgcatctgccaggaggcacgagtcaaatgctgaagcagctgttgtg ttggaagtttcaaacgaagtggatgggggttttagttcttcaaggaaaacagacttagta acccccgacccccatcatactttaatgtgtaaaagtggaagagacttctccaaaccagta gaggataatatcagtgataaaatatttgggaaatcctatcagagaaagggaagccgccct cacctgaaccatgtgactgaaattataggcacannnnnntttattacagaaccacagata acacaagagcagcccttcacaaataaattaaaacgtaagagannnnnnagtacatccctt caacctgaggacttcatcaagaaagcagattcagcaggtgttcaaaggactcctgacaac ataaatcagggaactgacctaatggagccaaatgagcaagcagtgagtactaccagtaac tgtcaggagaacaaaatagcaggtagtaatctccagaaagagaaaagcgctcatccaact gaatcattgagaaaggaacctgcttccacagcaggagccaaatctataagcaacagtgta agtgatttggaggtagaattaaacgtccacagttcaaaagcacctaagaaaaataggctg aggaggaagtcttctatcaggtgtgctcttccacttgaaccannnatcagtagaaatcca agcccacctacttgtgctgagcttcaaatcgatagttgtggtagcagtgaagaaacaaag aaaaaccattccaaccaacagccagccgggcaccttagagagcctcaactcatcgaagac actgaacctgcagcggatgccaagaagaacgagnnnccaaatgaacacataaggaagaga cgtgccagcgatgctttcccagaagagaaattaatgaacaaagctggtttattaactagc tgttcaagtcctagaaaatctcaagggcctgtcaatcccagccctcagagaacaggaaca gagcaannnnnncttgaaacacgccaaatgtctgacagtgccaaagaactcggggatcgg gtcctaggaggagagcccagtggcaaaaccactgaccgatctgaggagagcaccagcgta tccttggtacctgacactgactacgacactcagaacagtgtctcagtcctggacgctcac actgtcagannntatgcaagaacaggatccgctcagtgtatgactcagtttgtagcaagc gaaaaccccaaggaactcgtccatggctctaacaatgctgggnnnagtggcacagagggt ctcaagccccccttgagacacgcgcttaacctcagtcaggagaaannngtagaaatggaa gacagtgaacttgatactcagtatttgcagaatacatttcaagtttcaaagcgtcagtca tttgctttattttcaaaacctagaagtccccaaaaggactgtnnnnnnnnnnnngctcac tctgtgccctcaaaggaactgagtccaaaggtgacagctaaaggtaaacaaaaagaacgt cagggacaggaagaatttgaaatcagtcacgtacaagcagttgcggccacagtgggcnnn ttacctgtgccctgtcaagaaggtaagnnnctagctgctgatacaatgtgtnnnnnngat agaggttgtaggctttgtccatcatctcattacagaagcggggagaatggactcagcgcc acaggtaaatcaggaatttcacaaaactcacattttaaacaatcagtttctcccatcagg tcatctataaaaactgacaataggaaacctctgacagagggacgatttgagagacataca tcatcaactgagatggcggtgggaaatgagaacattcttcagagtacagtgcacacagtt agcctgaataacnnnagaggaaatgctnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn nnnnnnnnntgtcaagaagccggctcgggcagtattcatgaagtatgttccactnnnnnn nnnnnnnnnnnnnnnggtgactccttcccaggacaactaggtagaaacagagggcctaag gtgaacactgtgcctccattagatagtatgcagcctggtgtctgtcagcaaagtgttcct gtaagtgatnnnaagtatcttgaaataaaaaagnnnnnnnnnnnnnnncaggagggtgag gctgtctgtgcagacttctctccatgtctattctcagaccatcttgagcaatctatgagt ggtnnnaaggtttttcaggtttgctctgagacacctgatgacctgctggatgatgttgaa atacagggacatactagctttggtgaaggtgacataatggagagatctgctgtctttaac ggaagcatcctgagaagggagtccagtaggagccctagtcctgtaacccatgcatcgaag tctcagagtctccacagagcgtctaggaaattagaatcgtcagaagagagcgactccact gag >HowlerMon tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttgttactcact aaagacacactgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggctta gcaaggagccaacataacagatgggctgaaagtgaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagatgtggatgctgatcccctgcatgggagaaaagaatgg aataagcagaaacctccgtgctctgagaatcctagagatgatactgaagatgttgcttgg ataatgctaaatagcagcattcagaaagttaatgagtggttttccagaagtgatgaactg ttaacttctgatgactcacatgatggggggtctgaatcaaatgccaaagtagctgaagca ttggaagttctaaatgaggtagatggatattctagttcttcagagaaaatagacttactg gccagtgatcctcatgatcatttgatatgtaaaagtgaaagagttcactgcaaatcagta gagagtagtattgaagataaaatatttgggaaaacctatcggaggaaggcaagcctccct aacttgagccacgtaactgaaaatctaattataggagcatttgttactgagccacagata atacaagagcatcctctcacaaataaattaaagcgtaaaaggagagttacatcaggactt catcctgaggattttatcaagaaagcagatttggcagttnnncaaaagactcctgaaaag ataaatcagggaactaaccaaacagagcggaatgatcaagtgatgaatattactaacagt ggtcatgagaataaaacaaaaggtgattctattcagaatgagaacaatcctaacccagta gaatcactggaaaaagaannntcattcaaaagtaaagctgaacctataagcagtagtata agcaatatggaattagaattgaatgtccacaattccaaagcatctaaaaagaataggctg agaaggaagtcttctaccaggcatattcatgagcttgaactagtagtcagtagaaatcta agcccacctaattatactgaagtacaaattgatagttgttctagcagtgaagagataaag aaaaaaaattacaaccaaatgccagtcaggcacagcagaaagctacaactcatggaagat aaagaacgtgcagctagagccaaaaagagtagcaagccaaatgaacaaacaagtaaaaga catgccagtgatactttcccagaactgaggttaacaaacatacctggttcttttactaac tgttcaaatactaatgaatttaaagaatttgtcaatcctagccttccaagagaacaaaca gaagagaaannnctagaaacagttaaactgtctaataatgccaaagaccccaaagatctc atgttaagtggagaannnagtgttttgcaaattgaaagatctgtagagagtagcagtatt ttgttgatacctggtactgattatggcactcaggaaagtatctcattactggaagttagc actctggggnnnaaggcaaaaacagaaccaaataaatgtgtgagtcagtgtgcagcattt gaaaaccccaaggaactaattcatggttgttctaaagatactagaaatggcacagaaggc ttgaagtatccattgggacctgaagttaactacagtcaggaaacaagcatagatatgaga gaaagtgaacttgatactcaatatttgcagaatacattcaaggtttcaaagcgccagtca tttgctctgttttcaaatccaggaaatccagaaaaggaatgtgcaacattctctgcctgc tctaggtccttaaagaaacaaagtccaaaggtcactcctgaatgtgaacaaaaggaagaa aatcaaggagagaaagagtctaatatcgagcttgtagagacagttaataccactgcaggc tttcctatggtttgtcagaaagataagnnnccagttgattatgccagatgtatcgaannn ggaggctctaggctttgtctatcatctcagttcagaggcaacgaaactggactcattatt ccaaataaacatggacttttacagaacccatatcatatgtcaccgcttattcccaccagg tcatttgttaaaactaaatgtaagaaaaacctgctagaagaaaactctgaggaacattca atgtcacctgaaagagcaatgggaaacaagaacatcattccaagtacagtgagcacaatt agccataataacnnnagagaaaatgcttttaaagaaaccagctcaagcagtatttatgaa gtaggttccagtactaatgaagcaggttctagtactaatgaagtaggctccagtattaat gaagtaggttccagtgatgaaaacattcaagcagagctaggtagaaacagaaggccaaaa ttgaatgctatgcttagattagggcttctgcaacctgagatttgtaagcaaagtcttcct ataagtgattgtaaacatcctgaaattaaaaagcaagaacatgaagaannngtagttcag actgttaatacagacgtctctctatgtctgatttcatataacctagaacagcatatggga agcagtcatacatctcaggtttgttctgagacacctgacaacctgttagatgatggtgaa ataaaggaagatactagttttgctgaatatggcattaaggagacttctactgtttttagc aaaagtgtccagagaggagagctcagcaggagccctagccctttcacccatacacatttg gctcaggtttaccaaagaggggccaagaaattagagtcctcggaagagaatttatctagt gag cogent3-scinexus-e0aee79/tests/data/sample.tsv000066400000000000000000000000641520253266500215140ustar00rootroot00000000000000id name value 1 alpha 10.5 2 beta 20.3 3 gamma 30.1 cogent3-scinexus-e0aee79/tests/data/scitrack.log000066400000000000000000000124151520253266500220060ustar00rootroot000000000000002019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO system_details : system=Darwin Kernel Version 18.6.0: Thu Apr 25 23:16:27 PDT 2019; root:xnu-4903.261.4~2/RELEASE_X86_64 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO python : 3.7.3 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO user : gavin 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO command_string : /Users/gavin/miniconda3/envs/c3dev/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/gavin/Library/Jupyter/runtime/kernel-b12c134a-a2d4-4a8d-b7d2-f226e12fe628.json 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO composable function : load_unaligned(type='sequences', moltype='dna', format='fasta') + take_named_seqs(type='sequences', names=('Algerian mouse', 'Mouse', 'Rat'), negate=False) + min_length(type='sequences', length=600, motif_length=1, subtract_degen=True, moltype=None) + progressive_align(type='sequences') + omit_degenerates(type='aligned', moltype='dna', gap_is_degen=True, motif_length=3) + write_db(type='output', data_path='../data/rodent_cds/aligned_xy.tinydb', name_callback=None, create=True, if_exists='overwrite', suffix='json') 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000031379.fa 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 3c740b2f8b4713754e23d612e2489768 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000031379.json 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 07babd6d7eedfe27fa1648213f4609c9 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000031239.fa 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 43c78bd597e3edf7765e883aea8344b3 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000031239.json 2019-07-03 12:17:26 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 236beef72a6f6f5aaa7afabf79fc3e5d 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000031328.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : f255615238a754aa30fa316d126fa651 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000031328.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : f93ccf09e267a513b72472afdb6e7229 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000063663.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 07fb8fff9da3cd37f5c3b1057236a44b 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO FALSE : min_length : 543 < min_length 600 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000042225.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 701311f0178ba16e848ed08a57a7aa47 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000042225.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : bc0878d93bff8bc032feeebe57af642d 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000035847.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : bf4a639e534aa97d2bbc215c3b1c7b23 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000035847.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : bb218b33c4bc1fd9b1dff27a1351c8b2 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000054453.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 79b9e38b2e2d4129fb593503ea50a79f 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000054453.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 7c7edeb9a25fb49dff7298a4736a0a7b 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000073010.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 122875a99fcb55055f850a1de3cf09ec 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000073010.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 8da59b555fd94d22dd8a05574cd81af6 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000025261.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : 5eab1a81a7e62e08607f65ff0437a14f 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000025261.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 9c9eba09762838f832c40383983d4221 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input : ../data/rodent_cds/raw_xy/ENSMUSG00000034055.fa 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO input md5sum : d91d611ae9f66c5152b4af0aa2f421e6 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output : ENSMUSG00000034055.json 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO output md5sum : 0f309bcc4d8856249d0f5c26fa6f455c 2019-07-03 12:17:39 eratosthenes.uds.anu.edu.au:92022 INFO TIME TAKEN : 13.204617977142334 cogent3-scinexus-e0aee79/tests/test_composable.py000066400000000000000000002300151520253266500223220ustar00rootroot00000000000000import inspect import pickle import shutil from copy import copy from pathlib import Path from pickle import dumps, loads from unittest.mock import Mock import pytest from citeable import Software from numpy import array, ndarray from scitrack import CachingLogger try: import cogent3 as c3 from cogent3.app import typing as c3types from cogent3.util.union_dict import UnionDict except ImportError: c3 = None c3types = None UnionDict = None from scinexus import open_data_store from scinexus import typing as snx_types from scinexus.composable import ( GENERIC, LOADER, NON_COMPOSABLE, WRITER, ComposableApp, LoaderApp, NonComposableApp, NotCompleted, NotCompletedType, WriterApp, _get_raw_hints, _make_logfile_name, _proxy_input, define_app, is_app, is_app_composable, propagate_source, source_proxy, ) from scinexus.data_store import ( DataMember, DataStoreDirectory, Mode, get_unique_id, set_id_from_source, ) from scinexus.deserialise import deserialise_object from scinexus.sqlite_data_store import DataStoreSqlite def _typed_main(self, val: int) -> int: return val def test_composable(): """correctly form string""" @define_app class app_dummyclass_1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val @define_app class app_dummyclass_2: def __init__(self, b): self.b = b def main(self, val: int) -> int: return val aseqfunc1 = app_dummyclass_1(1) aseqfunc2 = app_dummyclass_2(2) comb = aseqfunc1 + aseqfunc2 expect = "app_dummyclass_1(a=1) + app_dummyclass_2(b=2)" got = str(comb) assert got == expect def test_composables_reuse(): """apps can be reused in multiple compositions""" @define_app class app_dummyclass_1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val @define_app class app_dummyclass_2: def __init__(self, b): self.b = b def main(self, val: int) -> int: return val @define_app class app_dummyclass_3: def __init__(self, c): self.c = c def main(self, val: int) -> int: return val one = app_dummyclass_1(1) two = app_dummyclass_2(2) three = app_dummyclass_3(3) one + three two + three # reuse of three now works # originals are not mutated assert one.input is None assert two.input is None assert three.input is None def test_composable_to_self(): """this should raise a ValueError""" @define_app class app_dummyclass_1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val app1 = app_dummyclass_1(1) with pytest.raises(ValueError): _ = app1 + app1 def test_err_result(): """excercise creation of NotCompletedResult""" result = NotCompleted(NotCompletedType.FAIL, "this", "some obj") assert not result assert result.origin == "this" assert result.message == "some obj" assert result.source is None # check source correctly deduced from provided object fake_source = Mock() fake_source.source = "blah" del fake_source.info result = NotCompleted(NotCompletedType.FAIL, "this", "err", source=fake_source) assert result.source == "blah" try: _ = 0 msg = "error message" raise ValueError(msg) except ValueError as err: result = NotCompleted(NotCompletedType.FAIL, "this", err.args[0]) assert result.message == "error message" def test_not_completed_result(): """should survive roundtripping pickle""" err = NotCompleted(NotCompletedType.FAIL, "mytest", "can we roundtrip") p = dumps(err) new = loads(p) assert err.type == new.type assert err.message == new.message assert err.source == new.source assert err.origin == new.origin def test_composable_variable_positional_args(): """correctly associate argument vals with their names when have variable positional args""" @define_app class pos_var_pos1: def __init__(self, a, b, *args): self.a = a self.b = b self.args = args def main(self, val: int) -> int: return val instance = pos_var_pos1(2, 3, 4, 5, 6) assert instance._init_vals == {"a": 2, "b": 3, "args": (4, 5, 6)} def test_composable_minimum_parameters(): """correctly associate argument vals with their names when have variable positional args and kwargs""" def test_func1(arg1) -> int: return 1 with pytest.raises(ValueError): _, _ = _get_raw_hints(test_func1, 2) def test_composable_return_type_hint(): """correctly associate argument vals with their names when have variable positional args and kwargs""" def test_func1(arg1): return 1 with pytest.raises(TypeError): _, _ = _get_raw_hints(test_func1, 1) def test_composable_firstparam_type_hint(): """correctly associate argument vals with their names when have variable positional args and kwargs""" def test_func1(arg1) -> int: return 1 with pytest.raises(TypeError): _, _ = _get_raw_hints(test_func1, 1) def test_composable_firstparam_type_is_None(): """correctly associate argument vals with their names when have variable positional args and kwargs""" def test_func1(arg1: None) -> int: return 1 with pytest.raises(TypeError): _, _ = _get_raw_hints(test_func1, 1) def test_composable_return_type_is_None(): """correctly associate argument vals with their names when have variable positional args and kwargs""" def test_func1(arg1: int) -> None: return with pytest.raises(TypeError): _, _ = _get_raw_hints(test_func1, 1) def test_composable_variable_positional_args_and_kwargs(): """correctly associate argument vals with their names when have variable positional args and kwargs""" @define_app class pos_var_pos_kw2: def __init__(self, a, *args, c=False): self.a = a self.c = c self.args = args def main(self, val: int) -> int: return val instance = pos_var_pos_kw2(2, 3, 4, 5, 6, c=True) assert instance._init_vals == {"a": 2, "args": (3, 4, 5, 6), "c": True} def test_app_decoration_fails_with_slots(): with pytest.raises(NotImplementedError): @define_app class app_not_supported_slots1: __slots__ = ("a",) def __init__(self, a): self.a = a def main(self, val: int) -> int: return val def test_repeated_decoration(): @define_app class app_decorated_repeated1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val with pytest.raises(TypeError): define_app(app_decorated_repeated1) def test_recursive_decoration(): @define_app class app_docorated_recursive1: def __init__(self, a): self.a = a def main(self, val: int) -> int: define_app(app_docorated_recursive1) return val with pytest.raises(TypeError): app_docorated_recursive1().main(1) def test_inheritance_from_decorated_class(): @define_app class app_decorated_first1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val with pytest.raises(TypeError): @define_app class app_inherits_decorated1(app_decorated_first1): def __init__(self, a): self.a = a def main(self, val: int) -> int: return val def test_super_in_decorated_class_with_base(): """super() works when decorated class inherits from an undecorated base.""" class MyBase: def __init__(self, x: int) -> None: self.x = x @define_app class my_app(MyBase): def __init__(self, x: int = 1) -> None: super().__init__(x) def main(self, data: int) -> int: return data + self.x app = my_app(x=5) assert app.x == 5 assert app(3) == 8 def test_super_in_composableapp_subclass_with_base(): """super() works when ComposableApp subclass also inherits from another base.""" class MyBase: def __init__(self, x: int) -> None: self.x = x class my_app(ComposableApp[int, int], MyBase): def __init__(self, x: int = 1) -> None: super().__init__(x) def main(self, data: int) -> int: return data + self.x app = my_app(x=5) assert app.x == 5 assert app(3) == 8 # have to define this at module level for pickling to work @define_app def func2app(arg1: int, exponent: int) -> float: return arg1**exponent @define_app def float2int(val: float) -> int: return int(val) def test_decorate_app_function(): """works on functions now""" sqd = func2app(exponent=2) assert sqd(3) == 9 assert inspect.isclass(func2app) def test_roundtrip_decorated_function(): """decorated function can be pickled/unpickled""" sqd = func2app(exponent=2) u = pickle.loads(pickle.dumps(sqd)) assert u(4) == 16 def test_roundtrip_composed_app(): """composed app can be pickled/unpickled""" composed = func2app(exponent=2) + float2int() u = pickle.loads(pickle.dumps(composed)) assert u(3) == 9 def test_decorated_func_optional(): @define_app(app_type=NON_COMPOSABLE) def power(val: int, pow: int = 1) -> int: return val**pow sqd = power(2) assert sqd(3) == 9 def test_decorated_func_repr(): def kw(val: int = 1) -> int: return val**val def kw_kw(val: int = 1, pow: int = 1) -> int: # noqa: A002 return val**pow def pos(val: int) -> int: return val**val def pos_pos(val: int, pow: int) -> int: # noqa: A002 return val**pow def pos_kw(val: int, pow: int = 1) -> int: # noqa: A002 return val**pow fns = {fn: func for fn, func in locals().items() if callable(func)} args = {"pos": 4, "kw": {"pow": 3}} for name, func in fns.items(): app = define_app(func) if len(name.split("_")) == 1: instance = app() expect = f"{name}()" elif name.endswith("kw"): instance = app(**args["kw"]) expect = f"{name}(pow={args['kw']['pow']})" else: instance = app(args["pos"]) expect = f"{name}(pow={args['pos']})" assert repr(instance) == expect, name def test_decorated_func_just_args(): @define_app(app_type=NON_COMPOSABLE) def power(val: int, pow: int) -> int: # noqa: A002 return val**pow sqd = power() assert sqd(3, 3) == 27 def test_decorated_app_is_app(): """check is_app for define_app decorated apps""" @define_app class app_test_isapp1: def main(self, data: int) -> int: return data assert is_app(app_test_isapp1) def test_undecorated_app_is_not_an_app(): """check is_app for non-decorated apps""" class app_not_composable1: def main(self, data: int) -> int: return data assert not is_app(app_not_composable1) def test_add_non_composable_apps(): @define_app(app_type=NON_COMPOSABLE) class app_non_composable1: def __init__(self): pass def main(self, val: int) -> int: return val @define_app(app_type=NON_COMPOSABLE) class app_non_composable2: def __init__(self): pass def main(self, val: int) -> int: return val app_non_composable1.__add__ = ComposableApp.__add__ app_non_composable2.__add__ = ComposableApp.__add__ app1 = app_non_composable1() app2 = app_non_composable2() with pytest.raises(TypeError): app1 + app2 _types_null = (list, []), (tuple, ()) @pytest.mark.parametrize(("in_type", "input_"), _types_null) def test_handles_null_series_input(in_type, input_): """apps correctly handle null output""" @define_app def null_in(val: in_type, pow: int) -> int: # noqa: A002, ARG001 return 2 app = null_in(pow=2) got = app(input_) assert isinstance(got, NotCompleted) @pytest.mark.parametrize("ret_type", [0, array([]), [], {}]) def test_handles_null_output(ret_type): """apps correctly handle null output""" @define_app def null_out(val: ndarray, pow: int) -> int: # noqa: A002, ARG001 return ret_type app = null_out(pow=2) d = array([3, 3]) got = app(d) assert isinstance(got, type(ret_type)) def test_handles_None(): """apps correctly handle null output""" @define_app def none_out(val: ndarray, pow: int) -> int: # noqa: A002, ARG001 return None @define_app def take_int(val: int) -> int: return val app = none_out(pow=2) d = array([3, 3]) got = app(d) assert isinstance(got, NotCompleted) app = none_out(pow=2) + take_int() d = array([3, 3]) got = app(d) assert isinstance(got, NotCompleted) def test_validate_data_type_not_completed_pass_through(): # returns the instance of a NotCompleted created by an input @define_app def take_int1(val: int) -> int: # noqa: ARG001 return NotCompleted( NotCompletedType.ERROR, "take_int1", "external to app", source="unknown" ) @define_app def take_int2(val: int) -> int: return val app = take_int1() + take_int2() got = app(2) assert got.origin == "take_int1" @pytest.mark.parametrize( ("first", "ret"), [(tuple[set[str]], int), (int, tuple[set[str]])], ) def test_complex_type(first, ret): # deep nesting now allowed (typeguard handles arbitrary nesting) @define_app class x: def main(self, data: first) -> ret: return data @pytest.mark.parametrize("hint", [tuple[set[str]], tuple[tuple[set[str]]]]) def test_complex_type_depths(hint): # deep nesting now allowed (typeguard handles arbitrary nesting) @define_app class x: def main(self, data: hint) -> bool: # noqa: ARG002 return True @pytest.mark.parametrize("hint", [int, set[str]]) def test_complex_type_allowed_depths(hint): # allowed <=2-deep nesting of types @define_app class x: def main(self, data: hint) -> int: # noqa: ARG002 return int @pytest.mark.parametrize( "meth", [ "__call__", "__repr__", "__str__", "__new__", "__copy__", "__eq__", "__add__", "input", "apply_to", "set_logger", "_validate_data_type", "as_completed", "_get_citations", "disconnect", ], ) def test_forbidden_methods_composable_app(meth): class app_forbidden_methods1: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val def function1(): pass setattr(app_forbidden_methods1, meth, function1) with pytest.raises(TypeError): define_app(app_type=WRITER)(app_forbidden_methods1) @pytest.mark.parametrize( "meth", [ "__call__", "__repr__", "__str__", "__new__", "__copy__", "__eq__", "_validate_data_type", "as_completed", "_get_citations", ], ) def test_forbidden_methods_non_composable_app(meth): class app_forbidden_methods2: def __init__(self, a): self.a = a def main(self, val: int) -> int: return val def function1(): pass setattr(app_forbidden_methods2, meth, function1) with pytest.raises(TypeError): define_app(app_type=NON_COMPOSABLE)(app_forbidden_methods2) def test_forbidden_input_attribute(): """user-defined input attribute (non-function) is rejected on composable apps""" class app_with_input_attr: input = "some_value" def main(self, val: int) -> int: return val with pytest.raises(TypeError): define_app(app_with_input_attr) @pytest.mark.parametrize( "meth", [ "__call__", "__repr__", "__str__", "__new__", "__copy__", "__eq__", "__add__", "_validate_data_type", "as_completed", "_get_citations", "disconnect", ], ) def test_forbidden_methods_subclass_composable(meth): def forbidden(): pass with pytest.raises(TypeError): type("bad_app", (ComposableApp,), {"main": _typed_main, meth: forbidden}) @pytest.mark.parametrize( "meth", [ "__call__", "__repr__", "__str__", "__new__", "__copy__", "__eq__", "_validate_data_type", "as_completed", "_get_citations", ], ) def test_forbidden_methods_subclass_non_composable(meth): def forbidden(): pass with pytest.raises(TypeError): type("bad_app", (NonComposableApp,), {"main": _typed_main, meth: forbidden}) @pytest.mark.parametrize( "meth", ["apply_to", "set_logger"], ) def test_forbidden_methods_subclass_writer(meth): def forbidden(): pass with pytest.raises(TypeError): type("bad_app", (WriterApp,), {"main": _typed_main, meth: forbidden}) def test_forbidden_input_attribute_subclass(): with pytest.raises(TypeError): type( "bad_app", (ComposableApp,), {"main": _typed_main, "input": "some_value"}, ) @pytest.mark.parametrize( "prop", ["check_data_type", "citations", "bib"], ) def test_forbidden_properties_subclass(prop): with pytest.raises(TypeError): type( "bad_app", (ComposableApp,), {"main": _typed_main, prop: property(lambda self: None)}, ) @pytest.mark.parametrize( "prop", ["check_data_type", "citations", "bib"], ) def test_forbidden_properties_define_app(prop): class my_app: def main(self, val: int) -> int: return val setattr(my_app, prop, property(lambda self: None)) with pytest.raises(TypeError): define_app(my_app) def test_skip_not_completed(): @define_app(skip_not_completed=False) def takes_not_completed(val: snx_types.SerialisableType) -> dict: return val.to_rich_dict() app = takes_not_completed() nc = NotCompleted(NotCompletedType.ERROR, "test", "for tracing", source="blah") got = app(nc) assert isinstance(got, dict) assert got == nc.to_rich_dict() def test_copies_doc_from_func(): @define_app def delme(val: snx_types.SerialisableType) -> dict: """my docstring""" return val.to_rich_dict() assert delme.__doc__ == "my docstring" @define_app def delme2(val: snx_types.SerialisableType) -> dict: """my docstring Notes ----- body """ return val.to_rich_dict() assert delme2.__doc__ == "my docstring" assert delme2.__init__.__doc__.split() == ["Notes", "-----", "body"] def test_bad_wrap(): def foo(a: "str") -> int: return int(a) with pytest.raises(NotImplementedError): define_app(foo) def bar(a: str) -> "int": return int(a) with pytest.raises(NotImplementedError): define_app(bar) def _make_cite(**kwargs): defaults = { "author": ["Doe, J"], "title": "test", "year": 2024, "url": "https://example.com", "version": "1.0", "license": "MIT", "doi": "10.0/test", "publisher": "test", } defaults.update(kwargs) return Software(**defaults) def test_single_app_with_citation(): cite = _make_cite() @define_app(cite=cite) class cited_app: def main(self, val: int) -> int: return val app = cited_app() assert app._cite is cite assert app.citations == (cite,) def test_single_app_without_citation(): @define_app class uncited_app: def main(self, val: int) -> int: return val app = uncited_app() assert app._cite is None assert app.citations == () def test_composed_apps_all_with_citations(): cite_a = _make_cite(title="A") cite_b = _make_cite(title="B") @define_app(cite=cite_a) class app_a: def main(self, val: int) -> int: return val @define_app(cite=cite_b) class app_b: def main(self, val: int) -> int: return val composed = app_a() + app_b() assert composed.citations == (cite_b, cite_a) def test_composed_apps_some_with_citations(): cite_a = _make_cite(title="A") @define_app(cite=cite_a) class app_c: def main(self, val: int) -> int: return val @define_app class app_d: def main(self, val: int) -> int: return val composed = app_c() + app_d() assert composed.citations == (cite_a,) def test_composed_apps_none_with_citations(): @define_app class app_e: def main(self, val: int) -> int: return val @define_app class app_f: def main(self, val: int) -> int: return val composed = app_e() + app_f() assert composed.citations == () def test_composed_three_app_chain(): cite_l = _make_cite(title="L") cite_g = _make_cite(title="G") cite_w = _make_cite(title="W") @define_app(cite=cite_l) class chain_l: def main(self, val: int) -> int: return val @define_app(cite=cite_g) class chain_g: def main(self, val: int) -> int: return val @define_app(cite=cite_w) class chain_w: def main(self, val: int) -> int: return val composed = chain_l() + chain_g() + chain_w() assert composed.citations == (cite_w, cite_g, cite_l) def test_duplicate_citation_deduplication(): cite = _make_cite() @define_app(cite=cite) class dup_a: def main(self, val: int) -> int: return val @define_app(cite=cite) class dup_b: def main(self, val: int) -> int: return val composed = dup_a() + dup_b() assert composed.citations == (cite,) def test_non_composable_app_with_citation(): cite = _make_cite() @define_app(app_type=NON_COMPOSABLE, cite=cite) class nc_cited: def main(self, val: int) -> int: return val app = nc_cited() assert app.citations == (cite,) def test_cite_sets_app_attribute(): cite = _make_cite() @define_app(cite=cite) class my_special_app: def main(self, val: int) -> int: return val app = my_special_app() assert app.citations[0].app == "my_special_app" def test_cite_shared_across_apps(): cite = _make_cite() @define_app(cite=cite) class app_one: def main(self, val: int) -> int: return val @define_app(cite=cite) class app_two: def main(self, val: int) -> int: return val a = app_one() b = app_two() assert a.citations[0].app == "app_one" assert b.citations[0].app == "app_two" def test_bib_single_app_with_citation(): cite = _make_cite() @define_app(cite=cite) class bib_app: def main(self, val: int) -> int: return val app = bib_app() assert app.bib == str(cite) def test_bib_app_without_citation(): @define_app class no_bib_app: def main(self, val: int) -> int: return val app = no_bib_app() assert app.bib == "" def test_bib_composed_apps(): cite_a = _make_cite(title="A") cite_b = _make_cite(title="B") @define_app(cite=cite_a) class bib_a: def main(self, val: int) -> int: return val @define_app(cite=cite_b) class bib_b: def main(self, val: int) -> int: return val composed = bib_a() + bib_b() assert str(cite_b) in composed.bib assert str(cite_a) in composed.bib assert "\n\n" in composed.bib def test_app_copy(): """shallow copy creates new instance sharing attribute references""" @define_app class app_copy_test: def __init__(self, data): self.data = data def main(self, val: int) -> int: return val original = app_copy_test([1, 2, 3]) copied = copy(original) assert copied is not original assert copied.data is original.data assert copied._init_vals is original._init_vals def test_composition_does_not_mutate_originals(): """composition uses copies so originals stay independent""" @define_app class app_mut_a: def main(self, val: int) -> int: return val @define_app class app_mut_b: def main(self, val: int) -> int: return val a = app_mut_a() b = app_mut_b() _ = a + b assert a.input is None assert b.input is None def test_app_reuse_in_multiple_compositions(): """same app instance can be used in multiple compositions""" @define_app class app_reuse_a: def main(self, val: int) -> int: return val @define_app class app_reuse_b: def main(self, val: int) -> int: return val @define_app class app_reuse_c: def main(self, val: int) -> int: return val a = app_reuse_a() b = app_reuse_b() c = app_reuse_c() comb1 = a + b comb2 = a + c comb3 = c + b assert comb1(1) == 1 assert comb2(1) == 1 assert comb3(1) == 1 def test_app_eq(): """__eq__ checks attribute identity""" @define_app class app_eq_test: def __init__(self, x): self.x = x def main(self, val: int) -> int: return val a = app_eq_test(1) b = copy(a) assert a == b c = app_eq_test(1) assert a != c # different instances with different attribute objects assert a != "not an app" def test_check_data_type_default_true(): @define_app class app_cdt: def main(self, val: int) -> int: return val assert app_cdt().check_data_type is True def test_check_data_type_true_rejects_bad_type(): @define_app class app_cdt: def main(self, val: int) -> int: return val app = app_cdt() app.check_data_type = True got = app("not_an_int") assert isinstance(got, NotCompleted) def test_check_data_type_false_skips_validation(): @define_app class app_cdt: def main(self, val: int) -> int: return val app = app_cdt() app.check_data_type = False got = app("42") assert got == "42" def test_check_data_type_propagates_in_composition(): @define_app class app_cdt_a: def main(self, val: int) -> int: return val @define_app class app_cdt_b: def main(self, val: int) -> int: return val @define_app class app_cdt_c: def main(self, val: int) -> int: return val composed = app_cdt_a() + app_cdt_b() + app_cdt_c() assert composed.check_data_type is True assert composed.input.check_data_type is True assert composed.input.input.check_data_type is True composed.check_data_type = False assert composed.check_data_type is False assert composed.input.check_data_type is False assert composed.input.input.check_data_type is False def test_check_data_type_does_not_affect_originals(): @define_app class app_cdt_a: def main(self, val: int) -> int: return val @define_app class app_cdt_b: def main(self, val: int) -> int: return val a = app_cdt_a() b = app_cdt_b() composed = a + b composed.check_data_type = False assert a.check_data_type is True assert b.check_data_type is True def test_check_data_type_false_composed_end_to_end(): @define_app class app_cdt_a: def main(self, val: int) -> int: return val @define_app class app_cdt_b: def main(self, val: int) -> int: return val composed = app_cdt_a() + app_cdt_b() composed.check_data_type = False got = composed("not_an_int") assert got == "not_an_int" def test_check_data_type_re_enable(): @define_app class app_cdt: def main(self, val: int) -> int: return val app = app_cdt() app.check_data_type = False assert app("not_int") == "not_int" app.check_data_type = True got = app("not_int") assert isinstance(got, NotCompleted) def test_make_logfile_name(): @define_app class logname_app: def main(self, val: int) -> int: return val app = logname_app() name = _make_logfile_name(app) assert name.startswith("logname_app") assert name.endswith(".log") def test_not_completed_repr(): nc = NotCompleted(NotCompletedType.ERROR, "origin", "msg", source="src") r = repr(nc) assert "ERROR" in r assert "origin" in r assert "msg" in r def test_not_completed_source_exception(): """source that raises in get_data_source results in None""" nc = NotCompleted(NotCompletedType.ERROR, "test", "msg", source=42) assert nc.source is None def test_call_with_none(): @define_app class none_app: def main(self, val: int) -> int: return val app = none_app() got = app(None) assert isinstance(got, NotCompleted) assert "None" in got.message def test_validate_data_type_source_proxy(): @define_app class proxy_app: def main(self, val: int) -> int: return val app = proxy_app() sp = source_proxy(42) got = app(sp) assert got == 42 def test_add_writer_lhs_raises(): @define_app(app_type=WRITER) class writer_app: def __init__(self, data_store): self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data @define_app class generic_app: def main(self, val: int) -> int: return val ds = Mock() ds.source = "/tmp" w = writer_app(data_store=ds) g = generic_app() with pytest.raises(TypeError, match="writer"): w + g def test_add_loader_rhs_raises(): @define_app(app_type=LOADER) class loader_app: def main(self, val: str) -> int: return int(val) @define_app class generic_app: def main(self, val: int) -> int: return val g = generic_app() lo = loader_app() with pytest.raises(TypeError, match="loader"): g + lo def test_add_incompatible_types(): @define_app class str_app: def main(self, val: int) -> str: return str(val) @define_app class int_only_app: def main(self, val: int) -> int: return val with pytest.raises(TypeError, match="incompatible"): str_app() + int_only_app() def test_is_app_composable_true(): @define_app class comp_app: def main(self, val: int) -> int: return val assert is_app_composable(comp_app) def test_is_app_composable_false_non_composable(): @define_app(app_type=NON_COMPOSABLE) class nc_app: def main(self, val: int) -> int: return val assert not is_app_composable(nc_app) def test_is_app_composable_false_not_app(): assert not is_app_composable("not an app") def test_define_app_on_non_class(): with pytest.raises(ValueError, match="not a class"): define_app(42) def test_source_proxy_basic(): obj = [1, 2, 3] sp = source_proxy(obj) assert sp.obj is obj assert sp.source is obj assert isinstance(sp.uuid, str) def test_source_proxy_set_obj(): sp = source_proxy([1, 2]) sp.set_obj([3, 4]) assert sp.obj == [3, 4] assert sp.source == [1, 2] def test_source_proxy_source_setter(): sp = source_proxy("original") # property setter must be called directly because __setattr__ intercepts type(sp).source.fset(sp, "new_source") assert sp.source == "new_source" def test_source_proxy_getattr(): sp = source_proxy([1, 2, 3]) assert sp.count(1) == 1 def test_source_proxy_setattr(): obj = Mock() sp = source_proxy(obj) sp.value = 42 assert obj.value == 42 def test_source_proxy_bool(): assert bool(source_proxy([1])) assert not bool(source_proxy([])) def test_source_proxy_repr_str(): sp = source_proxy([1, 2]) assert repr(sp) == repr([1, 2]) assert str(sp) == str([1, 2]) def test_source_proxy_eq(): sp = source_proxy(42) assert sp == 42 assert sp != 43 def test_source_proxy_len(): sp = source_proxy([1, 2, 3]) assert len(sp) == 3 def test_source_proxy_pickle(): sp = source_proxy("hello") restored = pickle.loads(pickle.dumps(sp)) assert restored.obj == "hello" assert restored.source == "hello" def test_proxy_input_with_source(): item = Mock() item.source = "test" item.__bool__ = lambda self: True result = _proxy_input([item]) assert len(result) == 1 assert result[0] is item def test_proxy_input_without_source(): result = _proxy_input(["a", "b"]) assert len(result) == 2 assert all(isinstance(r, source_proxy) for r in result) def test_proxy_input_skips_falsy(): result = _proxy_input([0, "", "valid"]) assert len(result) == 1 def test_propagate_source_non_proxy(): @define_app class prop_app: def main(self, val: int) -> int: return val * 2 app = prop_app() ps = propagate_source(app, get_unique_id) got = ps(4) assert got == 8 def test_propagate_source_proxy_with_source(): @define_app class prop_app2: def main(self, val: str) -> str: return val.upper() app = prop_app2() class HasSource: def __init__(self, v, src): self.value = v self.source = src def upper(self): return HasSource(self.value.upper(), self.source) obj = HasSource("hello", "my_source") sp = source_proxy(obj) ps = propagate_source(app, get_unique_id) got = ps(sp) # result has source via get_unique_id, so returned directly assert not isinstance(got, source_proxy) or got.obj is not obj def test_propagate_source_proxy_no_source(): @define_app class prop_app3: def main(self, val: int) -> int: return val + 1 app = prop_app3() sp = source_proxy(10) ps = propagate_source(app, get_unique_id) got = ps(sp) assert isinstance(got, source_proxy) assert got.obj == 11 def test_as_completed_serial(tmp_path): @define_app class ac_app: def main(self, val: str) -> str: return val.upper() app = ac_app() results = list(app.as_completed(["a", "b", "c"], show_progress=False)) objs = [r.obj if isinstance(r, source_proxy) else r for r in results] assert "A" in objs assert len(objs) == 3 def test_as_completed_empty(): @define_app class ac_empty: def main(self, val: int) -> int: return val app = ac_empty() results = list(app.as_completed([], show_progress=False)) assert results == [] def test_as_completed_string_input(): @define_app class ac_str: def main(self, val: str) -> str: return val app = ac_str() results = list(app.as_completed("hello", show_progress=False)) assert len(results) == 1 def test_as_completed_with_progress_instance(): from scinexus.progress import NoProgress @define_app class ac_prog: def main(self, val: str) -> str: return val.upper() app = ac_prog() results = list(app.as_completed(["a", "b"], show_progress=NoProgress())) objs = [r.obj if isinstance(r, source_proxy) else r for r in results] assert len(objs) == 2 assert "A" in objs def test_as_completed_with_tqdm_progress(): from scinexus.progress import TqdmProgress @define_app class ac_tqdm: def main(self, val: int) -> int: return val * 2 app = ac_tqdm() results = list( app.as_completed([1, 2, 3], show_progress=TqdmProgress(disable=True)) ) objs = [r.obj if isinstance(r, source_proxy) else r for r in results] assert len(objs) == 3 def test_writer_set_logger_default(tmp_path): @define_app(app_type=WRITER) class w_logger: def __init__(self, data_store): self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data ds = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") app = w_logger(data_store=ds) app.set_logger() assert app.logger is not None app.logger.shutdown() def test_writer_set_logger_false(tmp_path): @define_app(app_type=WRITER) class w_logger2: def __init__(self, data_store): self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data ds = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") app = w_logger2(data_store=ds) app.set_logger(logger=False) assert app.logger is None def test_writer_set_logger_invalid_type(tmp_path): @define_app(app_type=WRITER) class w_logger3: def __init__(self, data_store): self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data ds = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") app = w_logger3(data_store=ds) with pytest.raises(TypeError, match="CachingLogger"): app.set_logger(logger="not a logger") def test_writer_apply_to(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "src" src.mkdir() for i in range(3): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) result = process.apply_to(dstore, logger=False, show_progress=False) assert len(result) == 3 def test_writerapp_subclass_apply_to_writes_once(tmp_path) -> None: """An inheritance-defined writer runs main() exactly once per input. Regression test for the original bug where ``class X(WriterApp)`` ended up with ``app_type=GENERIC``, causing ``apply_to`` to feed the writer's own output back into ``main`` on a second pass. """ src = tmp_path / "src" src.mkdir() for i in range(3): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") call_count: list[str] = [] @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() class inherited_writer(WriterApp): def __init__(self, data_store: DataStoreDirectory) -> None: self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: call_count.append(identifier) return self.data_store.write(unique_id=identifier, data=data) assert inherited_writer.app_type is WRITER process = reader() + inherited_writer(data_store=out_dstore) result = process.apply_to(dstore, logger=False, show_progress=False) assert len(result) == 3 assert len(call_count) == 3 def test_apply_to_uses_registered_id_from_source( tmp_path, reset_id_from_source: None, ) -> None: """`apply_to` consults the globally registered ID extractor by default.""" src = tmp_path / "src" src.mkdir() for i in range(3): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") seen_ids: list[str | None] = [] def custom(obj: object) -> str | None: result = get_unique_id(obj) seen_ids.append(result) return result set_id_from_source(custom) @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store: DataStoreDirectory) -> None: self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) process.apply_to(dstore, logger=False, show_progress=False) # `apply_to`'s input-id loop calls `custom` directly with input paths, # so the seen IDs include the input filenames (suffix-stripped). assert seen_ids assert any(sid is not None and sid.startswith("item_") for sid in seen_ids) def test_apply_to_explicit_id_from_source_overrides_registered( tmp_path, reset_id_from_source: None, ) -> None: """An explicit ``id_from_source=`` argument wins over the registered func.""" registered_calls: list[object] = [] explicit_calls: list[object] = [] def registered(obj: object) -> str | None: registered_calls.append(obj) return get_unique_id(obj) def explicit(obj: object) -> str | None: explicit_calls.append(obj) return get_unique_id(obj) set_id_from_source(registered) src = tmp_path / "src" src.mkdir() (src / "a.txt").write_text("data") dstore = DataStoreDirectory(src, suffix="txt") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store: DataStoreDirectory) -> None: self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) process.apply_to( dstore, id_from_source=explicit, logger=False, show_progress=False, ) assert explicit_calls, "explicit override should have been called" assert not registered_calls, ( "registered func must not be called when explicit override is supplied" ) def test_not_completed_uses_registered_id_from_source( reset_id_from_source: None, ) -> None: """`NotCompleted` normalises its source via the registered extractor.""" class WithSource: source = "path/to/file.fasta" def custom(obj: object) -> str | None: return f"custom::{obj!r}" set_id_from_source(custom) nc = NotCompleted(NotCompletedType.ERROR, "test", "msg", source=WithSource()) assert nc.source is not None assert nc.source.startswith("custom::") def test_not_completed_default_source_is_unique_id( reset_id_from_source: None, ) -> None: """Without a registration, `NotCompleted.source` is the unique ID form. Locks in the behaviour change: the default extractor strips file-format suffixes (``seqs.fasta`` → ``seqs``), where the prior implementation (calling ``get_data_source`` directly) returned the un-stripped form. """ class WithSource: source = "path/to/seqs.fasta" nc = NotCompleted(NotCompletedType.ERROR, "test", "msg", source=WithSource()) assert nc.source == "seqs" def test_get_main_hints_no_main(): from scinexus.composable import _get_main_hints class NoMain: pass with pytest.raises(ValueError, match="main"): _get_main_hints(NoMain) def test_source_proxy_hash(): sp = source_proxy("hello") assert isinstance(hash(sp), int) sp2 = source_proxy("hello") assert hash(sp) != hash(sp2) def test_init_subclass_slots(): with pytest.raises(NotImplementedError, match="slots"): class BadApp(ComposableApp[int, int]): __slots__ = ("x",) def main(self, val: int) -> int: return val def test_writerapp_subclass_implies_writer() -> None: """Inheriting WriterApp yields a WRITER without any class kwarg.""" class my_writer(WriterApp): def __init__(self, data_store: DataStoreDirectory) -> None: self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data assert my_writer.app_type is WRITER def test_loaderapp_subclass_implies_loader() -> None: """Inheriting LoaderApp yields a LOADER and is not stripped of input.""" class my_loader(LoaderApp): def main(self, path: str) -> str: return path assert my_loader.app_type is LOADER # _init_subclass_setup only clears `cls.input` for non-LOADER apps. assert "input" not in my_loader.__dict__ def test_noncomposableapp_subclass_implies_non_composable() -> None: """Inheriting NonComposableApp yields a NON_COMPOSABLE.""" class my_app(NonComposableApp[int, int]): def main(self, val: int) -> int: return val * 2 assert my_app.app_type is NON_COMPOSABLE def test_explicit_composableapp_subclass_is_generic() -> None: """Direct ComposableApp subclasses still resolve to GENERIC.""" class my_app(ComposableApp[int, int]): def main(self, val: int) -> int: return val assert my_app.app_type is GENERIC def test_validate_data_type_not_completed_skip_true(): @define_app class skip_app: def main(self, val: int) -> int: return val app = skip_app() nc = NotCompleted(NotCompletedType.ERROR, "test", "msg") got = app._validate_data_type(nc) assert isinstance(got, NotCompleted) def test_as_completed_with_datastore(tmp_path): src = tmp_path / "src" src.mkdir() for i in range(2): (src / f"f_{i}.txt").write_text(f"content {i}") dstore = DataStoreDirectory(src, suffix="txt") @define_app class read_app: def main(self, val: str) -> str: return val app = read_app() results = list(app.as_completed(dstore, show_progress=False)) assert len(results) == 2 def test_as_completed_parallel(): app = func2app(exponent=2) results = list(app.as_completed([1, 2, 3], parallel=True, show_progress=False)) objs = [r.obj if isinstance(r, source_proxy) else r for r in results] assert sorted(objs) == [1, 4, 9] def test_writer_apply_to_no_input(tmp_path): @define_app(app_type=WRITER) class lone_writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: int, identifier: str = "") -> int: return data ds = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") app = lone_writer(data_store=ds) with pytest.raises(RuntimeError, match="no composed input"): app.apply_to(["something"], logger=False) def test_apply_to_empty_dstore(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "empty" src.mkdir() dstore = DataStoreDirectory(src, suffix="txt") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) with pytest.raises(ValueError, match="empty"): process.apply_to(dstore, logger=False, show_progress=False) def test_apply_to_skip_existing(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "src" src.mkdir() for i in range(3): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) process.apply_to(dstore, logger=False, show_progress=False) assert len(out_dstore) == 3 # run again — existing items should be skipped result = process.apply_to(dstore, logger=False, show_progress=False) assert len(result) == 3 def test_apply_to_with_logging(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "src" src.mkdir() (src / "item.txt").write_text("data") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) result = process.apply_to(dstore, show_progress=False) assert len(result) == 1 # log file should have been written to the data store assert any("log" in str(m) for m in out_dstore.logs) def test_apply_to_logger_true(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "src" src.mkdir() (src / "item.txt").write_text("data") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) result = process.apply_to(dstore, logger=True, show_progress=False) assert len(result) == 1 assert any("log" in str(m) for m in out_dstore.logs) def test_apply_to_logger_false(tmp_path): from scinexus.data_store import DataMember src = tmp_path / "src" src.mkdir() (src / "item.txt").write_text("data") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) result = process.apply_to(dstore, logger=False, show_progress=False) assert len(result) == 1 assert len(list(out_dstore.logs)) == 0 assert process.logger is None def test_not_completed_to_json(): """NotCompleted.to_json returns valid JSON""" import json nc = NotCompleted(NotCompletedType.ERROR, "origin", "a message") result = json.loads(nc.to_json()) assert result["type"] == "scinexus.composable.NotCompleted" def test_deserialise_not_completed(): """roundtrip NotCompleted through JSON deserialisation""" nc = NotCompleted(NotCompletedType.ERROR, "origin", "msg") data = nc.to_rich_dict() result = deserialise_object(data) assert isinstance(result, NotCompleted) assert result.message == "msg" def test_app_main_exception_returns_not_completed(): """exception in main() returns NotCompleted instead of raising""" @define_app class raises_app: def main(self, val: int) -> int: msg = "boom" raise ValueError(msg) app = raises_app() result = app(1) assert isinstance(result, NotCompleted) assert result.type is NotCompletedType.ERROR assert "boom" in result.message def test_not_completed_source_raises(): """source that raises in get_data_source sets source to None""" class BadSource: @property def source(self): msg = "broken" raise RuntimeError(msg) nc = NotCompleted(NotCompletedType.ERROR, "origin", "msg", source=BadSource()) assert nc.source is None def test_composed_skip_not_completed_input(): """NotCompleted passed as first input to composed pipeline is returned""" @define_app class first: def main(self, val: int) -> int: return val + 1 @define_app class second: def main(self, val: int) -> int: return val * 2 composed = first() + second() nc = NotCompleted(NotCompletedType.ERROR, "test", "fail") result = composed(nc) assert isinstance(result, NotCompleted) assert result is nc def test_apply_to_string_input(tmp_path): """apply_to accepts a single string path as dstore""" src = tmp_path / "src" src.mkdir() (src / "item.txt").write_text("data") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: str) -> str: from pathlib import Path return Path(val).read_text() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) result = process.apply_to(str(src / "item.txt"), logger=False, show_progress=False) assert len(result) == 1 def test_apply_to_duplicate_id(tmp_path): """apply_to raises ValueError for duplicate identifiers""" src = tmp_path / "src" src.mkdir() for i in range(2): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out_dstore = DataStoreDirectory(tmp_path / "out", mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) with pytest.raises(ValueError, match="non-unique identifier"): process.apply_to( dstore, id_from_source=lambda _: "same_id", logger=False, show_progress=False, ) def test_apply_to_skips_existing_items(tmp_path): """apply_to skips items already in the data store""" src = tmp_path / "src" src.mkdir() for i in range(3): (src / f"item_{i}.txt").write_text(f"data {i}") dstore = DataStoreDirectory(src, suffix="txt") out = tmp_path / "out" out_dstore = DataStoreDirectory(out, mode=Mode.w, suffix="txt") @define_app(app_type=LOADER) class reader: def main(self, val: DataMember) -> str: return val.read() @define_app(app_type=WRITER) class writer: def __init__(self, data_store): self.data_store = data_store def main(self, data: str, identifier: str = "") -> DataMember: return self.data_store.write(unique_id=identifier, data=data) process = reader() + writer(data_store=out_dstore) # pre-write one item so it already exists out_dstore.write(unique_id="item_0", data="existing") process.apply_to(dstore, logger=False, show_progress=False) # item_0 skipped, items 1 and 2 written → total 3 assert len(out_dstore.completed) == 3 def test_triggers_bugcatcher(): """a composable that does not trap failures returns NotCompletedResult requesting bug report""" @define_app(app_type=LOADER) class reader: def main(self, val: str) -> str: return val.read() read = reader() read.main = lambda x: None got = read("somepath.fasta") # pylint: disable=not-callable assert isinstance(got, NotCompleted) assert got.type is NotCompletedType.BUG @pytest.mark.parametrize("func", [str, repr]) def test_user_function_repr(func): @define_app def bar(val: float, num=3) -> float: return val * num got = func(bar(num=3)) assert got == "bar(num=3)" @pytest.fixture(params=[Path, None, str]) def source_type(DATA_DIR, request): fname = "brca1.fasta" dstore = open_data_store(DATA_DIR, suffix="fasta") if request.param is not None: return request.param(DATA_DIR / fname) return next((m for m in dstore if m.unique_id == fname), None) def test_composable_unwraps_source_proxy_as_completed(source_type): app = c3.get_app("load_unaligned", format_name="fasta", moltype="dna") result = next(iter(app.as_completed([source_type], show_progress=False))) got = result.source assert got.endswith("brca1.fasta") assert not isinstance(got, source_proxy) def test_composable_unwraps_source_proxy_call(source_type): app = c3.get_app("load_unaligned", format_name="fasta", moltype="dna") result = app(source_type) got = result.source assert got.endswith("brca1.fasta") assert not isinstance(got, source_proxy) def test_as_completed(DATA_DIR): """correctly applies iteratively""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) reader = c3.get_app("load_unaligned", format_name="fasta", moltype="dna") got = list(reader.as_completed(dstore, show_progress=False)) assert len(got) == len(dstore) # should also be able to apply the results to another composable func min_length = c3.get_app("sample.min_length", 10) got = list(min_length.as_completed(got, show_progress=False)) assert len(got) == len(dstore) # should work on a chained function proc = reader + min_length got = list(proc.as_completed(dstore, show_progress=False)) assert len(got) == len(dstore) # and works on a list of just strings got = list(proc.as_completed([str(m) for m in dstore], show_progress=False)) assert len(got) == len(dstore) # or a single string path = str(Path(dstore[0].data_store.source) / dstore[0].unique_id) got = list(proc.as_completed(path, show_progress=False)) assert len(got) == 1 assert got[0].__class__.__name__.endswith("SequenceCollection") @pytest.mark.parametrize("data", [(), ("", "")]) def test_as_completed_empty_data(data): """correctly applies iteratively""" reader = c3.get_app("load_unaligned", format_name="fasta", moltype="dna") min_length = c3.get_app("sample.min_length", 10) proc = reader + min_length # returns empty input got = list(proc.as_completed(data)) assert got == [] _as_completed_w_wout_source_params = [{"a": 2}] if c3 is not None: _as_completed_w_wout_source_params.extend( [ UnionDict(a=2, source="blah.txt"), c3.make_aligned_seqs( {"a": "ACGT"}, info={"source": "blah.txt"}, moltype="dna", ), ], ) @pytest.mark.parametrize("data", _as_completed_w_wout_source_params) def test_as_completed_w_wout_source(data): from cogent3.app.typing import AlignedSeqsType @define_app def pass_through(val: dict | UnionDict | AlignedSeqsType) -> dict: return val app = pass_through() # pylint: disable=not-callable,no-value-for-parameter got = list(app.as_completed([data], show_progress=False)) assert bool(got), got @pytest.mark.parametrize("klass", [DataStoreDirectory, DataStoreSqlite]) @pytest.mark.parametrize("cast", [str, Path]) def test_apply_to_strings(DATA_DIR, tmp_dir, klass, cast): """apply_to handles non-DataMember""" dname = "test_apply_to_strings" outpath = tmp_dir / dname dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) dstore = [cast(str(m)) for m in dstore] reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) kwargs = {"suffix": "fasta"} if klass == DataStoreDirectory else {} writer = c3.get_app("write_seqs", data_store=klass(outpath, mode="w", **kwargs)) process = reader + min_length + writer # create paths as strings _ = process.apply_to(dstore, show_progress=False) assert len(process.data_store.logs) == 1 @pytest.mark.parametrize("klass", [DataStoreDirectory, DataStoreSqlite]) @pytest.mark.parametrize("cast", [str, Path]) def test_as_completed_strings(DATA_DIR, tmp_dir, klass, cast): """as_completed handles non-DataMember""" dname = "test_apply_to_strings" outpath = tmp_dir / dname dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) dstore = [cast(str(m)) for m in dstore] reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) if klass == DataStoreDirectory: writer = c3.get_app("write_seqs", klass(outpath, mode="w", suffix="fasta")) else: writer = c3.get_app("write_seqs", klass(outpath, mode="w")) orig_length = len(writer.data_store) process = reader + min_length + writer # create paths as strings got = list(process.as_completed(dstore, show_progress=False)) assert len(got) > orig_length def test_apply_to_non_unique_identifiers(tmp_dir): """should fail if non-unique names""" dstore = [ "brca1.fasta", "brca1.fasta", ] reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) outpath = tmp_dir / "test_apply_to_non_unique_identifiers" writer = c3.get_app( "write_seqs", DataStoreDirectory(outpath, mode="w", suffix="fasta"), ) process = reader + min_length + writer with pytest.raises(ValueError): process.apply_to(dstore) def test_apply_to_logging(DATA_DIR, tmp_dir): """correctly creates log file""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) process = reader + min_length + writer process.apply_to(dstore, show_progress=False) # always creates a log assert len(process.data_store.logs) == 1 def test_apply_to_logger(DATA_DIR, tmp_dir): """correctly uses user provided logger""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) LOGGER = CachingLogger() reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) process = reader + min_length + writer process.apply_to(dstore, show_progress=False, logger=LOGGER) assert len(process.data_store.logs) == 1 def test_apply_to_no_logger(DATA_DIR, tmp_dir): """correctly uses user provided logger""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) process = reader + min_length + writer process.apply_to(dstore, show_progress=False, logger=False) assert len(process.data_store.logs) == 0 assert process.logger is None @pytest.mark.parametrize("logger_val", ["somepath.log"]) def test_apply_to_invalid_logger(DATA_DIR, tmp_dir, logger_val): """incorrect logger value raises TypeError""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length = c3.get_app("min_length", 10) out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) process = reader + min_length + writer with pytest.raises(TypeError): process.apply_to(dstore, show_progress=False, logger=logger_val) @pytest.fixture def fasta_dir(DATA_DIR, tmp_dir): filenames = DATA_DIR.glob("*.fasta") fasta_dir = tmp_dir / "fasta" fasta_dir.mkdir(parents=True, exist_ok=True) for fn in filenames: dest = fasta_dir / fn.name dest.write_text(fn.read_text()) return fasta_dir @pytest.fixture def log_data(DATA_DIR): path = DATA_DIR / "scitrack.log" return path.read_text() @pytest.fixture def ro_dstore(fasta_dir): return DataStoreDirectory(fasta_dir, suffix="fasta", mode="r") @pytest.fixture def completed_objects(ro_dstore): return {f"{Path(m.unique_id).stem}": m.read() for m in ro_dstore} @pytest.fixture def write_dir1(tmp_dir): write_dir1 = tmp_dir / "write1" write_dir1.mkdir(parents=True, exist_ok=True) yield write_dir1 shutil.rmtree(write_dir1) @pytest.fixture def write_dir2(tmp_dir): write_dir2 = tmp_dir / "write2" write_dir2.mkdir(parents=True, exist_ok=True) yield write_dir2 shutil.rmtree(write_dir2) @pytest.fixture def nc_objects(): return { f"id_{i}": NotCompleted("ERROR", "location", "message", source=f"id_{i}") for i in range(3) } @pytest.fixture def nc_dstore(tmp_dir, nc_objects): dstore = DataStoreDirectory(tmp_dir, suffix="fasta", mode="w") for id_, data in nc_objects.items(): dstore.write_not_completed(unique_id=id_, data=data.to_json()) return dstore def test_apply_to_input_only_not_completed(DATA_DIR, nc_dstore, tmp_dir): """correctly skips notcompleted""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) # trigger creation of notcompleted outpath = tmp_dir / "delme.sqlitedb" out_dstore = open_data_store(outpath, mode="w") writer = c3.get_app("write_db", out_dstore) process = ( c3.get_app("load_aligned", format_name="fasta", moltype="dna") + c3.get_app("min_length", 3000) + writer ) process.apply_to(dstore, show_progress=False) assert len(out_dstore.not_completed) == len(nc_dstore) def test_apply_to_makes_not_completed(DATA_DIR, tmp_dir): """correctly creates notcompleted""" dstore = open_data_store(DATA_DIR, suffix="fasta", limit=3) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") # trigger creation of notcompleted min_length = c3.get_app("min_length", 3000) out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) process = reader + min_length + writer process.apply_to(dstore, show_progress=False) assert len(out_dstore.not_completed) == 3 def test_apply_to_not_partially_done(DATA_DIR, tmp_dir): """correctly applies process when result already partially done""" dstore = open_data_store(DATA_DIR, suffix="fasta") num_records = len(dstore) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="w") writer = c3.get_app("write_db", out_dstore) # doing the first one # turning off warning as apps are callable _ = writer(reader(dstore[0])) # pylint: disable=not-callable writer.data_store.close() out_dstore = open_data_store(tmp_dir / "delme.sqlitedb", mode="a") writer = c3.get_app("write_db", out_dstore) process = reader + writer _ = process.apply_to(dstore, show_progress=False) assert len(out_dstore) == num_records @pytest.fixture def full_dstore(write_dir1, nc_objects, completed_objects, log_data): dstore = DataStoreDirectory(write_dir1, suffix="fasta", mode="w") for id_, data in nc_objects.items(): dstore.write_not_completed(unique_id=id_, data=data.to_json()) for id_, data in completed_objects.items(): dstore.write(unique_id=id_, data=data) dstore.write_log(unique_id="scitrack.log", data=log_data) return dstore # @pytest.mark.xfail(reason="passes except when run in full test suite") @pytest.mark.parametrize("show", [True, False]) def test_as_completed_progress(full_dstore, capsys, show): loader = c3.get_app("load_unaligned", format_name="fasta", moltype="dna") omit = c3.get_app("omit_degenerates") app = loader + omit list(app.as_completed(full_dstore.completed, show_progress=show)) result = capsys.readouterr().err.splitlines() if show: assert len(result) > 0 assert "6/6" in result[-1] else: assert len(result) == 0 def test_composite_pickleable(): """composable functions should be pickleable""" read = c3.get_app("load_aligned", moltype="dna") dumps(read) trans = c3.get_app("select_translatable") dumps(trans) aln = c3.get_app("progressive_align", "nucleotide") dumps(aln) just_nucs = c3.get_app("omit_degenerates", moltype="dna") dumps(just_nucs) limit = c3.get_app("fixed_length", 1000, random=True) dumps(limit) mod = c3.get_app("model", "HKY85") dumps(mod) qt = c3.get_app("quick_tree") dumps(qt) proc = read + trans + aln + just_nucs + limit + mod dumps(proc) def test_user_function(): """composable functions should be user definable""" @define_app def foo(val: c3types.AlignedSeqsType, *args, **kwargs) -> c3types.AlignedSeqsType: return val[:4] u_function = foo() aln = c3.make_aligned_seqs( [("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")], moltype="dna", ) got = u_function(aln) assert got.to_dict() == {"a": "GCAA", "b": "GCTT"} def test_user_function_without_arg_kwargs(): """composable functions should be user definable""" @define_app def foo_without_arg_kwargs( val: c3types.AlignedSeqsType, ) -> c3types.AlignedSeqsType: return val[:4] u_function = foo_without_arg_kwargs() aln = c3.make_aligned_seqs( [("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")], moltype="dna", ) got = u_function(aln) assert got.to_dict() == {"a": "GCAA", "b": "GCTT"} def test_user_function_multiple(): """user defined composable functions should not interfere with each other""" @define_app def foo(val: c3types.AlignedSeqsType, *args, **kwargs) -> c3types.AlignedSeqsType: return val[:4] @define_app def bar(val: c3types.AlignedSeqsType, num=3) -> c3types.PairwiseDistanceType: return val.distance_matrix(calc="hamming") u_function_1 = foo() u_function_2 = bar() aln_1 = c3.make_aligned_seqs( [("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")], moltype="dna", ) data = {"s1": "ACGTACGTA", "s2": "GTGTACGTA"} aln_2 = c3.make_aligned_seqs(data, moltype="dna") got_1 = u_function_1(aln_1) got_2 = u_function_2(aln_2) assert got_1.to_dict() == {"a": "GCAA", "b": "GCTT"} assert got_2 == {("s1", "s2"): 2.0, ("s2", "s1"): 2.0} def test_concat_not_composable(): concat = c3.get_app("concat") assert not is_app_composable(concat) def test_cogent3_serialisable_compatible_with_serialisabletype(tmp_path): @define_app def foo(arg: str) -> c3types.TreeType: # we're checking composability only return c3.make_tree(arg, source="blah") foo_instance = foo() out = open_data_store(tmp_path / "out.sqlitedb", mode="w") writer = c3.get_app("write_db", data_store=out) loader = c3.get_app("load_db") app = foo_instance + writer result = app("(A:0.1,B:0.2);") # pylint: disable=not-callable got = loader(result) assert not isinstance(got, NotCompleted) @pytest.mark.parametrize("klass", [DataStoreDirectory, DataStoreSqlite]) def test_apply_to_writes_citations(DATA_DIR, tmp_dir, klass): """apply_to writes citations to the data store""" cite = _make_cite(title="Test Citation") dstore = open_data_store(DATA_DIR, suffix="fasta", limit=2) reader = c3.get_app("load_aligned", format_name="fasta", moltype="dna") @define_app(cite=cite) class cited_step: def main(self, val: c3types.AlignedSeqsType) -> c3types.AlignedSeqsType: return val outpath = ( tmp_dir / "cite_out" if klass == DataStoreDirectory else tmp_dir / "cite_out.sqlitedb" ) kwargs = {"suffix": "fasta"} if klass == DataStoreDirectory else {} out_dstore = open_data_store(outpath, mode="w", **kwargs) writer = c3.get_app("write_seqs", data_store=out_dstore) process = reader + cited_step() + writer result_dstore = process.apply_to(dstore, show_progress=False) loaded = result_dstore._load_citations() assert len(loaded) >= 1 titles = [c.title for c in loaded] assert "Test Citation" in titles @pytest.fixture def half_dstore1(write_dir1, nc_objects, completed_objects, log_data): dstore = DataStoreDirectory(write_dir1, suffix="fasta", mode="w") i = 0 for id_, data in nc_objects.items(): dstore.write_not_completed(unique_id=id_, data=data.to_json()) i += 1 if i >= len(nc_objects.items()) / 2: break i = 0 for id_, data in completed_objects.items(): dstore.write(unique_id=id_, data=data) i += 1 if i >= len(completed_objects.items()) / 2: break dstore.write_log(unique_id="scitrack.log", data=log_data) return dstore @pytest.fixture def half_dstore2(write_dir2, nc_objects, completed_objects, log_data): dstore = DataStoreDirectory(write_dir2, suffix="fasta", mode="w") for i, (id_, data) in enumerate(nc_objects.items()): if i >= len(nc_objects.items()) / 2: dstore.write_not_completed(unique_id=id_, data=data.to_json()) for i, (id_, data) in enumerate(completed_objects.items()): if i >= len(completed_objects.items()) / 2: dstore.write(unique_id=id_, data=data) dstore.write_log(unique_id="scitrack.log", data=log_data) return dstore def test_apply_to_only_appends(half_dstore1, half_dstore2): half_dstore1 = open_data_store( half_dstore1.source, suffix=half_dstore1.suffix, mode="a", ) reader1 = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length1 = c3.get_app("min_length", length=10) writer1 = c3.get_app("write_seqs", data_store=half_dstore1) process1 = reader1 + min_length1 + writer1 # create paths as strings dstore1 = [ str(Path(m.data_store.source) / m.unique_id) for m in half_dstore1.completed ] # check does not modify dstore when applied to same records orig_members = {m.unique_id for m in half_dstore1.members} got = process1.apply_to(dstore1) assert {m.unique_id for m in got.members} == orig_members half_dstore2 = open_data_store( half_dstore2.source, suffix=half_dstore2.suffix, mode="a", ) reader2 = c3.get_app("load_aligned", format_name="fasta", moltype="dna") min_length2 = c3.get_app("min_length", length=10) writer2 = c3.get_app("write_seqs", data_store=half_dstore2) process2 = reader2 + min_length2 + writer2 # check not fail on append new records _ = process2.apply_to(dstore1) @define_app def app_1(a: int) -> list[int]: return [a] @define_app def app_2(b: list[int]) -> int: return b[0] def test_composed_app_case(): a = app_1() b = app_2() composed = a + b assert isinstance(composed, ComposableApp) cogent3-scinexus-e0aee79/tests/test_data_store.py000066400000000000000000001127761520253266500223400ustar00rootroot00000000000000import json import pathlib import shutil from itertools import product from pathlib import Path from pickle import dumps, loads import pytest from citeable import Software from scitrack import get_text_hexdigest try: import cogent3 as c3 from cogent3.util.union_dict import UnionDict except ImportError: c3 = None UnionDict = None from scinexus import open_data_store from scinexus.composable import NotCompleted, NotCompletedType from scinexus.data_store import ( APPEND, CITATIONS_FILE, MD5_TABLE, NOT_COMPLETED_TABLE, OVERWRITE, READONLY, DataStoreDirectory, ReadOnlyDataStoreZipped, get_data_source, get_id_from_source, get_summary_display, get_unique_id, load_record_from_json, make_record_for_json, set_id_from_source, set_summary_display, summary_not_completeds, ) # over-ride cogent3 setting set_summary_display(None) @pytest.fixture def tmp_dir(tmp_path_factory): return Path(tmp_path_factory.mktemp("datastore")) @pytest.fixture def fasta_dir(DATA_DIR, tmp_dir): tmp_dir = Path(tmp_dir) filenames = DATA_DIR.glob("*.fasta") fasta_dir = tmp_dir / "fasta" fasta_dir.mkdir(parents=True, exist_ok=True) for fn in filenames: dest = fasta_dir / fn.name dest.write_text(fn.read_text()) return fasta_dir @pytest.fixture def write_dir(tmp_dir): tmp_dir = Path(tmp_dir) write_dir = tmp_dir / "write" write_dir.mkdir(parents=True, exist_ok=True) yield write_dir shutil.rmtree(write_dir, ignore_errors=True) @pytest.fixture def w_dstore(write_dir): return DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) @pytest.fixture def ro_dstore(fasta_dir): return DataStoreDirectory(fasta_dir, suffix="fasta", mode=READONLY) @pytest.fixture def completed_objects(ro_dstore): return {f"{Path(m.unique_id).stem}": m.read() for m in ro_dstore} @pytest.fixture def nc_objects(): return { f"id_{i}": NotCompleted( NotCompletedType.ERROR, "location", "message", source=f"id_{i}" ) for i in range(3) } @pytest.fixture(scope="session") def log_data(DATA_DIR): path = DATA_DIR / "scitrack.log" return path.read_text() @pytest.fixture def full_dstore(write_dir, nc_objects, completed_objects, log_data): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) for id_, data in nc_objects.items(): dstore.write_not_completed(unique_id=id_, data=data.to_json()) for id_, data in completed_objects.items(): dstore.write(unique_id=id_, data=data) dstore.write_log(unique_id="scitrack.log", data=log_data) return dstore @pytest.fixture def nc_dir(tmp_dir): nc_dir = tmp_dir / "nc_test" nc_dir.mkdir(parents=True, exist_ok=True) yield nc_dir shutil.rmtree(nc_dir, ignore_errors=True) @pytest.fixture def nc_dstore(DATA_DIR, nc_dir): dstore = DataStoreDirectory(nc_dir, suffix="fasta", mode=OVERWRITE) log_filename = "scitrack.log" dstore.write_log(unique_id=log_filename, data=(DATA_DIR / log_filename).read_text()) nc = [ NotCompleted( NotCompletedType.FAIL, f"dummy{i}", f"dummy_message{i}", source=f"dummy_source{i}", ) for i in range(3) ] for i, item in enumerate(nc): dstore.write_not_completed(unique_id=f"nc{i + 1}", data=item.to_json()) assert len(dstore.not_completed) == 3 filenames = DATA_DIR.glob("*.fasta") for fn in filenames: identifier = fn.name dstore.write(unique_id=identifier, data=fn.read_text()) return dstore @pytest.fixture def sample_citations(): cite1 = Software( author=["Doe, J"], title="Tool One", year=2024, url="https://example.com/one", version="1.0", license="MIT", doi="10.0/one", publisher="test", ) cite2 = Software( author=["Smith, A"], title="Tool Two", year=2024, url="https://example.com/two", version="2.0", license="MIT", doi="10.0/two", publisher="test", ) return (cite1, cite2) def _get_member_data(members): return {m.unique_id: m.read() for m in members} @pytest.fixture def zipped_basic(fasta_dir): path = shutil.make_archive( base_name=str(fasta_dir.parent / fasta_dir.name), format="zip", base_dir=fasta_dir.name, root_dir=fasta_dir.parent, ) return pathlib.Path(path) @pytest.fixture def zipped_full(full_dstore): source = pathlib.Path(full_dstore.source) path = shutil.make_archive( base_name=str(source.parent / source.name), format="zip", base_dir=source.name, root_dir=source.parent, ) return ReadOnlyDataStoreZipped(pathlib.Path(path), suffix="fasta") @pytest.fixture def zipped_hidden(fasta_dir): # create a hidden file hidden = fasta_dir / ".hidden.fasta" hidden.write_text(">s1\nACGT\n") path = shutil.make_archive( base_name=str(fasta_dir.parent / (fasta_dir.name + "_hidden")), format="zip", base_dir=fasta_dir.name, root_dir=fasta_dir.parent, ) hidden.unlink() return pathlib.Path(path) def test_data_member_eq(ro_dstore, fasta_dir): ro_dstore2 = DataStoreDirectory(fasta_dir, mode="r", suffix="fasta") name = "brca1.fasta" mem1 = next(m for m in ro_dstore.completed if m.unique_id == name) mem2 = next(m for m in ro_dstore2.completed if m.unique_id == name) assert mem1 != mem2 def test_fail_try_append(full_dstore, completed_objects): full_dstore._mode = APPEND id_, data = next(iter(completed_objects.items())) with pytest.raises(IOError): full_dstore.write(unique_id=id_, data=data) def test_contains(ro_dstore): """correctly identify when a data store contains a member""" assert "brca1.fasta" in ro_dstore assert "brca1" in ro_dstore def test_len(ro_dstore): """DataStore returns correct len""" expect = len(list(ro_dstore.source.glob("*.fasta"))) assert expect == len(ro_dstore) == len(ro_dstore.members) def test_getitem(ro_dstore): with pytest.raises(IndexError): _ = ro_dstore[len(ro_dstore)] last = ro_dstore[-1] first = ro_dstore[0] assert last.unique_id != first.unique_id def test_iterall(ro_dstore): expect = {fn.name for fn in ro_dstore.source.glob("*.fasta")} got = {m.unique_id for m in ro_dstore} assert expect == got def test_read(ro_dstore): """correctly read content""" expect = (ro_dstore.source / "brca1.fasta").read_text() got = ro_dstore.read("brca1.fasta") assert got == expect def test_pickleable_roundtrip(ro_dstore): """pickling of data stores should be reversible""" re_dstore = loads(dumps(ro_dstore)) assert str(ro_dstore) == str(re_dstore) assert ro_dstore[0].read() == re_dstore[0].read() def test_pickleable_member_roundtrip(ro_dstore): """pickling of data store members should be reversible""" re_member = loads(dumps(ro_dstore[0])) data = re_member.read() assert len(data) > 0 def test_empty_directory(fasta_dir): dstore = DataStoreDirectory(fasta_dir, suffix=".txt") assert len(dstore) == 0 def test_no_logs(ro_dstore): assert len(ro_dstore.logs) == 0 def test_no_not_completed(ro_dstore): assert len(ro_dstore.not_completed) == 0 def test_logs(nc_dstore): assert len(nc_dstore.logs) == 1 log = nc_dstore.logs[0].read() assert isinstance(log, str) def test_not_completed(nc_dstore): assert len(nc_dstore.not_completed) == 3 nc = nc_dstore.not_completed[0].read() assert isinstance(nc, str) def test_drop_not_completed(nc_dstore): num_completed = len(nc_dstore.completed) num_not_completed = len(nc_dstore.not_completed) num_md5 = len(list((nc_dstore.source / MD5_TABLE).glob("*.txt"))) assert num_not_completed == 3 assert num_completed == 6 assert len(nc_dstore) == 9 assert num_md5 == num_completed + num_not_completed nc_dstore.drop_not_completed() assert len(nc_dstore.not_completed) == 0 num_md5 = len(list((nc_dstore.source / MD5_TABLE).glob("*.txt"))) assert num_md5 == num_completed def test_write_read_only_datastore(ro_dstore): with pytest.raises(IOError): ro_dstore.write(unique_id="brca1.fasta", data="test data") def test_write(fasta_dir, w_dstore): """correctly write content""" expect = Path(fasta_dir / "brca1.fasta").read_text() identifier = "brca1.fasta" w_dstore.write(unique_id=identifier, data=expect) got = w_dstore.read(identifier) assert got == expect def test_multi_write(fasta_dir, w_dstore): """correctly write multiple files to data store""" expect_a = Path(fasta_dir / "brca1.fasta").read_text() expect_b = Path(fasta_dir / "primates_brca1.fasta").read_text() identifier_a = "brca2.fasta" identifier_b = "primates_brca2.fasta" w_dstore.write(unique_id=identifier_a, data=expect_a) w_dstore.write(unique_id=identifier_b, data=expect_b) got_a = w_dstore.read(identifier_a) got_b = w_dstore.read(identifier_b) assert got_a == expect_a assert got_b == expect_b def test_append(w_dstore): """correctly write content""" identifier = "test1.fasta" data = "test data" w_dstore.write(unique_id=identifier, data=data) got = w_dstore.read(identifier) assert got == data def test_no_not_completed_subdir(nc_dstore): expect = f"{len(nc_dstore.completed) + len(nc_dstore.not_completed)}x member" assert str(nc_dstore).startswith(expect) nc_dstore.drop_not_completed() assert not Path(nc_dstore.source / NOT_COMPLETED_TABLE).exists() expect = f"{len(nc_dstore.completed)}x member" assert str(nc_dstore).startswith(expect) expect = f"{len(nc_dstore)}x member" assert str(nc_dstore).startswith(expect) assert len(nc_dstore) == len(nc_dstore.completed) not_dir = nc_dstore.source / NOT_COMPLETED_TABLE not_dir.mkdir(exist_ok=True) def test_limit_datastore(nc_dstore): assert len(nc_dstore) == len(nc_dstore.completed) + len(nc_dstore.not_completed) nc_dstore._limit = len(nc_dstore.completed) // 2 nc_dstore._completed = [] nc_dstore._not_completed = [] assert len(nc_dstore.completed) == len(nc_dstore.not_completed) == nc_dstore.limit assert len(nc_dstore) == len(nc_dstore.completed) + len(nc_dstore.not_completed) nc_dstore.drop_not_completed() assert len(nc_dstore) == len(nc_dstore.completed) assert len(nc_dstore.not_completed) == 0 nc_dstore._limit = len(nc_dstore.completed) // 2 nc_dstore._completed = [] nc_dstore._not_completed = [] assert len(nc_dstore) == len(nc_dstore.completed) == nc_dstore.limit assert len(nc_dstore.not_completed) == 0 def test_md5_sum(nc_dstore): for m in nc_dstore.members: data = m.read() md5 = nc_dstore.md5(m.unique_id) assert md5 == get_text_hexdigest(data) def test_md5_none(fasta_dir): dstore = DataStoreDirectory(fasta_dir, suffix="fasta") for m in dstore.members: assert m.md5 is None def test_md5_missing(nc_dstore): assert nc_dstore.md5("unknown") is None def test_write_if_member_exists(full_dstore, write_dir): """correctly write content""" expect = Path(write_dir / "brca1.fasta").read_text() identifier = "brca1.fasta" len_dstore = len(full_dstore) full_dstore.write(unique_id=identifier, data=expect) assert len_dstore == len(full_dstore) got = full_dstore.read(identifier) assert got == expect full_dstore._mode = OVERWRITE full_dstore.write(unique_id=identifier, data=expect) assert len_dstore == len(full_dstore) got = full_dstore.read(identifier) assert got == expect def test_write_success_replaces_not_completed(full_dstore): """correctly write content""" nc = full_dstore.not_completed[0].unique_id data = full_dstore.completed[0].read() new_id = Path(nc.replace(".json", f".{full_dstore.suffix}")).name num = len(full_dstore) full_dstore.write(unique_id=new_id, data=data) assert len(full_dstore) == num @pytest.mark.parametrize("klass", [str, Path]) def test_get_data_source_attr(klass): """handles case where input has source attribute string object or pathlib object""" class dummy: source = None obj = dummy() value = klass("some/path.txt") obj.source = value got = get_data_source(obj) assert got == "path.txt" @pytest.mark.parametrize( "name", ["path/name.txt", "path/name.gz", "path/name.fasta.gz", "name.fasta.gz"], ) def test_get_unique_id(name): got = get_unique_id(name) assert got == "name" def test_get_unique_id_none(): got = get_unique_id(None) assert got is None def test_set_id_from_source_returns_default_initially( reset_id_from_source: None, ) -> None: """Default extractor is `get_unique_id` when nothing is registered.""" assert get_id_from_source() is get_unique_id def test_set_id_from_source_registers_and_clears( reset_id_from_source: None, ) -> None: """A registered function replaces the default; None restores it.""" def my_extractor(obj: object) -> str | None: return f"custom-{obj}" set_id_from_source(my_extractor) assert get_id_from_source() is my_extractor assert get_id_from_source()("foo") == "custom-foo" set_id_from_source(None) assert get_id_from_source() is get_unique_id @pytest.mark.parametrize("data", [{}, set(), {"info": {}}]) def test_get_data_source_none(data): assert get_data_source(data) is None def test_load_record_from_json(): """handle different types of input""" orig = {"data": "blah", "identifier": "some.json", "completed": True} data = orig.copy() data2 = data.copy() data2["data"] = json.dumps(data) for d in (data, json.dumps(data), data2): expected = "blah" if d != data2 else json.loads(data2["data"]) id_, data_, compl = load_record_from_json(d) assert id_ == "some.json" assert data_ == expected assert compl is True def test_zipped_ro_fail(zipped_basic): with pytest.raises(ValueError): ReadOnlyDataStoreZipped(zipped_basic, suffix="fasta", mode="w") def test_zipped_ro_ioerror(): with pytest.raises(IOError): ReadOnlyDataStoreZipped("blah-1234.zip", suffix="fasta") def test_zipped_ro_basic(zipped_basic, ro_dstore): dstore = ReadOnlyDataStoreZipped(zipped_basic, suffix="fasta") assert len(dstore.completed) == len(ro_dstore.completed) assert len(dstore) == len(ro_dstore) expect = _get_member_data(ro_dstore.completed) got = _get_member_data(dstore.completed) assert expect == got expect = _get_member_data(ro_dstore.not_completed) got = _get_member_data(dstore.not_completed) assert expect == got def test_zipped_ro_basic_hidden(zipped_hidden, zipped_basic): orig = ReadOnlyDataStoreZipped(zipped_basic, suffix="fasta") dstore = ReadOnlyDataStoreZipped(zipped_hidden, suffix="fasta") assert len(dstore) == len(orig) assert all(not m.unique_id.startswith(".") for m in dstore) def test_zipped_ro_full(zipped_full, full_dstore): got_ids = {m.unique_id for m in zipped_full.completed} expect_ids = {m.unique_id for m in full_dstore.completed} assert got_ids == expect_ids got_ids = {m.unique_id for m in zipped_full.not_completed} expect_ids = {m.unique_id for m in full_dstore.not_completed} assert got_ids == expect_ids assert len(zipped_full) == len(full_dstore) expect = _get_member_data(full_dstore.completed) got = _get_member_data(zipped_full.completed) assert expect == got expect = _get_member_data(full_dstore.not_completed) got = _get_member_data(zipped_full.not_completed) assert expect == got def test_zipped_logs(zipped_full, full_dstore): assert len(zipped_full.logs) == len(full_dstore.logs) expect = _get_member_data(full_dstore.logs) got = _get_member_data(zipped_full.logs) assert expect == got def test_zipped_md5(zipped_full, full_dstore): expect = {m.unique_id: full_dstore.md5(m.unique_id) for m in full_dstore.completed} got = {m.unique_id: zipped_full.md5(m.unique_id) for m in zipped_full.completed} assert got == expect def test_write_citations_directory(write_dir, sample_citations): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) path = write_dir / CITATIONS_FILE assert path.exists() loaded = dstore._load_citations() assert len(loaded) == 2 assert loaded[0].title == "Tool One" assert loaded[1].title == "Tool Two" def test_write_citations_empty_directory(write_dir): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=()) path = write_dir / CITATIONS_FILE assert not path.exists() def test_write_bib_directory(write_dir, sample_citations): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) bib_path = write_dir / "refs.bib" dstore.write_bib(bib_path) assert bib_path.exists() content = bib_path.read_text() assert "Tool One" in content assert "Tool Two" in content def test_write_bib_no_citations(write_dir): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) bib_path = write_dir / "refs.bib" with pytest.warns(UserWarning, match="No citations stored"): dstore.write_bib(bib_path) assert not bib_path.exists() def test_load_citations_no_file(write_dir): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) assert dstore._load_citations() == [] def test_load_citations_zipped(write_dir, sample_citations): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) source = pathlib.Path(dstore.source) path = shutil.make_archive( base_name=str(source.parent / source.name), format="zip", base_dir=source.name, root_dir=source.parent, ) zipped = ReadOnlyDataStoreZipped(pathlib.Path(path), suffix="fasta") loaded = zipped._load_citations() assert len(loaded) == 2 assert loaded[0].title == "Tool One" def test_citations_file_not_in_completed(write_dir, sample_citations): """The bibliography.citations file must not appear in the completed members list.""" dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write(unique_id="sample.fasta", data=">s1\nACGT\n") dstore.write_citations(data=sample_citations) assert (write_dir / CITATIONS_FILE).exists() dstore._completed = [] member_ids = {m.unique_id for m in dstore.completed} assert CITATIONS_FILE not in member_ids assert "sample.fasta" in member_ids @pytest.fixture def _restore_display(): """Ensure the global display function is reset after each test.""" yield set_summary_display(None) def test_summary_display_default_is_none(_restore_display): assert get_summary_display() is None def test_summary_display_set_and_get(_restore_display): def my_display(data, *, name=""): return data set_summary_display(my_display) assert get_summary_display() is my_display def test_summary_display_set_none_clears(_restore_display): set_summary_display(lambda data, **kw: data) set_summary_display(None) assert get_summary_display() is None def test_describe_without_display(ro_dstore, _restore_display): result = ro_dstore.describe assert isinstance(result, dict) assert "completed" in result def test_describe_with_display(ro_dstore, _restore_display): captured = {} def display(data, *, name=""): captured["data"] = data captured["name"] = name return f"DISPLAY:{name}" set_summary_display(display) result = ro_dstore.describe assert result == "DISPLAY:describe" assert isinstance(captured["data"], dict) assert "completed" in captured["data"] assert captured["name"] == "describe" def test_summary_logs_with_display(full_dstore, _restore_display): captured = {} def display(data, *, name=""): captured["data"] = data captured["name"] = name return "transformed" set_summary_display(display) result = full_dstore.summary_logs assert result == "transformed" assert captured["name"] == "summary_logs" assert isinstance(captured["data"], list) def test_validate_with_display(ro_dstore, _restore_display): captured = {} def display(data, *, name=""): captured["name"] = name return "validated" set_summary_display(display) result = ro_dstore.validate() assert result == "validated" assert captured["name"] == "validate" def test_protected_methods_bypass_display(ro_dstore, _restore_display): set_summary_display(lambda data, **kw: "SHOULD_NOT_SEE") assert isinstance(ro_dstore._describe(), dict) assert isinstance(ro_dstore._summary_logs(), list) assert isinstance(ro_dstore._summary_not_completed(), list) assert isinstance(ro_dstore._validate(), dict) def test_summary_citations_with_display(write_dir, sample_citations, _restore_display): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) captured = {} def display(data, *, name=""): captured["name"] = name captured["data"] = data return "citations_display" set_summary_display(display) result = dstore.summary_citations assert result == "citations_display" assert captured["name"] == "summary_citations" assert isinstance(captured["data"], list) def test_validate_incorrect_md5(write_dir): dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) dstore.write(unique_id="item.txt", data="original") # corrupt the md5 md5_path = write_dir / MD5_TABLE / "item.txt" md5_path.write_text("wrong_md5_value") result = dstore._validate() assert result["md5_incorrect"] == 1 def test_readonly_nonexistent_dir(tmp_path): with pytest.raises(OSError, match="does not exist"): DataStoreDirectory(tmp_path / "nonexistent", suffix="txt", mode=READONLY) def test_not_completed_with_limit(write_dir): dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) for i in range(5): nc = NotCompleted(NotCompletedType.ERROR, "test", f"msg {i}", source=f"src_{i}") dstore.write_not_completed(unique_id=f"nc_{i}.json", data=nc.to_json()) limited = DataStoreDirectory(write_dir, suffix="txt", mode=READONLY, limit=2) assert len(limited.not_completed) == 2 def test_summary_not_completeds(write_dir): dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) for i in range(3): nc = NotCompleted( NotCompletedType.ERROR, "myapp", f"error msg {i}", source=f"s{i}" ) dstore.write_not_completed(unique_id=f"nc_{i}.json", data=nc.to_json()) rows = summary_not_completeds(dstore.not_completed) assert len(rows) >= 1 assert rows[0]["origin"] == "myapp" assert rows[0]["num"] == 3 def test_make_record_for_json(): result = make_record_for_json("id1", {"key": "value"}, True) assert result["identifier"] == "id1" assert result["completed"] is True assert isinstance(result["data"], str) parsed = json.loads(result["data"]) assert parsed == {"key": "value"} def test_make_record_for_json_with_rich_dict(): class FakeObj: def to_rich_dict(self): return {"type": "fake", "data": 42} result = make_record_for_json("id2", FakeObj(), True) parsed = json.loads(result["data"]) assert parsed == {"type": "fake", "data": 42} def test_zipped_readonly_write_methods(tmp_path): src = tmp_path / "src" src.mkdir() (src / "a.txt").write_text("data") zpath = shutil.make_archive(str(src), "zip", root_dir=src.parent, base_dir=src.name) zstore = ReadOnlyDataStoreZipped(zpath, suffix="txt") with pytest.raises(TypeError): zstore.write(unique_id="x", data="d") with pytest.raises(TypeError): zstore.write_not_completed(unique_id="x", data="d") with pytest.raises(TypeError): zstore.write_log(unique_id="x", data="d") with pytest.raises(TypeError): zstore.write_citations(data=()) with pytest.raises(TypeError): zstore.drop_not_completed() def test_zipped_md5_returns_none(tmp_path): src = tmp_path / "src" src.mkdir() (src / "a.txt").write_text("data") zpath = shutil.make_archive(str(src), "zip", root_dir=src.parent, base_dir=src.name) zstore = ReadOnlyDataStoreZipped(zpath, suffix="txt") assert zstore.md5("a.txt") is None def test_zipped_load_citations_missing(tmp_path): src = tmp_path / "src" src.mkdir() (src / "a.txt").write_text("data") zpath = shutil.make_archive(str(src), "zip", root_dir=src.parent, base_dir=src.name) zstore = ReadOnlyDataStoreZipped(zpath, suffix="txt") assert zstore._load_citations() == [] def test_zipped_completed_with_limit(tmp_path): src = tmp_path / "src" src.mkdir() for i in range(5): (src / f"f_{i}.txt").write_text(f"data {i}") zpath = shutil.make_archive(str(src), "zip", root_dir=src.parent, base_dir=src.name) zstore = ReadOnlyDataStoreZipped(zpath, suffix="txt", limit=2) assert len(zstore.completed) == 2 def test_summary_logs_continuation_line(write_dir): from scitrack import CachingLogger dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) logger = CachingLogger(create_dir=True) log_path = write_dir / "test.log" logger.log_file_path = str(log_path) logger.log_message("a long message\nthat continues", label="multi") logger.shutdown() dstore.write_log(unique_id="test.log", data=log_path.read_text()) rows = dstore._summary_logs() assert len(rows) == 1 def test_data_member_str(ro_dstore): member = ro_dstore[0] assert str(member) == member.unique_id def test_data_member_repr(ro_dstore): member = ro_dstore[0] r = repr(member) assert "DataMember" in r assert member.unique_id in r def test_datastore_repr(ro_dstore): r = repr(ro_dstore) assert "DataStoreDirectory" in r assert "source=" in r def test_contains_non_string(ro_dstore): assert 42 not in ro_dstore def test_write_not_completed_readonly(ro_dstore): with pytest.raises(OSError, match="readonly"): ro_dstore.write_not_completed(unique_id="x", data="d") def test_write_log_readonly(ro_dstore): with pytest.raises(OSError, match="readonly"): ro_dstore.write_log(unique_id="x", data="d") def test_summary_logs_malformed_continuation(): from unittest.mock import MagicMock log_text = "2024-01-01\t00:00:00\n\tcontinuation without key" member = MagicMock() member.read.return_value = log_text member.unique_id = "bad.log" class FakeDS(DataStoreDirectory): @property def logs(self): return [member] fake = FakeDS.__new__(FakeDS) fake._completed = [] fake._not_completed = [] fake._init_vals = {} with pytest.raises(ValueError, match="malformed log data"): fake._summary_logs() def test_tidy_and_check_suffix_empty(): from scinexus.data_store import _tidy_and_check_suffix with pytest.raises(ValueError, match="suffix is required"): _tidy_and_check_suffix(None) with pytest.raises(ValueError, match="suffix is required"): _tidy_and_check_suffix("*") with pytest.raises(ValueError, match="suffix is required"): _tidy_and_check_suffix(".**") def test_summary_not_completeds_with_bytes(): from unittest.mock import MagicMock members = [] for i in range(3): m = MagicMock() m.read.return_value = b"binary data" m.unique_id = f"item_{i}" members.append(m) result = summary_not_completeds(members) assert result == [] def test_summary_not_completeds_with_deserialise(write_dir): dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) from scinexus.composable import NotCompleted, NotCompletedType for i in range(3): nc = NotCompleted( NotCompletedType.ERROR, "myapp", f"error msg {i}", source=f"s{i}" ) dstore.write_not_completed(unique_id=f"deser_{i}.json", data=nc.to_json()) rows = summary_not_completeds(dstore.not_completed, deserialise=lambda x: x) assert len(rows) >= 1 def test_summary_not_completeds_long_sources(write_dir): dstore = DataStoreDirectory(write_dir, suffix="txt", mode=OVERWRITE) from scinexus.composable import NotCompleted, NotCompletedType for i in range(10): long_source = f"very_long_source_name_for_item_{i}_padding" nc = NotCompleted( NotCompletedType.ERROR, "myapp", f"error msg {i}", source=long_source, ) dstore.write_not_completed(unique_id=f"long_{i}.json", data=nc.to_json()) rows = summary_not_completeds(dstore.not_completed) assert len(rows) >= 1 for row in rows: if len(row["source"]) > 45: assert row["source"].endswith("...") def test_get_data_source_data_member(ro_dstore): member = ro_dstore[0] result = get_data_source(member) assert result == member.unique_id def test_zipped_not_completed_with_limit(tmp_path): from scinexus.composable import NotCompleted, NotCompletedType src = tmp_path / "src" src.mkdir() nc_dir = src / "not_completed" nc_dir.mkdir() (src / "a.txt").write_text("data") for i in range(5): nc = NotCompleted(NotCompletedType.ERROR, "test", f"msg {i}", source=f"s{i}") (nc_dir / f"nc_{i}.json").write_text(nc.to_json()) zpath = shutil.make_archive(str(src), "zip", root_dir=src.parent, base_dir=src.name) zstore = ReadOnlyDataStoreZipped(zpath, suffix="txt", limit=2) assert len(zstore.not_completed) == 2 def test_zipped_mode_property(zipped_basic): zstore = ReadOnlyDataStoreZipped(zipped_basic, suffix="fasta") assert zstore.mode is READONLY def _make_minimal_ds(mode=OVERWRITE): from scinexus.data_store import DataStoreABC class MinimalDS(DataStoreABC): @property def source(self): return "test" @property def mode(self): return mode @property def limit(self): return None def read(self, unique_id): return "" def write(self, *, unique_id, data): super().write(unique_id=unique_id, data=data) def write_not_completed(self, *, unique_id, data): super().write_not_completed(unique_id=unique_id, data=data) def write_log(self, *, unique_id, data): super().write_log(unique_id=unique_id, data=data) @property def logs(self): return [] @property def completed(self): return [] @property def not_completed(self): return [] def drop_not_completed(self, *, unique_id=None): pass def md5(self, unique_id): return None return MinimalDS() def test_base_write_citations_warns(): ds = _make_minimal_ds() with pytest.warns(UserWarning, match="does not support saving citations"): ds.write_citations(data=(object(),)) def test_base_summary_citations_warns(): ds = _make_minimal_ds() with pytest.warns(UserWarning, match="does not support saving citations"): result = ds._summary_citations() assert result == [] def test_base_load_citations_returns_empty(): ds = _make_minimal_ds() assert ds._load_citations() == [] def test_base_write_not_completed_readonly(): ds = _make_minimal_ds(mode=READONLY) with pytest.raises(OSError, match="readonly"): ds.write_not_completed(unique_id="x", data="d") def test_base_write_log_readonly(): ds = _make_minimal_ds(mode=READONLY) with pytest.raises(OSError, match="readonly"): ds.write_log(unique_id="x", data="d") def test_base_write_citations_empty_data(): ds = _make_minimal_ds() ds.write_citations(data=()) @pytest.mark.mpi def test_source_check_create_not_master(tmp_path): from unittest.mock import patch from scinexus import data_store as ds_mod target = tmp_path / "should_not_exist" with patch.object(ds_mod, "is_master_process", return_value=False): dstore = DataStoreDirectory(target, suffix="txt", mode=OVERWRITE) assert not target.exists() assert dstore.source == target def test_write_read_not_completed(nc_dstore): nc_dstore.drop_not_completed() assert len(nc_dstore.not_completed) == 0 nc = NotCompleted("ERROR", "test", "for tracing", source="blah") writer = c3.get_app("write_seqs", data_store=nc_dstore) writer.main(nc, identifier="blah") assert len(nc_dstore.not_completed) == 1 got = nc_dstore.not_completed[0].read() assert nc.to_json() == got def test_summary_logs_missing_field(nc_dstore): log_path = Path(nc_dstore.source) / nc_dstore.logs[0].unique_id data = [ l for l in log_path.read_text().splitlines() if "composable function" not in l ] log_path.write_text("\n".join(data)) # doesn't fail because of a missing field in the log data assert isinstance(nc_dstore.summary_logs, list) @pytest.fixture def app_dstore_in(tmp_path): pytest.importorskip("cogent3") in_path = tmp_path / "in_data" in_path.mkdir(parents=True) fasta_content = ">seq\nACGT" with open(in_path / "one.fa", "w") as file: file.write(fasta_content) dstore_in = open_data_store(in_path, suffix=".fa", mode="r") dstore_out = open_data_store(tmp_path / "data_out", suffix="fa", mode="w") loader = c3.get_app("load_unaligned") writer = c3.get_app("write_seqs", dstore_out) pipe = loader + writer return pipe, dstore_in def test_write_multiple_times_apply_to(app_dstore_in): app, dstore_in = app_dstore_in app.apply_to(dstore_in) orig_length = len(app.data_store) app.apply_to(dstore_in) assert len(app.data_store) == orig_length def test_directory_data_store_write_compressed(tmp_path): out = open_data_store(base_path=tmp_path / "demo", suffix="fa.gz", mode="w") writer = c3.get_app("write_seqs", data_store=out) seqs = c3.make_aligned_seqs( {"s1": "CG--T", "s2": "CGTTT"}, moltype="dna", info={"source": "test"}, ) got = writer(seqs) # pylint: disable=not-callable assert got, got def test_apply_to_not_completed(nc_dstore, tmp_path): loader = c3.get_app("load_unaligned") num_seqs = c3.get_app("take_n_seqs", number=3, fixed_choice=False) out_dstore = open_data_store(tmp_path / "output", suffix="fa", mode="w") writer = c3.get_app("write_seqs", data_store=out_dstore, format_name="fasta") app = loader + num_seqs + writer fini = app.apply_to(nc_dstore) assert 0 < len(fini.completed) <= len(nc_dstore.completed) def test_summary_citations_directory(write_dir, sample_citations): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) cited = dstore.summary_citations assert isinstance(cited, list) assert len(cited) == 2 assert "app" in cited[0] assert "citation" in cited[0] def test_write_bib_tilde_path(write_dir, sample_citations, HOME_TMP_DIR): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) bib_path = f"~/{HOME_TMP_DIR.name}/refs.bib" dstore.write_bib(bib_path) expected = pathlib.Path(bib_path).expanduser() assert expected.exists() content = expected.read_text() assert "Tool One" in content assert "Tool Two" in content def test_summary_citations_zipped(write_dir, sample_citations): dstore = DataStoreDirectory(write_dir, suffix="fasta", mode=OVERWRITE) dstore.write_citations(data=sample_citations) source = pathlib.Path(dstore.source) path = shutil.make_archive( base_name=str(source), format="zip", base_dir=source.name, root_dir=source.parent, ) zipped = ReadOnlyDataStoreZipped(pathlib.Path(path), suffix="fasta") cited = zipped.summary_citations assert isinstance(cited, list) assert len(cited) == 2 def test_write_citations_zipped_raises(zipped_basic): zipped = ReadOnlyDataStoreZipped(zipped_basic, suffix="fasta") with pytest.raises(TypeError, match="read only"): zipped.write_citations(data=(None,)) def test_old_directory_store_without_citations(fasta_dir): """Opening a directory store created before citations were added works.""" # fasta_dir has .fasta files but no .citations file dstore = DataStoreDirectory(fasta_dir, suffix="fasta", mode=READONLY) assert dstore._load_citations() == [] cited = dstore.summary_citations assert isinstance(cited, list) assert len(cited) == 0 _dict_types = [dict] if UnionDict is not None: _dict_types.append(UnionDict) _types = tuple(product(_dict_types, (str, Path))) @pytest.mark.parametrize(("container_type", "source_stype"), _types) def test_get_data_source_dict(container_type, source_stype): """handles case where input is dict (sub)class instance with top level source key""" value = source_stype("some/path.txt") data = container_type(source=value) got = get_data_source(data) assert got == "path.txt" @pytest.mark.parametrize("klass", [str, Path]) def test_get_data_source_seqcoll(klass): """handles case where input is sequence collection object""" from cogent3 import make_unaligned_seqs value = klass("some/path.txt") obj = make_unaligned_seqs( {"seq1": "ACGG"}, moltype="dna", info={"random_key": 1234}, source=value, ) got = get_data_source(obj) assert got == "path.txt" cogent3-scinexus-e0aee79/tests/test_deserialise.py000066400000000000000000000053771520253266500225020ustar00rootroot00000000000000import json import pytest from scinexus.composable import NotCompleted from scinexus.deserialise import ( deserialise_object, get_class, register_deserialiser, str_to_version, ) def test_deserialise_python_builtins(): """any object that does not contain a type key is returned as is""" data = {"a": 123, "b": "text"} jdata = json.dumps(data) got = deserialise_object(jdata) assert got == data data = range(4) got = deserialise_object(data) assert got is data def test_custom_deserialiser(): """correctly registers a function to inflate a custom object""" @register_deserialiser("test_myfunkydata") def astuple(data): data.pop("type") return tuple(data["data"]) orig = {"type": "test_myfunkydata", "data": [1, 2, 3]} txt = json.dumps(orig) got = deserialise_object(txt) assert got == (1, 2, 3) assert isinstance(got, tuple) with pytest.raises(TypeError): @register_deserialiser(42) def bad(data): return data def test_register_deserialiser_duplicate(): """raises ValueError for duplicate type strings""" register_deserialiser("test_unique_type_xyz")(lambda d: d) with pytest.raises(ValueError, match="already in"): register_deserialiser("test_unique_type_xyz")(lambda d: d) def test_not_completed_deserialise(): """correctly reconstructs a NotCompleted object""" val = NotCompleted("ERROR", "nothing", "some error", source="here") expect = val.to_rich_dict() j = val.to_json() got = deserialise_object(j) assert got.to_rich_dict() == expect def test_deserialise_from_file(tmp_path): """correctly deserialises from a json file""" val = NotCompleted("ERROR", "nothing", "some error", source="here") j = val.to_json() outpath = tmp_path / "test.json" outpath.write_text(j) got = deserialise_object(outpath) assert got.to_rich_dict() == val.to_rich_dict() def test_deserialise_unknown_type(): """raises NotImplementedError for unknown type""" data = {"type": "completely.unknown.Type", "data": 42} with pytest.raises(NotImplementedError, match="completely.unknown.Type"): deserialise_object(data) def test_get_class(): """correctly imports a class from provenance string""" klass = get_class("scinexus.composable.NotCompleted") assert klass is NotCompleted def test_get_class_invalid(): """raises ValueError for invalid provenance string""" with pytest.raises(ValueError, match="invalid provenance"): get_class("nodotshere") def test_str_to_version(): """correctly parses version strings""" got = str_to_version("2024.5.8a9") assert isinstance(got, tuple) assert len(got) > 0 got = str_to_version("1.2.3") assert got == () cogent3-scinexus-e0aee79/tests/test_init.py000066400000000000000000000016261520253266500211450ustar00rootroot00000000000000import pytest import scinexus from scinexus.progress import NoProgress def test_get_progress(): result = scinexus.get_progress() assert isinstance(result, NoProgress) def test_set_default_progress(): scinexus.set_progress_backend(None) def test_lazy_import_open_data_store(): assert scinexus.open_data_store is not None def test_lazy_import_set_summary_display(): assert callable(scinexus.set_summary_display) def test_lazy_import_get_summary_display(): assert callable(scinexus.get_summary_display) def test_lazy_import_nonexistent(): with pytest.raises(AttributeError, match="no attribute"): scinexus.no_such_attribute # noqa: B018 @pytest.mark.parametrize( "attr", [ "open_", "open_data_store", "set_summary_display", "get_summary_display", ], ) def test_lazy_import(attr): assert callable(getattr(scinexus, attr)) cogent3-scinexus-e0aee79/tests/test_io.py000066400000000000000000000170021520253266500206040ustar00rootroot00000000000000import bz2 import gzip import json import pathlib import pickle import shutil import pytest import scinexus from scinexus.composable import NotCompleted from scinexus.data_store import DataStoreDirectory, ReadOnlyDataStoreZipped from scinexus.deserialise import deserialise_object from scinexus.io import ( DEFAULT_DESERIALISER, DEFAULT_SERIALISER, compress, decompress, from_json, from_primitive, open_data_store, pickle_it, to_json, to_primitive, ) from scinexus.sqlite_data_store import DataStoreSqlite @pytest.fixture def tmp_dir(tmp_path_factory): return tmp_path_factory.mktemp("io") @pytest.fixture(autouse=True) def workingdir(tmp_dir, monkeypatch): monkeypatch.chdir(tmp_dir) @pytest.fixture def fasta_dir(DATA_DIR, tmp_dir): tmp_dir = pathlib.Path(tmp_dir) filenames = DATA_DIR.glob("*.fasta") fasta_dir = tmp_dir / "fasta" fasta_dir.mkdir(parents=True, exist_ok=True) for fn in filenames: dest = fasta_dir / fn.name dest.write_text(fn.read_text()) return fasta_dir @pytest.fixture def zipped_full(fasta_dir): source = fasta_dir path = shutil.make_archive( base_name=str(source), format="zip", base_dir=source, root_dir=source.parent, ) return ReadOnlyDataStoreZipped(pathlib.Path(path), suffix="fasta") def test_define_data_store(fasta_dir): """returns an iterable data store""" found = open_data_store(fasta_dir, suffix=".fasta") assert len(found) > 1 found = open_data_store(fasta_dir, suffix=".fasta", limit=2) assert len(found) == 2 found = list(open_data_store(fasta_dir, suffix=".fasta*")) assert len(found) > 2 with pytest.raises(ValueError): open_data_store(fasta_dir, suffix="*") with pytest.raises(ValueError): open_data_store(fasta_dir) with pytest.raises(TypeError): open_data_store(fasta_dir, 1) @pytest.mark.parametrize( ("serialiser", "deserialiser"), [ (json.dumps, json.loads), (pickle.dumps, pickle.loads), (lambda x: x, deserialise_object), ], ) def test_deserialiser(serialiser, deserialiser): data = {"1": 1, "abc": [1, 2]} deserialised = from_primitive(deserialiser=deserialiser) assert deserialised(serialiser(data)) == data def test_pickle_unpickle_apps(): data = {"a": [1, 2, 3]} pkld = to_primitive() + to_json() upkld = from_json() + from_primitive() assert upkld(pkld(data)) == data def test_pickle_it_unpickleable(): def foo(): ... app = pickle_it() got = app(foo) assert isinstance(got, NotCompleted) @pytest.mark.parametrize( ("comp", "decomp"), [(bz2.compress, bz2.decompress), (gzip.compress, gzip.decompress)], ) def test_compress_decompress(comp, decomp): data = pickle.dumps({"1": 1, "abc": [1, 2]}) decompressor = decompress(decompressor=decomp) compressor = compress(compressor=comp) assert decompressor(compressor(data)) == data @pytest.mark.parametrize("data", [{"a": [0, 1]}]) def test_default_serialiser_deserialiser(data): s = DEFAULT_SERIALISER(data) ds = DEFAULT_DESERIALISER(s) assert ds == data def test_to_json(): to_j = to_json() data = {"a": [0, 1]} assert to_j(data) == json.dumps(data) def test_from_json(): from_j = from_json() assert from_j('{"a": [0, 1]}') == {"a": [0, 1]} def test_to_from_json(): to_j = to_json() from_j = from_json() app = to_j + from_j data = {"a": [0, 1]} assert app(data) == data assert app(data) is not data def test_open_suffix_dirname(tmp_dir): outpath = tmp_dir / "melsubgroup_aln_flydivas_v1.2" outpath.mkdir(exist_ok=True) dstore = open_data_store(outpath, suffix="txt") assert isinstance(dstore, DataStoreDirectory) def test_open_zipped(zipped_full): got = open_data_store(zipped_full.source, mode="r", suffix="fasta") assert len(got) == len(zipped_full) assert isinstance(got, type(zipped_full)) def test_open_data_store_sqlitedb(tmp_dir): path = tmp_dir / "test.sqlitedb" dstore = open_data_store(path, mode="w") assert isinstance(dstore, DataStoreSqlite) # Tests for top-level scinexus.open_data_store def test_toplevel_open_data_store(fasta_dir): """top-level open_data_store delegates to scinexus.io""" found = scinexus.open_data_store(fasta_dir, suffix="fasta") assert isinstance(found, DataStoreDirectory) assert len(found) > 1 def test_toplevel_open_data_store_sqlitedb(tmp_dir): path = tmp_dir / "test_toplevel.sqlitedb" dstore = scinexus.open_data_store(path, mode="w") assert isinstance(dstore, DataStoreSqlite) # Tests for set_summary_display / get_summary_display def test_get_summary_display_default(): """default summary display is None""" orig = scinexus.get_summary_display() try: scinexus.set_summary_display(None) assert scinexus.get_summary_display() is None finally: scinexus.set_summary_display(orig) def test_set_get_summary_display(): """set then get returns the same function""" orig = scinexus.get_summary_display() try: sentinel = lambda data, *, name: data # noqa: E731 scinexus.set_summary_display(sentinel) assert scinexus.get_summary_display() is sentinel finally: scinexus.set_summary_display(orig) def test_set_summary_display_none(): """setting None clears the display function""" orig = scinexus.get_summary_display() try: scinexus.set_summary_display(lambda data, *, name: data) scinexus.set_summary_display(None) assert scinexus.get_summary_display() is None finally: scinexus.set_summary_display(orig) def test_summary_display_applied(fasta_dir): """a registered display function is called by summary properties""" orig = scinexus.get_summary_display() calls = [] def track(data, *, name): calls.append(name) return data try: scinexus.set_summary_display(track) dstore = scinexus.open_data_store(fasta_dir, suffix="fasta") _ = dstore.describe assert "describe" in calls finally: scinexus.set_summary_display(orig) def test_register_datastore_reader_duplicate_none(): from scinexus.io import _datastore_reader_map, register_datastore_reader assert None in _datastore_reader_map with pytest.raises(ValueError, match="already in"): register_datastore_reader(None) def test_register_datastore_reader_non_string(): from scinexus.io import register_datastore_reader with pytest.raises(TypeError, match="is not a string"): register_datastore_reader(123) def test_register_datastore_reader_empty_string(): from scinexus.io import register_datastore_reader with pytest.raises(ValueError, match="white-space"): register_datastore_reader("") def test_register_datastore_reader_duplicate_suffix(): from scinexus.io import register_datastore_reader with pytest.raises(ValueError, match="already in"): register_datastore_reader("zip") def test_open_data_store_unknown_suffix(tmp_dir): path = tmp_dir / "test.xyz" path.write_text("data") with pytest.raises(KeyError): open_data_store(path, suffix="fasta") def test_open_data_store_no_suffix_write_mode(tmp_dir): outpath = tmp_dir / "newdir" with pytest.raises(ValueError, match="suffix is required"): open_data_store(outpath, mode="w") def test_open_data_store_memory_readonly(): with pytest.raises(NotImplementedError, match="readonly"): open_data_store(":memory:", mode="r") cogent3-scinexus-e0aee79/tests/test_io_util.py000066400000000000000000000451521520253266500216500ustar00rootroot00000000000000import bz2 import gzip import pathlib import zipfile from urllib.parse import urlparse import pytest from scinexus.composable import NotCompleted from scinexus.io_util import ( _path_relative_to_zip_parent, atomic_write, get_format_suffixes, is_url, iter_line_blocks, iter_record_chunks, iter_splitlines, open_, open_url, path_exists, ) @pytest.fixture def tmp_dir(tmp_path_factory): return tmp_path_factory.mktemp("test_io") @pytest.fixture def home_file(DATA_DIR, HOME_TMP_DIR): """makes a temporary directory with file""" fn = "sample.tsv" contents = (DATA_DIR / fn).read_text() (HOME_TMP_DIR / fn).write_text(contents) return str(HOME_TMP_DIR / fn) @pytest.mark.parametrize("transform", [str, pathlib.Path]) def test_open_home(DATA_DIR, home_file, transform): """expands tilde for opening / writing to home""" data_path = DATA_DIR / "sample.tsv" expect = data_path.read_text() with open_(transform(home_file)) as infile: got = infile.read() assert got == expect def test_does_not_write_if_exception(tmp_dir): """file does not exist if an exception raised before closing""" test_filepath = tmp_dir / "Atomic_write_test" with pytest.raises(AssertionError), atomic_write(test_filepath, mode="w") as f: f.write("abc") raise AssertionError assert not test_filepath.exists() @pytest.mark.parametrize("suffix", ["gz", "bz2", "zip", "lzma", "xz"]) def test_writes_compressed_formats(DATA_DIR, tmp_dir, suffix): """correctly writes / reads different compression formats""" fpath = DATA_DIR / "sample.tsv" expect = pathlib.Path(fpath).read_text() outpath = tmp_dir / f"{fpath.name}.{suffix}" with atomic_write(outpath, mode="wt") as f: f.write(expect) with open_(outpath) as infile: got = infile.read() assert got == expect, f"write failed for {suffix}" def test_atomic_invalid_parent_dir(): with pytest.raises(OSError), atomic_write("invalid_dir/test.txt") as out: out.write("will not work") def test_rename(tmp_dir): """Renames file as expected""" test_filepath = tmp_dir / "Atomic_write_test" open(test_filepath, "w").close() assert test_filepath.exists() with atomic_write(test_filepath, mode="w") as f: f.write("abc") def test_atomic_write_noncontext(tmp_dir): """atomic write works as more regular file object""" path = tmp_dir / "foo.txt" zip_path = path.parent / f"{path.name}.zip" aw = atomic_write(path, in_zip=zip_path, mode="w") aw.write("some data") aw.close() with open_(zip_path) as ifile: got = ifile.read() assert got == "some data" def test_open_handles_bom(tmp_dir): """handle files with a byte order mark""" text = "some text" textfile = tmp_dir / "sample.txt" textfile.write_text(text, encoding="utf-8-sig") gzip_file = tmp_dir / "sample.txt.gz" with gzip.open(gzip_file, "wt", encoding="utf-8-sig") as outfile: outfile.write(text) bzip_file = tmp_dir / "sample.txt.bz2" with bz2.open(bzip_file, "wt", encoding="utf-8-sig") as outfile: outfile.write(text) zip_file = tmp_dir / "sample.zip" with zipfile.ZipFile(zip_file, "w") as outfile: outfile.write(textfile, "sample.txt") for path in (bzip_file, gzip_file, textfile, zip_file): with open_(path) as infile: got = infile.read() assert got == text, f"failed reading {path}" @pytest.mark.parametrize("non", [None, ""]) def test_open_empty_raises(non): with pytest.raises(ValueError): open_(non) def test_aw_zip_from_path(tmp_dir): """supports inferring zip archive name from path""" path = tmp_dir / "foo.txt" zip_path = path.parent / f"{path.name}.zip" aw = atomic_write(zip_path, in_zip=True, mode="w") aw.write("some data") aw.close() with open_(zip_path) as ifile: got = ifile.read() assert got == "some data" path = tmp_dir / "foo2.txt" zip_path = path.parent / f"{path.name}.zip" aw = atomic_write(path, in_zip=zip_path, mode="w") aw.write("some data") aw.close() with open_(zip_path) as ifile: got = ifile.read() assert got == "some data" def test_expanduser(tmp_dir): """expands user correctly""" home = pathlib.Path("~").expanduser() test_filepath = tmp_dir / "Atomic_write_test" test_filepath = str(test_filepath).replace(str(home), "~") with atomic_write(test_filepath, mode="w") as f: f.write("abc") def test_path_relative_to_zip_parent(): """correctly generates member paths for a zip archive""" zip_path = pathlib.Path("some/path/to/a/data.zip") for member in ("data/member.txt", "member.txt", "a/b/c/member.txt"): got = _path_relative_to_zip_parent(zip_path, pathlib.Path(member)) assert got.parts[0] == "data" @pytest.mark.parametrize( ("name", "expect"), [ ("suffixes.GZ", (None, "gz")), ("suffixes.ABCD", ("abcd", None)), ("suffixes.ABCD.BZ2", ("abcd", "bz2")), ("suffixes.abcd.BZ2", ("abcd", "bz2")), ("suffixes.ABCD.bz2", ("abcd", "bz2")), ], ) def test_get_format_suffixes_returns_lower_case(name, expect): """should always return lower case""" assert get_format_suffixes(name) == expect @pytest.mark.parametrize( ("name", "expect"), [ ("no_suffixes", (None, None)), ("suffixes.gz", (None, "gz")), ("suffixes.abcd", ("abcd", None)), ("suffixes.abcd.bz2", ("abcd", "bz2")), ("suffixes.zip", (None, "zip")), ], ) def test_get_format_suffixes(name, expect): """correctly return suffixes for compressed etc.. formats""" assert get_format_suffixes(name) == expect @pytest.mark.parametrize( ("name", "expect"), [ ("no_suffixes", (None, None)), ("suffixes.gz", (None, "gz")), ("suffixes.abcd", ("abcd", None)), ("suffixes.abcd.bz2", ("abcd", "bz2")), ("suffixes.zip", (None, "zip")), ], ) def test_get_format_suffixes_pathlib(name, expect): """correctly return suffixes for compressed etc.. formats from pathlib""" assert get_format_suffixes(pathlib.Path(name)) == expect @pytest.mark.parametrize( ("val", "expect"), [ ({}, False), ("not an existing path", False), ("(a,b,(c,d))", False), ("(a:0.1,b:0.1,(c:0.1,d:0.1):0.1)", False), (__file__, True), (pathlib.Path(__file__), True), (NotCompleted("FAIL", "test", message="none", source="unknown"), False), ], ) def test_path_exists(val, expect): """robustly identifies whether an object is a valid path and exists""" assert path_exists(val) == expect def test_open_reads_zip(tmp_dir): """correctly reads a zip compressed file""" text_path = tmp_dir / "foo.txt" with open(text_path, "w") as f: f.write("any str") zip_path = tmp_dir / "foo.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.write(text_path) with open_(zip_path) as got: assert got.readline() == "any str" def test_open_writes_zip(tmp_dir): """correctly writes a zip compressed file""" zip_path = tmp_dir / "foo.txt.zip" with open_(zip_path, "w") as f: f.write("any str") with zipfile.ZipFile(zip_path, "r") as zf: name = zf.namelist()[0] got = zf.open(name).read() assert got == b"any str" def test_open_zip_multi(tmp_dir): """zip with multiple records cannot be opened using open_""" text_path1 = tmp_dir / "foo.txt" with open(text_path1, "w") as f: f.write("any str") text_path2 = tmp_dir / "bar.txt" with open(text_path2, "w") as f: f.write("any str") zip_path = tmp_dir / "foo.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.write(text_path1) zf.write(text_path2) with pytest.raises(ValueError): open_(zip_path) def test_open_url_write_exceptions(): """Test 'w' mode (should raise Exception)""" with pytest.raises(Exception): open_url("http://example.com/test.txt", mode="w") def test_open_url_exceptions(): """non-http(s) address for url (should raise Exception)""" with pytest.raises(Exception): open_url("ftp://example.com/test.txt") def test_iter_splitlines_one(tmp_path): path = tmp_path / "one-line.txt" value = "We have text on one line." path.write_text(value) got = list(iter_splitlines(path)) assert got == [value] @pytest.mark.parametrize("newline", ["\n", "\r\n"]) def test_iter_splitlines_line_diff_newline(tmp_path, newline): path = tmp_path / "multi-line.txt" value = ["We have some", "text on different lines", "which load"] with open_(path, mode="w", newline=newline) as out: out.write("\n".join(value)) got = list(iter_splitlines(path, chunk_size=5)) assert got == value @pytest.mark.parametrize("newline", ["\n", "\r\n"]) def test_iter_splitlines_file_endswith_newline(tmp_path, newline): path = tmp_path / "multi-line.txt" value = ["We have some", "text on different lines", "which load"] with open_(path, mode="w", newline=newline) as out: out.write("\n".join(value) + "\n") got = list(iter_splitlines(path, chunk_size=5)) assert got == value def test_iter_splitlines_chunk_size_exceeds_file_size(tmp_path): path = tmp_path / "multi-line.txt" value = ["We have some", "text on different lines", "which load"] path.write_text("\n".join(value)) got = list(iter_splitlines(path, chunk_size=5_000_000)) assert got == value @pytest.mark.parametrize( "value", [ "With text\nending on a\nended in newline.", "With text\nending\non a\nended in newline.", ], ) def test_iter_splitlines_chunk_endswith_newline(tmp_path, value): path = tmp_path / "multi-line.txt" value = value.splitlines() path.write_text("\n".join(value)) got = list(iter_splitlines(path, chunk_size=11)) assert got == value def test_iter_splitlines_chunk_empty_file(tmp_path): path = tmp_path / "zero.txt" path.write_text("") got = list(iter_splitlines(path)) assert not got @pytest.mark.parametrize("transform", [str, pathlib.Path]) def test_iter_splitlines_tilde(home_file, transform): expect = pathlib.Path(home_file).expanduser().read_text().splitlines() got = list(iter_splitlines(transform(home_file))) assert len(got) == len(expect) def test_iter_line_blocks_correct_size(tmp_path): path = tmp_path / "multi-line.txt" value = ["We have some", "text on different lines", "which load"] path.write_text("\n".join(value)) got = list(iter_line_blocks(path, num_lines=2, chunk_size=5)) expect = [value[:2], value[-1:]] assert got == expect def test_iter_line_blocks_empty(tmp_path): path = tmp_path / "zero.txt" path.write_text("") got = list(iter_line_blocks(path, num_lines=2)) assert not got def test_iter_line_blocks_one(tmp_path): path = tmp_path / "one-line.txt" value = "We have text on one line." path.write_text(value) got = list(iter_line_blocks(path, num_lines=2)) assert got == [[value]] def test_iter_line_blocks_none_num_lines(tmp_path): path = tmp_path / "multi-line.txt" value = ["We have some", "text on different lines", "which load"] path.write_text("\n".join(value)) got = list(iter_line_blocks(path, num_lines=None)) expect = [value] assert got == expect @pytest.mark.parametrize( "url", [ "http://example.com", b"file://example.txt", pathlib.Path("example.txt").absolute().as_uri(), ], ) def test_is_url(url): assert is_url(url) @pytest.mark.parametrize( "url", [ "example.txt", pathlib.Path("example.txt"), b"example.txt", r"D:\foo\example.txt", ], ) def test_not_is_url(url): assert not is_url(url) def test_open_url_local(DATA_DIR, tmp_path): """using file:///""" file_name = "sample.tsv" local_path = DATA_DIR / file_name with open_(local_path) as infile: local_data = infile.read() with open_url(local_path.absolute().as_uri()) as infile: remote_data = infile.read() assert remote_data.splitlines() == local_data.splitlines() @pytest.fixture def gzip_uri(DATA_DIR, tmp_path): inpath = DATA_DIR / "sample.tsv" data = inpath.read_text() outpath = tmp_path / "sample.tsv.gz" with open_(outpath, "wb") as outfile: outfile.write(data.encode("utf8")) return outpath.as_uri() @pytest.mark.parametrize("mode", ["r", "rb", "rt"]) def test_open_url_gzip_mode(gzip_uri, mode): with open_url(gzip_uri, mode=mode) as infile: got = infile.read() expect_type = bytes if "b" in mode else str assert isinstance(got, expect_type) @pytest.mark.slow @pytest.mark.parametrize( "mode", ["r", "rb", "rt", None], ) @pytest.mark.internet def test_open_url(DATA_DIR, mode): """different open mode's all work""" file_name = "formattest.fasta" remote_root = "https://github.com/user-attachments/files/20321056/{}.gz" with open_(DATA_DIR / file_name, mode=mode) as infile: local_data = infile.read() with open_url(remote_root.format(file_name), mode=mode) as infile: remote_data = infile.read() assert remote_data.splitlines() == local_data.splitlines() # Test using a ParseResult for url with open_url(urlparse(remote_root.format(file_name)), mode=mode) as infile: remote_data = infile.read() assert remote_data.splitlines() == local_data.splitlines() @pytest.mark.slow @pytest.mark.internet def test_open_url_compressed(DATA_DIR): """comparing compressed file handling""" file_name = "formattest.fasta.gz" remote_root = "https://github.com/user-attachments/files/20321056/{}" with open_(DATA_DIR / file_name) as infile: local_data = infile.read() with open_url(remote_root.format(file_name), mode="rt") as infile: remote_data = infile.read() assert remote_data.splitlines() == local_data.splitlines() def test_get_compression_open_no_args(): from scinexus.io_util import _get_compression_open with pytest.raises(ValueError, match="either path or compression"): _get_compression_open() def test_open_via_url(DATA_DIR): uri = (DATA_DIR / "sample.tsv").absolute().as_uri() with open_(uri) as infile: got = infile.read() assert len(got) > 0 def test_atomic_write_tmpdir_not_exist(tmp_path): from scinexus.io_util import atomic_write bad_tmpdir = tmp_path / "nonexistent_tmpdir" with pytest.raises(FileNotFoundError, match="does not exist"): atomic_write(tmp_path / "test.txt", tmpdir=bad_tmpdir, mode="w") def test_close_rename_zip_in_zip_none(tmp_path): path = tmp_path / "test.txt" zip_path = tmp_path / "test.zip" aw = atomic_write(path, in_zip=zip_path, mode="w") aw._in_zip = None with pytest.raises(RuntimeError, match="in_zip path is unexpectedly None"): aw._close_rename_zip(aw._tmppath) def test_atomic_write_exit_without_enter(tmp_path): aw = atomic_write(tmp_path / "test.txt", mode="w") with pytest.raises(ValueError, match="file object is unexpectedly None"): aw.__exit__(None, None, None) def test_iter_splitlines_url(DATA_DIR): uri = (DATA_DIR / "sample.tsv").absolute().as_uri() got = list(iter_splitlines(uri)) assert len(got) > 0 @pytest.mark.parametrize("chunk_size", [1, 16, 64, 1024, 5_000_000]) def test_iter_record_chunks_chunk_size_independence(tmp_path, chunk_size): delim = b"\n//" data = b"record1\n//record2 is longer\n//record3" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=delim, chunk_size=chunk_size)) assert got == [b"record1", b"record2 is longer", b"record3"] def test_iter_record_chunks_delimiter_spans_chunk_boundary(tmp_path): delim = b"\n//" data = b"AAAAA" + delim + b"BBBBB" + delim + b"CCCCC" path = tmp_path / "records.bin" path.write_bytes(data) chunk_size = data.index(delim) + 1 got = list(iter_record_chunks(path=path, delimiter=delim, chunk_size=chunk_size)) assert got == [b"AAAAA", b"BBBBB", b"CCCCC"] def test_iter_record_chunks_record_larger_than_chunk(tmp_path): delim = b">" record = b"x" * 1000 data = delim + record + delim + b"short" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=delim, chunk_size=16)) assert got == [b"", record, b"short"] def test_iter_record_chunks_chunk_size_one(tmp_path): delim = b">" data = b">a>b>c" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=delim, chunk_size=1)) assert got == [b"", b"a", b"b", b"c"] def test_iter_record_chunks_ends_on_delimiter(tmp_path): delim = b"\n//" data = b"record1\n//record2\n//" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=delim, chunk_size=8)) assert got == [b"record1", b"record2"] def test_iter_record_chunks_no_delimiter(tmp_path): data = b"no delimiter present at all" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=b">", chunk_size=8)) assert got == [data] def test_iter_record_chunks_empty_file(tmp_path): path = tmp_path / "empty.bin" path.write_bytes(b"") got = list(iter_record_chunks(path=path, delimiter=b">")) assert got == [] def test_iter_record_chunks_empty_delimiter_raises(tmp_path): path = tmp_path / "records.bin" path.write_bytes(b"anything") with pytest.raises(ValueError, match="delimiter must be non-empty"): list(iter_record_chunks(path=path, delimiter=b"")) @pytest.mark.parametrize("compression", ["gz", "bz2"]) def test_iter_record_chunks_compressed(tmp_path, compression): data = b">a\nAAA>b\nBBB>c\nCCC" path = tmp_path / f"records.bin.{compression}" with open_(path, mode="wb") as f: f.write(data) got = list(iter_record_chunks(path=path, delimiter=b">", chunk_size=4)) assert got == [b"", b"a\nAAA", b"b\nBBB", b"c\nCCC"] @pytest.mark.parametrize("chunk_size", [None, 5_000_000]) def test_iter_record_chunks_read_all(tmp_path, chunk_size): data = b">a>b>c" path = tmp_path / "records.bin" path.write_bytes(data) got = list(iter_record_chunks(path=path, delimiter=b">", chunk_size=chunk_size)) assert got == [b"", b"a", b"b", b"c"] def test_iter_record_chunks_url(tmp_path): data = b">a\nAAA>b\nBBB>c\nCCC" src = tmp_path / "records.bin" src.write_bytes(data) uri = src.absolute().as_uri() got = list(iter_record_chunks(path=uri, delimiter=b">", chunk_size=4)) assert got == [b"", b"a\nAAA", b"b\nBBB", b"c\nCCC"] cogent3-scinexus-e0aee79/tests/test_misc.py000066400000000000000000000067631520253266500211440ustar00rootroot00000000000000from gzip import GzipFile, compress import pytest import scinexus.misc as misc_module from scinexus.misc import ( docstring_to_summary_rest, extend_docstring_from, get_object_provenance, in_jupyter, ) def test_not_in_jupyter(): assert not in_jupyter() def test_is_in_jupyter(): misc_module.get_ipython = lambda: None try: assert in_jupyter() finally: del misc_module.get_ipython def test_get_object_provenance_builtin_instance(): assert get_object_provenance("abc") == "str" assert get_object_provenance({"a": 1}) == "dict" def test_get_object_provenance_builtin_type(): assert get_object_provenance(dict) == "dict" assert get_object_provenance(str) == "str" def test_get_object_provenance_function(): assert get_object_provenance(compress) == "gzip.compress" def test_get_object_provenance_class(): assert get_object_provenance(GzipFile) == "gzip.GzipFile" def _source(): """This is a source docstring.""" def test_extend_docstring_from_append(): @extend_docstring_from(_source) def target(): """I am target.""" assert target.__doc__ == "This is a source docstring.\nI am target." def test_extend_docstring_from_prepend(): @extend_docstring_from(_source, pre=True) def target(): """I am target.""" assert target.__doc__ == "I am target.\nThis is a source docstring." def test_extend_docstring_from_no_dest_doc(): @extend_docstring_from(_source) def target(): pass assert target.__doc__ == "This is a source docstring.\n" def test_extend_docstring_from_method_append(): class C: @extend_docstring_from(_source) def target(self): """I am target.""" assert C.target.__doc__ == "This is a source docstring.\nI am target." def test_extend_docstring_from_method_prepend(): class C: @extend_docstring_from(_source, pre=True) def target(self): """I am target.""" assert C.target.__doc__ == "I am target.\nThis is a source docstring." def test_extend_docstring_from_method_no_dest_doc(): class C: @extend_docstring_from(_source) def target(self): pass assert C.target.__doc__ == "This is a source docstring.\n" class _SourceClass: """This is a class docstring.""" def test_extend_docstring_from_class_append(): @extend_docstring_from(_SourceClass) class Target: """I am target.""" assert Target.__doc__ == "This is a class docstring.\nI am target." def test_extend_docstring_from_class_prepend(): @extend_docstring_from(_SourceClass, pre=True) class Target: """I am target.""" assert Target.__doc__ == "I am target.\nThis is a class docstring." def test_extend_docstring_from_class_no_dest_doc(): @extend_docstring_from(_SourceClass) class Target: pass assert Target.__doc__ == "This is a class docstring.\n" def _foo1(): """some text""" def _foo2(): """some text Notes ----- body """ def _foo3(): """ Notes ----- body """ def _foo4(): ... @pytest.mark.parametrize( ("func", "sum_exp", "body_exp"), [ (_foo1, "some text", []), (_foo2, "some text", ["Notes", "-----", "body"]), (_foo3, "", ["Notes", "-----", "body"]), (_foo4, "", []), ], ) def test_docstring_to_summary_rest(func, sum_exp, body_exp): summary, body = docstring_to_summary_rest(func.__doc__) assert summary == sum_exp assert body.split() == body_exp cogent3-scinexus-e0aee79/tests/test_mypy_typing.py000066400000000000000000000101441520253266500225650ustar00rootroot00000000000000"""Tests that type-checker support works for both inheritance and decorator paths.""" import subprocess import sys import textwrap import pytest from scinexus import ComposableApp, NonComposableApp from scinexus.composable import ( NON_COMPOSABLE, NotCompleted, NotCompletedType, define_app, ) class IntToStr(ComposableApp[int, str]): def main(self, val: int) -> str: return str(val) class StrToInt(ComposableApp[str, int]): def main(self, val: str) -> int: return int(val) class NonComp(NonComposableApp[int, int]): def main(self, val: int) -> int: return val * 2 def test_inheritance_basic_call(): app = IntToStr() result = app(42) assert result == "42" def test_inheritance_composition(): app = IntToStr() + StrToInt() result = app(42) assert result == 42 def test_inheritance_not_completed_propagation(): app = IntToStr() nc = NotCompleted(NotCompletedType.ERROR, "test", "msg") result = app(nc) assert isinstance(result, NotCompleted) def test_inheritance_non_composable(): app = NonComp() assert app(5) == 10 assert not hasattr(app, "__add__") or app.app_type is NON_COMPOSABLE def test_inheritance_repr(): app = IntToStr() assert "IntToStr()" in repr(app) def test_inheritance_type_validation(): app = IntToStr() result = app("wrong type") assert isinstance(result, NotCompleted) def test_inheritance_init_vals(): class WithInit(ComposableApp[int, int]): def __init__(self, factor: int = 1): self.factor = factor def main(self, val: int) -> int: return val * self.factor app = WithInit(factor=3) assert app._init_vals == {"factor": 3} assert app(5) == 15 def test_inheritance_citations(): from citeable import Software cite = Software( author=["Doe, J"], title="test", year=2024, url="https://example.com", version="1.0", license="MIT", doi="10.0/test", publisher="test", ) class Cited(ComposableApp[int, int], cite=cite): def main(self, val: int) -> int: return val app = Cited() assert app.citations == (cite,) assert cite.app == "Cited" def test_inheritance_pickle(): import pickle app = IntToStr() data = pickle.dumps(app) restored = pickle.loads(data) assert restored(42) == "42" def test_decorator_still_works(): """define_app decorator continues to produce working apps.""" @define_app class dec_app: def main(self, val: int) -> int: return val + 1 app = dec_app() assert app(1) == 2 def test_decorator_composable_with_inheritance(): """Decorator-created and inheritance-created apps compose together.""" @define_app class adder: def main(self, val: int) -> int: return val + 1 composed = adder() + IntToStr() assert composed(1) == "2" def _run_mypy(code: str, tmp_path) -> tuple[int, str]: src = tmp_path / "check.py" src.write_text(code) result = subprocess.run( [sys.executable, "-m", "mypy", "--no-error-summary", str(src)], capture_output=True, text=True, check=False, ) return result.returncode, result.stdout + result.stderr @pytest.mark.slow def test_mypy_decorator_reveal_type(tmp_path): code = textwrap.dedent("""\ from scinexus.composable import define_app @define_app class MyApp: def main(self, val: int) -> str: return str(val) app = MyApp() reveal_type(app(42)) """) _, output = _run_mypy(code, tmp_path) assert "str" in output or "Union" in output def test_mypy_inheritance_reveal_type(tmp_path): code = textwrap.dedent("""\ from scinexus import ComposableApp class MyApp(ComposableApp[int, str]): def main(self, val: int) -> str: return str(val) app = MyApp() reveal_type(app(42)) """) _, output = _run_mypy(code, tmp_path) # Should see str (or Union[str, NotCompleted]) — not Any assert "str" in output cogent3-scinexus-e0aee79/tests/test_parallel.py000066400000000000000000000342611520253266500217770ustar00rootroot00000000000000import multiprocessing import os import time from collections.abc import Generator from unittest.mock import patch import numpy import pytest from scinexus import parallel from scinexus.parallel import ( LokyBackend, MPIBackend, MultiprocessBackend, Parallel, PicklableAndCallable, _clamp_max_workers_local, _effective_backend, as_completed, get_default_chunksize, get_parallel_backend, get_size, set_parallel_backend, ) @pytest.fixture(autouse=True) def _reset_backend(): """Reset the module-level default after each test.""" yield set_parallel_backend(None) def get_process_value(n): # Sleep to accommodate Windows process creation overhead time.sleep(1) return (parallel.get_rank(), n) def get_ranint(n): numpy.random.seed(n) return numpy.random.randint(1, 10) def check_is_master_process(n): return parallel.is_master_process() def _double(x): return x * 2 def test_parallel_backend_abc_cannot_instantiate(): """Parallel cannot be instantiated directly""" with pytest.raises(TypeError): Parallel() def test_parallel_backend_abc_missing_methods(): """incomplete subclass raises TypeError""" class Incomplete(Parallel): def imap(self, f, s, max_workers=None, **kwargs): yield from () with pytest.raises(TypeError): Incomplete() def test_set_parallel_backend_multiprocess(): """setting 'multiprocess' returns MultiprocessBackend""" set_parallel_backend("multiprocess") assert isinstance(get_parallel_backend(), MultiprocessBackend) def test_set_parallel_backend_loky(): """setting 'loky' returns LokyBackend""" set_parallel_backend("loky") assert isinstance(get_parallel_backend(), LokyBackend) def test_set_parallel_backend_none_resets(): """None resets to default""" set_parallel_backend("loky") set_parallel_backend(None) assert isinstance(get_parallel_backend(), MultiprocessBackend) def test_set_parallel_backend_custom_instance(): """accepts a Parallel instance""" class Custom(Parallel): def imap(self, f, s, max_workers=None, **kwargs): yield from () def as_completed(self, f, s, max_workers=None, **kwargs): yield from () def is_master_process(self): return True def get_rank(self): return 0 def get_size(self): return 1 custom = Custom() set_parallel_backend(custom) assert get_parallel_backend() is custom def test_set_parallel_backend_invalid_string(): """invalid string raises ValueError""" with pytest.raises(ValueError, match="unknown backend"): set_parallel_backend("invalid") # type: ignore def test_set_parallel_backend_loky_not_installed(): """set_parallel_backend('loky') raises ImportError when loky is missing""" with patch.dict("sys.modules", {"loky": None}), pytest.raises(ImportError): set_parallel_backend("loky") def test_set_parallel_backend_mpi_not_available(): """set_parallel_backend('mpi') raises ImportError when mpi4py is missing""" with patch.object(parallel, "MPI", None), pytest.raises(ImportError): set_parallel_backend("mpi") def test_get_parallel_backend_default(): """returns MultiprocessBackend when nothing set""" set_parallel_backend(None) assert isinstance(get_parallel_backend(), MultiprocessBackend) def test_get_parallel_backend_caches(): """get_parallel_backend caches the default instance""" set_parallel_backend(None) b1 = get_parallel_backend() b2 = get_parallel_backend() assert b1 is b2 def test_get_parallel_backend_with_backend_multiprocess(): """returns a MultiprocessBackend when backend='multiprocess'""" assert isinstance(get_parallel_backend(backend="multiprocess"), MultiprocessBackend) def test_get_parallel_backend_with_backend_loky(): """returns a LokyBackend when backend='loky'""" assert isinstance(get_parallel_backend(backend="loky"), LokyBackend) def test_get_parallel_backend_with_backend_does_not_modify_default(): """passing backend does not change the global default""" set_parallel_backend("multiprocess") default_before = get_parallel_backend() get_parallel_backend(backend="loky") default_after = get_parallel_backend() assert default_before is default_after assert isinstance(default_after, MultiprocessBackend) def test_get_parallel_backend_with_backend_none(): """backend=None returns the current default""" set_parallel_backend("loky") assert isinstance(get_parallel_backend(backend=None), LokyBackend) def test_effective_backend_caches_mpi(): """_effective_backend caches the MPIBackend instance""" mock_mpi = type("FakeMPI", (), {"COMM_WORLD": None}) with ( patch.object(parallel, "USING_MPI", True), patch.object(parallel, "MPI", mock_mpi), patch.object(parallel, "_mpi_backend", None), patch.object(MPIBackend, "__init__", lambda self: None), ): b1 = _effective_backend() b2 = _effective_backend() assert b1 is b2 assert isinstance(b1, MPIBackend) def test_multiprocess_imap(): """MultiprocessBackend.imap returns ordered results""" backend = MultiprocessBackend() data = list(range(10)) result = list(backend.imap(_double, data, max_workers=1)) assert result == [x * 2 for x in data] def test_multiprocess_as_completed(): """MultiprocessBackend.as_completed returns all results""" backend = MultiprocessBackend() data = list(range(10)) result = sorted(backend.as_completed(_double, data)) assert result == sorted(x * 2 for x in data) def test_multiprocess_is_master_process(): """MultiprocessBackend.is_master_process returns True in main""" backend = MultiprocessBackend() assert backend.is_master_process() def test_multiprocess_get_rank(): """MultiprocessBackend.get_rank returns 0 in main process""" backend = MultiprocessBackend() assert backend.get_rank() == 0 def test_multiprocess_get_size(): """MultiprocessBackend.get_size returns cpu_count""" backend = MultiprocessBackend() assert backend.get_size() == multiprocessing.cpu_count() def test_multiprocess_max_workers_too_large(): """max_workers > cpu_count raises ValueError""" backend = MultiprocessBackend() n = multiprocessing.cpu_count() + 1 with pytest.raises(ValueError, match="max_workers"): list(backend.imap(_double, [1], max_workers=n)) def test_multiprocess_non_sized_iterable(): """imap with a generator defaults chunksize to 1""" backend = MultiprocessBackend() def gen(): yield from range(4) result = list(backend.imap(_double, gen(), max_workers=1)) assert sorted(result) == [0, 2, 4, 6] def test_multiprocess_as_completed_max_workers_clamped(): """large max_workers gets clamped""" backend = MultiprocessBackend() data = list(range(4)) result = sorted(backend.as_completed(_double, data, max_workers=9999)) assert result == sorted(x * 2 for x in data) def test_clamp_max_workers_local_valid(): """valid max_workers is returned unchanged""" result = _clamp_max_workers_local(1) assert result == 1 def test_clamp_max_workers_local_too_large(): """max_workers exceeding cpu_count is clamped to cpu_count""" cpu = multiprocessing.cpu_count() result = _clamp_max_workers_local(cpu + 1) assert result == cpu def test_loky_imap(): """LokyBackend.imap returns ordered results""" backend = LokyBackend() data = list(range(10)) result = list(backend.imap(_double, data, max_workers=1)) assert result == [x * 2 for x in data] def test_loky_as_completed(): """LokyBackend.as_completed returns all results""" backend = LokyBackend() data = list(range(10)) result = sorted(backend.as_completed(_double, data)) assert result == sorted(x * 2 for x in data) def test_loky_is_master_process(): """LokyBackend.is_master_process returns True in main""" backend = LokyBackend() assert backend.is_master_process() def test_loky_get_rank(): """LokyBackend.get_rank returns 0 in main process""" backend = LokyBackend() assert backend.get_rank() == 0 def test_loky_get_size(): """LokyBackend.get_size returns cpu_count""" backend = LokyBackend() assert backend.get_size() == multiprocessing.cpu_count() @pytest.mark.slow def test_create_processes(): """Processor pool should create multiple distinct processes""" max_worker_count = multiprocessing.cpu_count() - 1 index = list(range(max_worker_count)) result = parallel.map(get_process_value, index, max_workers=None) result_processes = [v[0] for v in result] result_values = [v[1] for v in result] assert sorted(result_values) == index assert len(set(result_processes)) == max_worker_count def test_random_seeding(): """Random seed should be set every function call""" index1 = [2, 3, 4, 5, 6, 7, 8, 9, 10] index2 = [2, 2, 2, 2, 2, 2, 2, 2, 2] result1 = parallel.map(get_ranint, index1, max_workers=1) result2 = parallel.map(get_ranint, index2, max_workers=1) assert result1[0] == result2[0] assert result1 != result2 def _get_rank(_x): return parallel.get_rank() def test_get_rank(): """get_rank() should return 0 on master, > 0 on workers""" assert parallel.get_rank() == 0 index = list(range(1, 5)) ranks = list(parallel.imap(_get_rank, index)) assert all(r > 0 for r in ranks) def test_is_master_process(): """is_master_process() should return True on master, False on workers""" assert parallel.is_master_process() index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] master_processes = sum( bool(result) for result in parallel.imap(check_is_master_process, index) ) assert master_processes == 0 def test_as_completed(): """as_completed should return all results""" data = list(range(10)) result = sorted(as_completed(_double, data)) assert result == sorted(x * 2 for x in data) def test_get_size(): """get_size returns cpu_count when not using MPI""" assert get_size() == multiprocessing.cpu_count() def test_get_default_chunksize_exact(): """chunksize with no remainder""" assert get_default_chunksize(range(16), 4) == 1 def test_get_default_chunksize_remainder(): """chunksize rounds up when there is a remainder""" assert get_default_chunksize(range(17), 4) == 2 def test_picklable_and_callable(): """PicklableAndCallable wraps and delegates calls""" wrapped = PicklableAndCallable(_double) assert wrapped(5) == 10 def test_imap_invalid_if_serial(): """invalid if_serial raises ValueError""" with pytest.raises(ValueError, match="invalid choice"): list(parallel.imap(_double, [1], if_serial="invalid")) def test_imap_max_workers_too_large(): """max_workers > cpu_count raises ValueError""" n = multiprocessing.cpu_count() + 1 with pytest.raises(ValueError, match="max_workers"): list(parallel.imap(_double, [1], max_workers=n)) def test_as_completed_invalid_if_serial(): """invalid if_serial raises ValueError in as_completed""" with pytest.raises(ValueError, match="invalid choice"): list(as_completed(_double, [1], if_serial="invalid")) def test_as_completed_max_workers_clamped(): """large max_workers gets clamped""" data = list(range(4)) result = sorted(as_completed(_double, data, max_workers=9999)) assert result == sorted(x * 2 for x in data) def test_imap_use_mpi_when_unavailable(): """imap(use_mpi=True) raises RuntimeError when MPI unavailable""" with patch.object(parallel, "MPI", None): with pytest.raises(RuntimeError, match="Cannot use MPI"): list(parallel.imap(_double, [1], use_mpi=True)) def test_imap_non_sized_iterable(): """imap with a generator (non-Sized) defaults chunksize to 1""" def gen(): yield from range(4) result = list(parallel.imap(_double, gen(), max_workers=1)) assert sorted(result) == [0, 2, 4, 6] def test_get_rank_worker_process(): """get_rank parses rank from worker process name""" mock_process = type("FakeProcess", (), {"name": "LokyProcess-3"})() with patch("multiprocessing.current_process", return_value=mock_process): backend = MultiprocessBackend() assert backend.get_rank() == 3 def test_dont_use_mpi_env_var(): """DONT_USE_MPI env var disables MPI import""" import importlib with patch.dict("os.environ", {"DONT_USE_MPI": "1"}): importlib.reload(parallel) assert parallel.MPI is None assert parallel.USING_MPI is False # reload to restore original state os.environ.pop("DONT_USE_MPI", None) importlib.reload(parallel) def test_mpi_import_error_fallback(): """MPI is None when mpi4py cannot be imported""" import importlib import scinexus.parallel as par with patch.dict("sys.modules", {"mpi4py": None, "mpi4py.futures": None}): importlib.reload(par) assert par.MPI is None assert par.USING_MPI is False # reload to restore original state importlib.reload(par) def test_module_imap_delegates_to_backend(): """module-level imap delegates to the current backend""" set_parallel_backend("multiprocess") data = list(range(5)) result = list(parallel.imap(_double, data, max_workers=1)) assert result == [x * 2 for x in data] def test_module_as_completed_delegates_to_backend(): """module-level as_completed delegates to the current backend""" set_parallel_backend("multiprocess") data = list(range(5)) result = sorted(parallel.as_completed(_double, data)) assert result == sorted(x * 2 for x in data) def test_module_map_returns_list(): """module-level map returns a list""" data = list(range(5)) result = parallel.map(_double, data, max_workers=1) assert isinstance(result, list) assert result == [x * 2 for x in data] def test_imap_returns_generator(): """module-level imap returns a generator""" result = parallel.imap(_double, [1, 2, 3], max_workers=1) assert isinstance(result, Generator) def test_as_completed_returns_generator(): """module-level as_completed returns a generator""" result = parallel.as_completed(_double, [1, 2, 3]) assert isinstance(result, Generator) cogent3-scinexus-e0aee79/tests/test_parallel_mpi.py000066400000000000000000000150011520253266500226330ustar00rootroot00000000000000import sys from unittest.mock import patch import pytest from scinexus import parallel from scinexus.parallel import ( SIZE, as_completed, get_rank, imap, is_master_process, map, ) pytestmark = pytest.mark.mpi def _get_rank(n): # noqa: ARG001 return get_rank() def _is_master(n): # noqa: ARG001 return is_master_process() @pytest.mark.mpi def test_get_rank_mpi(): """get_rank() should return 0 on master, > 0 on MPI workers""" assert get_rank() == 0 ranks = list(imap(_get_rank, list(range(1, 5)), use_mpi=True)) assert all(r > 0 for r in ranks) @pytest.mark.mpi def test_is_master_process(): """is_master_process() should return False for all child processes""" assert is_master_process() index = list(range(1, 11)) master_processes = sum( bool(result) for result in imap(_is_master, index, use_mpi=True) ) assert master_processes == 0 def _double(x): return x * 2 @pytest.mark.mpi def test_imap_mpi(): """imap with use_mpi should process all items""" data = list(range(10)) result = list(imap(_double, data, use_mpi=True)) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_map_mpi(): """map with use_mpi should return correct results""" data = list(range(10)) result = map(_double, data, use_mpi=True) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_as_completed_mpi(): """as_completed with use_mpi should return all results""" data = list(range(10)) result = sorted(as_completed(_double, data, use_mpi=True)) assert result == sorted(x * 2 for x in data) @pytest.mark.mpi def test_imap_mpi_with_chunksize(): """imap with explicit chunksize under MPI""" data = list(range(20)) result = list(imap(_double, data, use_mpi=True, chunksize=5)) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_imap_mpi_max_workers_warning(): """max_workers exceeding SIZE should emit a warning""" data = list(range(10)) with pytest.warns(UserWarning, match="max_workers too large"): result = list(imap(_double, data, use_mpi=True, max_workers=SIZE + 10)) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_imap_mpi_if_serial_warn(): """if_serial='warn' should be accepted without error when SIZE > 1""" data = list(range(10)) result = list(imap(_double, data, use_mpi=True, if_serial="warn")) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_imap_mpi_if_serial_ignore(): """if_serial='ignore' should be accepted without error""" data = list(range(10)) result = list(imap(_double, data, use_mpi=True, if_serial="ignore")) assert result == [x * 2 for x in data] @pytest.mark.mpi def test_imap_mpi_invalid_if_serial(): """invalid if_serial value should raise ValueError""" with pytest.raises(ValueError, match="invalid choice"): list(imap(_double, [1], use_mpi=True, if_serial="invalid")) @pytest.mark.mpi def test_imap_mpi_if_serial_raise_size_1(): """if_serial='raise' with SIZE==1 raises RuntimeError""" backend = parallel.MPIBackend() backend._size = 1 with pytest.raises(RuntimeError, match="Execution in serial"): list(backend.imap(_double, [1], if_serial="raise")) @pytest.mark.mpi def test_imap_mpi_if_serial_warn_size_1(): """if_serial='warn' with SIZE==1 emits warning""" backend = parallel.MPIBackend() backend._size = 1 with pytest.warns(UserWarning, match="Execution in serial"): with pytest.raises(ZeroDivisionError): list(backend.imap(_double, [1], if_serial="warn")) @pytest.mark.mpi def test_imap_mpi_non_sized_iterable(): """imap with a generator under MPI defaults chunksize to 1""" def gen(): yield from range(4) result = list(imap(_double, gen(), use_mpi=True)) assert sorted(result) == [0, 2, 4, 6] @pytest.mark.mpi def test_as_completed_mpi_invalid_if_serial(): """invalid if_serial raises ValueError via as_completed with MPI""" with pytest.raises(ValueError, match="invalid choice"): list(as_completed(_double, [1], use_mpi=True, if_serial="invalid")) @pytest.mark.mpi def test_as_completed_mpi_max_workers_warning(): """max_workers > SIZE emits warning in _as_completed_mpi""" data = list(range(10)) with pytest.warns(UserWarning, match="max_workers too large"): result = sorted( as_completed(_double, data, use_mpi=True, max_workers=SIZE + 10) ) assert result == sorted(x * 2 for x in data) @pytest.mark.mpi def test_as_completed_mpi_if_serial_raise_size_1(): """_as_completed_mpi with SIZE==1 and if_serial='raise' raises RuntimeError""" backend = parallel.MPIBackend() backend._size = 1 with pytest.raises(RuntimeError, match="Execution in serial"): list(backend.as_completed(_double, [1], if_serial="raise")) @pytest.mark.mpi def test_as_completed_mpi_if_serial_warn_size_1(): """_as_completed_mpi with SIZE==1 and if_serial='warn' emits warning""" backend = parallel.MPIBackend() backend._size = 1 with pytest.warns(UserWarning, match="Execution in serial"): with pytest.raises(ZeroDivisionError): list(backend.as_completed(_double, list(range(4)), if_serial="warn")) @pytest.mark.mpi def test_as_completed_mpi_if_serial_ignore_size_1(): """_as_completed_mpi with SIZE==1 and if_serial='ignore' does not raise serial error""" backend = parallel.MPIBackend() backend._size = 1 with pytest.raises(ZeroDivisionError): list(backend.as_completed(_double, list(range(4)), if_serial="ignore")) def test_as_completed_mpi_not_using_mpi(): """MPIBackend raises RuntimeError when MPI is None""" with patch.object(parallel, "MPI", None): with pytest.raises(RuntimeError, match="Cannot use MPI"): list(as_completed(_double, [1], use_mpi=True)) @pytest.mark.mpi def test_as_completed_mpi_non_sized_iterable(): """_as_completed_mpi with generator defaults chunksize to 1""" def gen(): yield from range(4) result = sorted(as_completed(_double, gen(), use_mpi=True)) assert result == [0, 2, 4, 6] @pytest.mark.mpi def test_mpi_get_size(): """MPIBackend.get_size returns UNIVERSE_SIZE""" backend = parallel.MPIBackend() assert backend.get_size() == SIZE @pytest.mark.mpi def test_is_master_process_mpi_server(): """is_master_process returns False when argv[0] is server.py""" backend = parallel.MPIBackend() with patch.object(sys, "argv", ["server.py"]): assert backend.is_master_process() is False cogent3-scinexus-e0aee79/tests/test_progress.py000066400000000000000000000647751520253266500220640ustar00rootroot00000000000000from unittest.mock import MagicMock, patch import pytest from scinexus.progress import ( NoProgress, Progress, ProgressContext, RichProgress, TqdmProgress, get_progress, set_progress_backend, ) @pytest.fixture(autouse=True) def _reset_default(): """Reset the module-level default after each test.""" yield set_progress_backend(None) def test_progress_abc_cannot_instantiate(): with pytest.raises(TypeError): Progress() def test_progress_abc_missing_call(): class Incomplete(Progress): def child(self, *, leave=None): return self with pytest.raises(TypeError): Incomplete() def test_progress_abc_missing_child(): class Incomplete(Progress): def __call__(self, iterable, *, total=None, msg=""): yield from iterable with pytest.raises(TypeError): Incomplete() def test_no_progress_yields_all_items_from_list(): np = NoProgress() assert list(np([1, 2, 3])) == [1, 2, 3] def test_no_progress_yields_all_items_from_generator(): np = NoProgress() def gen(): yield "a" yield "b" assert list(np(gen())) == ["a", "b"] def test_no_progress_child_returns_self(): np = NoProgress() assert np.child() is np def test_no_progress_total_and_msg_accepted(): np = NoProgress() assert list(np([1], total=1, msg="test")) == [1] def test_no_progress_empty_iterable(): np = NoProgress() assert list(np([])) == [] def test_no_progress_mixed_types(): np = NoProgress() data = [1, "two", 3.0, None] assert list(np(data)) == data def test_no_progress_is_progress_subclass(): assert isinstance(NoProgress(), Progress) def test_tqdm_yields_all_items(): tp = TqdmProgress(disable=True) assert list(tp([1, 2, 3], total=3)) == [1, 2, 3] def test_tqdm_yields_from_generator(): tp = TqdmProgress(disable=True) def gen(): yield "x" yield "y" assert list(tp(gen(), total=2)) == ["x", "y"] def test_tqdm_default_position_is_zero(): tp = TqdmProgress() assert tp._position == 0 def test_tqdm_child_increments_position(): tp = TqdmProgress() child = tp.child() assert isinstance(child, TqdmProgress) assert child._position == 1 def test_tqdm_chained_child_positions(): tp = TqdmProgress() grandchild = tp.child().child() assert grandchild._position == 2 def test_tqdm_empty_iterable(): tp = TqdmProgress(disable=True) assert list(tp([], total=0)) == [] def test_tqdm_is_progress_subclass(): assert isinstance(TqdmProgress(), Progress) def test_tqdm_total_passed_to_tqdm(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress() list(tp([1, 2], total=42, msg="testing")) mock_tqdm.assert_called_once() call_kwargs = mock_tqdm.call_args assert call_kwargs.kwargs["total"] == 42 assert call_kwargs.kwargs["desc"] == "testing" def test_tqdm_leave_true_at_position_zero(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress() list(tp([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is True def test_tqdm_leave_false_at_position_nonzero(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress() child = tp.child() list(child([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is False def test_tqdm_custom_refresh_per_second(): tp = TqdmProgress(refresh_per_second=5.0) assert tp._refresh_per_second == 5.0 @pytest.mark.parametrize("cls", [TqdmProgress, RichProgress]) @pytest.mark.parametrize("value", [0, -1.0]) def test_refresh_per_second_non_positive_raises(cls, value): with pytest.raises(ValueError, match="refresh_per_second must be positive"): cls(refresh_per_second=value) def test_tqdm_custom_bar_format(): tp = TqdmProgress(bar_format="{l_bar}{bar}") assert tp._bar_format == "{l_bar}{bar}" def test_tqdm_extra_kwargs_stored(): tp = TqdmProgress(unit="B") assert tp._tqdm_kwargs == {"unit": "B"} def test_tqdm_child_inherits_options(): tp = TqdmProgress( refresh_per_second=5.0, bar_format="{l_bar}", ) child = tp.child() assert child._refresh_per_second == 5.0 assert child._bar_format == "{l_bar}" def test_tqdm_child_inherits_tqdm_kwargs(): tp = TqdmProgress(unit="B") child = tp.child() assert child._tqdm_kwargs == {"unit": "B"} def test_tqdm_options_passed_to_tqdm(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress( refresh_per_second=5.0, bar_format="{l_bar}", bar_width=None, ) list(tp([], total=0)) kw = mock_tqdm.call_args.kwargs assert kw["mininterval"] == 0.2 assert kw["bar_format"] == "{l_bar}" def test_rich_yields_all_items(): rp = RichProgress(disable=True) assert list(rp([1, 2, 3], total=3, msg="test")) == [1, 2, 3] def test_rich_child_shares_progress_context(): rp = RichProgress(disable=True) list(rp([1], total=1)) child = rp.child() assert child._progress is rp._progress def test_rich_custom_refresh_per_second(): rp = RichProgress(refresh_per_second=5.0) assert rp._refresh_per_second == 5.0 def test_rich_child_inherits_refresh_per_second(): rp = RichProgress(refresh_per_second=5.0) child = rp.child() assert child._refresh_per_second == 5.0 def test_rich_empty_iterable(): rp = RichProgress(disable=True) assert list(rp([], total=0)) == [] def test_rich_is_progress_subclass(): assert isinstance(RichProgress(), Progress) def test_get_progress_false_returns_no_progress(): assert isinstance(get_progress(show_progress=False), NoProgress) def test_get_progress_true_returns_tqdm_progress(): assert isinstance(get_progress(show_progress=True), TqdmProgress) def test_get_progress_falsy_int_returns_no_progress(): assert isinstance(get_progress(0), NoProgress) def test_get_progress_passthrough_no_progress(): np = NoProgress() assert get_progress(np) is np def test_get_progress_passthrough_tqdm_progress(): tp = TqdmProgress() assert get_progress(tp) is tp def test_get_progress_default_arg_returns_no_progress(): assert isinstance(get_progress(), NoProgress) def test_set_default_no_progress_instance(): np = NoProgress() set_progress_backend(np) assert isinstance(get_progress(show_progress=True), NoProgress) def test_set_default_reset_with_none(): set_progress_backend(NoProgress()) set_progress_backend(None) assert isinstance(get_progress(show_progress=True), TqdmProgress) def test_set_default_preserves_specific_instance(): tp = TqdmProgress(refresh_per_second=5.0) set_progress_backend(tp) result = get_progress(show_progress=True) assert result is tp def test_set_default_false_unaffected(): set_progress_backend(TqdmProgress()) assert isinstance(get_progress(show_progress=False), NoProgress) def test_set_default_passthrough_unaffected(): np = NoProgress() set_progress_backend(TqdmProgress()) assert get_progress(np) is np def test_set_default_string_tqdm(): set_progress_backend("tqdm") assert isinstance(get_progress(show_progress=True), TqdmProgress) def test_set_default_string_rich(): set_progress_backend("rich") assert isinstance(get_progress(show_progress=True), RichProgress) def test_set_default_invalid_string_raises(): with pytest.raises(ValueError, match="unknown progress type"): set_progress_backend("invalid") def test_set_default_string_tqdm_with_kwargs(): set_progress_backend("tqdm", colour="green") result = get_progress(show_progress=True) assert isinstance(result, TqdmProgress) assert result._colour == "green" def test_set_default_string_rich_with_kwargs(): set_progress_backend("rich", colour="blue", leave=True) result = get_progress(show_progress=True) assert isinstance(result, RichProgress) assert result._colour == "blue" assert result._leave is True def test_tqdm_nested_child_yields_all(): outer = TqdmProgress(disable=True) inner = outer.child() outer_data = list(outer([1, 2], total=2)) inner_data = list(inner([3, 4], total=2)) assert outer_data == [1, 2] assert inner_data == [3, 4] def test_no_progress_nested_chain(): p = NoProgress() c = p.child() assert list(c([1, 2, 3])) == [1, 2, 3] assert c is p def test_get_progress_child_type(): tp = TqdmProgress() result = get_progress(tp).child() assert isinstance(result, TqdmProgress) def test_partial_iteration_cleanup(): tp = TqdmProgress(disable=True) it = tp([1, 2, 3, 4, 5], total=5) assert next(it) == 1 assert next(it) == 2 del it def test_progress_context_abc_cannot_instantiate(): with pytest.raises(TypeError): ProgressContext() def test_progress_context_abc_missing_update(): class Incomplete(ProgressContext): def close(self): pass with pytest.raises(TypeError): Incomplete() def test_progress_context_close_default_is_noop(): class MinimalCtx(ProgressContext): def update(self, *, progress, msg=""): pass ctx = MinimalCtx() ctx.close() def test_no_progress_context_returns_progress_context(): np = NoProgress() ctx = np.context() assert isinstance(ctx, ProgressContext) def test_no_progress_context_update_is_noop(): np = NoProgress() with np.context(msg="test") as ctx: ctx.update(progress=0.5, msg="halfway") def test_no_progress_context_close_is_noop(): np = NoProgress() ctx = np.context() ctx.close() ctx.close() def test_tqdm_context_returns_progress_context(): tp = TqdmProgress(disable=True) ctx = tp.context() assert isinstance(ctx, ProgressContext) ctx.close() def test_tqdm_context_as_context_manager(): tp = TqdmProgress(disable=True) with tp.context(msg="test") as ctx: ctx.update(progress=0.0, msg="start") ctx.update(progress=0.5, msg="halfway") ctx.update(progress=1.0, msg="done") def test_tqdm_context_maps_start_end(): tp = TqdmProgress(disable=True) with tp.context(start=0.0, end=0.9) as ctx: ctx.update(progress=0.5) assert ctx._bar.n == pytest.approx(0.45) def test_tqdm_context_full_range(): tp = TqdmProgress(disable=True) with tp.context(start=0.0, end=1.0) as ctx: ctx.update(progress=1.0) assert ctx._bar.n == pytest.approx(1.0) def test_tqdm_context_subrange(): tp = TqdmProgress(disable=True) with tp.context(start=0.9, end=1.0) as ctx: ctx.update(progress=0.5) assert ctx._bar.n == pytest.approx(0.95) def test_tqdm_context_msg_updates_description(): tp = TqdmProgress(disable=True) with tp.context() as ctx: ctx.update(progress=0.5, msg="custom message") assert "custom message" in ctx._bar.desc def test_tqdm_context_no_msg_does_not_update_description(): tp = TqdmProgress(disable=True) with tp.context(msg="initial") as ctx: ctx.update(progress=0.5, msg="updated") ctx.update(progress=0.7) def test_tqdm_context_options_passed(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(refresh_per_second=5.0) ctx = tp.context(msg="test") kw = mock_tqdm.call_args.kwargs assert kw["total"] == 1.0 assert kw["desc"] == "test" assert kw["mininterval"] == 0.2 ctx.close() def test_rich_context_returns_progress_context(): rp = RichProgress(disable=True) ctx = rp.context() assert isinstance(ctx, ProgressContext) ctx.close() def test_rich_context_as_context_manager(): rp = RichProgress(disable=True) with rp.context(msg="test") as ctx: ctx.update(progress=0.5, msg="halfway") def test_rich_context_maps_start_end(): rp = RichProgress(disable=True) with rp.context(start=0.0, end=0.9) as ctx: ctx.update(progress=0.5) def test_rich_context_creates_progress_on_first_call(): rp = RichProgress(disable=True) assert rp._progress is None with rp.context() as ctx: ctx.update(progress=0.5) assert rp._progress is not None def test_tqdm_context_multiphase(): tp = TqdmProgress(disable=True) with tp.context() as ctx: for i in range(10): ctx.update(progress=i / 10 * 0.9, msg="Global") for i in range(10): ctx.update(progress=0.9 + i / 10 * 0.1, msg="Local") ctx.update(progress=1.0, msg="Done") assert ctx._bar.n == pytest.approx(1.0) def test_tqdm_leave_none_uses_position_logic(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(leave=None) list(tp([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is True child = tp.child(leave=None) list(child([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is False def test_tqdm_leave_true_overrides_position(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(leave=True) child = tp.child() list(child([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is True def test_tqdm_leave_false_overrides_position(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(leave=False) list(tp([], total=0)) assert mock_tqdm.call_args.kwargs["leave"] is False def test_tqdm_leave_propagated_to_child(): tp = TqdmProgress(leave=True) child = tp.child() assert child._leave is True def test_tqdm_child_leave_override(): tp = TqdmProgress(leave=True) child = tp.child(leave=False) assert child._leave is False def test_tqdm_child_leave_none_inherits(): tp = TqdmProgress(leave=True) child = tp.child(leave=None) assert child._leave is True def test_tqdm_context_respects_leave(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(leave=True) child = tp.child() ctx = child.context() assert mock_tqdm.call_args.kwargs["leave"] is True ctx.close() def test_tqdm_colour_none_by_default(): tp = TqdmProgress() assert tp._colour is None def test_tqdm_colour_passed_to_tqdm(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(colour="green") list(tp([], total=0)) assert mock_tqdm.call_args.kwargs["colour"] == "green" def test_tqdm_colour_propagated_to_child(): tp = TqdmProgress(colour="green") child = tp.child() assert child._colour == "green" def test_tqdm_context_colour_passed(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(colour="blue") ctx = tp.context() assert mock_tqdm.call_args.kwargs["colour"] == "blue" ctx.close() def test_rich_leave_false_by_default(): rp = RichProgress() assert rp._leave is False def test_rich_leave_false_removes_task_after_iteration(): rp = RichProgress(disable=True, leave=False) result = list(rp([1, 2], total=2)) assert result == [1, 2] assert len(rp._progress.tasks) == 0 def test_rich_leave_true_keeps_task(): rp = RichProgress(disable=True, leave=True) result = list(rp([1, 2], total=2)) assert result == [1, 2] assert len(rp._progress.tasks) == 1 assert rp._progress.tasks[0].completed == 2 def test_rich_leave_propagated_to_child(): rp = RichProgress(leave=True) child = rp.child() assert child._leave is True def test_rich_child_leave_override(): rp = RichProgress(leave=True) child = rp.child(leave=False) assert child._leave is False def test_rich_child_leave_none_inherits(): rp = RichProgress(leave=True) child = rp.child(leave=None) assert child._leave is True def test_rich_context_leave_false_removes_task(): rp = RichProgress(disable=True, leave=False) with rp.context(msg="test") as ctx: ctx.update(progress=0.5) assert len(rp._progress.tasks) == 0 def test_rich_context_leave_true_keeps_task(): rp = RichProgress(disable=True, leave=True) with rp.context(msg="test") as ctx: ctx.update(progress=0.5) assert len(rp._progress.tasks) == 1 assert rp._progress.tasks[0].completed == 1.0 def test_rich_colour_none_by_default(): rp = RichProgress() assert rp._colour is None def test_rich_colour_creates_styled_bar_column(): from rich.progress import BarColumn # type: ignore[import-not-found] rp = RichProgress(disable=True, colour="blue") rp._ensure_progress() bar_columns = [c for c in rp._progress.columns if isinstance(c, BarColumn)] assert len(bar_columns) == 1 assert bar_columns[0].complete_style == "blue" assert bar_columns[0].finished_style == "blue" def test_rich_colour_not_applied_when_progress_provided(): from rich.progress import ( # type: ignore[import-not-found] BarColumn, ) from rich.progress import ( Progress as RProgress, ) custom = RProgress(disable=True) rp = RichProgress(progress=custom, colour="red") result = rp._ensure_progress() assert result is custom bar_columns = [c for c in result.columns if isinstance(c, BarColumn)] for col in bar_columns: assert col.complete_style != "red" def test_rich_colour_propagated_to_child(): rp = RichProgress(colour="cyan") child = rp.child() assert child._colour == "cyan" def test_rich_default_columns_include_elapsed_and_remaining(): from rich.progress import ( # type: ignore[import-not-found] TimeElapsedColumn, TimeRemainingColumn, ) rp = RichProgress(disable=True) rp._ensure_progress() column_types = [type(c) for c in rp._progress.columns] assert TimeElapsedColumn in column_types assert TimeRemainingColumn in column_types def test_no_progress_child_accepts_leave(): np = NoProgress() assert np.child(leave=True) is np assert np.child(leave=False) is np assert np.child(leave=None) is np def test_tqdm_bar_width_default(): tp = TqdmProgress() assert tp._bar_width is None def test_tqdm_bar_width_passed_as_ncols(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(bar_width=80) list(tp([], total=0)) kw = mock_tqdm.call_args.kwargs assert kw["ncols"] == 80 assert kw["dynamic_ncols"] is False def test_tqdm_bar_width_none_uses_dynamic_ncols(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress(bar_width=None, dynamic_ncols=True) list(tp([], total=0)) kw = mock_tqdm.call_args.kwargs assert "ncols" not in kw assert kw["dynamic_ncols"] is True def test_tqdm_bar_width_propagated_to_child(): tp = TqdmProgress(bar_width=80) child = tp.child() assert child._bar_width == 80 def test_rich_bar_width_default(): rp = RichProgress() assert rp._bar_width is None def test_rich_bar_width_applied_to_bar_column(): from rich.progress import BarColumn # type: ignore[import-not-found] rp = RichProgress(disable=True, bar_width=80) rp._ensure_progress() bar_columns = [c for c in rp._progress.columns if isinstance(c, BarColumn)] assert len(bar_columns) == 1 assert bar_columns[0].bar_width == 80 def test_rich_bar_width_propagated_to_child(): rp = RichProgress(bar_width=80) child = rp.child() assert child._bar_width == 80 def test_tqdm_reuses_bar_across_calls(): tp = TqdmProgress(disable=True) assert list(tp([1, 2], total=2)) == [1, 2] assert list(tp([3, 4, 5], total=3)) == [3, 4, 5] def test_tqdm_reuses_bar_single_creation(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress() list(tp([1], total=1)) list(tp([2], total=1)) mock_tqdm.assert_called_once() def test_tqdm_reset_updates_total_and_msg(): tp = TqdmProgress(disable=True) list(tp([], total=5, msg="first")) list(tp([], total=10, msg="second")) assert tp._bar.total == 10 assert tp._bar.n == 0 def test_tqdm_del_closes_bar(): with patch("tqdm.auto.tqdm") as mock_tqdm: mock_bar = MagicMock() mock_tqdm.return_value = mock_bar tp = TqdmProgress() list(tp([], total=0)) del tp mock_bar.close.assert_called_once() def test_tqdm_del_without_use_is_safe(): tp = TqdmProgress() del tp def test_tqdm_print_after_single_bar_appears_after_bar(): import io buf = io.StringIO() tp = TqdmProgress(leave=True, file=buf) list(tp([1, 2, 3], total=3, msg="step")) tp.close() print("DONE", file=buf) lines = buf.getvalue().splitlines() done_idx = next(i for i, ln in enumerate(lines) if "DONE" in ln) bar_idx = next(i for i, ln in enumerate(lines) if "step" in ln) assert done_idx > bar_idx def test_tqdm_print_after_parent_and_child_bars_appears_after_bars(): import io buf = io.StringIO() tp = TqdmProgress(leave=True, file=buf) child = tp.child(leave=True) for _ in tp([1, 2], total=2, msg="outer"): list(child([10, 20, 30], total=3, msg="inner")) tp.close() print("DONE", file=buf) lines = buf.getvalue().splitlines() done_idx = next(i for i, ln in enumerate(lines) if "DONE" in ln) outer_idx = next(i for i, ln in enumerate(lines) if "outer" in ln) inner_idx = next(i for i, ln in enumerate(lines) if "inner" in ln) assert done_idx > outer_idx assert done_idx > inner_idx def test_tqdm_close_as_context_manager(): import io buf = io.StringIO() with TqdmProgress(leave=True, file=buf) as tp: list(tp([1, 2, 3], total=3, msg="step")) print("DONE", file=buf) lines = buf.getvalue().splitlines() done_idx = next(i for i, ln in enumerate(lines) if "DONE" in ln) bar_idx = next(i for i, ln in enumerate(lines) if "step" in ln) assert done_idx > bar_idx def test_tqdm_close_idempotent(): import io buf = io.StringIO() tp = TqdmProgress(leave=True, file=buf) list(tp([1, 2], total=2, msg="step")) tp.close() tp.close() def test_rich_close_stops_display(): rp = RichProgress(disable=True, leave=True) list(rp([1, 2], total=2)) assert rp._progress is not None rp.close() assert rp._task is None def test_rich_close_as_context_manager(): with RichProgress(disable=True, leave=True) as rp: list(rp([1, 2, 3], total=3)) assert rp._task is None def test_rich_close_idempotent(): rp = RichProgress(disable=True, leave=True) list(rp([1, 2], total=2)) rp.close() rp.close() def test_rich_reuses_task_across_calls(): rp = RichProgress(disable=True, leave=True) assert list(rp([1, 2], total=2)) == [1, 2] assert list(rp([3, 4, 5], total=3)) == [3, 4, 5] assert len(rp._progress.tasks) == 1 def test_rich_leave_false_removes_task_across_calls(): rp = RichProgress(disable=True, leave=False) assert list(rp([1, 2], total=2)) == [1, 2] assert len(rp._progress.tasks) == 0 assert list(rp([3, 4, 5], total=3)) == [3, 4, 5] assert len(rp._progress.tasks) == 0 def test_rich_reset_updates_total(): rp = RichProgress(disable=True, leave=True) list(rp([1], total=1)) list(rp([1, 2, 3], total=3)) task = rp._progress.tasks[0] assert task.total == 3 def test_rich_del_completes_task_when_leave_true(): rp = RichProgress(disable=True, leave=True) list(rp([1, 2], total=2)) progress = rp._progress del rp assert len(progress.tasks) == 1 assert progress.tasks[0].completed == 2 def test_rich_cleanup_removes_task_on_interrupted_iteration(): rp = RichProgress(disable=True, leave=False) it = iter(rp([1, 2, 3], total=3)) next(it) progress = rp._progress assert rp._task is not None rp._cleanup_task() assert rp._task is None assert len(progress.tasks) == 0 def test_rich_del_without_use_is_safe(): rp = RichProgress() del rp def test_child_reuses_bar_independently(): outer = TqdmProgress(disable=True) child = outer.child() for batch in outer([1, 2], total=2): assert list(child([10, 20], total=2)) == [10, 20] def test_no_progress_multiple_calls(): np = NoProgress() assert list(np([1, 2])) == [1, 2] assert list(np([3, 4])) == [3, 4] def test_get_progress_kwargs_forwarded(): result = get_progress(show_progress=True, colour="green") assert isinstance(result, TqdmProgress) assert result._colour == "green" def test_get_progress_kwargs_with_default_creates_new_instance(): set_progress_backend("tqdm") result = get_progress(show_progress=True, colour="green") assert isinstance(result, TqdmProgress) assert result._colour == "green" def test_get_progress_kwargs_with_rich_default(): set_progress_backend("rich") result = get_progress(show_progress=True, colour="blue") assert isinstance(result, RichProgress) assert result._colour == "blue" def test_get_progress_no_kwargs_returns_default(): set_progress_backend("tqdm") default = get_progress(show_progress=True) assert default is get_progress(show_progress=True) def test_get_progress_kwargs_with_instance_ignored(): tp = TqdmProgress() assert get_progress(tp, colour="green") is tp def test_set_progress_backend_rich_not_installed(): """set_progress_backend("rich") raises ImportError when rich is missing""" with patch.dict("sys.modules", {"rich": None}), pytest.raises(ImportError): set_progress_backend("rich") def test_get_progress_kwargs_false_ignored(): result = get_progress(show_progress=False, colour="green") assert isinstance(result, NoProgress) def test_get_progress_multiple_kwargs(): result = get_progress(show_progress=True, colour="green", refresh_per_second=5.0) assert isinstance(result, TqdmProgress) assert result._colour == "green" assert result._refresh_per_second == 5.0 cogent3-scinexus-e0aee79/tests/test_sqlite_data_store.py000066400000000000000000000566371520253266500237240ustar00rootroot00000000000000import os import sqlite3 from pathlib import Path from pickle import dumps, loads import pytest from citeable import Software from scitrack import get_text_hexdigest from scinexus.composable import NotCompleted, NotCompletedType from scinexus.data_store import OVERWRITE, READONLY, DataMemberABC, DataStoreDirectory from scinexus.sqlite_data_store import ( _MEMORY, LOG_TABLE, RESULT_TABLE, DataStoreSqlite, has_valid_schema, open_sqlite_db_ro, open_sqlite_db_rw, ) @pytest.fixture def tmp_dir(tmp_path_factory): return Path(tmp_path_factory.mktemp("sqlitedb")) @pytest.fixture def sample_citations(): cite1 = Software( author=["Doe, J"], title="Tool One", year=2024, url="https://example.com/one", version="1.0", license="MIT", doi="10.0/one", publisher="test", ) cite2 = Software( author=["Smith, A"], title="Tool Two", year=2024, url="https://example.com/two", version="2.0", license="MIT", doi="10.0/two", publisher="test", ) return (cite1, cite2) @pytest.fixture def nc_objects(): return { f"id_{i}": NotCompleted( NotCompletedType.ERROR, "location", "message", source=f"id_{i}" ) for i in range(3) } @pytest.fixture def sql_dstore(DATA_DIR, tmp_dir): ro_dir_dstore = DataStoreDirectory(DATA_DIR, suffix="fasta") path = tmp_dir / "data.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) for m in ro_dir_dstore: dstore.write(data=m.read(), unique_id=m.unique_id) return dstore @pytest.fixture def full_dstore_sqlite(tmp_dir, nc_objects, DATA_DIR): path = tmp_dir / "full.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) for uid, obj in nc_objects.items(): dstore.write_not_completed(unique_id=uid, data=obj.to_json()) ro = DataStoreDirectory(DATA_DIR, suffix="fasta") for m in ro: dstore.write(unique_id=m.unique_id, data=m.read()) log_text = (DATA_DIR / "scitrack.log").read_text() dstore.write_log(unique_id="scitrack.log", data=log_text) yield dstore dstore.close() @pytest.fixture def writable_store(tmp_dir): path = tmp_dir / "writable.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") yield dstore dstore.close() @pytest.fixture def populated_store(tmp_dir): """A store with data, closed and ready for read-only access.""" path = tmp_dir / "populated.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.close() return path def test_db_creation(tmp_dir): path = tmp_dir / "test.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert dstore.source == path dstore.close() def test_db_init_log(tmp_dir): path = tmp_dir / "test_log.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="test_record", data="test data") assert dstore._log_id is not None dstore.close() def test_open_sqlite_db_rw(tmp_dir): path = tmp_dir / "test_rw.sqlitedb" db = open_sqlite_db_rw(path) assert has_valid_schema(db) db.close() def test_rw_sql_dstore_mem(): """in memory dstore with multiple writes verified via SQL""" dstore = DataStoreSqlite(_MEMORY, mode=OVERWRITE) records = {f"r{i}": f"data {i}" for i in range(3)} for unique_id, data in records.items(): dstore.write(data=data, unique_id=unique_id) expect = len(records) query = f"SELECT count(*) as c FROM {RESULT_TABLE} WHERE is_completed=?" got = dstore.db.execute(query, (1,)).fetchone()["c"] assert got == expect assert len(dstore.completed) == expect def test_not_completed(tmp_dir): """multiple not_completed records are stored and retrievable""" nc_objects = { f"id_{i}": NotCompleted( NotCompletedType.ERROR, "location", "message", source=f"id_{i}" ) for i in range(3) } path = tmp_dir / "test_nc.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) for unique_id, obj in nc_objects.items(): dstore.write_not_completed(data=obj.to_json(), unique_id=unique_id) expect = len(nc_objects) query = f"SELECT count(*) as c FROM {RESULT_TABLE} WHERE is_completed=?" got = dstore.db.execute(query, (0,)).fetchone()["c"] assert got == expect assert len(dstore.not_completed) == expect dstore.close() def test_logdata(tmp_dir, DATA_DIR): path = tmp_dir / "test_logdata.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) log_text = (DATA_DIR / "scitrack.log").read_text() dstore.write_log(unique_id="test.log", data=log_text) assert len(dstore.logs) == 1 got = dstore.logs[0].read() assert got == log_text dstore.close() def test_drop_not_completed(nc_objects): dstore = DataStoreSqlite(_MEMORY, mode=OVERWRITE) for unique_id, obj in nc_objects.items(): dstore.write_not_completed(data=obj.to_json(), unique_id=unique_id) assert len(dstore.not_completed) == len(nc_objects) dstore.drop_not_completed() assert len(dstore.not_completed) == 0 def test_contains(tmp_dir): path = tmp_dir / "test_contains.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="record1", data="data1") assert "record1" in dstore assert "record2" not in dstore dstore.close() def test_iter(tmp_dir): path = tmp_dir / "test_iter.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.write(unique_id="r2", data="d2") ids = {m.unique_id for m in dstore} assert ids == {"r1", "r2"} dstore.close() def test_members(tmp_dir): path = tmp_dir / "test_members.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") nc = NotCompleted(NotCompletedType.FAIL, "dummy", "msg", source="src") dstore.write_not_completed(unique_id="nc1", data=nc.to_json()) assert len(dstore.members) == 2 assert all(isinstance(m, DataMemberABC) for m in dstore) dstore.close() def test_len(tmp_dir): path = tmp_dir / "test_len.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert len(dstore) == 0 dstore.write(unique_id="r1", data="d1") assert len(dstore) == 1 dstore.close() def test_md5_sum(tmp_dir): path = tmp_dir / "test_md5.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) data = "test data for md5" dstore.write(unique_id="r1", data=data) md5 = dstore.md5("r1") assert md5 == get_text_hexdigest(data) dstore.close() def test_iterall(tmp_dir): path = tmp_dir / "test_iterall.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.write(unique_id="r2", data="d2") all_members = list(dstore) assert len(all_members) == 2 dstore.close() def test_read(tmp_dir): path = tmp_dir / "test_read.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) data = "test data content" dstore.write(unique_id="r1", data=data) got = dstore.read("r1") assert got == data dstore.close() def test_read_all_record_types(full_dstore_sqlite): """reading from completed, not_completed, and log records all return str""" records = [ full_dstore_sqlite.completed[0], full_dstore_sqlite.not_completed[0], full_dstore_sqlite.logs[0], ] assert all(isinstance(r.read(), str) for r in records) def test_write_success_replaces_not_completed(tmp_dir): path = tmp_dir / "test_replace_nc.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) nc = NotCompleted(NotCompletedType.FAIL, "dummy", "msg", source="src") dstore.write_not_completed(unique_id="r1", data=nc.to_json()) assert len(dstore.not_completed) == 1 dstore.write(unique_id="r1", data="completed data") assert len(dstore.not_completed) == 0 assert len(dstore.completed) == 1 dstore.close() def test_read_log(tmp_dir, DATA_DIR): path = tmp_dir / "test_readlog.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) log_text = (DATA_DIR / "scitrack.log").read_text() dstore.write_log(unique_id="test.log", data=log_text) got = dstore.read(str(Path("logs") / "test.log")) assert got == log_text dstore.close() @pytest.mark.parametrize("binary", [False, True]) def test_write_text_binary(binary): """correctly write content whether text or binary data""" dstore = DataStoreSqlite(_MEMORY, mode=OVERWRITE) expect = "some text data" if binary: expect = dumps(expect) m = dstore.write(unique_id="record", data=expect) got = m.read() assert got == expect def test_write_if_member_exists(tmp_dir): path = tmp_dir / "test_exists.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="data1") dstore.write(unique_id="r1", data="data2") got = dstore.read("r1") assert got == "data2" assert len(dstore.completed) == 1 dstore.close() def test_new_write_read(tmp_dir): path = tmp_dir / "test_new_wr.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="content1") dstore.write(unique_id="r2", data="content2") assert dstore.read("r1") == "content1" assert dstore.read("r2") == "content2" dstore.close() def test_read_unknown_table(tmp_dir): path = tmp_dir / "test_unknown.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") with pytest.raises(ValueError): dstore.read("unknown_table/r1") dstore.close() def test_limit_on_writable(tmp_dir): path = tmp_dir / "test_limit.sqlitedb" with pytest.raises(ValueError): DataStoreSqlite(path, mode=OVERWRITE, limit=10) @pytest.mark.parametrize("table_name", ["", RESULT_TABLE]) def test_new_write_id_includes_table(table_name): """correctly handles table name if included in unique id""" dstore = DataStoreSqlite(_MEMORY, mode=OVERWRITE) identifier = "test1.fasta" if table_name: identifier = str(Path(table_name) / identifier) data = "test data" m = dstore.write(unique_id=identifier, data=data) got = dstore.read(m.unique_id) assert got == data def test_is_locked(tmp_dir): path = tmp_dir / "test_locked.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") assert dstore.locked dstore.close() def test_lock_unlock(tmp_dir): path = tmp_dir / "test_lock_unlock.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") assert dstore.locked dstore.unlock() assert not dstore.locked dstore.lock() assert dstore.locked dstore.unlock() assert not dstore.locked dstore.close() def test_lock_firsttime(tmp_dir): path = tmp_dir / "test_lockfirst.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) # accessing db triggers lock _ = dstore.db assert dstore.locked # delete state row and re-lock from empty state dstore.db.execute("DELETE FROM state WHERE state_id=1") dstore.lock() assert dstore.locked dstore.unlock() assert not dstore.locked dstore.close() def test_db_without_logs(tmp_dir): path = tmp_dir / "test_nologs.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") assert len(dstore.logs) == 0 dstore.close() def test_md5_none(tmp_dir): path = tmp_dir / "test_md5none.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert dstore.md5("nonexistent") is None dstore.close() def test_md5_missing(tmp_dir): path = tmp_dir / "test_md5missing.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert dstore.md5("missing_record") is None dstore.close() def test_open_data_store_sqlitedb_err(): from scinexus.io import open_data_store with pytest.raises(NotImplementedError): open_data_store(":memory:", mode="r") def test_pickleable_roundtrip(tmp_dir): path = tmp_dir / "test_pickle.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.close() dstore2 = DataStoreSqlite(path, mode=READONLY) re_dstore = loads(dumps(dstore2)) assert re_dstore.read("r1") == "d1" re_dstore.close() dstore2.close() def test_pickleable_member_roundtrip(tmp_dir): path = tmp_dir / "test_pickle_member.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.close() dstore2 = DataStoreSqlite(path, mode=READONLY) member = dstore2[0] re_member = loads(dumps(member)) assert re_member.read() == "d1" re_member.data_store.close() dstore2.close() def test_getitem(tmp_dir): path = tmp_dir / "test_getitem.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.write(unique_id="r2", data="d2") first = dstore[0] assert first.unique_id == "r1" with pytest.raises(IndexError): _ = dstore[len(dstore)] dstore.close() def test_empty_data_store(tmp_dir): path = tmp_dir / "test_empty.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert len(dstore) == 0 dstore.close() def test_no_logs(tmp_dir): path = tmp_dir / "test_nologs2.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert len(dstore.logs) == 0 dstore.close() def test_no_not_completed(tmp_dir): path = tmp_dir / "test_nonc.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) assert len(dstore.not_completed) == 0 dstore.close() def test_write_read_only_datastore(tmp_dir): path = tmp_dir / "test_ro.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write(unique_id="r1", data="d1") dstore.close() ro = DataStoreSqlite(path, mode=READONLY) with pytest.raises(IOError): ro.write(unique_id="r2", data="d2") ro.close() def test_write_citations_sqlite(tmp_dir, sample_citations): path = tmp_dir / "test_cite.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write_citations(data=sample_citations) loaded = dstore._load_citations() assert len(loaded) == 2 assert loaded[0].title == "Tool One" assert loaded[1].title == "Tool Two" dstore.close() def test_write_citations_empty_sqlite(tmp_dir): path = tmp_dir / "test_cite_empty.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write_citations(data=()) loaded = dstore._load_citations() assert len(loaded) == 0 dstore.close() def test_write_bib_sqlite(tmp_dir, sample_citations): path = tmp_dir / "test_bib.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write_citations(data=sample_citations) bib_path = tmp_dir / "refs.bib" dstore.write_bib(bib_path) assert bib_path.exists() content = bib_path.read_text() assert "Tool One" in content assert "Tool Two" in content dstore.close() def test_summary_citations_sqlite(tmp_dir, sample_citations): path = tmp_dir / "test_sumcite.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write_citations(data=sample_citations) result = dstore.summary_citations assert isinstance(result, list) assert len(result) == 2 dstore.close() def test_describe_sqlite_with_display(tmp_dir): from scinexus.data_store import set_summary_display path = tmp_dir / "test_display.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) captured = {} def display(data, *, name=""): captured["data"] = data captured["name"] = name return "DISPLAY" set_summary_display(display) try: result = dstore.describe assert result == "DISPLAY" assert captured["name"] == "describe" assert "title" in captured["data"] assert "completed" in captured["data"] finally: set_summary_display(None) dstore.close() def test_open_sqlite_db_ro_invalid_schema(tmp_dir): path = tmp_dir / "bad_schema.sqlitedb" db = sqlite3.connect(str(path)) db.execute("CREATE TABLE IF NOT EXISTS bogus(id INTEGER PRIMARY KEY)") db.close() with pytest.raises(ValueError, match="valid schema"): open_sqlite_db_ro(path) def test_lock_raises_when_db_none(tmp_dir): path = tmp_dir / "lock_none.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) # _db is None before first access to .db property with pytest.raises(RuntimeError, match="unexpectedly None"): dstore.lock() def test_lock_overwrite_on_locked_db(tmp_dir): path = tmp_dir / "lock_ow.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) _ = dstore.db # opens and locks # fake a different pid in the lock dstore._db.execute( "UPDATE state SET lock_pid=? WHERE state_id=1", (os.getpid() + 1,), ) dstore2 = DataStoreSqlite(path, mode=OVERWRITE) dstore2._db = dstore._db with pytest.raises(OSError, match="locked"): dstore2.lock() dstore.close() def test_lock_update_existing_state(writable_store): writable_store.unlock() assert not writable_store.locked # re-lock: state row exists but lock_pid is NULL → UPDATE path writable_store.lock() assert writable_store.locked def test_unlock_readonly(populated_store): ro = DataStoreSqlite(populated_store, mode=READONLY) ro.unlock() # should be a no-op, no error ro.close() def test_unlock_already_unlocked(writable_store): writable_store.unlock() assert not writable_store.locked writable_store.unlock() # should be a no-op def test_write_duplicate_not_added_to_completed(writable_store): writable_store.write(unique_id="r1", data="d1_updated") assert len(writable_store.completed) == 1 def test_write_log_with_table_prefix(tmp_dir, DATA_DIR): path = tmp_dir / "log_prefix.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) log_text = (DATA_DIR / "scitrack.log").read_text() dstore.write_log(unique_id=f"{LOG_TABLE}/test.log", data=log_text) assert len(dstore.logs) == 1 dstore.close() def test_write_not_completed_with_table_prefix(tmp_dir): path = tmp_dir / "nc_prefix.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) nc = NotCompleted(NotCompletedType.FAIL, "dummy", "msg", source="src") dstore.write_not_completed(unique_id=f"{RESULT_TABLE}/nc1", data=nc.to_json()) assert len(dstore.not_completed) == 1 dstore.close() def test_write_citations_update_existing(tmp_dir, sample_citations): path = tmp_dir / "cite_update.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) dstore.write_citations(data=sample_citations) # write again to trigger UPDATE path dstore.write_citations(data=(sample_citations[0],)) loaded = dstore._load_citations() assert len(loaded) == 1 dstore.close() def test_load_citations_no_table(tmp_dir): path = tmp_dir / "no_cite_table.sqlitedb" # create db without citations table db = sqlite3.connect(str(path)) db.execute( "CREATE TABLE IF NOT EXISTS state" "(state_id INTEGER PRIMARY KEY, record_type TEXT, lock_pid INTEGER)", ) db.execute( f"CREATE TABLE IF NOT EXISTS {LOG_TABLE}" "(log_id INTEGER PRIMARY KEY, log_name TEXT, date timestamp, data BLOB)", ) db.execute( f"CREATE TABLE IF NOT EXISTS {RESULT_TABLE}" "(record_id TEXT PRIMARY KEY, log_id INTEGER, md5 BLOB, is_completed INTEGER, data BLOB)", ) db.close() dstore = DataStoreSqlite(path, mode=READONLY) result = dstore._load_citations() assert result == [] dstore.close() def test_describe_locked_different_pid(writable_store): writable_store._db.execute( "UPDATE state SET lock_pid=? WHERE state_id=1", (os.getpid() + 1,), ) result = writable_store._describe() assert "Locked db store" in result["title"] assert str(os.getpid() + 1) in result["title"] def test_describe_unlocked(writable_store): writable_store.unlock() result = writable_store._describe() assert result["title"] == "Unlocked db store." def test_record_type_getter_and_setter(writable_store): from scinexus.misc import get_object_provenance writable_store.record_type = str # set using a type object assert writable_store.record_type == get_object_provenance(str) def test_record_type_overwrite_error(writable_store): writable_store.record_type = str with pytest.raises(OSError, match="cannot overwrite"): writable_store.record_type = int def test_summary_not_completed(tmp_dir): path = tmp_dir / "summary_nc.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) nc = NotCompleted(NotCompletedType.FAIL, "dummy", "test msg", source="src") dstore.write_not_completed(unique_id="nc1", data=nc.to_json()) result = dstore._summary_not_completed() assert isinstance(result, list) assert len(result) == 1 dstore.close() def test_db_property_none_after_open(tmp_dir): from unittest.mock import patch path = tmp_dir / "db_none.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) with ( patch("scinexus.sqlite_data_store.open_sqlite_db_rw", return_value=None), patch.object(dstore, "lock"), ): with pytest.raises(ValueError, match="unexpectedly None"): _ = dstore.db def test_write_member_none(tmp_dir): from unittest.mock import patch path = tmp_dir / "write_none.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) with patch.object(dstore, "_write", return_value=None): with pytest.raises(RuntimeError, match="failed to produce a member"): dstore.write(unique_id="r1", data="d1") dstore.close() def test_write_not_completed_member_none(tmp_dir): from unittest.mock import patch path = tmp_dir / "nc_none.sqlitedb" dstore = DataStoreSqlite(path, mode=OVERWRITE) nc = NotCompleted(NotCompletedType.FAIL, "dummy", "msg", source="src") with patch.object(dstore, "_write", return_value=None): with pytest.raises(RuntimeError, match="failed to produce a member"): dstore.write_not_completed(unique_id="nc1", data=nc.to_json()) dstore.close() def test_write_citations_no_table(tmp_dir, sample_citations): path = tmp_dir / "no_cite_write.sqlitedb" db = sqlite3.connect(str(path)) db.execute( "CREATE TABLE IF NOT EXISTS state" "(state_id INTEGER PRIMARY KEY, record_type TEXT, lock_pid INTEGER)", ) db.execute( f"CREATE TABLE IF NOT EXISTS {LOG_TABLE}" "(log_id INTEGER PRIMARY KEY, log_name TEXT, date timestamp, data BLOB)", ) db.execute( f"CREATE TABLE IF NOT EXISTS {RESULT_TABLE}" "(record_id TEXT PRIMARY KEY, log_id INTEGER, md5 BLOB, " "is_completed INTEGER, data BLOB)", ) db.close() dstore = DataStoreSqlite(path, mode=OVERWRITE) # Replace _db with a connection to the DB without citations table # (the lazy db property would call open_sqlite_db_rw which creates it) dstore._db = sqlite3.connect( str(path), detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES, ) dstore._db.row_factory = sqlite3.Row dstore._open = True assert not dstore._has_citations_table() dstore.write_citations(data=sample_citations) assert dstore._has_citations_table() loaded = dstore._load_citations() assert len(loaded) == 2 dstore.close() cogent3-scinexus-e0aee79/tests/test_typing.py000066400000000000000000000233131520253266500215110ustar00rootroot00000000000000from pathlib import Path from typing import Any, ForwardRef, TypeVar, Union, get_args import pytest from scinexus.data_store import DataMemberABC from scinexus.typing import ( IdentifierType, SerialisableType, _clear_type_namespace_providers, _resolve_name, _type_namespace_providers, check_type_compatibility, get_type_display_names, register_type_namespace, resolve_type_hint, ) @pytest.fixture def clean_namespace_providers(): """isolate tests that register namespace providers""" saved = list(_type_namespace_providers) _clear_type_namespace_providers() try: yield finally: _clear_type_namespace_providers() for provider in saved: register_type_namespace(provider) def test_resolve_type_hint_concrete_class(): """Concrete classes pass through unchanged""" resolved = resolve_type_hint(int) assert resolved is int def test_resolve_type_hint_protocol(): """Protocol classes pass through unchanged""" resolved = resolve_type_hint(SerialisableType) assert resolved is SerialisableType def test_resolve_type_hint_union(): """Union types are resolved recursively""" resolved = resolve_type_hint(IdentifierType) from typing import get_args args = set(get_args(resolved)) assert args == {str, Path, DataMemberABC} def test_resolve_type_hint_unconstrained_typevar(): """unconstrained TypeVar raises TypeError""" T = TypeVar("T") with pytest.raises(TypeError, match="unconstrained TypeVar"): resolve_type_hint(T) def test_resolve_type_hint_unresolvable(): """Unresolvable string raises TypeError""" with pytest.raises(TypeError, match="cannot resolve"): resolve_type_hint("NoSuchType") def test_resolve_type_hint_user_module_globals(): """module_globals are checked first for resolution""" class MyCustomType: pass resolved = resolve_type_hint("MyCustomType", {"MyCustomType": MyCustomType}) assert resolved is MyCustomType def test_register_type_namespace_resolves_forward_ref(clean_namespace_providers): """a registered namespace provider supplies fallback types""" class MyCustomType: pass register_type_namespace(lambda: {"MyCustomType": MyCustomType}) assert resolve_type_hint("MyCustomType") is MyCustomType def test_register_type_namespace_is_lazy(clean_namespace_providers): """providers are invoked on resolution, not at registration time""" calls = {"count": 0} class MyCustomType: pass def provider(): calls["count"] += 1 return {"MyCustomType": MyCustomType} register_type_namespace(provider) assert calls["count"] == 0 resolve_type_hint("MyCustomType") assert calls["count"] == 1 resolve_type_hint("MyCustomType") assert calls["count"] == 2 def test_register_type_namespace_dedupes(clean_namespace_providers): """re-registering the same provider is a no-op""" def provider(): return {} register_type_namespace(provider) register_type_namespace(provider) assert _type_namespace_providers.count(provider) == 1 def test_module_globals_take_precedence_over_namespace_provider( clean_namespace_providers, ): """module_globals win over registered providers when both supply a name""" class FromGlobals: pass class FromProvider: pass register_type_namespace(lambda: {"Foo": FromProvider}) assert resolve_type_hint("Foo", {"Foo": FromGlobals}) is FromGlobals def test_typevar_bound_forwardref_resolved_via_provider(clean_namespace_providers): """a TypeVar with a ForwardRef bound resolves via a registered provider""" class MyCustomType: pass register_type_namespace(lambda: {"MyCustomType": MyCustomType}) T = TypeVar("T", bound=ForwardRef("MyCustomType")) assert resolve_type_hint(T) is MyCustomType def test_unresolved_name_with_empty_providers_raises(clean_namespace_providers): """with no providers, an unknown name still raises TypeError""" with pytest.raises(TypeError, match="cannot resolve"): resolve_type_hint("NoSuchType") def test_serialisable_type_not_isinstance(): """objects without to_rich_dict do not satisfy SerialisableType""" assert not isinstance("hello", SerialisableType) assert not isinstance(42, SerialisableType) def test_serialisable_type_custom_class(): """custom class with to_rich_dict satisfies SerialisableType""" class MyObj: def to_rich_dict(self) -> dict: return {} assert isinstance(MyObj(), SerialisableType) def test_get_type_display_names_concrete(): """concrete class returns its name""" names = get_type_display_names(int) assert names == frozenset({"int"}) def test_get_type_display_names_union(): """Union returns names of all constituents""" names = get_type_display_names(Union[str, int]) # noqa: UP007 assert names == frozenset({"str", "int"}) def test_get_type_display_names_protocol(): """Protocol returns its own name""" names = get_type_display_names(SerialisableType) assert names == frozenset({"SerialisableType"}) def test_get_type_display_names_typevar_fallback(): """unresolved TypeVar returns its __name__""" T = TypeVar("T") names = get_type_display_names(T) assert names == frozenset({"T"}) def test_check_type_compatibility_protocol_input(): """Protocol on input side is lenient""" assert check_type_compatibility(int, SerialisableType) is True def test_check_type_compatibility_protocol_return(): """Protocol on return side is lenient""" assert check_type_compatibility(SerialisableType, int) is True def test_check_type_compatibility_incompatible(): """incompatible concrete types""" assert check_type_compatibility(int, str) is False @pytest.fixture def broken_subclasscheck(): """a class whose metaclass makes issubclass() raise TypeError""" class BadMeta(type): def __subclasscheck__(cls, subclass: Any): msg = "broken" raise TypeError(msg) class Weird(metaclass=BadMeta): pass return Weird def test_check_type_compatibility_issubclass_typeerror_same(broken_subclasscheck): """issubclass TypeError with identity match returns True""" assert check_type_compatibility(broken_subclasscheck, broken_subclasscheck) is True def test_check_type_compatibility_issubclass_typeerror_diff(broken_subclasscheck): """issubclass TypeError with different classes returns False""" assert check_type_compatibility(int, broken_subclasscheck) is False def test_resolve_name_non_type_in_globals(): """non-type value in module_globals falls through to TypeError""" with pytest.raises(TypeError, match="cannot resolve"): _resolve_name("foo", {"foo": 42}) def test_resolve_type_hint_typevar_bound(): """TypeVar with bound resolves to the bound type""" T = TypeVar("T", bound=int) assert resolve_type_hint(T) is int def test_resolve_type_hint_typevar_bound_str(): """TypeVar with string bound resolved via module_globals""" class Custom: pass T = TypeVar("T", bound="Custom") resolved = resolve_type_hint(T, {"Custom": Custom}) assert resolved is Custom def test_resolve_type_hint_typevar_bound_forwardref(): """TypeVar with ForwardRef bound resolved via module_globals""" class Custom: pass T = TypeVar("T", bound=ForwardRef("Custom")) resolved = resolve_type_hint(T, {"Custom": Custom}) assert resolved is Custom def test_resolve_type_hint_typevar_constraints(): """TypeVar with constraints resolves to Union of constraints""" T = TypeVar("T", int, str) resolved = resolve_type_hint(T) assert set(get_args(resolved)) == {int, str} def test_resolve_type_hint_union_type(): """PEP 604 X | Y syntax resolves correctly""" resolved = resolve_type_hint(int | str) assert set(get_args(resolved)) == {int, str} def test_resolve_type_hint_list(): """list[int] resolves correctly""" resolved = resolve_type_hint(list[int]) assert get_args(resolved) == (int,) def test_resolve_type_hint_tuple(): """tuple[str, int] resolves correctly""" resolved = resolve_type_hint(tuple[str, int]) assert get_args(resolved) == (str, int) def test_resolve_type_hint_set(): """set[int] resolves correctly""" resolved = resolve_type_hint(set[int]) assert get_args(resolved) == (int,) def test_resolve_type_hint_forwardref(): """ForwardRef resolves via module_globals""" resolved = resolve_type_hint(ForwardRef("int"), {"int": int}) assert resolved is int def test_get_type_display_names_list(): """list[int] returns inner type names""" names = get_type_display_names(list[int]) assert names == frozenset({"int"}) def test_get_type_display_names_tuple(): """tuple[str, int] returns all inner type names""" names = get_type_display_names(tuple[str, int]) assert names == frozenset({"str", "int"}) def test_check_type_compatibility_any_return(): """Any as return type is compatible with anything""" assert check_type_compatibility(Any, int) is True def test_check_type_compatibility_any_input(): """Any as input type is compatible with anything""" assert check_type_compatibility(int, Any) is True def test_check_type_compatibility_subclass(): """bool is subclass of int, so they are compatible""" assert check_type_compatibility(bool, int) is True def test_check_type_compatibility_union_subclass(): """Union types with subclass relationship are compatible""" assert check_type_compatibility(Union[bool, str], int) is True def test_check_type_compatibility_protocol_in_union(): """Union containing a Protocol is lenient""" assert check_type_compatibility(Union[SerialisableType, int], str) is True cogent3-scinexus-e0aee79/tests/test_warning.py000066400000000000000000000147371520253266500216560ustar00rootroot00000000000000import pickle import warnings import pytest from scinexus.warning import deprecated_args, deprecated_callable def test_function_deprecated_args(): @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(a: int, b: int) -> int: return a + b with pytest.deprecated_call(): expected = changed(a=5, b=3) got = changed(x=5, y=3) assert got == expected def test_function_deprecated_args_docstring(): @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(a: int, b: int) -> int: """This is a test function""" return a + b assert changed.__doc__ == "This is a test function" @pytest.mark.parametrize( "kwargs", [{"x": 5, "y": 3}, {"a": 5, "y": 3}, {"x": 5, "b": 3}], ) def test_function_deprecated_args_warn(kwargs): @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(a: int, b: int) -> int: return a + b with pytest.deprecated_call(): changed(**kwargs) def test_function_correct_args_do_not_warn(): @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(a: int, b: int) -> int: return a + b with warnings.catch_warnings(): warnings.simplefilter("error") changed(a=5, b=3) def test_function_deprecated_args_pickled(): @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(a: int, b: int) -> int: return a + b with pytest.deprecated_call(): myfunc = changed(x=1, y=2) pickled_func = pickle.dumps(myfunc) assert isinstance(pickled_func, bytes) unpickled_func = pickle.loads(pickled_func) assert unpickled_func == changed(a=1, b=2) class foo: def __init__(self): self.a = 0 self.b = 0 @deprecated_args( version="a future release", reason="x and y are not descriptive", old_new=[("x", "a"), ("y", "b")], ) def changed(self, a: int, b: int): """This is a test function""" self.a = a self.b = b def test_method_deprecated_args(): foo_instance = foo() foo_instance.changed(a=5, b=3) assert foo_instance.a == 5 assert foo_instance.b == 3 def test_method_deprecated_args_docstring(): assert foo.changed.__doc__ == "This is a test function" foo_instance = foo() assert foo_instance.changed.__doc__ == "This is a test function" @pytest.mark.parametrize( "kwargs", [{"x": 5, "y": 3}, {"a": 5, "y": 3}, {"x": 5, "b": 3}], ) def test_method_deprecated_args_warn(kwargs): with pytest.deprecated_call(): foo().changed(**kwargs) def test_method_correct_args_do_not_warn(): with warnings.catch_warnings(): warnings.simplefilter("error") foo().changed(a=5, b=3) def test_method_deprecated_args_pickled(): foo_instance = foo() assert foo_instance.a == 0 assert foo_instance.b == 0 with pytest.deprecated_call(): foo_instance.changed(x=1, y=2) assert foo_instance.a == 1 assert foo_instance.b == 2 pickled_foo = pickle.dumps(foo_instance) assert isinstance(pickled_foo, bytes) unpickled_foo = pickle.loads(pickled_foo) assert isinstance(unpickled_foo, foo) assert unpickled_foo.a == 1 assert unpickled_foo.b == 2 with pytest.deprecated_call(): unpickled_foo.changed(x=2, y=3) assert unpickled_foo.a == 2 assert unpickled_foo.b == 3 class foo2: @deprecated_callable(version="2023.9", reason="test meth", new="new_meth") def old_meth(self, v): return v**2 @deprecated_callable(version="2023.9", reason="redundant", is_discontinued=True) def squared(self, v): return v * v @deprecated_callable(version="2023.9", reason="test func", new="new_func") def old_func(v): return v**3 @deprecated_callable(version="2023.9", reason="redundant", is_discontinued=True) def cubed(v): return v * v @pytest.mark.parametrize("func", [foo2().old_meth, foo2().squared, old_func, cubed]) def test_deprecated_callable_warn(func): with pytest.deprecated_call(): func(2) @pytest.mark.parametrize("func", [cubed, old_func]) def test_method_deprecated_function_pickling(recwarn, func): pickled_func = pickle.dumps(func) assert isinstance(pickled_func, bytes) unpickled_func = pickle.loads(pickled_func) assert unpickled_func(20) == func(20) def test_method_deprecated_method_pickling(recwarn): instance = foo2() pickled = pickle.dumps(instance) assert isinstance(pickled, bytes) unpickled = pickle.loads(pickled) assert unpickled.old_meth(20) == instance.old_meth(20) assert unpickled.squared(20) == instance.squared(20) @pytest.mark.parametrize( ("func", "_type"), [(foo2().old_meth, "method"), (cubed, "function")], ) def test_deprecated_callable_resolves_type(recwarn, func, _type): func(2) assert any(_type in str(e) for e in recwarn.list), recwarn.list def test_function_deprecated_args_deprecated_callable_chained_decorators(recwarn): @deprecated_args( version="2023.6", reason="x is not descriptive", old_new=[("x", "a")] ) @deprecated_args( version="2023.6", reason="b is no longer required", discontinued=["b"] ) @deprecated_callable( version="2023.6", reason="Improved change function", new="changed2", is_discontinued=True, ) def changed(a: int) -> int: return a got = changed(x=5, b=3) assert got == 5 warn_msgs = [warning.message.args[0] for warning in recwarn.list] assert any("argument x will be removed" in w for w in warn_msgs) assert any("argument b is discontinued" in w for w in warn_msgs) assert any("function changed is discontinued" in w for w in warn_msgs) def test_class_deprecated(recwarn): class fooclass: @deprecated_callable( version="2023.6", reason="Improved change function", is_discontinued=True ) def __init__(self): ... fooclass() warn_msgs = [warning.message.args[0] for warning in recwarn.list] assert any("fooclass is discontinued" in w for w in warn_msgs) cogent3-scinexus-e0aee79/zensical.toml000066400000000000000000000321641520253266500201350ustar00rootroot00000000000000# ============================================================================ # # The configuration produced by default is meant to highlight the features # that Zensical provides and to serve as a starting point for your own # projects. # # ============================================================================ [project] # The site_name is shown in the page header and the browser window title # # Read more: https://zensical.org/docs/setup/basics/#site_name site_name = "scinexus" repo_url = "https://github.com/cogent3/scinexus" # The site_description is included in the HTML head and should contain a # meaningful description of the site content for use by search engines. # # Read more: https://zensical.org/docs/setup/basics/#site_description site_description = "A framework for rapid development of data processing applications." # The site_author attribute. This is used in the HTML head element. # # Read more: https://zensical.org/docs/setup/basics/#site_author site_author = "Gavin Huttley" # The site_url is the canonical URL for your site. When building online # documentation you should set this. # Read more: https://zensical.org/docs/setup/basics/#site_url #site_url = "https://www.example.com/" # The copyright notice appears in the page footer and can contain an HTML # fragment. # # Read more: https://zensical.org/docs/setup/basics/#copyright copyright = """ Copyright © 2026 The authors """ # Zensical supports both implicit navigation and explicitly defined navigation. # If you decide not to define a navigation here then Zensical will simply # derive the navigation structure from the directory structure of your # "docs_dir". The definition below demonstrates how a navigation structure # can be defined using TOML syntax. # # Read more: https://zensical.org/docs/setup/navigation/ nav = [ { "Home" = "index.md" }, { "Install" = "install.md" }, { "Explanation" = [ "explanation/index.md", "explanation/why-composable-apps.md", "explanation/app-lifecycle.md", "explanation/flow.md", "explanation/type-system.md", "explanation/not-completed-design.md", "explanation/source-tracking.md", "explanation/customisation-hooks.md", "explanation/data-store-model.md", ] }, { "How-to guides" = [ "howto/index.md", "howto/write-a-function-app.md", "howto/write-a-class-app.md", "howto/handle-failures.md", "howto/use-data-stores.md", "howto/read-and-write-files.md", "howto/run-in-parallel.md", "howto/track-progress.md", "howto/log-and-cite.md", "howto/customise-display-and-ids.md", "howto/extend-type-checking.md", "howto/migrate-from-cogent3.md", ] }, { "Tutorials" = [ "tutorials/index.md", "tutorials/composing-apps.md", "tutorials/processing-a-dataset.md", ] }, { "API reference" = [ "reference/index.md", "reference/define-app.md", "reference/app-classes.md", "reference/not-completed.md", "reference/source-proxy.md", "reference/data-stores.md", "reference/io-util.md", "reference/parallel.md", "reference/progress.md", "reference/deserialise.md", "reference/utilities.md", ] }, ] # With the "extra_css" option you can add your own CSS styling to customize # your Zensical project according to your needs. You can add any number of # CSS files. # # The path provided should be relative to the "docs_dir". # # Read more: https://zensical.org/docs/customization/#additional-css # extra_css = ["stylesheets/extra.css"] # With the `extra_javascript` option you can add your own JavaScript to your # project to customize the behavior according to your needs. # # The path provided should be relative to the "docs_dir". # # Read more: https://zensical.org/docs/customization/#additional-javascript #extra_javascript = ["javascripts/extra.js"] use_directory_urls = false # ---------------------------------------------------------------------------- # Section for configuring theme options # ---------------------------------------------------------------------------- [project.theme] favicon = "images/logo-bw.png" language = "en" # Zensical provides a number of feature toggles that change the behavior # of the documentation site. features = [ # Zensical includes an announcement bar. This feature allows users to # dismiss it when they have read the announcement. # https://zensical.org/docs/setup/header/#announcement-bar "announce.dismiss", # If you have a repository configured and turn on this feature, Zensical # will generate an edit button for the page. This works for common # repository hosting services. # https://zensical.org/docs/setup/repository/#content-actions #"content.action.edit", # If you have a repository configured and turn on this feature, Zensical # will generate a button that allows the user to view the Markdown # code for the current page. # https://zensical.org/docs/setup/repository/#content-actions #"content.action.view", # Code annotations allow you to add an icon with a tooltip to your # code blocks to provide explanations at crucial points. # https://zensical.org/docs/authoring/code-blocks/#code-annotations "content.code.annotate", # This feature turns on a button in code blocks that allow users to # copy the content to their clipboard without first selecting it. # https://zensical.org/docs/authoring/code-blocks/#code-copy-button "content.code.copy", # Code blocks can include a button to allow for the selection of line # ranges by the user. # https://zensical.org/docs/authoring/code-blocks/#code-selection-button "content.code.select", # Zensical can render footnotes as inline tooltips, so the user can read # the footnote without leaving the context of the document. # https://zensical.org/docs/authoring/footnotes/#footnote-tooltips "content.footnote.tooltips", # If you have many content tabs that have the same titles (e.g., "Python", # "JavaScript", "Cobol"), this feature causes all of them to switch to # at the same time when the user chooses their language in one. # https://zensical.org/docs/authoring/content-tabs/#linked-content-tabs "content.tabs.link", # With this feature enabled users can add tooltips to links that will be # displayed when the mouse pointer hovers the link. # https://zensical.org/docs/authoring/tooltips/#improved-tooltips "content.tooltips", # With this feature enabled, Zensical will automatically hide parts # of the header when the user scrolls past a certain point. # https://zensical.org/docs/setup/header/#automatic-hiding "header.autohide", # Turn on this feature to expand all collapsible sections in the # navigation sidebar by default. # https://zensical.org/docs/setup/navigation/#navigation-expansion # "navigation.expand", # This feature turns on navigation elements in the footer that allow the # user to navigate to a next or previous page. # https://zensical.org/docs/setup/footer/#navigation "navigation.footer", # When section index pages are enabled, documents can be directly attached # to sections, which is particularly useful for providing overview pages. # https://zensical.org/docs/setup/navigation/#section-index-pages "navigation.indexes", # When instant navigation is enabled, clicks on all internal links will be # intercepted and dispatched via XHR without fully reloading the page. # https://zensical.org/docs/setup/navigation/#instant-navigation "navigation.instant", # With instant prefetching, your site will start to fetch a page once the # user hovers over a link. This will reduce the perceived loading time # for the user. # https://zensical.org/docs/setup/navigation/#instant-prefetching "navigation.instant.prefetch", # In order to provide a better user experience on slow connections when # using instant navigation, a progress indicator can be enabled. # https://zensical.org/docs/setup/navigation/#progress-indicator #"navigation.instant.progress", # When navigation paths are activated, a breadcrumb navigation is rendered # above the title of each page # https://zensical.org/docs/setup/navigation/#navigation-path "navigation.path", # When pruning is enabled, only the visible navigation items are included # in the rendered HTML, reducing the size of the built site by 33% or more. # https://zensical.org/docs/setup/navigation/#navigation-pruning "navigation.prune", # When sections are enabled, top-level sections are rendered as groups in # the sidebar for viewports above 1220px, but remain as-is on mobile. # https://zensical.org/docs/setup/navigation/#navigation-sections "navigation.sections", # When tabs are enabled, top-level sections are rendered in a menu layer # below the header for viewports above 1220px, but remain as-is on mobile. # https://zensical.org/docs/setup/navigation/#navigation-tabs "navigation.tabs", # When sticky tabs are enabled, navigation tabs will lock below the header # and always remain visible when scrolling down. # https://zensical.org/docs/setup/navigation/#sticky-navigation-tabs "navigation.tabs.sticky", # A back-to-top button can be shown when the user, after scrolling down, # starts to scroll up again. # https://zensical.org/docs/setup/navigation/#back-to-top-button "navigation.top", # When anchor tracking is enabled, the URL in the address bar is # automatically updated with the active anchor as highlighted in the table # of contents. # https://zensical.org/docs/setup/navigation/#anchor-tracking "navigation.tracking", # When search highlighting is enabled and a user clicks on a search result, # Zensical will highlight all occurrences after following the link. # https://zensical.org/docs/setup/search/#search-highlighting "search.highlight", # When anchor following for the table of contents is enabled, the sidebar # is automatically scrolled so that the active anchor is always visible. # https://zensical.org/docs/setup/navigation/#anchor-following # "toc.follow", # When navigation integration for the table of contents is enabled, it is # always rendered as part of the navigation sidebar on the left. # https://zensical.org/docs/setup/navigation/#navigation-integration #"toc.integrate", ] # ---------------------------------------------------------------------------- # You can configure your own logo to be shown in the header using the "logo" # option in the "theme" subsection. The logo must be a relative path to a file # in your "docs_dir", e.g., to use `docs/assets/logo.png` you would set: # ---------------------------------------------------------------------------- logo = "images/logo-bw.png" # ---------------------------------------------------------------------------- # In the "font" subsection you can configure the fonts used. By default, fonts # are loaded from Google Fonts, giving you a wide range of choices from a set # of suitably licensed fonts. There are options for a normal text font and for # a monospaced font used in code blocks. # ---------------------------------------------------------------------------- #[project.theme.font] #text = "Inter" #code = "Jetbrains Mono" # ---------------------------------------------------------------------------- # In the "palette" subsection you can configure options for the color scheme. # You can configure different color schemes, e.g., to turn on dark mode, # that the user can switch between. Each color scheme can be further # customized. # # Read more: # - https://zensical.org/docs/setup/colors/ # ---------------------------------------------------------------------------- [[project.theme.palette]] scheme = "default" toggle.icon = "lucide/sun" toggle.name = "Switch to dark mode" [[project.theme.palette]] scheme = "slate" toggle.icon = "lucide/moon" toggle.name = "Switch to light mode" # ---------------------------------------------------------------------------- # The "extra" section contains miscellaneous settings. # ---------------------------------------------------------------------------- #[[project.extra.social]] icon = "fontawesome/brands/github" link = "https://github.com/cogent3/scinexus" [project.markdown_extensions.pymdownx.superfences] custom_fences = [ { name = "mermaid", class = "mermaid", format = "pymdownx.superfences.fence_code_format" }, ] [project.markdown_extensions.pymdownx.inlinehilite] [project.markdown_extensions.admonition] [project.markdown_extensions.pymdownx.details] [project.markdown_extensions.pymdownx.emoji] emoji_index = "zensical.extensions.emoji.twemoji" emoji_generator = "zensical.extensions.emoji.to_svg" [project.plugins.mkdocstrings.handlers.python] inventories = ["https://docs.python.org/3/objects.inv"] paths = ["src"] [project.plugins.mkdocstrings.handlers.python.options] docstring_style = "numpy" inherited_members = true show_source = false [project.markdown_extensions.pymdownx.tabbed] alternate_style = true [project.markdown_extensions.footnotes]