iterable_io-1.0.4/src/iterableio/__init__.py0000644000000000000000000001525713615410400015743 0ustar00#!/usr/bin/env python import io class RawIterableReader(io.RawIOBase): """A io.RawIOBase implemention for an iterable of bytes In most cases, this class should not be used directly. See the included `open_iterable` function for a high-level interface. """ def __init__(self, iterable): self._iter = iter(iterable) self._buffer = bytearray() self._total = 0 def readable(self): return True def close(self): self._iter = None self._buffer = None super().close() def tell(self): """The total number of bytes that have been read""" self._checkClosed() return self._total - len(self._buffer) def readinto(self, b): """Read bytes into a pre-allocated bytes-like object b Returns the number of bytes read, 0 indicates EOF """ self._checkClosed() num = len(b) if self._iter is not None: while len(self._buffer) < num: try: new = next(self._iter) except StopIteration: self._iter = None break else: self._total += len(new) self._buffer += new num_buffered = len(self._buffer) if num >= num_buffered: b[:num_buffered] = self._buffer self._buffer.clear() return num_buffered else: b[:num] = memoryview(self._buffer)[:num] del self._buffer[:num] return num def open_iterable(iterable, mode="r", buffering=-1, encoding=None, errors=None, newline=None): """Open an iterable of bytes to read from it using a file-like interface The `iterable` must be an iterable of bytes. mode is an optional string that specifies the mode in which the file is opened. It defaults to 'rt' which means open for reading in text mode. In text mode, if encoding is not specified the encoding used is platform dependent. (For reading raw bytes use binary mode and leave encoding unspecified.) The available modes are: ========= =============================================================== Character Meaning --------- --------------------------------------------------------------- 'r' open for reading (default) 'b' binary mode 't' text mode (default) ========= =============================================================== Iterables opened in binary mode (appending 'b' to the mode argument) return contents as bytes objects without any decoding. In text mode (the default), the contents of the iterable are returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified encoding if given. buffering is an optional integer used to set the buffering policy. Pass 0 to switch buffering off (only allowed in binary mode), and an integer > 0 to indicate the size of a fixed-size chunk buffer. When no buffering argument is given, `io.DEFAULT_BUFFER_SIZE` will be used. On many systems, the buffer will typically be 4096 or 8192 bytes long. encoding is the str name of the encoding used to decode or encode the file. This should only be used in text mode. The default encoding is platform dependent, but any encoding supported by Python can be passed. See the codecs module for the list of supported encodings. errors is an optional string that specifies how encoding errors are to be handled---this argument should not be used in binary mode. Pass 'strict' to raise a ValueError exception if there is an encoding error (the default of None has the same effect), or pass 'ignore' to ignore errors. Note that ignoring encoding errors can lead to data loss. See the documentation for codecs.register for a list of the permitted encoding error strings. newline is a string controlling how universal newlines works (it only applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works as follows: * On input, if newline is None, universal newlines mode is enabled. Lines in the input can end in '\n', '\r', or '\r\n', and these are translated into '\n' before being returned to the caller. If it is '', universal newline mode is enabled, but line endings are returned to the caller untranslated. If it has any of the other legal values, input lines are only terminated by the given string, and the line ending is returned to the caller untranslated. * On output, if newline is None, any '\n' characters written are translated to the system default line separator, os.linesep. If newline is '', no translation takes place. If newline is any of the other legal values, any '\n' characters written are translated to the given string. open_iterable() returns a file object whose type depends on the mode, and through which the standard file operations such as read() are performed. When open_iterable() is used to open an iterable in a text mode ('rt'), it returns an io.TextIOWrapper. When used to open an iterable in a binary mode, the returned class varies: For unbuffered access, a RawIterableReader is returned and in buffered mode it returns an io.BufferedReader. """ # This function is modeled after `io.open`, found in `Lib/_pyio.py` modes = set(mode) if modes - set("rtb") or len(mode) > len(modes): raise ValueError("invalid mode: '{}'".format(mode)) reading = "r" in modes binary = "b" in modes text = "t" in modes or (reading and not binary) if not reading: raise ValueError("Must specify read mode") if text and binary: raise ValueError("can't have text and binary mode at once") if binary and encoding is not None: raise ValueError("binary mode doesn't take an encoding argument") if binary and errors is not None: raise ValueError("binary mode doesn't take an errors argument") if binary and newline is not None: raise ValueError("binary mode doesn't take a newline argument") if text and buffering == 0: raise ValueError("can't have unbuffered text I/O") ret = RawIterableReader(iterable) try: if buffering == 0: # unbuffered binary mode return ret if buffering < 0: buffering = io.DEFAULT_BUFFER_SIZE ret = io.BufferedReader(ret, buffering) if binary: # buffered binary mode return ret # buffered text mode ret = io.TextIOWrapper(ret, encoding, errors, newline) ret.mode = mode return ret except: ret.close() raise iterable_io-1.0.4/tests/test_iteratorio.py0000644000000000000000000001107413615410400015651 0ustar00#!/usr/bin/env python import io from iterableio import RawIterableReader, open_iterable import pytest @pytest.mark.parametrize("mode, buffering, encoding, errors, newline",[ # bad modes ("", -1, None, None, None), ("abc", -1, None, None, None), ("rtb", -1, None, None, None), ("rt", 0, None, None, None), # need buffering ("rt", "bad int", None, None, None), # invalid buffering int # can't provide text decoding params in binary mode ("rb", 0, "utf-8", None, None), ("rb", 0, None, "ignore", None), ("rb", 0, None, None, "\n"), ]) def test_invalid_input(mode, buffering, encoding, errors, newline): """Test that invalid params are caught""" with pytest.raises((ValueError, TypeError, LookupError)): open_iterable([], mode, buffering, encoding, errors, newline) @pytest.mark.parametrize("buffering", (0, -1, 1)) def test_reading(buffering): def gen(): yield from ( b'\x01\x02\x03\x04\x05', b"abcde", b"fghij", b"klmno", b"qrstu", b"vwxyz", b'\x06\x07\x08\x09\x10', ) _data = b"".join(gen()) with open_iterable(gen(), "rb", buffering=buffering) as i: assert i.readable() assert not i.seekable() assert not i.writable() cnt = 0 for amt in (0, 1, 2, 3, 4, 5, 10, 1, 1, 0): d = i.read(amt) assert len(d) == amt assert d == _data[cnt:cnt+amt] cnt += amt assert i.tell() == cnt assert i.read() == _data[cnt:] assert i.read() == b"" assert i.tell() == len(_data) def test_returned_class(): """Test that the correct class is returned depending on the mode and buffering spec""" assert isinstance(open_iterable([], "rb", buffering=0), RawIterableReader) assert isinstance(open_iterable([], "rb", buffering=-1), io.BufferedReader) assert isinstance(open_iterable([], "rb", buffering=1), io.BufferedReader) assert isinstance(open_iterable([], "rt", buffering=-1), io.TextIOWrapper) assert isinstance(open_iterable([], "rt", buffering=1), io.TextIOWrapper) @pytest.mark.parametrize("mode, buffering",[ ("rb", 0), ("rb", -1), ("rt", -1), ]) def test_contextmgr_close(mode, buffering): with open_iterable([], mode, buffering) as i: assert not i.closed assert i.closed @pytest.mark.parametrize("mode, buffering",[ ("rb", 0), ("rb", -1), ("rt", -1), ]) def test_unreadable_after_close(mode, buffering): i = open_iterable([b"12345"], mode, buffering) assert not i.read(0) assert i.read(1) in (b"1", "1") assert not i.closed i.close() assert i.closed with pytest.raises(ValueError, match="closed"): i.read() with pytest.raises(ValueError, match="closed"): i.tell() def test_yield_empty_bytes(): """Test that a generator is only 'done' when it stops yielding, not when it yields empty bytes""" def gen(): yield from ( b"1", b"", b"", b"", b"", b"", b"", b"", b"2", b"3", b"", b"", b"", b"", b"", b"", b"4", ) i = RawIterableReader(gen()) out = [] while True: b = i.read(1) if not b: break out.append(b) assert len(out) == 4 assert b"".join(out) == b"1234" def test_read_text(): def gen(): # 9 lines yielded in non-line chunks yield from ( x.encode("utf-8") for x in ( "this is a line\n", "", "", "_a", "another line\n", "another line1\n", "another line2\n", "another line_", "a", "aaaaaaa\nbbbbbbbb", "_", "1", "2", "3", "4", "5", "_line line line another line actually\n", "another line\n", "ending line\n", "actual ending line no trailing newline", ) ) real = "".join(x.decode("utf-8") for x in gen()) # read across chunks and lines with open_iterable(gen(), encoding="utf-8") as i: assert i.read(10) == real[:10] assert i.read(10) == real[10:20] with open_iterable(gen(), encoding="utf-8") as i: lines = list(i) with open_iterable(gen(), encoding="utf-8") as i: assert lines == i.readlines() assert len(lines) == len(real.splitlines()) == 9 assert "".join(lines) == real iterable_io-1.0.4/.gitignore0000644000000000000000000000041513615410400012702 0ustar00# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging build/ develop-eggs/ dist/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # Pytest .coverage .pytest_cache iterable_io-1.0.4/README.md0000644000000000000000000000527013615410400012175 0ustar00iterable-io =========== [![Status](https://github.com/pR0Ps/iterable-io/workflows/tests/badge.svg)](https://github.com/pR0Ps/iterable-io/actions/workflows/tests.yml) [![Version](https://img.shields.io/pypi/v/iterable-io.svg)](https://pypi.org/project/iterable-io/) ![Python](https://img.shields.io/pypi/pyversions/iterable-io.svg) `iterable-io` is a small Python library that provides an adapter so that it's possible to read from [iterable](https://docs.python.org/3/glossary.html#term-iterable) objects in the same way as [file-like](https://docs.python.org/3/glossary.html#term-file-object) objects. It is primarily useful as "glue" between two incompatible interfaces. As an example, in the case where one interface expects a file-like object to call `.read()` on, and the other only provides a generator of bytes. One way to solve this issue would be to write all the bytes in the generator to a temporary file, then provide that file instead, but if the generator produces a large amount of data then this is both slow to start, and resource-intensive. This library allows streaming data between these two incompatible interfaces so as data is requested by `.read()`, it's pulled from the iterable. This keeps resource usage low and removes the startup delay. Installation ------------ ``` pip install iterable-io ``` Documentation ------------- The functionality of this library is accessed via a single function: `open_iterable()`. `open_iterable()` is designed to work the same way as the builtin `open()`, except that it takes an iterable to "open" instead of a file. For example, it can open the iterable in binary or text mode, has options for buffering, encoding, etc. See the docstring of `open_iterable` for more detailed documentation. Simple examples --------------- The following examples should be enough to understand in which cases `open_iterable()` would be useful and get a high-level understanding of how to use it: Read bytes from a generator of bytes: ```python gen = generate_bytes() # adapt the generator to a file-like object in binary mode # (fp.read() will return bytes) fp = open_iterable(gen, "rb") while chunk := fp.read(4096): process_chunk(chunk) ``` Read lines of text from a generator of bytes: ```python gen = generate_bytes() # adapt the generator to a file-like object in text mode # (fp.read() will return a string, fp.readline is also available) fp = open_iterable(gen, "rt", encoding="utf-8") for line in fp: process_line_of_text(line) ``` Tests ----- This package contains tests. To run them, install `pytest` (`pip install pytest`) and run `py.test` in the project directory. License ------- Licensed under the [GNU LGPLv3](https://www.gnu.org/licenses/lgpl-3.0.html). iterable_io-1.0.4/pyproject.toml0000644000000000000000000000236213615410400013631 0ustar00[project] name = "iterable-io" version = "1.0.4" description = "Adapt generators and other iterables to a file-like interface" urls.Repository = "https://github.com/pR0Ps/iterable-io" urls.Changelog = "https://github.com/pR0Ps/iterable-io/blob/master/CHANGELOG.md" readme = "README.md" license = "LGPL-3.0-only" classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Programming Language :: Python :: 3.15", "Operating System :: OS Independent", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)" ] requires-python = ">=3.5" dependencies = [] [dependency-groups] test = ["pytest", "pytest-cov"] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/iterableio"] [tool.hatch.build.targets.sdist] include = ["src/", "tests/"] iterable_io-1.0.4/PKG-INFO0000644000000000000000000000741413615410400012015 0ustar00Metadata-Version: 2.4 Name: iterable-io Version: 1.0.4 Summary: Adapt generators and other iterables to a file-like interface Project-URL: Repository, https://github.com/pR0Ps/iterable-io Project-URL: Changelog, https://github.com/pR0Ps/iterable-io/blob/master/CHANGELOG.md License-Expression: LGPL-3.0-only Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3.14 Classifier: Programming Language :: Python :: 3.15 Requires-Python: >=3.5 Description-Content-Type: text/markdown iterable-io =========== [![Status](https://github.com/pR0Ps/iterable-io/workflows/tests/badge.svg)](https://github.com/pR0Ps/iterable-io/actions/workflows/tests.yml) [![Version](https://img.shields.io/pypi/v/iterable-io.svg)](https://pypi.org/project/iterable-io/) ![Python](https://img.shields.io/pypi/pyversions/iterable-io.svg) `iterable-io` is a small Python library that provides an adapter so that it's possible to read from [iterable](https://docs.python.org/3/glossary.html#term-iterable) objects in the same way as [file-like](https://docs.python.org/3/glossary.html#term-file-object) objects. It is primarily useful as "glue" between two incompatible interfaces. As an example, in the case where one interface expects a file-like object to call `.read()` on, and the other only provides a generator of bytes. One way to solve this issue would be to write all the bytes in the generator to a temporary file, then provide that file instead, but if the generator produces a large amount of data then this is both slow to start, and resource-intensive. This library allows streaming data between these two incompatible interfaces so as data is requested by `.read()`, it's pulled from the iterable. This keeps resource usage low and removes the startup delay. Installation ------------ ``` pip install iterable-io ``` Documentation ------------- The functionality of this library is accessed via a single function: `open_iterable()`. `open_iterable()` is designed to work the same way as the builtin `open()`, except that it takes an iterable to "open" instead of a file. For example, it can open the iterable in binary or text mode, has options for buffering, encoding, etc. See the docstring of `open_iterable` for more detailed documentation. Simple examples --------------- The following examples should be enough to understand in which cases `open_iterable()` would be useful and get a high-level understanding of how to use it: Read bytes from a generator of bytes: ```python gen = generate_bytes() # adapt the generator to a file-like object in binary mode # (fp.read() will return bytes) fp = open_iterable(gen, "rb") while chunk := fp.read(4096): process_chunk(chunk) ``` Read lines of text from a generator of bytes: ```python gen = generate_bytes() # adapt the generator to a file-like object in text mode # (fp.read() will return a string, fp.readline is also available) fp = open_iterable(gen, "rt", encoding="utf-8") for line in fp: process_line_of_text(line) ``` Tests ----- This package contains tests. To run them, install `pytest` (`pip install pytest`) and run `py.test` in the project directory. License ------- Licensed under the [GNU LGPLv3](https://www.gnu.org/licenses/lgpl-3.0.html).