pax_global_header00006660000000000000000000000064151711541510014513gustar00rootroot0000000000000052 comment=35c9552a984e27a231611462841e8527aaacb774 python-sfdutf7-0.1.0/000077500000000000000000000000001517115415100144125ustar00rootroot00000000000000python-sfdutf7-0.1.0/LICENSE.txt000066400000000000000000000036441517115415100162440ustar00rootroot00000000000000Copyright (c) 2014–2022 sfdnormalize: Fredrick R. Brennan (@ctrlcctrlv) IMAPClient¹: Menno Finlay-Smits (@mjs), Carson Ip (GitHub @carsonip), Mathieu Agopian (@magopian), John Villalovos (@JohnVillalovos) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Menno Smits nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MENNO SMITS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ¹ The version of IMAPClient this is based on is 6e6ec34b0e71975134d9492add22361ce4beb2a0: . python-sfdutf7-0.1.0/README.md000066400000000000000000000057441517115415100157030ustar00rootroot00000000000000# SFDUTF7 encoder/decoder library FontForge's Spline Font Databases have a rather unusual encoding called "UTF-7", but which is incompatible with *either* the common meaning of this word *or* the IMAP meaning. It is instead its own encoding unrelated to either, which I have implemented for use in `sfdnormalize` and other projects. ## Tests / source of truth The ultimate source of truth for this library is the implementation as it appears in [FontForge](https://github.com/fontforge/fontforge)'s `sfd.c` of [`SFDDumpUTF7Str`](https://github.com/fontforge/fontforge/blob/18225116959807bcf0276ff07f69a19b0dddfe52/fontforge/sfd.c#L207) and [`SFDReadUTF7Str`](https://github.com/fontforge/fontforge/blob/18225116959807bcf0276ff07f69a19b0dddfe52/fontforge/sfd.c#L413). The function `utf7toutf8_copy` has some involvment as well. There is no spec, if this library results in a string FontForge can decode transparently (not the same thing as "exactly the same string FontForge itself would produce"!) my implementation is right; if it doesn't then it's not, please open a bug! :-) ## License Copyright (c) 2014–2022 * sfdnormalize: Fredrick R. Brennan (@ctrlcctrlv) * IMAPClient¹: Menno Finlay-Smits (@mjs), Carson Ip (GitHub @carsonip), Mathieu Agopian (@magopian), John Villalovos (@JohnVillalovos) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Menno Smits nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MENNO SMITS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ¹ The version of IMAPClient this is based on is 6e6ec34b0e71975134d9492add22361ce4beb2a0: . python-sfdutf7-0.1.0/pyproject.toml000066400000000000000000000001421517115415100173230ustar00rootroot00000000000000# pyproject.toml [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" python-sfdutf7-0.1.0/requirements.txt000066400000000000000000000000001517115415100176640ustar00rootroot00000000000000python-sfdutf7-0.1.0/setup.cfg000066400000000000000000000010511517115415100162300ustar00rootroot00000000000000[metadata] name = sfdutf7 version = 0.1.0 author = Fredrick Brennan author_email = copypaste@kittens.ph description = SFDUTF7 encoding parsing library url = https://github.com/ctrlcctrlv/sfdutf7.py long_description = file: README.md long_description_content_type = text/markdown classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: Apache Software License Operating System :: OS Independent [options] packages = find: python_requires = >=3.6 [options.entry_points] console_scripts = sfdutf7 = sfdutf7.__main__:main python-sfdutf7-0.1.0/setup.py000066400000000000000000000013631517115415100161270ustar00rootroot00000000000000import setuptools with open("README.md", "r") as fh: long_description = fh.read() setuptools.setup( name="sfdutf7", version="0.1.0", author="Fredrick Brennan", author_email="copypaste@kittens.ph", description="SFDUTF7 encoding parsing library", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/ctrlcctrlv/sfdutf7.py", entry_points={ "console_scripts": ["sfdutf7 = sfdutf7.__main__:main"], }, packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ], python_requires=">=3.6", ) python-sfdutf7-0.1.0/sfdutf7/000077500000000000000000000000001517115415100157745ustar00rootroot00000000000000python-sfdutf7-0.1.0/sfdutf7/__init__.py000066400000000000000000000147501517115415100201140ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file contains two main methods used to encode and decode "SFD" UTF-7 # string, based heavily on functions written for the IMAPClient project. # # The IMAPClient library cannot be used directly due to these FontForge quirks: # * the non-use of `,` instead of `/` which was a change in RFC 3501; # * the non-use of `&` instead of `+` which was also a change in RFC 3501; # * the mandatory termination with `A`/`AA` (0x0) to make the number of # bytes even; # * the mandatory handling of both string terminated in `A` and `-`; # * the mandatory encoding of `"`. # # Copyright (c) 2014–2022 # sfdnormalize: Fredrick R. Brennan (@ctrlcctrlv) # IMAPClient¹: Menno Finlay-Smits (@mjs), Carson Ip (GitHub @carsonip), Mathieu # Agopian (@magopian), John Villalovos (@JohnVillalovos) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Menno Smits nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL MENNO SMITS BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # ¹ The version of IMAPClient this is based on is # 6e6ec34b0e71975134d9492add22361ce4beb2a0: # . import binascii import string from . import xmlvalid PLUS_ORD = ord(b'+') DASH_ORD = ord(b'-') CAP_A_ORD = ord(b'A') QUOT_ORD = ord(b'"') EQUALS_ORD = ord(b'=') MODIFIED_B64 = (string.ascii_lowercase + string.ascii_uppercase + string.digits).encode('ascii') + b'/' def encode(s: str, quote=False) -> bytes: res = bytearray() b64_buffer = list() def consume_b64_buffer(buf): """ Consume the buffer by encoding it into a "modified base 64" ( representation and surround it with shift characters + and - """ if buf: # FontForge SFD extension: null (0x00) bytes are used to end UTF7 # buffer. In IMAP, the last (shift) character would be "-" here. ext = bytearray(b'+' + _base64_utf7_encode(buf)) # FontForge quirk: must be even len if ext[-2] == EQUALS_ORD: ext = ext[:-1] if ext[-1] == EQUALS_ORD: ext[-1] = CAP_A_ORD ext = ext + b'-' res.extend(bytes(ext)) del buf[:] for c in s: # printable ascii case should not be modified o = ord(c) # (FontForge SFD exception: # " is always encoded) if 0x20 <= o <= 0x7E and o != QUOT_ORD: consume_b64_buffer(b64_buffer) # Special case: + is used as shift character so we need to escape # it in ASCII if o == PLUS_ORD: # 0x2B = + res.extend(b"+-") else: res.append(o) # Bufferize characters that will be encoded in base64 and append them # later in the result, when iterating over ASCII character or the end # of string else: b64_buffer.append(c) # Consume the remaining buffer if the string finish with non-ASCII # characters consume_b64_buffer(b64_buffer) if quote: res.insert(0, QUOT_ORD) res.append(QUOT_ORD) return bytes(res) def decode(s: bytes, unquote=False, force_valid_xml=False) -> str: if unquote and len(s) >= 2 and s[0] == QUOT_ORD and s[-1] == QUOT_ORD: s = s[1:-1] res = [] # Store b64 substring that will be decoded once stepping on end shift char b64_buffer = bytearray() for c in s: if c == PLUS_ORD: b64_buffer.append(c) # End shift char → append the decoded buffer to the result and reset it elif (c == DASH_ORD or c not in MODIFIED_B64) and b64_buffer: # Special case: +- represents «+» escaped if len(b64_buffer) == 1: res.append("+") else: res.append(_base64_utf7_decode(b64_buffer[1:])) b64_buffer = bytearray() if c != DASH_ORD: res.append(chr(c)) # Shift character w/o anything in buffer → starts storing b64 substr # Or still buffering between the shift character and the shift back to ASCII elif c in MODIFIED_B64 and (len(b64_buffer) > 0 or b64_buffer == [PLUS_ORD]): b64_buffer.append(c) # No buffer initialized yet, should be an ASCII printable char else: res.append(chr(c)) # Decode the remaining buffer, if any if b64_buffer: res.append(_base64_utf7_decode(b64_buffer[1:])) if force_valid_xml: res = xmlvalid.force_text(res) return "".join(res) def _base64_utf7_encode(buffer: str) -> bytes: s = "".join(buffer).encode("utf-16be") return binascii.b2a_base64(s, newline=False) def _base64_utf7_decode(s: bytes) -> str: s = bytearray(s) # Cut off FontForge's final `A` or `AA` because it's invalid UTF-7. # (This quirk is because FontForge wrongly encodes the unneeded 0's.) if len(s) >= 2 and len(s) % 2 == 0 and s[-2] == CAP_A_ORD and s[-1] == CAP_A_ORD: s = s[:-2] elif s[-1] == CAP_A_ORD: s = s[:-1] if not s[-1] == DASH_ORD: s = s + b'-' s_utf7 = b'+' + s return s_utf7.decode("utf-7") python-sfdutf7-0.1.0/sfdutf7/__main__.py000066400000000000000000000012711517115415100200670ustar00rootroot00000000000000from sfdutf7 import encode, decode import argparse import sys def main(): parser = argparse.ArgumentParser(description="Encode/decode SFDUTF7") parser.add_argument( "-d", "--decode", help="Decode, not encode", action="store_true" ) parser.add_argument( "-q", "--quote", help="Quote string for storing in .sfd (or unquote quoted string)", action="store_true", ) args = parser.parse_args() inp = sys.stdin.buffer.read() if args.decode: print(decode(inp, quote=args.quote), end="") else: print(encode(inp.decode("utf-8"), quote=args.quote).decode("ascii"), end="") if __name__ == "__main__": main() python-sfdutf7-0.1.0/sfdutf7/xmlvalid.py000066400000000000000000000017511517115415100201720ustar00rootroot00000000000000# This file is part of [MFEK/sfdutf7.py](https://github.com/MFEK/sfdutf7.py); # see __init__.py for reuse information. from typing import Iterator import warnings # Removes all C0/C1 controls except tab (U+0009) and CRLF (U+000D U+000A). Cf.: # _xmlIsChar_ch = lambda c: ( ((0x9 <= (c)) and ((c) <= 0xA)) or ((c) == 0xD) or (0x20 <= (c)) ) def _str_for_str(ch: str) -> str: c = ord(ch) if c == 0x0B: # VERTICAL TABULATION warnings.warn("Replaced 0x0B with newline; suspected FontForge bug?") ret = ord("\n") elif not _xmlIsChar_ch(c): warnings.warn(f"Replaced 0x0B with newline in string {s}!") ret = ord("�") else: ret = c return chr(ret) def _force_text_validity(s: str) -> Iterator[str]: for ch in s: for cp in ch: yield _str_for_str(cp) def force_text(s: str) -> str: return "".join(_force_text_validity(s)) python-sfdutf7-0.1.0/tests/000077500000000000000000000000001517115415100155545ustar00rootroot00000000000000python-sfdutf7-0.1.0/tests/test_sfdutf7.py000066400000000000000000000041541517115415100205530ustar00rootroot00000000000000import ctypes import ctypes.util from ctypes import ( POINTER, Structure, py_object, pythonapi, c_void_p, c_int, c_char_p, pointer, ) import logging import os import sfdutf7 libc = ctypes.cdll.LoadLibrary(ctypes.util.find_library("c")) libfontforge = ctypes.cdll.LoadLibrary(ctypes.util.find_library("fontforge")) class FILE(Structure): pass fdopen = libc.fdopen fdopen.restype = POINTER(FILE) fdopen.argtypes = [c_int, c_char_p] SFDDumpUTF7Str = libfontforge.SFDDumpUTF7Str SFDDumpUTF7Str.restype = c_void_p SFDDumpUTF7Str.argtypes = [POINTER(FILE), c_char_p] SFDReadUTF7Str = libfontforge.SFDReadUTF7Str SFDReadUTF7Str.restype = c_char_p SFDReadUTF7Str.argtypes = [POINTER(FILE)] def libfontforge_sfdutf7decode(s: bytes) -> str: filename = '/tmp/sfdutf7_SFDDumpUTF7StrOUT' fp = open(filename, 'wb+') fp.write(b'"') fp.write(s) fp.write(b'"') fp.close() fp = open(filename, 'rb') c_file = fdopen(c_int(fp.fileno()), c_char_p(fp.mode.encode('ascii'))) buffer = SFDReadUTF7Str(c_file) libc.fclose(c_file) logging.debug("libfontforge_sfdutf7decode buffer: {}".format(buffer.decode('utf-8'))) return buffer.decode('utf-8') def libfontforge_sfdutf7encode(s: str) -> bytes: filename = '/tmp/sfdutf7_SFDDumpUTF7StrOUT' fp = open(filename, 'wb+') c_file = fdopen(c_int(fp.fileno()), c_char_p(fp.mode.encode('ascii'))) SFDDumpUTF7Str(c_file, c_char_p(s.encode('utf-8'))) libc.fclose(c_file) with open(filename, 'rb') as f: ret = f.read() os.unlink(filename) logging.debug("libfontforge_sfdutf7encode buffer: {}".format(ret.decode('utf-8'))) return ret test_strings = [ '"', '""', '"\n', '" ', '"A', '日本go語' ] def test_decode(): for to_encode in test_strings: encoded = libfontforge_sfdutf7encode(to_encode)[1:-1] decoded = sfdutf7.decode(encoded) assert to_encode == decoded def test_encode(): for to_encode in test_strings: encoded = sfdutf7.encode(to_encode) decoded = libfontforge_sfdutf7decode(encoded) assert to_encode == decoded