pax_global_header00006660000000000000000000000064141642626730014525gustar00rootroot0000000000000052 comment=beab328945547b0147a53655f32c5cc76ab4488b Anonip-1.1.0/000077500000000000000000000000001416426267300127505ustar00rootroot00000000000000Anonip-1.1.0/.editorconfig000066400000000000000000000003231416426267300154230ustar00rootroot00000000000000root = true [*] insert_final_newline = true end_of_line = lf charset = utf-8 trim_trailing_whitespace = true [*.py] indent_style = space indent_size = 4 max_line_length = 88 Anonip-1.1.0/.github/000077500000000000000000000000001416426267300143105ustar00rootroot00000000000000Anonip-1.1.0/.github/workflows/000077500000000000000000000000001416426267300163455ustar00rootroot00000000000000Anonip-1.1.0/.github/workflows/pypi.yml000066400000000000000000000013411416426267300200500ustar00rootroot00000000000000name: PyPI on: release: types: [created] jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Setup Python uses: actions/setup-python@v1 with: python-version: "3.9" - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Remove existing packages run: rm -rf ./dist/ ./anonip.egg-info/ ./build/ - name: Run twine env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} TWINE_NON_INTERACTIVE: true run: | python setup.py sdist bdist_wheel twine upload dist/* Anonip-1.1.0/.github/workflows/tests.yml000066400000000000000000000031571416426267300202400ustar00rootroot00000000000000name: Tests on: push: pull_request: schedule: - cron: '0 0 * * 0' jobs: tests: runs-on: ubuntu-latest strategy: matrix: python: ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "pypy3"] steps: - uses: actions/checkout@v2 - name: Setup Python uses: actions/setup-python@v1 with: python-version: ${{ matrix.python }} - name: Install Tox and any other packages run: pip install tox - name: Run Tox run: tox -e py # Run tox using the version of Python in `PATH` flake8: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Setup Python uses: actions/setup-python@v1 with: python-version: "3.9" - name: Install Tox and any other packages run: pip install tox - name: Run Tox run: tox -e flake8 black: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Setup Python uses: actions/setup-python@v1 with: python-version: "3.9" - name: Install Tox and any other packages run: pip install tox - name: Run Tox run: tox -e black gitlint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} - name: Setup Python uses: actions/setup-python@v2 with: python-version: "3.9" - name: Install gitlint run: pip install gitlint - name: Run gitlint run: gitlint --contrib contrib-title-conventional-commits Anonip-1.1.0/.gitignore000066400000000000000000000003531416426267300147410ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # Distribution / packaging build/ dist/ *.egg-info/ # Environments pyenv/ venv/ .python-version # tests .tox/ .pytest_cache/ .coverage *,cover # idea .idea/ Anonip-1.1.0/.gitlint000066400000000000000000000001321416426267300144170ustar00rootroot00000000000000[general] ignore=body-is-missing, body-max-line-length, body-min-length, title-max-length Anonip-1.1.0/CHANGELOG.md000066400000000000000000000065431416426267300145710ustar00rootroot00000000000000# Changelog ## v1.1.0 ### Feature * Performance: Make column indexes zero-based internally. [`aa28386`](https://github.com/DigitaleGesellschaft/Anonip/commit/aa28386ecaefcb479f5646b4dac11a9ea6c1e4d4) * Performance: Calculate IP prefix for masking only once. [`70e4d55`](https://github.com/DigitaleGesellschaft/Anonip/commit/70e4d5504e8605691e43c8802c549556e10c2ac4) * Performance: Use a dict to store and access the IP prefixes used for masking. [`536e22b`](https://github.com/DigitaleGesellschaft/Anonip/commit/536e22b6e712040b5bf80a8566b13f783faa647f) * Performance: Avoid another useless use of str.format() in inner loop. [`d67bd11`](https://github.com/DigitaleGesellschaft/Anonip/commit/d67bd115e453dec40483ac3c64c86fc108d3d295) * Make Anonip.run() accept an optional input stream. [`3c44134`](https://github.com/DigitaleGesellschaft/Anonip/commit/3c44134c37d612326e2fd3396906d42bf53eff0d) * Add option "--input". [`1a2c4b4`](https://github.com/DigitaleGesellschaft/Anonip/commit/1a2c4b4cb7d3382ee4ee113bfcb775f38b55407d) * feat(logging): improve logging if no ip can be detected [`f9584f1`](https://github.com/DigitaleGesellschaft/Anonip/commit/f9584f16472cf6c450dd322d7754315a29ec9f32) * feat(cli): Regex based IP detection [`bf37456`](https://github.com/DigitaleGesellschaft/Anonip/commit/bf3745692a6acca9e99c2845c9f03d26861725ba) ### Fix * Fix warning message if column was not found. [`3831a29`](https://github.com/DigitaleGesellschaft/Anonip/commit/3831a29087dbacc222c19d036d9c528d0f35583f) * Fix: Log-messages get formatted even if not output. [`a4a0448`](https://github.com/DigitaleGesellschaft/Anonip/commit/a4a04483935f95d0d3889b8fa6b6976fe4491fa3) * Fix: Terminates if empty or all-white-space line is read. [`ddbfc86`](https://github.com/DigitaleGesellschaft/Anonip/commit/ddbfc860388d1cfc8a02f78eeb5908c8559db926) * fix(properties): use setter for columns property [`43b7002`](https://github.com/DigitaleGesellschaft/Anonip/commit/43b7002f5dcbb8f285a594605135cb2584c82b85) * fix(cli): handle KeyboardInterrupt [`b82da7a`](https://github.com/DigitaleGesellschaft/Anonip/commit/b82da7a659b9a47c6178d4868e9a6b5cb9549eed) * Bugfix: IPv6 address masquerading with python2.7 [`556b132`](https://github.com/DigitaleGesellschaft/Anonip/commit/556b1324b4d1ca159e2e0dc47918c378dbcc69c0) * fix(python2): Fix reading from stdin [`4ecfd91`](https://github.com/DigitaleGesellschaft/Anonip/commit/4ecfd91bc52e005fcaa188ff873a28a59cf6757e) * fix(logging) Avoid side-effect during module import [`474125d`](https://github.com/DigitaleGesellschaft/Anonip/commit/474125d1bf39c9b4bfee882a45aab7fb1b13b875) ### Docs * Readme: Remove useless use of `cat`. [`25939fc`](https://github.com/DigitaleGesellschaft/Anonip/commit/25939fc737bc5ad3a94d5c4ca87914f1d59e14bf) * Readme: Slightly restructure and update the Usage section. [`900263c`](https://github.com/DigitaleGesellschaft/Anonip/commit/900263cbd733a9504a62b05a3f1fde29d09df08a) * Readme: Add usage for nginx. [`3c0e4dd`](https://github.com/DigitaleGesellschaft/Anonip/commit/3c0e4dd057efa8fb5b7e1eb62b70f8fc6c6bb99c) * docs: fix ipaddress module link [`4c7628c`](https://github.com/DigitaleGesellschaft/Anonip/commit/4c7628ca11c370e259030188f033cc34af1ae07f) * docs: fix link for coverage shield [`1387f2e`](https://github.com/DigitaleGesellschaft/Anonip/commit/1387f2ea657e95f98eb568083d947cb0ab3fdce8) ## v1.0.0 Rewrite using the ipaddress module. Anonip-1.1.0/LICENSE.txt000066400000000000000000000030271416426267300145750ustar00rootroot00000000000000Copyright (c) 2013 - 2016, Swiss Privacy Foundation 2016 - 2019, Digitale Gesellschaft All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Anonip-1.1.0/MANIFEST.in000066400000000000000000000001071416426267300145040ustar00rootroot00000000000000include LICENSE.txt include README.md include tests.py include tox.ini Anonip-1.1.0/README.md000066400000000000000000000150001416426267300142230ustar00rootroot00000000000000# anonip [![PyPI](https://img.shields.io/pypi/v/anonip.svg)](https://pypi.org/project/anonip/) [![Python versions](https://img.shields.io/pypi/pyversions/anonip.svg)](https://pypi.org/project/anonip/) [![Build Status](https://github.com/DigitaleGesellschaft/Anonip/workflows/Tests/badge.svg)](https://github.com/DigitaleGesellschaft/Anonip/actions?query=workflow%3ATests) [![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen.svg)](https://github.com/DigitaleGesellschaft/Anonip/blob/master/setup.cfg#L58) [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/DigitaleGesellschaft/Anonip) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) Digitale Gesellschaft https://www.digitale-gesellschaft.ch Formerly Swiss Privacy Foundation https://www.privacyfoundation.ch/ ## Description Anonip is a tool to anonymize IP addresses in log files. It masks the last bits of IPv4 and IPv6 addresses. That way most of the relevant information is preserved, while the IP-address does not match a particular individuum anymore. Depending on your webserver software, the log entries may directly get piped to Anonip. The unmasked IP addresses will never be written to any file. Using shell redirects, it's also possible to rewrite existing log files. ## Features - Masks IP addresses in log files - Configurable amount of masked bits - The column containing the IP address can freely be chosen - Alternatively use a regex to point anonip to the location(s) of the IP(s). See [this RFC](https://github.com/DigitaleGesellschaft/Anonip/issues/44) for more information. - Works for both access.log- and error.log files ## Officially supported python versions - 2.7 - 3.6 - 3.7 - 3.8 - 3.9 - 3.10 ## Dependencies If you're using python version >=3.3, there are no external dependencies. For python versions <3.3: - [ipaddress module](https://github.com/kwi-dk/py2-ipaddress) ## Invocation ``` usage: anonip.py [-h] [-4 INTEGER] [-6 INTEGER] [-i INTEGER] [-o FILE] [--input FILE] [-c INTEGER [INTEGER ...]] [-l STRING] [--regex STRING [STRING ...]] [-r STRING] [-p] [-d] [-v] Anonip is a tool to anonymize IP-addresses in log files. optional arguments: -h, --help show this help message and exit -4 INTEGER, --ipv4mask INTEGER truncate the last n bits (default: 12) -6 INTEGER, --ipv6mask INTEGER truncate the last n bits (default: 84) -i INTEGER, --increment INTEGER increment the IP address by n (default: 0) -o FILE, --output FILE file to write to --input FILE File or FIFO to read from (default: stdin) -c INTEGER [INTEGER ...], --column INTEGER [INTEGER ...] assume IP address is in column n (1-based indexed; default: 1) -l STRING, --delimiter STRING log delimiter (default: " ") --regex STRING [STRING ...] regex for detecting IP addresses (use instead of -c) -r STRING, --replace STRING replacement string in case address parsing fails (Example: 0.0.0.0) -p, --skip-private do not mask addresses in private ranges. See IANA Special-Purpose Address Registry. -d, --debug print debug messages -v, --version show program's version number and exit Example-usage in apache-config: CustomLog "| /path/to/anonip.py [OPTIONS] --output /path/to/log" combined ``` ## Usage ``` shell /path/to/anonip.py [OPTIONS] < /path/to/orig_log --output /path/to/log ``` or using shell redirects only (mind the redirected output is appending): ``` shell /path/to/anonip.py [OPTIONS] < /path/to/orig_log >> /path/to/log ``` ### With Apache In the Apache configuration (or the one of a vhost) the log output needs to get piped to anonip like this: ``` CustomLog "|/path/to/anonip.py [OPTIONS] --output /path/to/log" combined ErrorLog "|/path/to/anonip.py [OPTIONS] --output /path/to/error_log" ``` That's it! All the IP addresses will be masked in the log now. ### With nginx nginx does not support spawning a process it then pipes to. Thus you need to create a named pipe (file-based FIFO) and start the processes yourself, along this lines: ``` shell mkfifo /path/to/log.fifo /path/to/error_log.fifo /path/to/anonip.py [OPTIONS] --output /path/to/log < /path/to/log.fifo & /path/to/anonip.py [OPTIONS] --output /path/to/error_log < /path/to/error_log.fifo & ``` As you can see, you need to start a separate process for each access-log file and for each error-log file. In the nginx configuration (or the one of a vhost) the log output needs to be set to the named pipe like this: ``` access_log /path/to/log.fifo; error_log /path/to/error_log.fifo; ``` ### As a python module Read from stdin: ``` python from anonip import Anonip anonip = Anonip() for line in anonip.run(): print(line) ``` Manually feed lines: ``` python from anonip import Anonip data = ['1.1.1.1', '2.2.2.2', '3.3.3.3'] anonip = Anonip() for line in data: print(anonip.process_line(line)) ``` ### Python 2 or 3? For compatibility reasons, anonip uses the shebang `#! /usr/bin/env python`. This will default to python2 on all Linux distributions except for Arch Linux. The performance of anonip can be improved by running it with python3. If python3 is available on your system, you should preferrably invoke anonip like this: ``` shell python3 -m anonip [OPTIONS] ``` or ``` shell python3 /path/to/anonip.py [OPTIONS] ``` ## Motivation In most cases IP addresses are personal data as they refer to individuals (or at least their Internet connection). IP addresses - and the data associated with them - may therefore only be lawfully processed in accordance with the principles of the applicable data protection laws. Storage of log files from web servers, for example, is only permitted within close time limits or with the voluntary consent of the persons concerned (as long as the information about the IP address is linkable to a person). Anonip tries to avoid exactly that, but without losing the benefit of those log files. With the masking of the last bits of IP addresses, we're still able to distinguish the log entries up to a certain degree. Compared to the entire removal of the IP-adresses, we're still able to make a rough geolocating as well as a reverse DNS lookup. But the otherwise distinct IP addresses do not match a particular individuum anymore. Anonip-1.1.0/anonip.py000077500000000000000000000356671416426267300146320ustar00rootroot00000000000000#! /usr/bin/env python # -*- coding: utf-8 -*- """ An IP address anonymizer Digitale Gesellschaft https://www.digitale-gesellschaft.ch. Special thanks to: Thomas B. and Fabio R. Copyright (c) 2013 - 2016, Swiss Privacy Foundation 2016 - 2019, Digitale Gesellschaft 2019, Hartmut Goebel All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ from __future__ import print_function, unicode_literals import argparse import logging import re import sys from io import open try: import ipaddress except ImportError: # pragma: no cover # Could happen with python < 3.3 print("\033[31;1mError: Module ipaddress not found.\033[0m", file=sys.stderr) sys.exit(1) try: from urllib.parse import urlparse except ImportError: # pragma: no cover # compatibility for python < 3 from urlparse import urlparse if sys.version_info[0] >= 3: # pragma: no cover # compatibility for python < 3 unicode = str __title__ = "anonip" __description__ = "Anonip is a tool to anonymize IP-addresses in log files." __version__ = "1.1.0" __license__ = "BSD" __author__ = "Digitale Gesellschaft" logger = logging.getLogger(__name__) class Anonip(object): def __init__( self, columns=None, ipv4mask=12, ipv6mask=84, increment=0, delimiter=" ", replace=None, regex=None, skip_private=False, ): """ Main class for anonip. :param columns: list of int, 1-based column numbers :param ipv4mask: int :param ipv6mask: int :param increment: int :param delimiter: str :param replace: str :param skip_private: bool """ self.columns = columns self._prefixes = {} # next two lines will fill the values self.ipv4mask = ipv4mask self.ipv6mask = ipv6mask self.increment = increment self.delimiter = delimiter self.replace = replace self.regex = regex self.skip_private = skip_private @property def columns(self): return self._columns @columns.setter def columns(self, columns): # change columns to be 0-based self._columns = [c - 1 for c in columns] if columns else [0] @property def ipv4mask(self): return self._ipv4mask @ipv4mask.setter def ipv4mask(self, mask): self._ipv4mask = mask self._prefixes[4] = 32 - mask @property def ipv6mask(self): return self._ipv6mask @ipv6mask.setter def ipv6mask(self, mask): self._ipv6mask = mask self._prefixes[6] = 128 - mask def run(self, input_file=None): """ Generator that reads from file handle (defaults to stdin) and loops forever. Yields anonymized log lines. :param input_file: file handle to read from (default: sys.stdin) :return: None """ if not input_file: # Assign here instead of using a default parameter value # to allow "late binding". input_file = sys.stdin line = input_file.readline() while line: line = line.rstrip() if line.strip() == "": logger.debug("Empty line detected. Doing nothing.") yield line line = input_file.readline() continue logger.debug("Got line: %r", line) yield self.process_line(line) line = input_file.readline() def process_ip(self, ip): """ Process a single ip. :param ip: /32 ipaddress.IPv4Network or /128 ipaddress.IPv6Network :return: ipaddress.IPv4Address or ipaddress.IPv6Address """ if self.skip_private and ip[0].is_private: return ip[0] else: trunc_ip = self.truncate_address(ip) if self.increment: try: trunc_ip = trunc_ip + self.increment except ipaddress.AddressValueError: logger.error( "Could not increment IP %s by %s", trunc_ip, self.increment ) return trunc_ip def process_line_regex(self, line): """ This function processes a single line based on the provided regex. It returns the anonymized log line as string. :param line: str :return: str """ match = re.match(self.regex, line) if not match: logger.debug("Regex did not match!") return line groups = match.groups() for m in set(groups): if not m: continue ip_str, ip = self.extract_ip(m) if ip: trunc_ip = self.process_ip(ip) line = line.replace(ip_str, str(trunc_ip)) elif self.replace: line = line.replace(m, self.replace) return line def process_line_column(self, line): """ This function processes a single line based on the provided columns. It returns the anonymized log line as string. :param line: str :return: str """ loglist = line.split(self.delimiter) for index in self.columns: try: column = loglist[index] except IndexError: logger.warning("Column %s does not exist!", index + 1) continue else: if column == "": logger.debug("Column %s is empty.", index + 1) continue ip_str, ip = self.extract_ip(column) if ip: trunc_ip = self.process_ip(ip) loglist[index] = column.replace(ip_str, str(trunc_ip)) elif self.replace: loglist[index] = self.replace return self.delimiter.join(loglist) def process_line(self, line): """ This function processes a single line. It returns the anonymized log line as string. :param line: str :return: str """ if self.regex: return self.process_line_regex(line) return self.process_line_column(line) @staticmethod def extract_ip(column): """ This function extracts the ip from the column and returns it. It can handle following ip formats: - 192.168.100.200 - 192.168.100.200:80 - 192.168.100.200] - 192.168.100.200:80] - 2001:0db8:85a3:0000:0000:8a2e:0370:7334 - [2001:0db8:85a3:0000:0000:8a2e:0370:7334] - [2001:0db8:85a3:0000:0000:8a2e:0370:7334]] - [2001:0db8:85a3:0000:0000:8a2e:0370:7334]:443 - [2001:0db8:85a3:0000:0000:8a2e:0370:7334]:443] :param column: str :return: tuple ( ip str, /32 ipaddress.IPv4Network or /128 ipaddress.IPv6Network) or (None, None) """ # first we try if the whole column is just the ip try: ip = ipaddress.ip_network(unicode(column)) return column, ip except ValueError: # then we try if the ip has the port appended and/or a trailing ']' try: # strip additional ']' from column. Ugly but functional if (column.startswith("[") and column.endswith("]]")) or ( not column.startswith("[") and column.endswith("]") ): column = column[:-1] parsed = urlparse("//{}".format(column)) new_column = parsed.hostname ip = ipaddress.ip_network(unicode(new_column)) return new_column, ip except Exception as e: logger.warning(e) return None, None def truncate_address(self, ip): """ Do the actual masking of the IP address :param ip: ipaddress object :return: ipaddress object """ return ip.supernet(new_prefix=self._prefixes[ip.version])[0] def _validate_ipmask(mask, bits=32): """ Verify if the supplied ip mask is valid. :param mask: the provided ip mask :param bits: 32 for ipv4, 128 for ipv6 :return: int """ msg = "must be an integer between 1 and {}".format(bits) try: mask = int(mask) except ValueError: raise argparse.ArgumentTypeError(msg) if not 0 < mask <= bits: raise argparse.ArgumentTypeError(msg) return mask def _validate_integer_ht_0(value): """ Validate if given string is a number higher than 0. :param value: str or int :return: int """ msg = "must be a positive integer" try: value = int(value) except ValueError: raise argparse.ArgumentTypeError(msg) if not value >= 1: raise argparse.ArgumentTypeError(msg) return value def regex_arg_type(value): try: re.compile(value) except re.error as e: msg = "must be a valid regex." if hasattr(e, "msg"): # pragma: no cover # not available on py27 msg = "must be a valid regex. Error: {}".format(e.msg) raise argparse.ArgumentTypeError(msg) return value def parse_arguments(args): """ Parse all given arguments. :param args: list :return: argparse.Namespace """ parser = argparse.ArgumentParser( description=__description__, epilog="Example-usage in apache-config:\n" 'CustomLog "| /path/to/anonip.py ' '[OPTIONS] --output /path/to/log" ' "combined\n ", formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "-4", "--ipv4mask", metavar="INTEGER", help="truncate the last n bits (default: %(default)s)", type=lambda x: _validate_ipmask(x, 32), ) parser.set_defaults(ipv4mask=12) parser.add_argument( "-6", "--ipv6mask", type=lambda x: _validate_ipmask(x, 128), metavar="INTEGER", help="truncate the last n bits (default: %(default)s)", ) parser.set_defaults(ipv6mask=84) parser.add_argument( "-i", "--increment", metavar="INTEGER", type=lambda x: _validate_integer_ht_0(x), help="increment the IP address by n (default: %(default)s)", ) parser.set_defaults(increment=0) parser.add_argument("-o", "--output", metavar="FILE", help="file to write to") parser.add_argument( "--input", metavar="FILE", help="File or FIFO to read from (default: stdin)" ) parser.add_argument( "-c", "--column", metavar="INTEGER", dest="columns", nargs="+", type=lambda x: _validate_integer_ht_0(x), help="assume IP address is in column n (1-based indexed; default: 1)", ) parser.add_argument( "-l", "--delimiter", metavar="STRING", type=str, help='log delimiter (default: " ")', ) parser.add_argument( "--regex", metavar="STRING", nargs="+", help="regex for detecting IP addresses (use optionally instead of -c)", type=regex_arg_type, ) parser.add_argument( "-r", "--replace", metavar="STRING", help="replacement string in case address parsing fails (Example: 0.0.0.0)", ) parser.add_argument( "-p", "--skip-private", dest="skip_private", action="store_true", help="do not mask addresses in private ranges. " "See IANA Special-Purpose Address Registry.", ) parser.add_argument( "-d", "--debug", action="store_true", help="print debug messages" ) parser.add_argument("-v", "--version", action="version", version=__version__) args = parser.parse_args(args) if args.regex and (args.columns is not None or args.delimiter is not None): raise parser.error( 'Ambiguous arguments: When using "--regex", "-c" and "-l" can\'t be used.' ) if not args.regex and args.columns is None: args.columns = [1] if not args.regex and args.delimiter is None: args.delimiter = " " if args.regex: try: args.regex = re.compile(r"|".join(args.regex)) except re.error: # pragma: no cover raise argparse.ArgumentTypeError("Failed to compile concatenated regex!") return args def main(): """ Main CLI function for anonip. """ args = parse_arguments(sys.argv[1:]) logging.basicConfig() if args.debug: logger.level = logging.DEBUG else: logger.level = logging.WARNING anonip = Anonip( args.columns, args.ipv4mask, args.ipv6mask, args.increment, args.delimiter, args.replace, args.regex, args.skip_private, ) input_file = output_file = None try: if args.input: input_file = open(args.input, "r") if args.output: output_file = open(args.output, "a") else: output_file = sys.stdout for line in anonip.run(input_file): print(unicode(line), file=output_file) # TODO: when dropping support for Python <= 3.3, move the # flush into the print() output_file.flush() except IOError as err: # pragma: no cover logger.error(err) except KeyboardInterrupt: # pragma: no cover pass finally: if args.input and input_file: input_file.close() if args.output and output_file: output_file.close() if __name__ == "__main__": # pragma: no cover main() Anonip-1.1.0/conftest.py000066400000000000000000000002231416426267300151440ustar00rootroot00000000000000import sys import pytest @pytest.fixture() def backup_and_restore_sys_argv(): old_sys_argv = sys.argv yield sys.argv = old_sys_argv Anonip-1.1.0/setup.cfg000066400000000000000000000020771416426267300145770ustar00rootroot00000000000000[bdist_wheel] universal = 1 [metadata] description-file = README.md license_files = LICENSE.txt [flake8] ignore = # whitespace before ':' E203, # too many leading ### in a block comment E266, # line too long (managed by black) E501, # Line break occurred before a binary operator (this is not PEP8 compatible) W503, # do not enforce existence of docstrings D100, D101, D102, D103, D104, D105, D106, D107, # needed because of https://github.com/ambv/black/issues/144 D202, # other string does contain unindexed parameters P103 max-line-length = 88 max-complexity = 10 doctests = True show-source = True statistics = True [tool:isort] known_first_party=anonip known_third_party=pytest multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 combine_as_imports=True line_length=88 [tool:pytest] addopts = --cov=anonip --cov-report=term-missing --no-cov-on-fail [coverage:paths] source=. [coverage:run] branch = True [coverage:report] precision = 2 fail_under = 100 show_missing = True Anonip-1.1.0/setup.py000066400000000000000000000021151416426267300144610ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- """ setup.py for anonip """ from io import open from os import path from setuptools import setup here = path.abspath(path.dirname(__file__)) with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() setup( name="anonip", version="1.1.0", description="Anonip is a tool to anonymize IP-addresses in log-files.", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/DigitaleGesellschaft/Anonip", author="Digitale Gesellschaft", license="BSD", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: System Administrators", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: Implementation :: PyPy", ], install_requires=['ipaddress; python_version<"3.3"'], py_modules=["anonip"], entry_points={"console_scripts": ["anonip = anonip:main"]}, ) Anonip-1.1.0/tests.py000077500000000000000000000261031416426267300144710ustar00rootroot00000000000000#! /usr/bin/env python # -*- coding: utf-8 -*- """ Tests for anonip. """ from __future__ import print_function, unicode_literals import argparse import logging import re import sys from io import StringIO import pytest import anonip @pytest.mark.parametrize( "ip,v4mask,v6mask,expected", [ ("192.168.100.200", 12, 84, "192.168.96.0"), ("192.168.100.200:80", 12, 84, "192.168.96.0:80"), ("192.168.100.200]", 12, 84, "192.168.96.0]"), ("192.168.100.200:80]", 12, 84, "192.168.96.0:80]"), ("192.168.100.200", 0, 84, "192.168.100.200"), ("192.168.100.200", 4, 84, "192.168.100.192"), ("192.168.100.200", 8, 84, "192.168.100.0"), ("192.168.100.200", 24, 84, "192.0.0.0"), ("192.168.100.200", 32, 84, "0.0.0.0"), ("no_ip_address", 12, 84, "no_ip_address"), ("2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 84, "2001:db8:85a0::"), ( "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:443", 12, 84, "[2001:db8:85a0::]:443", ), ("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]", 12, 84, "[2001:db8:85a0::]"), ("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]]", 12, 84, "[2001:db8:85a0::]]"), ( "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:443]", 12, 84, "[2001:db8:85a0::]:443]", ), ( "2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 0, "2001:db8:85a3::8a2e:370:7334", ), ( "2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 4, "2001:db8:85a3::8a2e:370:7330", ), ( "2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 8, "2001:db8:85a3::8a2e:370:7300", ), ( "2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 24, "2001:db8:85a3::8a2e:300:0", ), ("2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 32, "2001:db8:85a3::8a2e:0:0"), ("2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 62, "2001:db8:85a3::"), ("2001:0db8:85a3:0000:0000:8a2e:0370:7334", 12, 128, "::"), (" foo", 12, 84, " foo"), ], ) def test_process_line(ip, v4mask, v6mask, expected): a = anonip.Anonip(ipv4mask=v4mask, ipv6mask=v6mask) assert a.process_line(ip) == expected @pytest.mark.parametrize( "ip,increment,expected", [ ("192.168.100.200", 3, "192.168.96.3"), ("192.168.100.200", 284414028745874325, "192.168.96.0"), ], ) def test_increment(ip, increment, expected): a = anonip.Anonip(increment=increment) assert a.process_line(ip) == expected @pytest.mark.parametrize( "line,columns,expected", [ ( "192.168.100.200 some string with öéäü", None, "192.168.96.0 some string with öéäü", ), ( "some 192.168.100.200 string with öéäü", [2], "some 192.168.96.0 string with öéäü", ), ( "some string 192.168.100.200 with öéäü", [3], "some string 192.168.96.0 with öéäü", ), ( "192.168.100.200 192.168.11.222 192.168.123.234", [1, 2, 3], "192.168.96.0 192.168.0.0 192.168.112.0", ), ( "192.168.100.200 192.168.11.222 192.168.123.234", [9999], "192.168.100.200 192.168.11.222 192.168.123.234", ), ], ) def test_column(line, columns, expected): a = anonip.Anonip(columns=columns) assert a.process_line(line) == expected @pytest.mark.parametrize( "line,regex,expected,replace", [ ( '3.3.3.3 - - [20/May/2015:21:05:01 +0000] "GET / HTTP/1.1" 200 13358 "-" "useragent"', re.compile( r"(?:^([^,]+) - - |.* - somefixedstring: ([^,]+) - .* - ([^,]+))" ), '3.3.0.0 - - [20/May/2015:21:05:01 +0000] "GET / HTTP/1.1" 200 13358 "-" "useragent"', None, ), ( "blabla/ 3.3.3.3 /blublu", re.compile(r"^blabla/ ([^,]+) /blublu"), "blabla/ 3.3.0.0 /blublu", None, ), ( "1.1.1.1 - somefixedstring: 2.2.2.2 - some random stuff - 3.3.3.3", re.compile(r"^([^,]+) - somefixedstring: ([^,]+) - .* - ([^,]+)"), "1.1.0.0 - somefixedstring: 2.2.0.0 - some random stuff - 3.3.0.0", None, ), ( "some line that doesn't match the provided regex", re.compile(r"^([^,]+) - somefixedstring: ([^,]+) - .* - ([^,]+)"), "some line that doesn't match the provided regex", None, ), ( "match but no ip/ notanip /blublu", re.compile(r"^match but no ip/ ([^,]+) /blublu"), "match but no ip/ notanip /blublu", None, ), ( "match but no ip/ notanip /blublu", re.compile(r"^match but no ip/ ([^,]+) /blublu"), "match but no ip/ yeah /blublu", "yeah", ), ], ) def test_regex(line, regex, expected, replace): a = anonip.Anonip(regex=regex, replace=replace) assert a.process_line(line) == expected def test_replace(): a = anonip.Anonip(replace="replacement") assert a.process_line("bla something") == "replacement something" def test_delimiter(): a = anonip.Anonip(delimiter=";") assert ( a.process_line("192.168.100.200;some;string;with;öéäü") == "192.168.96.0;some;string;with;öéäü" ) def test_private(): a = anonip.Anonip(skip_private=True) assert a.process_line("192.168.100.200") == "192.168.100.200" def test_run(monkeypatch): a = anonip.Anonip() monkeypatch.setattr( "sys.stdin", StringIO("192.168.100.200\n1.2.3.4\n \n9.8.130.6\n") ) lines = [line for line in a.run()] assert lines == ["192.168.96.0", "1.2.0.0", "", "9.8.128.0"] def test_run_with_input_file(): a = anonip.Anonip() input_file = StringIO("192.168.100.200\n1.2.3.4\n \n9.8.130.6\n") lines = [line for line in a.run(input_file)] assert lines == ["192.168.96.0", "1.2.0.0", "", "9.8.128.0"] @pytest.mark.parametrize( "args,attribute,expected", [ (["-c", "3", "5"], "columns", [3, 5]), (["-4", "24"], "ipv4mask", 24), (["-6", "64"], "ipv6mask", 64), ], ) def test_cli_generic_args(args, attribute, expected): assert getattr(anonip.parse_arguments(args), attribute) == expected @pytest.mark.parametrize( "args,success", [ ([], True), (["--regex", "test"], True), (["-c", "4"], True), (["--regex", "test", "-c", "3"], False), (["--regex", "test", "-l", ";"], False), (["--regex", "test", "-l", ";", "-c", "4"], False), ], ) def test_cli_args_ambiguity(args, success): if success: anonip.parse_arguments(args) return with pytest.raises(SystemExit) as e: anonip.parse_arguments(args) assert e.value.code == 2 @pytest.mark.parametrize( "args,expected", [ (["--regex", "test"], "test"), (["--regex", "foo", "bar", "baz"], "foo|bar|baz"), ], ) def test_regex_concat(args, expected): args = anonip.parse_arguments(args) assert args.regex == re.compile(expected) @pytest.mark.parametrize( "value,valid,bits", [ ("1", True, 32), ("0", False, 32), ("33", False, 32), ("string", False, 32), ("129", False, 128), ], ) def test_cli_validate_ipmask(value, valid, bits): if valid: assert anonip._validate_ipmask(value, bits) == int(value) else: with pytest.raises(argparse.ArgumentTypeError): anonip._validate_ipmask(value, bits) @pytest.mark.parametrize( "value,valid", [("1", True), ("0", False), ("-1", False), ("string", False)] ) def test_cli_validate_integer_ht_0(value, valid): if valid: assert anonip._validate_integer_ht_0(value) == int(value) else: with pytest.raises(argparse.ArgumentTypeError): anonip._validate_integer_ht_0(value) @pytest.mark.parametrize("value,valid", [("valid (.*)", True), ("\\9", False)]) def test_regex_arg_type(value, valid): if valid: assert anonip.regex_arg_type(value) == value else: with pytest.raises(argparse.ArgumentTypeError): anonip.regex_arg_type(value) @pytest.mark.parametrize("to_file", [False, True]) @pytest.mark.parametrize("debug,log_level", [(False, 30), (True, 10)]) def test_main( to_file, debug, log_level, backup_and_restore_sys_argv, capsys, monkeypatch, tmp_path, ): log_file = tmp_path / "anonip.log" sys.argv = [ "anonip.py", "-c", "2", "-4", "12", "-6", "42", "-i", "1", "-l", ";", "-r", "replace", "-p", ] if to_file: sys.argv += ["-o", str(log_file)] if debug: sys.argv.append("-d") monkeypatch.setattr( "sys.stdin", StringIO( "string;192.168.100.200\n" "string;1.2.3.4\n" "string;2001:0db8:85a3:0000:0000:8a2e:0370:7334\n" "string;2a00:1450:400a:803::200e\n" "string;string\n" ), ) anonip.main() if to_file: with log_file.open() as f: lines = [line.rstrip("\n") for line in f.readlines()] else: captured = capsys.readouterr() lines = captured.out.split("\n")[:-1] assert lines == [ "string;192.168.100.200", "string;1.2.0.1", "string;2001:db8:85a3::8a2e:370:7334", "string;2a00:1450:400a:803::1", "string;replace", ] logger = logging.getLogger("anonip") assert logger.level == log_level def test_main_reading_from_input_file(tmp_path, capsys, backup_and_restore_sys_argv): input_filename = tmp_path / "anonip-input.txt" input_filename.write_text( "192.168.100.200 string\n" "1.2.3.4 string\n" "2001:0db8:85a3:0000:0000:8a2e:0370:7334 string\n" "2a00:1450:400a:803::200e string\n" ) sys.argv = ["anonip.py", "--input", str(input_filename), "-d"] anonip.main() captured = capsys.readouterr() lines = captured.out.split("\n")[:-1] assert lines == [ "192.168.96.0 string", "1.2.0.0 string", "2001:db8:85a0:: string", "2a00:1450:4000:: string", ] def test_prefixes_dict(): a = anonip.Anonip(ipv4mask=11, ipv6mask=83) prefixes = a._prefixes assert len(prefixes) == 2 assert 4 in prefixes and bool(prefixes[4]) assert 6 in prefixes and bool(prefixes[6]) def test_properties_v4(): a = anonip.Anonip(ipv4mask=11, ipv6mask=83) assert a.ipv4mask == 11 assert a._prefixes[4] == 21 def test_properties_v6(): a = anonip.Anonip(ipv4mask=11, ipv6mask=83) assert a.ipv6mask == 83 assert a._prefixes[6] == 45 def test_properties_columns(): a = anonip.Anonip() assert a.columns == [0] a.columns = [5, 6] assert a.columns == [4, 5] Anonip-1.1.0/tox.ini000066400000000000000000000014171416426267300142660ustar00rootroot00000000000000# Tox (https://tox.readthedocs.io/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. # # See also https://tox.readthedocs.io/en/latest/config.html for more # configuration options. [tox] envlist = py{27,36,37,38,39,310}, pypy3, flake8, black [testenv] deps= pytest pytest-cov commands=pytest -r a -vv tests.py anonip.py [testenv:flake8] deps= pytest flake8 # can be removed, once flake8-isort dependency is resolved (https://github.com/gforcada/flake8-isort/issues/88) isort<5 flake8-isort flake8-bugbear commands=flake8 [testenv:black] deps= black commands=black --check --diff ./