pax_global_header00006660000000000000000000000064151572642320014521gustar00rootroot0000000000000052 comment=83f2d6a52abd3b071bcaf078ce124a0c18921c28 sferik-buftok-3bc82aa/000077500000000000000000000000001515726423200147615ustar00rootroot00000000000000sferik-buftok-3bc82aa/.github/000077500000000000000000000000001515726423200163215ustar00rootroot00000000000000sferik-buftok-3bc82aa/.github/FUNDING.yml000066400000000000000000000000211515726423200201270ustar00rootroot00000000000000github: [sferik] sferik-buftok-3bc82aa/.github/workflows/000077500000000000000000000000001515726423200203565ustar00rootroot00000000000000sferik-buftok-3bc82aa/.github/workflows/gem_push.yml000066400000000000000000000017041515726423200227120ustar00rootroot00000000000000name: Push gem to RubyGems on: push: tags: - "v*" permissions: contents: read jobs: push: if: github.repository == 'sferik/buftok' runs-on: ubuntu-latest environment: name: rubygems.org url: https://rubygems.org/gems/buftok permissions: contents: write id-token: write steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - uses: rubygems/configure-rubygems-credentials@v1.0.0 - name: Update RubyGems run: gem update --system - name: Build gem run: bundle exec rake build - name: Sign gem with Sigstore run: gem exec sigstore-cli sign pkg/*.gem --bundle pkg/buftok.gem.sigstore.json - name: Push gem run: gem push pkg/*.gem --attestation pkg/buftok.gem.sigstore.json - name: Wait for release run: gem exec rubygems-await pkg/*.gem sferik-buftok-3bc82aa/.github/workflows/lint.yml000066400000000000000000000005341515726423200220510ustar00rootroot00000000000000name: Lint on: push: branches: [ master ] pull_request: branches: [ master ] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - name: Lint run: bundle exec rake lint sferik-buftok-3bc82aa/.github/workflows/mutant.yml000066400000000000000000000005701515726423200224130ustar00rootroot00000000000000name: Mutation Testing on: push: branches: [ master ] pull_request: branches: [ master ] jobs: mutant: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - name: Mutation testing run: bundle exec rake mutant sferik-buftok-3bc82aa/.github/workflows/test.yml000066400000000000000000000016151515726423200220630ustar00rootroot00000000000000# This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby name: Ruby on: push: branches: [ master ] pull_request: branches: [ master ] jobs: test: runs-on: ubuntu-latest strategy: matrix: ruby-version: ['3.2', '3.3', '3.4', '4.0'] steps: - uses: actions/checkout@v6 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true # runs 'bundle install' and caches installed gems automatically - name: Run tests run: bundle exec rake sferik-buftok-3bc82aa/.github/workflows/typecheck.yml000066400000000000000000000005561515726423200230660ustar00rootroot00000000000000name: Type Check on: push: branches: [ master ] pull_request: branches: [ master ] jobs: typecheck: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - name: Type check run: bundle exec rake steep sferik-buftok-3bc82aa/.github/workflows/yardstick.yml000066400000000000000000000006401515726423200230760ustar00rootroot00000000000000name: Documentation Coverage on: push: branches: [ master ] pull_request: branches: [ master ] jobs: yardstick: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - name: Verify YARD documentation coverage run: bundle exec rake verify_measurements sferik-buftok-3bc82aa/.gitignore000066400000000000000000000002501515726423200167460ustar00rootroot00000000000000*.gem *.rbc .bundle .config .yardoc Gemfile.lock InstalledFiles _yardoc coverage doc/ lib/bundler/man measurements/ pkg rdoc spec/reports test/tmp test/version_tmp tmp sferik-buftok-3bc82aa/.rubocop.yml000066400000000000000000000005121515726423200172310ustar00rootroot00000000000000plugins: - rubocop-minitest - rubocop-performance - rubocop-rake AllCops: NewCops: enable SuggestExtensions: false TargetRubyVersion: 3.2 Metrics/BlockLength: Exclude: - Rakefile Style/StringLiterals: EnforcedStyle: double_quotes Style/TernaryParentheses: EnforcedStyle: require_parentheses_when_complex sferik-buftok-3bc82aa/CHANGELOG.md000066400000000000000000000063711515726423200166010ustar00rootroot00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [1.0.1] - 2026-03-20 ### Changed - Improve gem push workflow security and reliability - Add top-level `permissions: contents: read` and scope `contents: write` to the job - Restrict workflow to `sferik/buftok` repository - Add `rubygems.org` deployment environment - Pin `rubygems/configure-rubygems-credentials` to v1.0.0 - Sign gem with Sigstore before pushing - Push gem with `--attestation` flag - Simplify release steps and remove manual git config ## [1.0.0] - 2026-03-20 ### Added - RBS type signatures in `sig/buftok.rbs` with Steep for strict type checking - RuboCop, Standard, rubocop-minitest, rubocop-performance, and rubocop-rake for linting - Mutant for mutation testing with 100% coverage - GitHub Actions workflows for linting, type checking, and mutation testing - `.github/FUNDING.yml` for GitHub Sponsors - Gemspec metadata (`allowed_push_host`, `changelog_uri`, `documentation_uri`, `funding_uri`, `homepage_uri`, `rubygems_mfa_required`, `source_code_uri`, `bug_tracker_uri`) - `CHANGELOG.md` ### Changed - Require Ruby >= 3.2 - Require RubyGems >= 3.0 - Test against Ruby 3.2, 3.3, 3.4, and 4.0 (drop EOL 2.6, 2.7, 3.0) - Update `actions/checkout` to v6 and `ruby/setup-ruby` to v1 - Replace test-unit with Minitest 6 - Replace `inject` with `sum` in `size` method - Use `@tail.clear` instead of `String.new` in `flush` (drop Ruby 1.8.7 workaround) - Move development dependencies from gemspec to Gemfile - Bump rake from `~> 10.0` to `>= 13` - Extract `rejoin_split_delimiter` and `consolidate_input` private methods - Update copyright years to 2006-2026 - Rename Erik Michaels-Ober to Erik Berlin ### Fixed - Typo in test comment ("Desipte" -> "Despite") ## [0.3.0] - 2021-03-25 ### Added - `Buftok` constant as an alias for `BufferedTokenizer` - `BufferedTokenizer#size` method to determine internal buffer size - GitHub Actions CI workflow - Support for `frozen_string_literal` ### Changed - Replace Ruby license with MIT license - Modernize gemspec - Remove Travis CI in favor of GitHub Actions - Update supported Ruby versions to 2.6, 2.7, 3.0 ## [0.2.0] - 2013-11-22 ### Added - Tests - Benchmark rake task - Support for multi-character delimiters split across chunks - Section on supported Ruby versions in README ### Changed - Use global input delimiter `$/` as default instead of hard-coded `"\n"` - Unified handling of single/multi-character delimiters ## [0.1.0] - 2013-11-20 ### Added - Initial release of BufferedTokenizer - Line-based tokenization with configurable delimiter - `extract` method for incremental tokenization - `flush` method to retrieve remaining buffer contents [Unreleased]: https://github.com/sferik/buftok/compare/v1.0.1...HEAD [1.0.1]: https://github.com/sferik/buftok/compare/v1.0.0...v1.0.1 [1.0.0]: https://github.com/sferik/buftok/compare/v0.3.0...v1.0.0 [0.3.0]: https://github.com/sferik/buftok/compare/v0.2.0...v0.3.0 [0.2.0]: https://github.com/sferik/buftok/compare/v0.1...v0.2.0 [0.1.0]: https://github.com/sferik/buftok/releases/tag/v0.1 sferik-buftok-3bc82aa/CONTRIBUTING.md000066400000000000000000000037231515726423200172170ustar00rootroot00000000000000## Contributing In the spirit of [free software][free-sw], **everyone** is encouraged to help improve this project. Here are some ways *you* can contribute: [free-sw]: http://www.fsf.org/licensing/essays/free-sw.html * Use alpha, beta, and pre-release versions. * Report bugs. * Suggest new features. * Write or edit documentation. * Write specifications. * Write code (**no patch is too small**: fix typos, add comments, clean up inconsistent whitespace). * Refactor code. * Fix [issues][]. * Review patches. [issues]: https://github.com/sferik/buftok/issues ## Submitting an Issue We use the [GitHub issue tracker][issues] to track bugs and features. Before submitting a bug report or feature request, check to make sure it hasn't already been submitted. When submitting a bug report, please include a [Gist][] that includes a stack trace and any details that may be necessary to reproduce the bug, including your gem version, Ruby version, and operating system. Ideally, a bug report should include a pull request with failing specs. [gist]: https://gist.github.com/ ## Submitting a Pull Request 1. [Fork the repository.][fork] 2. [Create a topic branch.][branch] 3. Add specs for your unimplemented feature or bug fix. 4. Run `bundle exec rake spec`. If your specs pass, return to step 3. 5. Implement your feature or bug fix. 6. Run `bundle exec rake spec`. If your specs fail, return to step 5. 7. Run `open coverage/index.html`. If your changes are not completely covered by your tests, return to step 3. 8. Run `RUBYOPT=W2 bundle exec rake spec 2>&1 | grep buftok`. If your changes produce any warnings, return to step 5. 9. Add documentation for your feature or bug fix. 10. Run `bundle exec rake yard`. If your changes are not 100% documented, go back to step 9. 11. Commit and push your changes. 12. [Submit a pull request.][pr] [fork]: http://help.github.com/fork-a-repo/ [branch]: http://learn.github.com/p/branching.html [pr]: http://help.github.com/send-pull-requests/ sferik-buftok-3bc82aa/Gemfile000066400000000000000000000005101515726423200162500ustar00rootroot00000000000000# frozen_string_literal: true source "https://rubygems.org" gemspec gem "minitest", "~> 6" gem "mutant" gem "mutant-minitest" gem "rake", ">= 13" gem "rubocop" gem "rubocop-minitest" gem "rubocop-performance" gem "rubocop-rake" gem "simplecov" gem "standard" gem "standard-performance" gem "steep" gem "yard" gem "yardstick" sferik-buftok-3bc82aa/LICENSE.txt000066400000000000000000000021261515726423200166050ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2006-2026 Tony Arcieri, Martin Emde, Erik Berlin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sferik-buftok-3bc82aa/README.md000066400000000000000000000074371515726423200162530ustar00rootroot00000000000000# BufferedTokenizer [![Gem Version](http://img.shields.io/gem/v/buftok.svg)][gem] [![Test](https://github.com/sferik/buftok/actions/workflows/test.yml/badge.svg)][test] [![Lint](https://github.com/sferik/buftok/actions/workflows/lint.yml/badge.svg)][lint] [![Type Check](https://github.com/sferik/buftok/actions/workflows/typecheck.yml/badge.svg)][typecheck] [![Mutation Testing](https://github.com/sferik/buftok/actions/workflows/mutant.yml/badge.svg)][mutant] [![Documentation Coverage](https://github.com/sferik/buftok/actions/workflows/yardstick.yml/badge.svg)][yardstick] [gem]: https://rubygems.org/gems/buftok [test]: https://github.com/sferik/buftok/actions/workflows/test.yml [lint]: https://github.com/sferik/buftok/actions/workflows/lint.yml [typecheck]: https://github.com/sferik/buftok/actions/workflows/typecheck.yml [mutant]: https://github.com/sferik/buftok/actions/workflows/mutant.yml [yardstick]: https://github.com/sferik/buftok/actions/workflows/yardstick.yml ###### Statefully split input data by a specifiable token BufferedTokenizer takes a delimiter upon instantiation, or acts line-based by default. It allows input to be spoon-fed from some outside source which receives arbitrary length datagrams which may-or-may-not contain the token by which entities are delimited. It's useful any time you need to extract delimited messages from a stream of chunked data. ## Examples ### TCP Server Process newline-delimited commands from a TCP client: ```ruby require "socket" require "buftok" server = TCPServer.new(4000) loop do client = server.accept tokenizer = BufferedTokenizer.new("\n") while (data = client.readpartial(4096)) tokenizer.extract(data).each do |line| puts "Received: #{line}" end end rescue EOFError client.close end ``` ### Streaming IO Read a large file in chunks without loading it all into memory: ```ruby require "buftok" tokenizer = BufferedTokenizer.new("\n") File.open("large_log_file.txt") do |file| while (chunk = file.read(8192)) tokenizer.extract(chunk).each do |line| process_log_line(line) end end end # Don't forget to flush any remaining data remaining = tokenizer.flush process_log_line(remaining) unless remaining.empty? ``` > [!IMPORTANT] > Always call `flush` when you're done reading from the stream to process any > remaining data that didn't end with a delimiter. ### Custom Delimiters Parse a stream using a multi-character delimiter: ```ruby require "buftok" tokenizer = BufferedTokenizer.new("\r\n\r\n") chunks = ["HTTP/1.1 200 OK\r\n", "Content-Type: text/plain\r\n\r\n", "Hello"] chunks.each do |chunk| tokenizer.extract(chunk).each do |headers| puts "Headers: #{headers}" end end puts "Body so far: #{tokenizer.flush}" ``` > [!TIP] > Multi-character delimiters that get split across chunks are handled > automatically — no special handling is needed on your end. ## Supported Ruby Versions This library aims to support and is [tested against][test] the following Ruby implementations: * Ruby 3.2 * Ruby 3.3 * Ruby 3.4 * Ruby 4.0 If something doesn't work on one of these interpreters, it's a bug. This code will likely still work on older Ruby versions but support will not be provided for end-of-life versions. If you would like this library to support another Ruby version, you may volunteer to be a maintainer. Being a maintainer entails making sure all tests run and pass on that implementation. When something breaks on your implementation, you will be responsible for providing patches in a timely fashion. If critical issues for a particular implementation exist at the time of a major release, support for that Ruby version may be dropped. ## Copyright Copyright (c) 2006-2026 Tony Arcieri, Martin Emde, Erik Berlin. Distributed under the [MIT license][license]. [license]: https://opensource.org/licenses/MIT sferik-buftok-3bc82aa/Rakefile000066400000000000000000000043511515726423200164310ustar00rootroot00000000000000# frozen_string_literal: true require "bundler" require "rake/testtask" require "rubocop/rake_task" require "standard/rake" require "yard" require "yardstick/rake/measurement" require "yardstick/rake/verify" task default: %i[test lint steep yard verify_measurements mutant] Yardstick::Rake::Measurement.new Yardstick::Rake::Verify.new do |verify| verify.threshold = 100 end RuboCop::RakeTask.new desc "Run RuboCop and Standard" task lint: %i[rubocop standard] desc "Type check with Steep" task :steep do sh "steep check" end desc "Run mutation testing" task :mutant do sh "mutant run" end Bundler::GemHelper.install_tasks # Override release task to skip gem push (handled by GitHub Actions with attestations) Rake::Task["release"].clear desc "Build gem and create tag (gem push handled by CI)" task release: %w[build release:guard_clean release:source_control_push] YARD::Rake::YardocTask.new Rake::TestTask.new :test do |t| t.libs << "lib" t.test_files = FileList["test/**/*.rb"] end desc "Benchmark the current implementation" task :bench do require "benchmark" require File.expand_path("lib/buftok", __dir__) n = 50_000 delimiter = "\n\n" frequency1 = 1000 puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..." data1 = (0...n).map do |i| (((i % frequency1 == 1) ? "\n" : "") + ("s" * i) + ((i % frequency1).zero? ? "\n" : "")).freeze end frequency2 = 10 puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..." data2 = (0...n).map do |i| (((i % frequency2 == 1) ? "\n" : "") + ("s" * i) + ((i % frequency2).zero? ? "\n" : "")).freeze end Benchmark.bmbm do |x| x.report("1 char, freq: #{frequency1}") do bt1 = BufferedTokenizer.new n.times { |i| bt1.extract(data1[i]) } end x.report("2 char, freq: #{frequency1}") do bt2 = BufferedTokenizer.new(delimiter) n.times { |i| bt2.extract(data1[i]) } end x.report("1 char, freq: #{frequency2}") do bt3 = BufferedTokenizer.new n.times { |i| bt3.extract(data2[i]) } end x.report("2 char, freq: #{frequency2}") do bt4 = BufferedTokenizer.new(delimiter) n.times { |i| bt4.extract(data2[i]) } end end end sferik-buftok-3bc82aa/Steepfile000066400000000000000000000002261515726423200166240ustar00rootroot00000000000000# frozen_string_literal: true D = Steep::Diagnostic target :lib do signature "sig" check "lib" configure_code_diagnostics(D::Ruby.strict) end sferik-buftok-3bc82aa/buftok.gemspec000066400000000000000000000021331515726423200176170ustar00rootroot00000000000000# frozen_string_literal: true Gem::Specification.new do |spec| spec.version = "1.0.1" spec.authors = ["Tony Arcieri", "Martin Emde", "Erik Berlin"] spec.summary = "BufferedTokenizer extracts token delimited entities from a sequence of string inputs" spec.description = spec.summary spec.email = ["sferik@gmail.com", "martin.emde@gmail.com"] spec.files = %w[CHANGELOG.md CONTRIBUTING.md LICENSE.txt README.md buftok.gemspec] + Dir["lib/**/*.rb"] spec.homepage = "https://github.com/sferik/buftok" spec.licenses = ["MIT"] spec.name = "buftok" spec.require_paths = ["lib"] spec.required_ruby_version = ">= 3.2" spec.required_rubygems_version = ">= 3.0" spec.metadata = { "allowed_push_host" => "https://rubygems.org", "bug_tracker_uri" => "#{spec.homepage}/issues", "changelog_uri" => "#{spec.homepage}/blob/master/CHANGELOG.md", "documentation_uri" => "https://rubydoc.info/gems/buftok/", "funding_uri" => "https://github.com/sponsors/sferik/", "homepage_uri" => spec.homepage, "rubygems_mfa_required" => "true", "source_code_uri" => spec.homepage } end sferik-buftok-3bc82aa/lib/000077500000000000000000000000001515726423200155275ustar00rootroot00000000000000sferik-buftok-3bc82aa/lib/buftok.rb000066400000000000000000000113421515726423200173470ustar00rootroot00000000000000# frozen_string_literal: true # Statefully split input data by a specifiable token # # BufferedTokenizer takes a delimiter upon instantiation, or acts line-based # by default. It allows input to be spoon-fed from some outside source which # receives arbitrary length datagrams which may-or-may-not contain the token # by which entities are delimited. # # @example # tokenizer = BufferedTokenizer.new("\n") # tokenizer.extract("foo\nbar") #=> ["foo"] # tokenizer.extract("baz\n") #=> ["barbaz"] # tokenizer.flush #=> "" class BufferedTokenizer # Limit passed to String#split to preserve trailing empty fields SPLIT_LIMIT = -1 # Return the delimiter overlap length # # The number of characters at the end of a chunk that may contain a # partial delimiter, equal to delimiter.length - 1. # # @example # BufferedTokenizer.new("<>").overlap #=> 1 # # @return [Integer] delimiter.length - 1 # # @api public attr_reader :overlap # Create a new BufferedTokenizer # # Operates on lines delimited by a delimiter, which is by default "\n". # # The input buffer is stored as an array. This is by far the most efficient # approach given language constraints (in C a linked list would be a more # appropriate data structure). Segments of input data are stored in a list # which is only joined when a token is reached, substantially reducing the # number of objects required for the operation. # # @example # tokenizer = BufferedTokenizer.new("<>") # # @param delimiter [String] the token delimiter (default: "\n") # # @return [BufferedTokenizer] # # @api public def initialize(delimiter = "\n") @delimiter = delimiter @input = [] @tail = +"" @overlap = @delimiter.length - 1 end # Return the byte size of the internal buffer # # Size is not cached and is determined every time this method is called # in order to optimize throughput for extract. # # @example # tokenizer = BufferedTokenizer.new # tokenizer.extract("foo") # tokenizer.size #=> 3 # # @return [Integer] # # @api public def size @tail.length + @input.sum(&:length) end # Extract tokenized entities from the input data # # Extract takes an arbitrary string of input data and returns an array of # tokenized entities, provided there were any available to extract. This # makes for easy processing of datagrams using a pattern like: # # tokenizer.extract(data).map { |entity| Decode(entity) }.each { ... } # # Using -1 makes split return "" if the token is at the end of # the string, meaning the last element is the start of the next chunk. # # @example # tokenizer = BufferedTokenizer.new # tokenizer.extract("foo\nbar") #=> ["foo"] # # @param data [String] a chunk of input data # # @return [Array] complete tokens extracted from the input # # @api public def extract(data) data = rejoin_split_delimiter(data) @input << @tail entities = data.split(@delimiter, SPLIT_LIMIT) @tail = entities.shift # : String consolidate_input(entities) if entities.length.positive? entities end # Flush the contents of the input buffer # # Return the contents of the input buffer even though a token has not # yet been encountered, then reset the buffer. # # @example # tokenizer = BufferedTokenizer.new # tokenizer.extract("foo\nbar") # tokenizer.flush #=> "bar" # # @return [String] the buffered input # # @api public def flush @input << @tail buffer = @input.join @input.clear @tail = +"" buffer end private # Rejoin a delimiter that was split across two chunks # # When the delimiter is longer than one character, it may be split across # two successive chunks. Transfer the trailing overlap from @tail back onto # the front of the incoming data so that split can find the full delimiter. # # @param data [String] incoming data # # @return [String] data with any split delimiter prefix restored # # @api private def rejoin_split_delimiter(data) if @overlap.positive? tail_end = @tail[-@overlap..] @tail.slice!(-@overlap, @overlap) tail_end ? tail_end + data : data else data end end # Consolidate the input buffer into the first entity # # Once at least one delimiter has been found, join the accumulated input # buffer with the first entity and move the trailing partial into @tail. # # @param entities [Array] split entities # # @return [void] # # @api private def consolidate_input(entities) @input << @tail entities.unshift @input.join @input.clear @tail = entities.pop # : String end end # Alias for {BufferedTokenizer}, matching the gem name Buftok = BufferedTokenizer sferik-buftok-3bc82aa/mutant.yml000066400000000000000000000003171515726423200170150ustar00rootroot00000000000000--- usage: opensource environment_variables: MUTANT: "true" integration: name: minitest includes: - lib requires: - buftok mutation: operators: full matcher: subjects: - "BufferedTokenizer*" sferik-buftok-3bc82aa/sig/000077500000000000000000000000001515726423200155435ustar00rootroot00000000000000sferik-buftok-3bc82aa/sig/buftok.rbs000066400000000000000000000007271515726423200175530ustar00rootroot00000000000000class BufferedTokenizer SPLIT_LIMIT: Integer @delimiter: String @input: Array[String] @tail: String @overlap: Integer attr_reader overlap: Integer def initialize: (?String delimiter) -> void def size: () -> Integer def extract: (String data) -> Array[String] def flush: () -> String private def consolidate_input: (Array[String] entities) -> void def rejoin_split_delimiter: (String data) -> String end Buftok: singleton(BufferedTokenizer) sferik-buftok-3bc82aa/test/000077500000000000000000000000001515726423200157405ustar00rootroot00000000000000sferik-buftok-3bc82aa/test/test_buftok.rb000066400000000000000000000020131515726423200206120ustar00rootroot00000000000000# frozen_string_literal: true require_relative "test_helper" class BufferedTokenizer class InitTest < Minitest::Test cover BufferedTokenizer def test_buftok_alias assert_same BufferedTokenizer, Buftok end def test_split_limit_constant assert_equal(-1, BufferedTokenizer::SPLIT_LIMIT) end def test_default_delimiter_is_newline tokenizer = BufferedTokenizer.new assert_equal %w[a], tokenizer.extract("a\nb") assert_equal "b", tokenizer.flush end def test_custom_delimiter tokenizer = BufferedTokenizer.new(",") assert_equal %w[a], tokenizer.extract("a,b") assert_equal "b", tokenizer.flush end def test_overlap_for_single_char_delimiter assert_equal 0, BufferedTokenizer.new("\n").overlap end def test_overlap_for_two_char_delimiter assert_equal 1, BufferedTokenizer.new("<>").overlap end def test_overlap_for_three_char_delimiter assert_equal 2, BufferedTokenizer.new("|||").overlap end end end sferik-buftok-3bc82aa/test/test_extract.rb000066400000000000000000000054311515726423200210010ustar00rootroot00000000000000# frozen_string_literal: true require_relative "test_helper" class BufferedTokenizer class ExtractTest < Minitest::Test cover BufferedTokenizer def test_returns_complete_tokens assert_equal %w[foo], BufferedTokenizer.new.extract("foo\nbar") end def test_returns_multiple_tokens assert_equal %w[foo bar], BufferedTokenizer.new.extract("foo\nbar\nbaz") end def test_buffers_partial_input tokenizer = BufferedTokenizer.new assert_equal [], tokenizer.extract("hel") assert_equal [], tokenizer.extract("lo") assert_equal %w[hello], tokenizer.extract("\n") end def test_joins_buffered_input_with_first_token tokenizer = BufferedTokenizer.new tokenizer.extract("foo\nbar") assert_equal %w[barbaz qux], tokenizer.extract("baz\nqux\nquu") end def test_only_delimiter assert_equal [""], BufferedTokenizer.new.extract("\n") end def test_consecutive_delimiters assert_equal ["", ""], BufferedTokenizer.new.extract("\n\n") end def test_trailing_delimiter assert_equal %w[line1 line2], BufferedTokenizer.new.extract("line1\nline2\n") end def test_two_char_delimiter assert_equal ["", "foo\n"], BufferedTokenizer.new("<>").extract("<>foo\n<>") end def test_two_char_delimiter_multiple_tokens assert_equal %w[ab cd], BufferedTokenizer.new("<>").extract("ab<>cd<>") end def test_two_char_delimiter_subsequent_chunk tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("<>foo\n<>") assert_equal %w[bar], tokenizer.extract("bar<>baz") end def test_three_char_delimiter_split_across_chunks tokenizer = BufferedTokenizer.new("|||") assert_equal [], tokenizer.extract("foo|") assert_equal [], tokenizer.extract("|") assert_equal %w[foo], tokenizer.extract("|bar") end def test_split_delimiter_recombines tokenizer = BufferedTokenizer.new("<>") assert_equal [], tokenizer.extract("foo<") assert_equal %w[foo], tokenizer.extract(">bar<") assert_equal %w[barqux<>") end def test_split_delimiter_across_multiple_chunks tokenizer = BufferedTokenizer.new("<>") assert_equal [], tokenizer.extract("x<") assert_equal %w[x], tokenizer.extract(">y<") assert_equal %w[y], tokenizer.extract(">") end def test_single_char_delimiter_does_not_slice tokenizer = BufferedTokenizer.new("\n") assert_equal [], tokenizer.extract("ab") assert_equal %w[ab], tokenizer.extract("\n") end def test_single_char_delimiter_preserves_data tokenizer = BufferedTokenizer.new("\n") assert_equal %w[abc], tokenizer.extract("abc\ndef") assert_equal %w[def], tokenizer.extract("\n") end end end sferik-buftok-3bc82aa/test/test_flush.rb000066400000000000000000000045221515726423200204500ustar00rootroot00000000000000# frozen_string_literal: true require_relative "test_helper" class BufferedTokenizer class FlushTest < Minitest::Test cover BufferedTokenizer def test_empty_buffer assert_equal "", BufferedTokenizer.new.flush end def test_returns_buffered_data tokenizer = BufferedTokenizer.new tokenizer.extract("abc") assert_equal "abc", tokenizer.flush end def test_returns_empty_after_complete_token tokenizer = BufferedTokenizer.new tokenizer.extract("abc\n") assert_equal "", tokenizer.flush end def test_returns_remainder_after_partial_token tokenizer = BufferedTokenizer.new tokenizer.extract("abc\ndef") assert_equal "def", tokenizer.flush end def test_with_custom_delimiter tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("bar<>baz") assert_equal "baz", tokenizer.flush end def test_consecutive_flush tokenizer = BufferedTokenizer.new tokenizer.extract("data") tokenizer.flush assert_equal "", tokenizer.flush end def test_resets_for_subsequent_extract tokenizer = BufferedTokenizer.new tokenizer.extract("foo") tokenizer.flush assert_equal %w[bar], tokenizer.extract("bar\nbaz") end def test_resets_for_subsequent_flush tokenizer = BufferedTokenizer.new tokenizer.extract("foo") tokenizer.flush tokenizer.extract("bar\nbaz") assert_equal "baz", tokenizer.flush end def test_clears_input_buffer tokenizer = BufferedTokenizer.new tokenizer.extract("abc") tokenizer.flush tokenizer.extract("xyz") assert_equal "xyz", tokenizer.flush end def test_size_zero_after_double_flush tokenizer = BufferedTokenizer.new tokenizer.extract("data") tokenizer.flush tokenizer.flush assert_equal 0, tokenizer.size end def test_multichar_delimiter_after_flush tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("foo") tokenizer.flush assert_equal %w[bar], tokenizer.extract("bar<>baz") end def test_multichar_delimiter_flush_after_flush tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("foo") tokenizer.flush tokenizer.extract("bar<>baz") assert_equal "baz", tokenizer.flush end end end sferik-buftok-3bc82aa/test/test_helper.rb000066400000000000000000000004011515726423200205760ustar00rootroot00000000000000# frozen_string_literal: true unless ENV["MUTANT"] require "simplecov" SimpleCov.start do enable_coverage :branch minimum_coverage line: 100, branch: 100 end end require "minitest/autorun" require "mutant/minitest/coverage" require "buftok" sferik-buftok-3bc82aa/test/test_size.rb000066400000000000000000000041311515726423200202750ustar00rootroot00000000000000# frozen_string_literal: true require_relative "test_helper" class BufferedTokenizer class SizeTest < Minitest::Test cover BufferedTokenizer def test_zero_initially assert_equal 0, BufferedTokenizer.new.size end def test_after_partial_input tokenizer = BufferedTokenizer.new tokenizer.extract("foo") assert_equal 3, tokenizer.size end def test_accumulates_across_extracts tokenizer = BufferedTokenizer.new tokenizer.extract("foo") tokenizer.extract("bar") assert_equal 6, tokenizer.size end def test_includes_partial_delimiter tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("foo<") assert_equal 4, tokenizer.size end def test_after_token_found tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("foo<") tokenizer.extract(">bar<") assert_equal 4, tokenizer.size end def test_zero_after_complete_tokens tokenizer = BufferedTokenizer.new("<>") tokenizer.extract("foo<") tokenizer.extract(">bar<") tokenizer.extract("baz<>qux<>") assert_equal 0, tokenizer.size end def test_input_buffer_grows_without_delimiter tokenizer = BufferedTokenizer.new tokenizer.extract("abc") assert_equal 1, tokenizer.instance_variable_get(:@input).length end def test_input_buffer_accumulates tokenizer = BufferedTokenizer.new tokenizer.extract("abc") tokenizer.extract("def") assert_equal 2, tokenizer.instance_variable_get(:@input).length end def test_input_buffer_clears_on_delimiter tokenizer = BufferedTokenizer.new tokenizer.extract("abc") tokenizer.extract("def\nghi") assert_empty tokenizer.instance_variable_get(:@input) end def test_single_char_delimiter_preserves_tail_in_input tokenizer = BufferedTokenizer.new("\n") tokenizer.extract("abc") tail_before = tokenizer.instance_variable_get(:@tail).dup tokenizer.extract("def") assert_includes tokenizer.instance_variable_get(:@input), tail_before end end end