pax_global_header00006660000000000000000000000064150540063570014517gustar00rootroot0000000000000052 comment=6bee3d73760494cb656ea0a55088a7c0af69a45b golang-github-agnivade-levenshtein-1.2.1/000077500000000000000000000000001505400635700203455ustar00rootroot00000000000000golang-github-agnivade-levenshtein-1.2.1/.github/000077500000000000000000000000001505400635700217055ustar00rootroot00000000000000golang-github-agnivade-levenshtein-1.2.1/.github/workflows/000077500000000000000000000000001505400635700237425ustar00rootroot00000000000000golang-github-agnivade-levenshtein-1.2.1/.github/workflows/ci.yml000066400000000000000000000007661505400635700250710ustar00rootroot00000000000000on: [push, pull_request] name: CI jobs: test: strategy: matrix: go-version: [1.22.x, 1.23.x] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Install Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@v4 - name: Lint run: make lint - name: Test run: make test - name: Install run: make install golang-github-agnivade-levenshtein-1.2.1/.gitignore000066400000000000000000000001461505400635700223360ustar00rootroot00000000000000coverage.txt fuzz/fuzz-fuzz.zip fuzz/corpus/corpus/* fuzz/corpus/suppressions/* fuzz/corpus/crashes/* golang-github-agnivade-levenshtein-1.2.1/License.txt000066400000000000000000000020731505400635700224720ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015 Agniva De Sarker Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-agnivade-levenshtein-1.2.1/Makefile000066400000000000000000000003111505400635700220000ustar00rootroot00000000000000all: test install install: go install lint: gofmt -l -s -w . && go vet . test: go test -race -v -coverprofile=coverage.txt -covermode=atomic bench: go test -run=XXX -bench=. -benchmem -count=5 golang-github-agnivade-levenshtein-1.2.1/README.md000066400000000000000000000046041505400635700216300ustar00rootroot00000000000000levenshtein ![Build Status](https://github.com/agnivade/levenshtein/actions/workflows/ci.yml/badge.svg) [![Go Report Card](https://goreportcard.com/badge/github.com/agnivade/levenshtein)](https://goreportcard.com/report/github.com/agnivade/levenshtein) [![PkgGoDev](https://pkg.go.dev/badge/github.com/agnivade/levenshtein)](https://pkg.go.dev/github.com/agnivade/levenshtein) =========== [Go](http://golang.org) package to calculate the [Levenshtein Distance](http://en.wikipedia.org/wiki/Levenshtein_distance) The library is fully capable of working with non-ascii strings. But the strings are not normalized. That is left as a user-dependant use case. Please normalize the strings before passing it to the library if you have such a requirement. - https://blog.golang.org/normalization #### Limitation As a performance optimization, the library can handle strings only up to 65536 characters (runes). If you need to handle strings larger than that, please pin to version 1.0.3. Install ------- go get github.com/agnivade/levenshtein Example ------- ```go package main import ( "fmt" "github.com/agnivade/levenshtein" ) func main() { s1 := "kitten" s2 := "sitting" distance := levenshtein.ComputeDistance(s1, s2) fmt.Printf("The distance between %s and %s is %d.\n", s1, s2, distance) // Output: // The distance between kitten and sitting is 3. } ``` Benchmarks ---------- ``` name time/op Simple/ASCII-4 330ns ± 2% Simple/French-4 617ns ± 2% Simple/Nordic-4 1.16µs ± 4% Simple/Tibetan-4 1.05µs ± 1% name alloc/op Simple/ASCII-4 96.0B ± 0% Simple/French-4 128B ± 0% Simple/Nordic-4 192B ± 0% Simple/Tibetan-4 144B ± 0% name allocs/op Simple/ASCII-4 1.00 ± 0% Simple/French-4 1.00 ± 0% Simple/Nordic-4 1.00 ± 0% Simple/Tibetan-4 1.00 ± 0% ``` Comparisons with other libraries -------------------------------- ``` name time/op Leven/ASCII/agniva-4 353ns ± 1% Leven/ASCII/arbovm-4 485ns ± 1% Leven/ASCII/dgryski-4 395ns ± 0% Leven/French/agniva-4 648ns ± 1% Leven/French/arbovm-4 791ns ± 0% Leven/French/dgryski-4 682ns ± 0% Leven/Nordic/agniva-4 1.28µs ± 1% Leven/Nordic/arbovm-4 1.52µs ± 1% Leven/Nordic/dgryski-4 1.32µs ± 1% Leven/Tibetan/agniva-4 1.12µs ± 1% Leven/Tibetan/arbovm-4 1.31µs ± 0% Leven/Tibetan/dgryski-4 1.16µs ± 0% ``` golang-github-agnivade-levenshtein-1.2.1/go.mod000066400000000000000000000002761505400635700214600ustar00rootroot00000000000000module github.com/agnivade/levenshtein go 1.21 require ( github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 ) golang-github-agnivade-levenshtein-1.2.1/go.sum000066400000000000000000000007141505400635700215020ustar00rootroot00000000000000github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= golang-github-agnivade-levenshtein-1.2.1/levenshtein.go000066400000000000000000000050301505400635700232160ustar00rootroot00000000000000// Package levenshtein is a Go implementation to calculate Levenshtein Distance. // // Implementation taken from // https://gist.github.com/andrei-m/982927#gistcomment-1931258 package levenshtein import "unicode/utf8" // minLengthThreshold is the length of the string beyond which // an allocation will be made. Strings smaller than this will be // zero alloc. const minLengthThreshold = 32 // ComputeDistance computes the levenshtein distance between the two // strings passed as an argument. The return value is the levenshtein distance // // Works on runes (Unicode code points) but does not normalize // the input strings. See https://blog.golang.org/normalization // and the golang.org/x/text/unicode/norm package. func ComputeDistance(a, b string) int { if len(a) == 0 { return utf8.RuneCountInString(b) } if len(b) == 0 { return utf8.RuneCountInString(a) } if a == b { return 0 } // We need to convert to []rune if the strings are non-ASCII. // This could be avoided by using utf8.RuneCountInString // and then doing some juggling with rune indices, // but leads to far more bounds checks. It is a reasonable trade-off. s1 := []rune(a) s2 := []rune(b) // swap to save some memory O(min(a,b)) instead of O(a) if len(s1) > len(s2) { s1, s2 = s2, s1 } // remove trailing identical runes. for i := 0; i < len(s1); i++ { if s1[len(s1)-1-i] != s2[len(s2)-1-i] { s1 = s1[:len(s1)-i] s2 = s2[:len(s2)-i] break } } // Remove leading identical runes. for i := 0; i < len(s1); i++ { if s1[i] != s2[i] { s1 = s1[i:] s2 = s2[i:] break } } lenS1 := len(s1) lenS2 := len(s2) // Init the row. var x []uint16 if lenS1+1 > minLengthThreshold { x = make([]uint16, lenS1+1) } else { // We make a small optimization here for small strings. // Because a slice of constant length is effectively an array, // it does not allocate. So we can re-slice it to the right length // as long as it is below a desired threshold. x = make([]uint16, minLengthThreshold) x = x[:lenS1+1] } // we start from 1 because index 0 is already 0. for i := 1; i < len(x); i++ { x[i] = uint16(i) } // make a dummy bounds check to prevent the 2 bounds check down below. // The one inside the loop is particularly costly. _ = x[lenS1] // fill in the rest for i := 1; i <= lenS2; i++ { prev := uint16(i) for j := 1; j <= lenS1; j++ { current := x[j-1] // match if s2[i-1] != s1[j-1] { current = min(x[j-1]+1, prev+1, x[j]+1) } x[j-1] = prev prev = current } x[lenS1] = prev } return int(x[lenS1]) } golang-github-agnivade-levenshtein-1.2.1/levenshtein_test.go000066400000000000000000000124361505400635700242650ustar00rootroot00000000000000package levenshtein_test import ( "testing" agnivade "github.com/agnivade/levenshtein" arbovm "github.com/arbovm/levenshtein" dgryski "github.com/dgryski/trifles/leven" ) func TestSanity(t *testing.T) { tests := []struct { a, b string want int }{ {"", "hello", 5}, {"hello", "", 5}, {"hello", "hello", 0}, {"ab", "aa", 1}, {"ab", "ba", 2}, {"ab", "aaa", 2}, {"bbb", "a", 3}, {"kitten", "sitting", 3}, {"distance", "difference", 5}, {"levenshtein", "frankenstein", 6}, {"resume and cafe", "resumes and cafes", 2}, {"a very long string that is meant to exceed", "another very long string that is meant to exceed", 6}, } for i, d := range tests { n := agnivade.ComputeDistance(d.a, d.b) if n != d.want { t.Errorf("Test[%d]: ComputeDistance(%q,%q) returned %v, want %v", i, d.a, d.b, n, d.want) } } } func TestUnicode(t *testing.T) { tests := []struct { a, b string want int }{ // Testing acutes and umlauts {"resumé and café", "resumés and cafés", 2}, {"resume and cafe", "resumé and café", 2}, {"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", 4}, // Only 2 characters are less in the 2nd string {"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", 2}, } for i, d := range tests { n := agnivade.ComputeDistance(d.a, d.b) if n != d.want { t.Errorf("Test[%d]: ComputeDistance(%q,%q) returned %v, want %v", i, d.a, d.b, n, d.want) } } } // Benchmarks // ---------------------------------------------- var sink int func BenchmarkSimple(b *testing.B) { tests := []struct { a, b string name string }{ // ASCII {a: "levenshtein", b: "frankenstein", name: "ASCII"}, // Testing acutes and umlauts {a: "resumé and café", b: "resumés and cafés", name: "French"}, {a: "Hafþór Júlíus Björnsson", b: "Hafþor Julius Bjornsson", name: "Nordic"}, // Long strings { a: "a very long string that is meant to exceed", b: "another very long string that is meant to exceed", name: "Long lead", }, { a: "a very long string with a word in the middle that is different", b: "a very long string with some text in the middle that is different", name: "Long middle", }, { a: "a very long string with some text at the end that is not the same", b: "a very long string with some text at the end that is very different", name: "Long trail", }, { a: "+a very long string with different leading and trailing characters+", b: "-a very long string with different leading and trailing characters-", name: "Long diff", }, // Only 2 characters are less in the 2nd string {a: "།་གམ་འས་པ་་མ།", b: "།་གམའས་པ་་མ", name: "Tibetan"}, } tmp := 0 for _, test := range tests { b.Run(test.name, func(b *testing.B) { for n := 0; n < b.N; n++ { tmp = agnivade.ComputeDistance(test.a, test.b) } }) } sink = tmp } func BenchmarkAll(b *testing.B) { tests := []struct { a, b string name string }{ // ASCII {"levenshtein", "frankenstein", "ASCII"}, // Testing acutes and umlauts {"resumé and café", "resumés and cafés", "French"}, {"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "Nordic"}, // Only 2 characters are less in the 2nd string {"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", "Tibetan"}, } tmp := 0 for _, test := range tests { b.Run(test.name, func(b *testing.B) { b.Run("agniva", func(b *testing.B) { for n := 0; n < b.N; n++ { tmp = agnivade.ComputeDistance(test.a, test.b) } }) b.Run("arbovm", func(b *testing.B) { for n := 0; n < b.N; n++ { tmp = arbovm.Distance(test.a, test.b) } }) b.Run("dgryski", func(b *testing.B) { for n := 0; n < b.N; n++ { tmp = dgryski.Levenshtein([]rune(test.a), []rune(test.b)) } }) }) } sink = tmp } // Fuzzing // ---------------------------------------------- func FuzzComputeDistanceDifferent(f *testing.F) { testcases := []struct{ a, b string }{ {"levenshtein", "frankenstein"}, {"resumé and café", "resumés and cafés"}, {"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson"}, {"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ"}, {`_p~𕍞`, `b잖PwN`}, {`7ȪJR`, `6L)wӝ`}, {`_p~𕍞`, `Y>q8օ݌`}, } for _, tc := range testcases { f.Add(tc.a, tc.b) } f.Fuzz(func(t *testing.T, a, b string) { n := agnivade.ComputeDistance(a, b) if n < 0 { t.Errorf("Distance can not be negative: %d, a: %q, b: %q", n, a, b) } if n > len(a)+len(b) { t.Errorf("Distance can not be greater than sum of lengths of a and b: %d, a: %q, b: %q", n, a, b) } }) } func FuzzComputeDistanceEqual(f *testing.F) { testcases := []string{ "levenshtein", "frankenstein", "resumé and café", "resumés and cafés", "Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", } for _, tc := range testcases { f.Add(tc) } f.Fuzz(func(t *testing.T, a string) { n := agnivade.ComputeDistance(a, a) if n != 0 { t.Errorf("Distance must be zero: %d, a: %q", n, a) } }) }