fastbloom-0.17.0/.cargo_vcs_info.json0000644000000001361046102023000131310ustar { "git": { "sha1": "5bbfc14f98b4fc4cd5a124626174fdf54e0b0c3d" }, "path_in_vcs": "" }fastbloom-0.17.0/.github/workflows/rust.yml000064400000000000000000000032661046102023000170230ustar 00000000000000name: Rust on: push: branches: [ "main" ] pull_request: branches: [ "main" ] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install cargo-hack run: cargo install cargo-hack - name: Build run: cargo hack build --verbose --release --feature-powerset --exclude-features loom - name: Tests run: cargo hack test --verbose --release --feature-powerset --exclude-features loom - name: Install thumb target run: rustup target add thumbv7em-none-eabi - name: 32-bit embedded check run: cargo check --target thumbv7em-none-eabi --no-default-features - name: Loom build run: cargo hack build --lib --bins --tests --feature-powerset --exclude-features serde,rand,default - name: Loom tests run: cargo hack test loom --lib --bins --tests --feature-powerset --exclude-features serde,rand,default msrv: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: toolchain: "1.70" override: true - name: Install cargo-hack run: cargo install cargo-hack --version 0.6.37 --force --locked - name: Build run: cargo hack build --verbose --release --feature-powerset --exclude-features loom - name: Tests run: cargo hack test --verbose --release --feature-powerset --exclude-features loom - name: Install thumb target run: rustup target add thumbv7em-none-eabi - name: 32-bit embedded check run: cargo check --target thumbv7em-none-eabi --no-default-features fastbloom-0.17.0/.gitignore000064400000000000000000000000101046102023000136560ustar 00000000000000/target fastbloom-0.17.0/Cargo.lock0000644000000425051046102023000111120ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "bitflags" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "cc" version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ "shlex", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "fastbloom" version = "0.17.0" dependencies = [ "fastrand", "foldhash", "libm", "loom", "portable-atomic", "rand", "serde", "serde_cbor", "siphasher", ] [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "foldhash" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "generator" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" dependencies = [ "cc", "cfg-if", "libc", "log", "rustversion", "windows", ] [[package]] name = "getrandom" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" dependencies = [ "cfg-if", "libc", "wasi", "windows-targets", ] [[package]] name = "half" version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "libm" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "log" version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "loom" version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" dependencies = [ "cfg-if", "generator", "scoped-tls", "tracing", "tracing-subscriber", ] [[package]] name = "matchers" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ "regex-automata 0.1.10", ] [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "nu-ansi-term" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" dependencies = [ "overload", "winapi", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "overload" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "pin-project-lite" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "portable-atomic" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" dependencies = [ "serde", ] [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha", "rand_core", "zerocopy", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a509b1a2ffbe92afab0e55c8fd99dea1c280e8171bd2d88682bb20bc41cbc2c" dependencies = [ "getrandom", "zerocopy", ] [[package]] name = "regex" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.9", "regex-syntax 0.8.5", ] [[package]] name = "regex-automata" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ "regex-syntax 0.6.29", ] [[package]] name = "regex-automata" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", "regex-syntax 0.8.5", ] [[package]] name = "regex-syntax" version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustversion" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "scoped-tls" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" [[package]] name = "serde" version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_cbor" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" dependencies = [ "half", "serde", ] [[package]] name = "serde_derive" version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "sharded-slab" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" dependencies = [ "lazy_static", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "siphasher" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" dependencies = [ "serde", ] [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" version = "2.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thread_local" version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", ] [[package]] name = "tracing" version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-core", ] [[package]] name = "tracing-core" version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", "valuable", ] [[package]] name = "tracing-log" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" dependencies = [ "log", "once_cell", "tracing-core", ] [[package]] name = "tracing-subscriber" version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", "once_cell", "regex", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", ] [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "valuable" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "wasi" version = "0.13.3+wasi-0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" dependencies = [ "wit-bindgen-rt", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", "windows-core", "windows-future", "windows-link", "windows-numerics", ] [[package]] name = "windows-collections" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ "windows-core", ] [[package]] name = "windows-core" version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-future" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ "windows-core", "windows-link", "windows-threading", ] [[package]] name = "windows-implement" version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "windows-interface" version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "windows-link" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-numerics" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ "windows-core", "windows-link", ] [[package]] name = "windows-result" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ "windows-link", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows-threading" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ "windows-link", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "wit-bindgen-rt" version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ "bitflags", ] [[package]] name = "zerocopy" version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", "syn", ] fastbloom-0.17.0/Cargo.toml0000644000000037551046102023000111410ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.70" name = "fastbloom" version = "0.17.0" authors = ["tomtomwombat"] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "The fastest Bloom filter in Rust. No accuracy compromises. Full concurrency support and compatible with any hasher." homepage = "https://github.com/tomtomwombat/fastbloom/" readme = "README.md" keywords = [ "data-structures", "bloom-filter", "bloomfilter", "no_std", ] categories = [ "data-structures", "caching", "algorithms", ] license = "MIT OR Apache-2.0" repository = "https://github.com/tomtomwombat/fastbloom/" [badges.maintenance] status = "actively-developed" [features] default = [ "std", "rand", ] loom = ["dep:loom"] rand = [ "std", "dep:rand", ] serde = [ "dep:serde", "siphasher/serde_std", "portable-atomic/serde", ] std = ["portable-atomic/std"] [lib] name = "fastbloom" path = "src/lib.rs" [dependencies.foldhash] version = "0.2.0" default-features = false [dependencies.libm] version = "0.2" [dependencies.loom] version = "0.7.2" optional = true [dependencies.portable-atomic] version = "1.13.1" features = ["fallback"] default-features = false [dependencies.rand] version = "0.9.0" optional = true [dependencies.serde] version = "1.0.203" features = ["derive"] optional = true [dependencies.siphasher] version = "1.0.0" default-features = false [dev-dependencies.fastrand] version = "2.3.0" [dev-dependencies.serde_cbor] version = "0.11.2" fastbloom-0.17.0/Cargo.toml.orig000064400000000000000000000024051046102023000145670ustar 00000000000000[package] name = "fastbloom" version = "0.17.0" edition = "2021" rust-version = "1.70" authors = ["tomtomwombat"] description = "The fastest Bloom filter in Rust. No accuracy compromises. Full concurrency support and compatible with any hasher." license = "MIT OR Apache-2.0" homepage = "https://github.com/tomtomwombat/fastbloom/" repository = "https://github.com/tomtomwombat/fastbloom/" keywords = ["data-structures", "bloom-filter", "bloomfilter", "no_std"] categories = ["data-structures", "caching", "algorithms"] readme = "README.md" [badges] maintenance = { status = "actively-developed" } [features] default = ["std", "rand"] std = ["portable-atomic/std"] loom = ["dep:loom"] rand = ["std", "dep:rand"] serde = ["dep:serde", "siphasher/serde_std", "portable-atomic/serde"] [dependencies] foldhash = { version = "0.2.0", default-features = false } loom = { version = "0.7.2", optional = true } rand = { version = "0.9.0", optional = true } serde = { version = "1.0.203", features = ["derive"], optional = true } siphasher = { version = "1.0.0", default-features = false } libm = "0.2" portable-atomic = { version = "1.13.1", default-features = false, features = ["fallback"] } [dev-dependencies] fastrand = "2.3.0" serde_cbor = "0.11.2" fastbloom-0.17.0/LICENSE-APACHE000064400000000000000000000232511046102023000136260ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONSfastbloom-0.17.0/LICENSE-MIT000064400000000000000000000020711046102023000133330ustar 00000000000000Copyright (c) 2023 Thomas Pendock Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.fastbloom-0.17.0/README.md000064400000000000000000000150051046102023000131570ustar 00000000000000# fastbloom [![Github](https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github)](https://github.com/tomtomwombat/fastbloom) [![Crates.io](https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust)](https://crates.io/crates/fastbloom) [![docs.rs](https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs)](https://docs.rs/fastbloom) ![Downloads](https://img.shields.io/crates/d/fastbloom?style=for-the-badge) The fastest Bloom filter in Rust. No accuracy compromises. Full concurrency support and compatible with any hasher. ## Overview fastbloom is a fast, flexible, and accurate Bloom filter implemented in Rust. fastbloom's default hasher is SipHash-1-3 using randomized keys but can be seeded or configured to use any hasher. fastbloom is 2-20 times faster and magnitudes more accurate than existing Bloom filter implementations. fastbloom's `AtomicBloomFilter` is a concurrent Bloom filter that avoids lock contention. ## Usage Due to a different (improved!) algorithm in 0.17.x, Bloomfilters have incompatible serialization/deserialization with prior versions. ```toml # Cargo.toml [dependencies] fastbloom = "0.17.0" ``` Basic usage: ```rust use fastbloom::BloomFilter; let mut filter = BloomFilter::with_num_bits(1024).expected_items(2); filter.insert("42"); filter.insert("🦀"); ``` Instantiate with a target false positive rate: ```rust use fastbloom::BloomFilter; let filter = BloomFilter::with_false_pos(0.001).items(["42", "🦀"].iter()); assert!(filter.contains("42")); assert!(filter.contains("🦀")); ``` Use any hasher: ```rust use fastbloom::BloomFilter; use foldhash::fast::RandomState; let filter = BloomFilter::with_num_bits(1024) .hasher(RandomState::default()) .items(["42", "🦀"].iter()); ``` Full concurrency support. `AtomicBloomFilter` is a drop-in replacement for `RwLock` because all methods take `&self`: ```rust use fastbloom::AtomicBloomFilter; let filter = AtomicBloomFilter::with_num_bits(1024).expected_items(2); filter.insert("42"); filter.insert("🦀"); ``` ## Background Bloom filters are space-efficient approximate membership set data structures supported by an underlying bit array to track item membership. To insert/check membership, a number of bits are set/checked at positions based on the item's hash. False positives from a membership check are possible, but false negatives are not. Once constructed, neither the Bloom filter's underlying memory usage nor number of bits per item change. [See more.](https://en.wikipedia.org/wiki/Bloom_filter) ```text hash(4) ──────┬─────┬───────────────┐ ↓ ↓ ↓ 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 ↑ ↑ ↑ └───────────┴───────────┴──── hash(3) (not in the set) ``` ## Implementation fastbloom is blazingly fast because it efficiently derives many index bits from **only one real hash per item** and leverages other research findings on Bloom filters. fastbloom employs "hash composition" on two 32-bit halves of an original 64-bit hash. Each subsequent hash is derived by combining the original hash value with a different constant using modular arithmetic and bitwise operations. This results in a set of hash functions that are effectively independent and uniformly distributed, even though they are derived from the same original hash function. Computing the composition of two original hashes is faster than re-computing the hash with a different seed. This technique is [explained in depth in this paper.](https://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf) ## Speed - AMD Ryzen 9 5900X 12-Core Processor (3.70 GHz) - 64-bit operating system, x64-based processor ![perf-non-member](https://github.com/user-attachments/assets/a825d2f7-4cd7-4b6b-a447-fd7f3dab356b) ![perf-member](https://github.com/user-attachments/assets/998f9470-b91f-460c-ad2d-1f197160c210) > Hashers used: > - xxhash: sbbf > - Sip1-3: bloom, bloomfilter, probabilistic-collections > - foldhash: fastbloom > > [Benchmark source](https://github.com/tomtomwombat/bench-bloom-filters) ## Accuracy fastbloom does not compromise accuracy. Below is a comparison of false positive rates with other Bloom filter crates: ![fp](https://github.com/user-attachments/assets/17ddaab7-c63f-456a-9e2e-b22c7f994386) [Benchmark source](https://github.com/tomtomwombat/bench-bloom-filters) ## Available Features - **`rand`** - Enabled by default, this has the `DefaultHasher` source its random state using `thread_rng()` instead of hardware sources. Getting entropy from a user-space source is considerably faster, but requires additional dependencies to achieve this. Disabling this feature by using `default-features = false` makes `DefaultHasher` source its entropy using `foldhash`, which will have a much simpler code footprint at the expense of speed. - **`serde`** - `BloomFilter`s implement `Serialize` and `Deserialize` when possible. - **`loom`** - `AtomicBloomFilter`s use [loom](https://github.com/tokio-rs/loom) atomics, making it compatible with loom testing. ## References - [Bloom filter - Wikipedia](https://en.wikipedia.org/wiki/Bloom_filter) - [Bloom filters debunked: Dispelling 30 Years of bad math with Coq!](https://gopiandcode.uk/logs/log-bloomfilters-debunked.html) - [Bloom Filter Interactive Demonstration](https://www.jasondavies.com/bloomfilter/) - [Cache-, Hash- and Space-Efficient Bloom Filters](https://web.archive.org/web/20070623102632/http://algo2.iti.uni-karlsruhe.de/singler/publications/cacheefficientbloomfilters-wea2007.pdf) - [Less hashing, same performance: Building a better Bloom filter](https://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf) - [A fast alternative to the modulo reduction](https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/) ## License Licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ## Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. fastbloom-0.17.0/src/bit_vector.rs000064400000000000000000000133701046102023000152000ustar 00000000000000use crate::AtomicU64; use alloc::{boxed::Box, vec::Vec}; use portable_atomic::Ordering::Relaxed; /// A bit vector partitioned in to `u64` blocks. #[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub(crate) struct BitVec { bits: Box<[u64]>, } /// A bit vector partitioned in to `u64` blocks. #[derive(Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub(crate) struct AtomicBitVec { bits: Box<[AtomicU64]>, } macro_rules! impl_bitvec { ($name:ident, $bits:ty) => { impl $name { #[inline(always)] pub(crate) const fn len(&self) -> usize { self.bits.len() } #[inline(always)] pub(crate) const fn num_bits(&self) -> usize { self.len() * u64::BITS as usize } #[inline(always)] pub(crate) fn as_slice(&self) -> &[$bits] { &self.bits } #[inline(always)] pub(crate) fn iter(&self) -> impl Iterator + '_ { self.bits.iter().map(Self::fetch) } #[inline(always)] pub(crate) fn check(&self, index: usize) -> bool { let (index, bit) = coord(index); Self::fetch(&self.bits[index]) & bit > 0 } } impl FromIterator for $name { fn from_iter>(iter: I) -> Self { let mut bits = iter.into_iter().map(Self::new).collect::>(); bits.shrink_to_fit(); Self { bits: bits.into() } } } impl PartialEq for $name { fn eq(&self, other: &Self) -> bool { if self.len() != other.len() { return false; } core::iter::zip(self.iter(), other.iter()).all(|(l, r)| l == r) } } impl Eq for $name {} }; } impl_bitvec!(BitVec, u64); impl_bitvec!(AtomicBitVec, AtomicU64); impl BitVec { #[inline(always)] fn new(x: u64) -> u64 { x } #[inline(always)] fn fetch(x: &u64) -> u64 { *x } #[inline(always)] pub(crate) fn set(&mut self, index: usize) -> bool { let (index, bit) = coord(index); let previously_contained = self.bits[index] & bit > 0; self.bits[index] |= bit; previously_contained } #[inline(always)] pub(crate) fn clear(&mut self) { for i in 0..self.len() { self.bits[i] = 0; } } #[inline(always)] pub(crate) fn union(&mut self, other: &BitVec) { assert_eq!(self.len(), other.len(), "expected same length"); for i in 0..self.len() { self.bits[i] |= other.bits[i]; } } #[inline(always)] pub(crate) fn intersect(&mut self, other: &BitVec) { assert_eq!(self.len(), other.len(), "expected same length"); for i in 0..self.len() { self.bits[i] &= other.bits[i]; } } } impl AtomicBitVec { #[inline] fn new(x: u64) -> AtomicU64 { AtomicU64::new(x) } #[inline] fn fetch(x: &AtomicU64) -> u64 { x.load(Relaxed) } #[inline] pub(crate) fn set(&self, index: usize) -> bool { let (index, bit) = coord(index); self.bits[index].fetch_or(bit, Relaxed) & bit > 0 } #[inline] pub(crate) fn clear(&self) { for i in 0..self.len() { self.bits[i].store(0, Relaxed); } } #[inline] pub(crate) fn union(&self, other: &AtomicBitVec) { assert_eq!(self.len(), other.len(), "expected same length"); for i in 0..self.len() { let x = other.bits[i].load(Relaxed); self.bits[i].fetch_or(x, Relaxed); } } #[inline] pub(crate) fn intersect(&self, other: &AtomicBitVec) { assert_eq!(self.len(), other.len(), "expected same length"); for i in 0..self.len() { let x = other.bits[i].load(Relaxed); self.bits[i].fetch_and(x, Relaxed); } } } impl Clone for AtomicBitVec { fn clone(&self) -> Self { self.iter().collect() } } #[inline] fn coord(index: usize) -> (usize, u64) { (index >> 6, 1u64 << (index & 0b111111)) } macro_rules! impl_tests { ($modname:ident, $name:ident) => { #[allow(unused_mut)] #[cfg(not(feature = "loom"))] #[cfg(test)] mod $modname { use super::*; use core::iter::repeat; #[test] fn test_to_from_vec() { let size = 42; let b: BitVec = repeat(0).take(size).collect(); assert_eq!(b.num_bits(), b.len() * 64); assert!(size <= b.len()); assert!((size + 64) > b.len()); } #[test] fn test_only_random_inserts_are_contained() { let mut vec: BitVec = repeat(0).take(80).collect(); let mut control = Vec::with_capacity(1000); let mut rng = fastrand::Rng::with_seed(4364); for _ in 0..1000 { let index = rng.usize(0..vec.num_bits()); if !control.contains(&index) { assert!(!vec.check(index)); } control.push(index); vec.set(index); assert!(vec.check(index)); } } } }; } impl_tests!(non_atomic, BitVec); impl_tests!(atomic, AtomicBitVec); fastbloom-0.17.0/src/builder.rs000064400000000000000000000355401046102023000144710ustar 00000000000000use crate::{math::*, AtomicBloomFilter, BloomFilter, BuildHasher, DefaultHasher}; use alloc::vec::Vec; use core::{cmp::max, f64::consts::LN_2, hash::Hash}; macro_rules! builder_with_bits { ($name:ident, $($m:ident)?, $bloom:ident) => { /// A Bloom filter builder with an immutable number of bits. /// #[doc = concat!("This type can be used to construct an instance of [`", stringify!($bloom), "`] via the builder pattern.")] /// /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let builder = ", stringify!($bloom), "::with_num_bits(1024);")] #[doc = concat!("let builder = ", stringify!($bloom), "::from_vec(vec![0; 8]);")] /// ``` #[derive(Debug, Clone)] pub struct $name { pub(crate) data: Vec, pub(crate) hasher: S, } impl PartialEq for $name { fn eq(&self, other: &Self) -> bool { self.data == other.data } } impl Eq for $name {} impl $name { /// Sets the seed for this builder. The later constructed Bloom filter /// will use this seed when hashing items. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).seed(&1).hashes(4);")] /// ``` pub fn seed(mut self, seed: &u128) -> Self { self.hasher = DefaultHasher::seeded(&seed.to_be_bytes()); self } } impl $name { /// Sets the hasher for this builder. The later constructed Bloom filter will use /// this hasher when inserting and checking items. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// use foldhash::fast::RandomState; /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).hasher(RandomState::default()).hashes(4);")] /// ``` pub fn hasher(self, hasher: H) -> $name { $name:: { data: self.data, hasher, } } /// "Consumes" this builder, using the provided `num_hashes` to return an #[doc = concat!("empty [`", stringify!($bloom), "`].")] /// /// Note: if `num_hashes` is 0, it is treated as 1. Bloom filters with 0 /// hashes per item are practically useless, and disallowing this case /// enables further optimizations. /// /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).hashes(4);")] /// ``` pub fn hashes(self, num_hashes: u32) -> $bloom { $bloom { bits: self.data.into_iter().collect(), num_hashes_minus_one: max(1, num_hashes) - 1, hasher: self.hasher, } } /// "Consumes" this builder, using the provided `expected_items` to return an #[doc = concat!("empty [`", stringify!($bloom), "`]. The number of hashes is optimized based on `expected_items`")] #[doc = concat!("to maximize Bloom filter accuracy (minimize false positives chance on [`", stringify!($bloom), "::contains`]).")] /// More or less than `expected_items` may be inserted into Bloom filter. /// /// Note: `expected_items` will internally be set to 1 if 0 is specified. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).expected_items(500);")] /// ``` pub fn expected_items(self, expected_items: usize) -> $bloom { let expected_items = max(1, expected_items); let hashes = optimal_hashes(self.data.len() * 64, expected_items); self.hashes(hashes) } #[doc = concat!("\"Consumes\" this builder and constructs a [`", stringify!($bloom), "`] containing")] /// all values in `items`. The number of hashes per item /// is optimized based on `items.len()` to maximize Bloom filter accuracy #[doc = concat!("(minimize false positives chance on [`", stringify!($bloom), "::contains`]).")] /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_num_bits(1024).items([1, 2, 3].iter());")] /// ``` pub fn items<'a, H: Hash + 'a, I: IntoIterator>>( self, items: I, ) -> $bloom { let into_iter = items.into_iter(); let $($m)? filter = self.expected_items(into_iter.len()); filter.insert_all(into_iter); filter } } }; } builder_with_bits!(BuilderWithBits, mut, BloomFilter); builder_with_bits!(AtomicBuilderWithBits, , AtomicBloomFilter); macro_rules! builder_with_fp { ($name:ident, $($m:ident)?, $bloom:ident) => { /// A Bloom filter builder with an immutable false positive rate. /// /// This type can be used to construct an instance of [`BloomFilter`] via the builder pattern. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let builder = ", stringify!($bloom), "::with_false_pos(0.01);")] /// ``` #[derive(Debug, Clone)] pub struct $name { pub(crate) desired_fp_rate: f64, pub(crate) hasher: S, } impl PartialEq for $name { fn eq(&self, other: &Self) -> bool { self.desired_fp_rate == other.desired_fp_rate } } impl Eq for $name {} impl $name { /// Sets the seed for this builder. The later constructed Bloom filter /// will use this seed when hashing items. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).seed(&1).expected_items(100);")] /// ``` pub fn seed(mut self, seed: &u128) -> Self { self.hasher = DefaultHasher::seeded(&seed.to_be_bytes()); self } } impl $name { #[doc = concat!("Sets the hasher for this builder. The later constructed [`", stringify!($bloom), "`] will use")] /// this hasher when inserting and checking items. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// use foldhash::fast::RandomState; /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).hasher(RandomState::default()).expected_items(100);")] /// ``` pub fn hasher(self, hasher: H) -> $name { $name:: { desired_fp_rate: self.desired_fp_rate, hasher, } } /// "Consumes" this builder, using the provided `expected_items` to return an #[doc = concat!("empty [`", stringify!($bloom), "`]. The number of hashes is optimized based on `expected_items`")] #[doc = concat!("to maximize Bloom filter accuracy (minimize false positives chance on [`", stringify!($bloom), "::contains`]).")] /// More or less than `expected_items` may be inserted into Bloom filter. /// /// Note: `expected_items` will internally be set to 1 if 0 is specified. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).expected_items(500);")] /// ``` pub fn expected_items(self, expected_items: usize) -> $bloom { let expected_items = max(1, expected_items); let num_bits = optimal_size(expected_items, self.desired_fp_rate); $bloom::new_builder(num_bits) .hasher(self.hasher) .expected_items(expected_items) } #[doc = concat!("\"Consumes\" this builder and constructs a [`", stringify!($bloom), "`] containing")] /// all values in `items`. The number of hashes per item and underlying memory /// is optimized based on `items.len()` to meet the desired false positive rate. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($bloom), ";")] /// #[doc = concat!("let bloom = ", stringify!($bloom), "::with_false_pos(0.001).items([1, 2, 3].iter());")] /// ``` pub fn items<'a, H: Hash + 'a, I: IntoIterator>>( self, items: I, ) -> $bloom { let into_iter = items.into_iter(); let $($m)? filter = self.expected_items(into_iter.len()); filter.insert_all(into_iter); filter } } }; } builder_with_fp!(BuilderWithFalsePositiveRate, mut, BloomFilter); builder_with_fp!(AtomicBuilderWithFalsePositiveRate, , AtomicBloomFilter); /// Returns the optimal (for false positive rate) number of hashes to perform for an item given the expected number of items in the bloom filter. pub fn optimal_hashes(num_bits: usize, num_items: usize) -> u32 { // Proof: . let num_bits = num_bits as f64; let hashes = LN_2 * num_bits / num_items as f64; max(round(hashes) as u32, 1) } /// Returns the smallest size in bits of a Bloom filter containing `num_items` items to achieve the target false positive rate. pub fn optimal_size(num_items: usize, fp: f64) -> usize { let num_items = num_items as f64; let log2_2 = LN_2 * LN_2; let result = 8 * ceil(num_items * ln(fp) / (-8.0 * log2_2)) as usize; max(result, 64) } /// Returns the probability of a "1" bit in the Bloom filter. pub fn expected_density(hashes: u32, bits: usize, items: usize) -> f64 { let total_sets = (items * hashes as usize) as f64; let bits = bits as f64; let prob_set = 1.0 / bits; let prob_not_set = 1.0 - prob_set; let prob_all_not_set = crate::math::pow(prob_not_set, total_sets); 1.0 - prob_all_not_set } /// Returns the expected false positive rate of a Bloom filter. pub fn expected_false_pos(hashes: u32, density: f64) -> f64 { crate::math::pow(density, hashes as f64) } #[cfg(test)] mod tests { use super::*; // optimal size should produce a FP the same. #[test] fn test_expected_false_pos() { for items_mag in 1..=32 { let items = 2usize.pow(items_mag); for fp_mag in 1..=16 { let target_fp = 1.0f64 / 10u64.pow(fp_mag) as f64; let size = optimal_size(items, target_fp); let thresh = if size < 256 { 0.1 // If size is tool small results too sensitive } else { 0.01 }; let h = optimal_hashes(size, items); let density = expected_density(h, size, items); let expected_fp = expected_false_pos(h, density); let err = (expected_fp - target_fp) / target_fp; assert!(err < thresh); } } } fn density_err(d: f64) -> f64 { (0.5 - d).abs() } #[test] fn test_optimal_hashes() { for bits_mag in 6..=16 { let bits = 2usize.pow(bits_mag); for items_mag in 1..=16 { let items = 2usize.pow(items_mag); let h = optimal_hashes(bits, items); // Too sensitive to rounding errors if h > 1000 { continue; } let density = expected_density(h, bits, items); assert!(density_err(density) <= density_err(expected_density(h + 1, bits, items))); assert!(density_err(density) <= density_err(expected_density(h - 1, bits, items))); } } } } #[cfg(test)] mod for_accuracy_tests { use crate::BloomFilter; #[test] fn data_size() { let size_bits = 512 * 1000; let bloom = BloomFilter::with_num_bits(size_bits).hashes(4); assert_eq!(bloom.num_bits(), size_bits); } #[test] fn specified_hashes() { for num_hashes in 1..1000 { assert_eq!( num_hashes, BloomFilter::with_num_bits(1) .hashes(num_hashes) .num_hashes() ); assert_eq!( num_hashes, BloomFilter::with_num_bits(1) .hashes(num_hashes) .num_hashes() ); } } } #[cfg(test)] mod for_size_tests { use crate::{AtomicBloomFilter, BloomFilter}; #[test] fn test_size() { let _: BloomFilter = BloomFilter::new_with_false_pos(0.0001).expected_items(10000); } #[test] fn test_zero_hashes() { let bloom = BloomFilter::with_num_bits(512).hashes(0); assert_eq!(bloom.num_hashes(), 1); let bloom = AtomicBloomFilter::with_num_bits(512).hashes(0); assert_eq!(bloom.num_hashes(), 1); } } fastbloom-0.17.0/src/hasher.rs000064400000000000000000000134731046102023000143160ustar 00000000000000use core::hash::{BuildHasher, Hasher}; use siphasher::sip::SipHasher13; #[derive(Clone, Debug, Default, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct CloneBuildHasher { hasher: H, } impl CloneBuildHasher { #[allow(dead_code)] fn new(hasher: H) -> Self { Self { hasher } } } impl BuildHasher for CloneBuildHasher { type Hasher = H; #[inline] fn build_hasher(&self) -> Self::Hasher { self.hasher.clone() } } /// The default hasher for `BloomFilter`. /// /// `DefaultHasher` has a faster `build_hasher` than `std::collections::hash_map::RandomState` or `SipHasher13`. /// This is important because `build_hasher` is called once for every actual hash. pub type DefaultHasher = CloneBuildHasher; impl DefaultHasher { pub fn seeded(seed: &[u8; 16]) -> Self { Self { hasher: RandomDefaultHasher::seeded(seed), } } } #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct RandomDefaultHasher(SipHasher13); impl RandomDefaultHasher { #[inline] pub fn seeded(seed: &[u8; 16]) -> Self { Self(SipHasher13::new_with_key(seed)) } } impl Default for RandomDefaultHasher { #[inline] fn default() -> Self { #[cfg(not(feature = "rand"))] { use foldhash::fast::RandomState; // create two random states let state_a = RandomState::default(); let state_b = RandomState::default(); // combine the two random states into a single 128-bit seed let low = state_a.build_hasher().finish() as u128; let high = state_b.build_hasher().finish() as u128; Self::seeded(&((high << 64) | low).to_ne_bytes()) } #[cfg(feature = "rand")] { let mut seed = [0u8; 16]; use rand::RngCore; rand::rng().fill_bytes(&mut seed); Self::seeded(&seed) } } } impl Hasher for RandomDefaultHasher { #[inline] fn finish(&self) -> u64 { self.0.finish() } #[inline] fn write(&mut self, bytes: &[u8]) { self.0.write(bytes) } #[inline] fn write_u8(&mut self, i: u8) { self.0.write_u8(i) } #[inline] fn write_u16(&mut self, i: u16) { self.0.write_u16(i) } #[inline] fn write_u32(&mut self, i: u32) { self.0.write_u32(i) } #[inline] fn write_u64(&mut self, i: u64) { self.0.write_u64(i) } #[inline] fn write_u128(&mut self, i: u128) { self.0.write_u128(i) } #[inline] fn write_usize(&mut self, i: usize) { self.0.write_usize(i) } #[inline] fn write_i8(&mut self, i: i8) { self.0.write_i8(i) } #[inline] fn write_i16(&mut self, i: i16) { self.0.write_i16(i) } #[inline] fn write_i32(&mut self, i: i32) { self.0.write_i32(i) } #[inline] fn write_i64(&mut self, i: i64) { self.0.write_i64(i) } #[inline] fn write_i128(&mut self, i: i128) { self.0.write_i128(i) } #[inline] fn write_isize(&mut self, i: isize) { self.0.write_isize(i) } } #[cfg(test)] mod test { use crate::hasher::RandomDefaultHasher; use core::hash::Hasher; use siphasher::sip::SipHasher13; fn hash_all(mut x: impl Hasher) -> u64 { x.write(&[1; 16]); x.write_u8(1); x.write_u16(1); x.write_u32(1); x.write_u64(1); x.write_u128(1); x.write_usize(1); x.write_i8(1); x.write_i16(1); x.write_i32(1); x.write_i64(1); x.write_i128(1); x.write_isize(1); x.finish() } #[test] fn test_hasher() { let h1 = RandomDefaultHasher::seeded(&[0; 16]); let h2 = SipHasher13::new_with_key(&[0; 16]); assert_eq!(hash_all(h1), hash_all(h2),); } #[test] fn test_random_default_hasher() { // two different instances of RandomDefaultHasher should have different seeds let h1 = RandomDefaultHasher::default(); let h2 = RandomDefaultHasher::default(); assert_ne!(h1.finish(), h2.finish()); // same seed value should result in the same hash let h3 = RandomDefaultHasher::seeded(&[0; 16]); let h4 = RandomDefaultHasher::seeded(&[0; 16]); assert_eq!(h3.finish(), h4.finish()); // different seed value should result in different hash let h5 = RandomDefaultHasher::seeded(&[1; 16]); let h6 = RandomDefaultHasher::seeded(&[2; 16]); assert_ne!(h5.finish(), h6.finish()); } } #[derive(Clone, Copy)] pub(crate) struct DoubleHasher { h1: u64, h2: u64, } impl DoubleHasher { /// The first two hashes of the value, h1 and h2. /// /// Subsequent hashes, h, are efficiently derived from these two using `next_hash`. /// /// This strategy is a modified version of . #[inline] pub(crate) fn new(h1: u64) -> Self { // 0xffff_ffff_ffff_ffff / 0x517c_c1b7_2722_0a95 = π let h2 = h1.wrapping_mul(0x51_7c_c1_b7_27_22_0a_95); Self { h1, h2 } } /// "Double hashing" produces a new hash efficiently from two orignal hashes. /// /// Modified from . #[inline] pub(crate) fn next(&mut self) -> u64 { self.h1 = self.h1.rotate_left(5).wrapping_add(self.h2); self.h1 } } fastbloom-0.17.0/src/lib.rs000064400000000000000000000776771046102023000136320ustar 00000000000000#![allow(rustdoc::bare_urls)] #![warn(unreachable_pub)] #![doc = include_str!("../README.md")] #![cfg_attr(not(feature = "std"), no_std)] extern crate alloc; use alloc::vec::Vec; use core::hash::{BuildHasher, Hash, Hasher}; use core::iter::repeat; mod hasher; pub use hasher::DefaultHasher; use hasher::DoubleHasher; mod builder; pub use builder::{ expected_density, expected_false_pos, optimal_hashes, optimal_size, AtomicBuilderWithBits, AtomicBuilderWithFalsePositiveRate, BuilderWithBits, BuilderWithFalsePositiveRate, }; mod bit_vector; use bit_vector::{AtomicBitVec, BitVec}; mod math; #[cfg(feature = "loom")] pub(crate) use loom::sync::atomic::AtomicU64; #[cfg(not(feature = "loom"))] pub(crate) use portable_atomic::AtomicU64; #[cfg(all(feature = "loom", feature = "serde"))] compile_error!("features `loom` and `serde` are mutually exclusive"); macro_rules! impl_bloom { ($name:ident, $builder_bits:ident, $builder_fp:ident, $bitvec:ident, $bits:ty, $ismut:literal, $($m:ident)?) => { /// A space efficient approximate membership set data structure. /// False positives from [`contains`](Self::contains) are possible, but false negatives /// are not, i.e. [`contains`](Self::contains) for all items in the set is guaranteed to return /// true, while [`contains`](Self::contains) for all items not in the set probably return false. /// /// [`Self`] is supported by an underlying bit vector to track item membership. /// To insert, a number of bits are set at positions based on the item's hash in the underlying bit vector. /// To check membership, a number of bits are checked at positions based on the item's hash in the underlying bit vector. /// /// Once constructed, neither the Bloom filter's underlying memory usage nor number of bits per item change. /// /// # Examples /// Basic usage: /// ```rust #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let ", $ismut, "filter = ", stringify!($name), "::with_num_bits(1024).expected_items(2);")] /// filter.insert("42"); /// filter.insert("🦀"); /// ``` /// Instantiate with a target false positive rate: /// ```rust #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let filter = ", stringify!($name), "::with_false_pos(0.001).items([\"42\", \"🦀\"].iter());")] /// assert!(filter.contains("42")); /// assert!(filter.contains("🦀")); /// ``` /// Use any hasher: /// ```rust #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// use foldhash::fast::RandomState; /// #[doc = concat!("let filter = ", stringify!($name), "::with_num_bits(1024)")] /// .hasher(RandomState::default()) /// .items(["42", "🦀"].iter()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct $name { bits: $bitvec, num_hashes_minus_one: u32, hasher: S, } impl $name { fn new_builder(num_bits: usize) -> $builder_bits { assert!(num_bits > 0); // Only available in rust 1.73+ // let num_u64s = num_bits.div_ceil(64); let num_u64s = (num_bits + 64 - 1) / 64; $builder_bits { data: repeat(0).take(num_u64s).collect(), hasher: Default::default(), } } fn new_from_vec(vec: Vec) -> $builder_bits { assert!(!vec.is_empty()); $builder_bits { data: vec, hasher: Default::default(), } } fn new_with_false_pos(fp: f64) -> $builder_fp { assert!(fp > 0.0); $builder_fp { desired_fp_rate: fp, hasher: Default::default(), } } /// Creates a new builder instance to construct a [`Self`] with a target false positive rate of `fp`. /// The memory size of the underlying bit vector is dependent on the false positive rate and the expected number of items. /// # Panics /// Panics if the false positive rate, `fp`, is 0. /// /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] #[doc = concat!("let filter = ", stringify!($name), "::with_false_pos(0.001).expected_items(1000);")] /// ``` pub fn with_false_pos(fp: f64) -> $builder_fp { $name::new_with_false_pos(fp) } /// Creates a builder instance to construct a [`Self`] with `num_bits` number of bits for tracking item membership. /// # Panics /// Panics if the number of bits, `num_bits`, is 0. /// /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] #[doc = concat!("let filter = ", stringify!($name), "::with_num_bits(1024).hashes(4);")] /// ``` pub fn with_num_bits(num_bits: usize) -> $builder_bits { $name::new_builder(num_bits) } /// Creates a builder instance to construct a [`Self`] initialized with bit vector `bit_vec`. /// /// # Panics /// Panics if the bit vector, `bit_vec`, is empty. /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let orig = ", stringify!($name), "::with_false_pos(0.001).seed(&42).items([1, 2].iter());")] /// let num_hashes = orig.num_hashes(); #[doc = concat!("let new = ", stringify!($name), "::from_vec(orig.iter().collect()).seed(&42).hashes(num_hashes);")] /// /// assert!(new.contains(&1)); /// assert!(new.contains(&2)); /// ``` pub fn from_vec(bit_vec: Vec) -> $builder_bits { $name::new_from_vec(bit_vec) } } impl $name { /// Checks if an element is possibly in the Bloom filter. /// /// # Returns /// /// `true` if the item is possibly in the Bloom filter, `false` otherwise. /// /// # Examples /// /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let bloom = ", stringify!($name), "::with_num_bits(1024).items([1, 2, 3].iter());")] /// assert!(bloom.contains(&1)); /// ``` #[inline] pub fn contains(&self, val: &(impl Hash + ?Sized)) -> bool { self.contains_hash(self.source_hash(val)) } /// Checks if the hash of an element is possibly in the Bloom filter. /// That is the element is pre-hashed and all subsequent hashes are derived from this "source" hash. /// /// # Returns /// /// `true` if the item is possibly in the Bloom filter, `false` otherwise. #[inline] pub fn contains_hash(&self, hash: u64) -> bool { match self.bits.check(index(self.num_bits(), hash)) { false => false, true => { let mut hasher = DoubleHasher::new(hash); (0..self.num_hashes_minus_one).all(|_| { let h = hasher.next(); self.bits.check(index(self.num_bits(), h)) }) } } } /// Returns the number of hashes per item. #[inline] pub fn num_hashes(&self) -> u32 { self.num_hashes_minus_one + 1 } /// Returns the total number of in-memory bits supporting the Bloom filter. pub fn num_bits(&self) -> usize { self.bits.num_bits() } /// Returns an iterator over the raw bit values of this Bloom filter. #[inline] pub fn iter(&self) -> impl Iterator + '_ { self.bits.iter() } /// Returns the underlying slice of this Bloom filter's bit contents. #[inline] pub fn as_slice(&self) -> &[$bits] { self.bits.as_slice() } /// Returns the hash of `val` using this Bloom filter's hasher. /// The resulting value can be used in [`Self::contains_hash`] or [`Self::insert_hash`]. /// All subsequent hashes are derived from this source hash. /// This is useful for pre-computing hash values in order to store them or send them over the network. #[inline] pub fn source_hash(&self, val: &(impl Hash + ?Sized)) -> u64 { let mut state = self.hasher.build_hasher(); val.hash(&mut state); state.finish() } /// Returns the expected false positive rate of this bloom filter containing `num_items`. pub fn expected_false_pos(&self, num_items: usize) -> f64 { let density = crate::expected_density(self.num_hashes(), self.num_bits(), num_items); crate::expected_false_pos(self.num_hashes(), density) } /// Inserts an element into the Bloom filter. /// /// # Returns /// /// `true` if the item may have been previously in the Bloom filter (indicating a potential false positive), /// `false` otherwise. /// /// # Examples /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(1024).hashes(4);")] /// bloom.insert(&2); /// assert!(bloom.contains(&2)); /// ``` #[inline] pub fn insert(&$($m)? self, val: &(impl Hash + ?Sized)) -> bool { self.insert_hash(self.source_hash(val)) } /// Inserts the hash of an element into the Bloom filter. /// That is the element is pre-hashed and all subsequent hashes are derived from this "source" hash. /// /// # Returns /// /// `true` if the item may have been previously in the Bloom filter (indicating a potential false positive), /// `false` otherwise. #[inline] pub fn insert_hash(&$($m)? self, hash: u64) -> bool { let mut previously_contained = true; previously_contained &= self.bits.set(index(self.num_bits(), hash)); let mut hasher = DoubleHasher::new(hash); for _ in 0..self.num_hashes_minus_one { let h = hasher.next(); previously_contained &= self.bits.set(index(self.num_bits(), h)); } previously_contained } /// Inserts all the items in `iter` into the `self`. #[inline] pub fn insert_all<'a, T: Hash + 'a, I: IntoIterator>(&$($m)? self, iter: I) { for val in iter { self.insert(val); } } /// Clear all of the bits in the Bloom filter, removing all items. #[inline] pub fn clear(&$($m)? self) { self.bits.clear(); } /// Unions `other` into `self`. The hashers of both Bloom filters must be identical (this is not enforced!). /// /// # Panics /// Panics if the other Bloom filter has a different number of bits or hashes than `self`. /// /// # Example /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")] #[doc = concat!("let ", $ismut, "other = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")] /// for x in 0..=1000 { /// bloom.insert(&x); /// } /// for x in 500..=1500 { /// bloom.insert(&x); /// } /// bloom.union(&other); /// /// for x in 0..=2000 { /// assert_eq!(bloom.contains(&x), bloom.contains(&x) || other.contains(&x)); /// } /// ``` #[inline] pub fn union(&$($m)? self, other: &Self) { assert_eq!( self.num_hashes(), other.num_hashes(), "expected same number of hashes" ); self.bits.union(&other.bits); } /// Intersects `other` onto `self`. The hashers of both Bloom filters must be identical (this is not enforced!). /// /// # Panics /// Panics if the other Bloom filter has a different number of bits or hashes than `self`. /// /// # Example /// ``` #[doc = concat!("use fastbloom::", stringify!($name), ";")] /// #[doc = concat!("let ", $ismut, "bloom = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")] #[doc = concat!("let ", $ismut, "other = ", stringify!($name), "::with_num_bits(4096).seed(&1).hashes(4);")] /// for x in 0..=1000 { /// bloom.insert(&x); /// } /// for x in 500..=1500 { /// bloom.insert(&x); /// } /// bloom.intersect(&other); /// /// for x in 0..=2000 { /// assert_eq!(bloom.contains(&x), bloom.contains(&x) && other.contains(&x)); /// } /// ``` #[inline] pub fn intersect(&$($m)? self, other: &Self) { assert_eq!( self.num_hashes(), other.num_hashes(), "expected same number of hashes" ); self.bits.intersect(&other.bits); } } impl Extend for $name where T: Hash, { #[inline] fn extend>(&mut self, iter: I) { for val in iter { self.insert(&val); } } } impl PartialEq for $name { fn eq(&self, other: &Self) -> bool { self.bits == other.bits && self.num_hashes() == other.num_hashes() } } impl Eq for $name {} }; } impl_bloom!( BloomFilter, BuilderWithBits, BuilderWithFalsePositiveRate, BitVec, u64, "mut ", mut ); impl_bloom!( AtomicBloomFilter, AtomicBuilderWithBits, AtomicBuilderWithFalsePositiveRate, AtomicBitVec, AtomicU64, "", ); /// Returns a the bit index for an item's hash. /// The bit index must be in the range `0..num_bits`. /// This implementation is a more performant alternative to `hash % num_bits`: /// #[inline] pub(crate) fn index(num_bits: usize, hash: u64) -> usize { ((hash as u128 * num_bits as u128) >> 64) as usize } macro_rules! impl_tests { ($modname:ident, $name:ident) => { #[allow(unused_mut)] #[cfg(not(feature = "loom"))] #[cfg(test)] mod $modname { use super::*; use alloc::format; trait Seeded: BuildHasher { fn seeded(seed: &[u8; 16]) -> Self; } impl Seeded for DefaultHasher { fn seeded(seed: &[u8; 16]) -> Self { Self::seeded(seed) } } const TRIALS: usize = 20_000_000; fn false_pos_rate(filter: &$name) -> f64 { let mut total = 0; let mut false_positives = 0; for x in non_member_nums() { total += 1; false_positives += filter.contains_hash(x) as usize; } (false_positives as f64) / (total as f64) } fn member_nums(num: usize) -> impl Iterator { random_numbers(num, 5) } fn non_member_nums() -> impl Iterator { random_numbers(TRIALS, 7) } fn random_numbers(num: usize, seed: u64) -> impl Iterator { let mut rng = fastrand::Rng::with_seed(seed); (0..=num).map(move |_| rng.u64(..)) } #[test] fn test_to_from_vec() { fn to_from_(size: usize) { let mut b = $name::new_builder(size).seed(&1).hashes(3); b.extend(member_nums(1000)); let b2 = $name::new_from_vec(b.iter().collect()) .seed(&1) .hashes(3); assert_eq!(b, b2); assert_eq!(b.num_bits(), b.bits.len() * 64); assert!(size <= b.bits.len() * 64); assert!((size + u64::BITS as usize) > b.bits.len() * 64); } for size in 1..=10009 { to_from_(size); } } #[test] fn first_insert_false() { let mut filter = $name::with_num_bits(1202).expected_items(4); assert!(!filter.insert(&5)); } #[test] fn target_fp_is_accurate_actual() { target_fp_is_accurate(1..=8, 3..=6, |bloom: &mut $name, num_items: usize| { for x in member_nums(num_items) { bloom.insert_hash(x); } false_pos_rate(&bloom) }); } #[test] fn target_fp_is_accurate_expected() { target_fp_is_accurate(1..=8, 2..=7, |bloom: &mut $name, num_items: usize| { bloom.expected_false_pos(num_items) }); } fn target_fp_is_accurate( target_fp_range: core::ops::RangeInclusive, num_items_range: core::ops::RangeInclusive, measure_fp: fn(&mut $name, usize) -> f64, ) { // actual false pos is at most 2x as high as expected // this is slightly higher to account for random variance and limited time to sample false pos rate. let thresh = 1.0f64; // fp: 10%, 1%, 0.1%, etc for fp_mag in target_fp_range { let fp = 1.0f64 / 10u64.pow(fp_mag) as f64; // Expected items: 10, 100, 1000, etc for num_items_mag in num_items_range.clone() { let num_items = 10usize.pow(num_items_mag); let allocated_bytes = crate::optimal_size(num_items, fp) >> 3; // Make sure we don't allocate too much assert!(allocated_bytes < 64_000_000, "About to allocate {} bytes", allocated_bytes); let mut filter = $name::new_with_false_pos(fp) .seed(&42) .expected_items(num_items); let sample_fp = measure_fp(&mut filter, num_items); let err = (sample_fp - fp) / fp; let size_bits = filter.num_bits(); assert!(sample_fp < fp || err < thresh, "err {err:}, thresh {thresh:}, num_items: {num_items:}, size bits: {size_bits:}, fp: {fp:}, sample fp: {sample_fp:}"); } } } #[test] fn nothing_after_clear() { for mag in 1..6 { let size = 10usize.pow(mag); for bloom_size_mag in 6..10 { let num_blocks_bytes = 1 << bloom_size_mag; let num_bits = num_blocks_bytes * 8; let mut filter = $name::new_builder(num_bits) .seed(&7) .expected_items(size); filter.extend(member_nums(size)); assert!(filter.num_hashes() > 0); filter.clear(); assert!(member_nums(size).all(|x| !filter.contains(&x))); } } } #[test] fn random_inserts_always_contained() { for mag in 1..6 { let size = 10usize.pow(mag); for bloom_size_mag in 6..10 { let num_blocks_bytes = 1 << bloom_size_mag; let num_bits = num_blocks_bytes * 8; let mut filter = $name::new_builder(num_bits).expected_items(size); filter.extend(member_nums(size)); assert!(member_nums(size).all(|x| filter.contains(&x))); assert!(member_nums(size).all(|x| filter.insert(&x))); } } } #[test] fn test_optimal_hashes_is_optimal() { fn test_optimal_hashes_is_optimal_() { let sizes = [1000, 2000, 5000, 6000, 8000, 10000]; for num_items in sizes { let num_bits = 65000 * 8; let mut filter = $name::new_builder(num_bits) .hasher(H::seeded(&[42; 16])) .expected_items(num_items); filter.extend(member_nums(num_items)); let fp_to_beat = false_pos_rate(&filter); let optimal_hashes = filter.num_hashes(); for num_hashes in [optimal_hashes - 1, optimal_hashes + 1] { let mut test_filter = $name::new_builder(num_bits) .hasher(H::seeded(&[42; 16])) .hashes(num_hashes); test_filter.extend(member_nums(num_items)); let fp = false_pos_rate(&test_filter); assert!(fp_to_beat <= fp); } } } test_optimal_hashes_is_optimal_::(); } #[test] fn seeded_is_same() { let num_bits = 1 << 10; let sample_vals = member_nums(1000).collect::>(); for x in 0u8..32 { let seed = x as u128; assert_eq!( $name::new_builder(num_bits) .seed(&seed) .items(sample_vals.iter()), $name::new_builder(num_bits) .seed(&seed) .items(sample_vals.iter()) ); assert!( !($name::new_builder(num_bits) .seed(&(seed + 1)) .items(sample_vals.iter()) == $name::new_builder(num_bits) .seed(&seed) .items(sample_vals.iter())) ); } } #[test] fn false_pos_decrease_with_size() { for mag in 5..6 { let size = 10usize.pow(mag); let mut prev_fp = 1.0; for num_bits_mag in 9..22 { let num_bits = 1 << num_bits_mag; let mut filter = $name::new_builder(num_bits).expected_items(size); for x in member_nums(size) { filter.insert_hash(x); } let fp = false_pos_rate(&filter); let err = format!( "size: {size:}, num_bits: {num_bits:}, {:.6}, {:?}", fp, filter.num_hashes(), ); assert!( fp <= prev_fp, "{}", err ); prev_fp = fp; } } } fn assert_even_distribution(distr: &[u64], err: f64) { assert!(err > 0.0 && err < 1.0); let expected: i64 = (distr.iter().sum::() / (distr.len() as u64)) as i64; let thresh = (expected as f64 * err) as i64; for x in distr { let diff = (*x as i64 - expected).abs(); assert!( diff <= thresh, "{x:?} deviates from {expected:?}\nDistribution: {distr:?}" ); } } #[test] fn test_seeded_hash_from_hashes_depth() { for size in [1, 10, 100, 1000] { let mut rng = fastrand::Rng::with_seed(524323); let mut hasher = DoubleHasher::new(rng.u64(..)); let mut seeded_hash_counts: Vec<_> = repeat(0).take(size).collect(); for _ in 0..(size * 10_000) { let hi = hasher.next(); seeded_hash_counts[(hi as usize) % size] += 1; } assert_even_distribution(&seeded_hash_counts, 0.05); } } #[test] fn test_debug() { let filter = $name::with_num_bits(1).hashes(1); assert!(!format!("{:?}", filter).is_empty()); } #[test] fn test_clone() { let filter = $name::with_num_bits(4).hashes(4); let mut cloned = filter.clone(); assert_eq!(filter, cloned); cloned.insert(&42); assert!(filter != cloned); } #[test] fn eq_constructors_num_bits() { assert_eq!( $name::with_num_bits(4).hashes(4), $name::new_builder(4).hashes(4), ); } #[test] fn eq_constructors_false_pos() { assert_eq!( $name::with_false_pos(0.4), $name::new_with_false_pos(0.4), ); } #[test] fn eq_constructors_from_vec() { assert_eq!( $name::from_vec(repeat(42).take(42).collect()), $name::new_from_vec(repeat(42).take(42).collect()), ); } #[test] fn test_rebuilt_from_vec() { for num in [1, 10, 1000, 100_000] { for fp in [0.1, 0.01, 0.0001, 0.0000001] { let mut b = $name::with_false_pos(fp) .seed(&42) .expected_items(num); b.extend(member_nums(num)); let orig_hashes = b.num_hashes(); let new = $name::from_vec(b.iter().collect()) .seed(&42) .hashes(orig_hashes); assert!(member_nums(num).all(|x| new.contains(&x))); } } } #[cfg(feature = "serde")] #[test] fn test_serde() { for num in [1, 10, 1000, 100_000] { for fp in [0.1, 0.01, 0.0001, 0.0000001] { let mut before = $name::with_false_pos(fp) .seed(&42) .expected_items(num); before.extend(member_nums(num)); let s = serde_cbor::to_vec(&before).unwrap(); let mut after: $name = serde_cbor::from_slice(&s).unwrap(); assert_eq!(before, after); before.extend(member_nums(num * 2)); after.extend(member_nums(num * 2)); assert_eq!(before, after); } } } } }; } impl_tests!(non_atomic, BloomFilter); impl_tests!(atomic, AtomicBloomFilter); #[cfg(not(feature = "loom"))] #[cfg(test)] mod atomic_parity_tests { #[cfg(feature = "serde")] #[test] fn serde_parity() { use super::*; for num_bits in [64, 1024, 4096, 1 << 16] { for seed in 4..=18 { let mut non = BloomFilter::with_num_bits(num_bits) .seed(&seed) .expected_items(100); non.extend(0..=100); let mut atomic = AtomicBloomFilter::with_num_bits(num_bits) .seed(&seed) .expected_items(100); atomic.extend(0..=100); let non_bytes = serde_cbor::to_vec(&non).unwrap(); let atomic_bytes = serde_cbor::to_vec(&atomic).unwrap(); assert_eq!(non_bytes, atomic_bytes); let non_from_atomic: BloomFilter = serde_cbor::from_slice(&atomic_bytes).unwrap(); let atomic_from_non: AtomicBloomFilter = serde_cbor::from_slice(&non_bytes).unwrap(); assert_eq!(non_from_atomic, non); assert_eq!(atomic_from_non, atomic); } } } } #[cfg(feature = "loom")] #[cfg(test)] mod loom_tests { use super::*; #[test] fn test_loom() { loom::model(|| { let b = loom::sync::Arc::new(AtomicBloomFilter::with_num_bits(128).seed(&42).hashes(2)); let expected = AtomicBloomFilter::with_num_bits(128).seed(&42).hashes(2); for x in 1..=3 { expected.insert(&x); } let handles: Vec<_> = [(1..=2), (2..=3)] .into_iter() .map(|data| { let v = b.clone(); loom::thread::spawn(move || { for x in data { v.insert(&x); } }) }) .collect(); for handle in handles { handle.join().unwrap(); } let res = b.iter().collect::>(); assert_eq!(res, expected.iter().collect::>()); }); } } fastbloom-0.17.0/src/math.rs000064400000000000000000000014241046102023000137660ustar 00000000000000#[cfg(feature = "std")] #[inline] pub(crate) fn ln(x: f64) -> f64 { x.ln() } #[cfg(not(feature = "std"))] #[inline] pub(crate) fn ln(x: f64) -> f64 { libm::log(x) } #[cfg(feature = "std")] #[inline] pub(crate) fn ceil(x: f64) -> f64 { x.ceil() } #[cfg(not(feature = "std"))] #[inline] pub(crate) fn ceil(x: f64) -> f64 { libm::ceil(x) } #[cfg(feature = "std")] #[inline] pub(crate) fn pow(b: f64, p: f64) -> f64 { b.powf(p) } #[cfg(not(feature = "std"))] #[inline] pub(crate) fn pow(b: f64, p: f64) -> f64 { libm::pow(b, p) } #[cfg(feature = "std")] #[inline] pub(crate) fn round(x: f64) -> f64 { x.round() } #[cfg(not(feature = "std"))] #[inline] pub(crate) fn round(x: f64) -> f64 { libm::round(x) }