rapidhash-4.4.1/.cargo_vcs_info.json0000644000000001471046102023000130310ustar { "git": { "sha1": "b49ea242c1aa907f2827654f240081c4dd1d2f9a" }, "path_in_vcs": "rapidhash" }rapidhash-4.4.1/Cargo.lock0000644000000361721046102023000110130ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anyhow" version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "assert_cmd" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514" dependencies = [ "anstyle", "bstr", "libc", "predicates", "predicates-core", "predicates-tree", "wait-timeout", ] [[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bstr" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", "regex-automata", "serde", ] [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "difflib" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", "windows-sys", ] [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "getrandom" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", "wasip2", ] [[package]] name = "getrandom" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" dependencies = [ "cfg-if", "libc", "r-efi", "wasip2", "wasip3", ] [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "foldhash", ] [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "id-arena" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" [[package]] name = "indexmap" version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", "serde", "serde_core", ] [[package]] name = "itoa" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "leb128fmt" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "linux-raw-sys" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "predicates" version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ada8f2932f28a27ee7b70dd6c1c39ea0675c55a36879ab92f3a715eaa1e63cfe" dependencies = [ "anstyle", "difflib", "predicates-core", ] [[package]] name = "predicates-core" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cad38746f3166b4031b1a0d39ad9f954dd291e7854fcc0eed52ee41a0b50d144" [[package]] name = "predicates-tree" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0de1b847b39c8131db0467e9df1ff60e6d0562ab8e9a16e568ad0fdb372e2f2" dependencies = [ "predicates-core", "termtree", ] [[package]] name = "prettyplease" version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", "syn", ] [[package]] name = "proc-macro2" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] [[package]] name = "rapidhash" version = "4.4.1" dependencies = [ "assert_cmd", "rand", "rand_core", "rustversion", "tempfile", ] [[package]] name = "regex-automata" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" [[package]] name = "rustix" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", ] [[package]] name = "serde_core" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", "serde", "serde_core", "zmij", ] [[package]] name = "syn" version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tempfile" version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", "getrandom 0.4.1", "once_cell", "rustix", "windows-sys", ] [[package]] name = "termtree" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-xid" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "wait-timeout" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" dependencies = [ "libc", ] [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasip3" version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-encoder" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" dependencies = [ "leb128fmt", "wasmparser", ] [[package]] name = "wasm-metadata" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", "indexmap", "wasm-encoder", "wasmparser", ] [[package]] name = "wasmparser" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", "indexmap", "semver", ] [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] [[package]] name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" dependencies = [ "wit-bindgen-rust-macro", ] [[package]] name = "wit-bindgen-core" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", "heck", "wit-parser", ] [[package]] name = "wit-bindgen-rust" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", "indexmap", "prettyplease", "syn", "wasm-metadata", "wit-bindgen-core", "wit-component", ] [[package]] name = "wit-bindgen-rust-macro" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" dependencies = [ "anyhow", "prettyplease", "proc-macro2", "quote", "syn", "wit-bindgen-core", "wit-bindgen-rust", ] [[package]] name = "wit-component" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", "indexmap", "log", "serde", "serde_derive", "serde_json", "wasm-encoder", "wasm-metadata", "wasmparser", "wit-parser", ] [[package]] name = "wit-parser" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", "indexmap", "log", "semver", "serde", "serde_derive", "serde_json", "unicode-xid", "wasmparser", ] [[package]] name = "zerocopy" version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" rapidhash-4.4.1/Cargo.toml0000644000000034051046102023000110270ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.71.0" name = "rapidhash" version = "4.4.1" authors = ["Liam Gray "] build = false exclude = [ "Cargo.lock", "fuzz", ] publish = true autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "An extremely fast, high quality, platform-independent hashing algorithm." documentation = "https://docs.rs/rapidhash" readme = "README.md" keywords = [ "hash", "hasher", "no-std", "rapidhash", ] categories = [ "algorithms", "no-std", ] license = "MIT OR Apache-2.0" repository = "https://github.com/hoxxep/rapidhash" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [features] default = ["std"] nightly = [] rand = [ "dep:rand", "std", ] rng = ["dep:rand_core"] std = [] unsafe = [] [lib] name = "rapidhash" path = "src/lib.rs" [[bin]] name = "rapidhash" path = "src/main.rs" [[test]] name = "cli" path = "tests/cli.rs" [dependencies.rand] version = "0.9" optional = true [dependencies.rand_core] version = "0.9" optional = true default-features = false [dependencies.rustversion] version = "1.0" [dev-dependencies.assert_cmd] version = "2.1.1" [dev-dependencies.rand] version = "0.9.0" [dev-dependencies.tempfile] version = "3.14.0" rapidhash-4.4.1/Cargo.toml.orig000064400000000000000000000027551046102023000144750ustar 00000000000000[package] name = "rapidhash" version = "4.4.1" edition = "2021" publish = true rust-version = "1.71.0" # const split_at and read_unaligned are our limitations authors = ["Liam Gray "] description = "An extremely fast, high quality, platform-independent hashing algorithm." repository = "https://github.com/hoxxep/rapidhash" documentation = "https://docs.rs/rapidhash" license = "MIT OR Apache-2.0" readme = "README.md" keywords = ["hash", "hasher", "no-std", "rapidhash"] categories = ["algorithms", "no-std"] exclude = ["Cargo.lock", "fuzz"] [lib] # Test docs using cargo-docs. Command: # RUSTDOCFLAGS="--cfg docsrs" cargo +nightly docs -- --all-feature [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [features] default = ["std"] std = [] # enable std library for RapidHashMap and RapidHashSet helpers rand = ["dep:rand", "std"] # enable the rand library for initializing RandomState rng = ["dep:rand_core"] # enable RapidRng, a rand Rng-compatible fast PRNG using rapidrng nightly = [] # enable nightly features, faster str hashing and likely/unlikely hints unsafe = [] # enable unsafe pointer arithmetic; this is used to check for bounds-check regressions [dependencies] rustversion = "1.0" rand = { version = "0.9", optional = true } rand_core = { version = "0.9", default-features = false, optional = true } [dev-dependencies] rapidhash-c = { path = "../rapidhash-c" } rand = "0.9.0" tempfile = "3.14.0" # testing rapidhash_file assert_cmd = "2.1.1" rapidhash-4.4.1/LICENSE-APACHE000064400000000000000000000251361046102023000135300ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2024 Liam Gray Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rapidhash-4.4.1/LICENSE-MIT000064400000000000000000000020561046102023000132340ustar 00000000000000Copyright (c) 2024 Liam Gray Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rapidhash-4.4.1/README.md000064400000000000000000000570341046102023000130650ustar 00000000000000# rapidhash – portable rust hashing A rust implementation of [rapidhash](https://github.com/Nicoshev/rapidhash), the official successor to [wyhash](https://github.com/wangyi-fudan/wyhash). - **High quality** – the fastest hash to pass all [SMHasher](https://github.com/rurban/smhasher) and [SMHasher3](https://gitlab.com/fwojcik/smhasher3) tests, with near-ideal collision probability. - **Very fast** – significant throughput improvement over wyhash and foldhash. - **Platform independent and no-std compatible** – stable hash output on all platforms with no dependency on vectorized or cryptographic hardware instructions. Optimized for both AMD64 and AArch64. - **Official successor to wyhash** with improved speed, quality, and compatibility. - **Run-time and compile-time hashing** – the hash implementation is fully `const`. - **Idiomatic** `std::hash::Hasher` compatible hasher for `HashMap` and `HashSet`. - **Non-cryptographic** – "minimally DoS resistant" in the same manner as foldhash. - **Streamable** – incremental and `Read`-based hashing for large files and other streams. - **CLI tool** for hashing files or stdin. **Sponsored by [Upon](https://uponvault.com?utm_source=github&utm_campaign=rapidhash)**, inheritance vaults for your digital life. Ensure your family can access your devices, accounts, and assets when the unexpected happens. ## Usage ### In-Memory Hashing The in-memory hasher follows rust's `std::hash` traits. The underlying hash function may change between minor versions and is only suitable for in-memory use (e.g. `HashMap`, `HashSet`). Available in `rapidhash::fast` and `rapidhash::quality` flavours. - `RapidHasher`: a `std::hash::Hasher` compatible hasher using the rapidhash algorithm. - `RandomState`: a `std::hash::BuildHasher` that initializes the hasher with a random seed and secrets. - `GlobalState`: a `std::hash::BuildHasher` that initializes the hasher with a global seed and secrets, randomized once per process. - `SeedableState`: a `std::hash::BuildHasher` that initializes the hasher with a custom seed and secrets. - `RapidHashMap` / `RapidHashSet`: helper types using `fast::RandomState` with `HashMap` and `HashSet`. ```rust use rapidhash::RapidHashMap; // A HashMap using RapidHasher for fast in-memory hashing. let mut map = RapidHashMap::default(); map.insert("key", "value"); ``` ```rust use std::hash::BuildHasher; use rapidhash::quality::SeedableState; // Using the RapidHasher directly for in-memory hashing. let hasher = SeedableState::fixed(); assert_eq!(hasher.hash_one(b"hello world"), 3348275917668072623); ``` ### Portable Hashing Fully compatible with the C++ rapidhash algorithms. Methods are provided for all rapidhash V1, V2, and V3 (with micro/nano) variants. These are stable functions whose output will not change between crate versions. ```rust use rapidhash::v3::{rapidhash_v3_seeded, rapidhash_v3_file_seeded, RapidSecrets, RapidStreamHasherV3}; /// Set your global hashing secrets. /// - For HashDoS resistance, choose a randomized secret. /// - For C++ compatibility, use the `seed_cpp` method or `DEFAULT_RAPID_SECRETS`. const SECRETS: RapidSecrets = RapidSecrets::seed(0x123456); // Bulk: hash a complete byte slice. let bulk = rapidhash_v3_seeded(b"hello world", &SECRETS); // Stream: write chunks of any size, same output regardless of chunk boundaries. let mut hasher = RapidStreamHasherV3::new(&SECRETS); hasher.write(b"hello "); hasher.write(b"world"); let stream = hasher.finish(); // Read: hash from any `Read` source (files, cursors, etc.). let read = rapidhash_v3_file_seeded(std::io::Cursor::new(b"hello world"), &SECRETS).unwrap(); assert_eq!(bulk, stream); assert_eq!(bulk, read); ``` See the [`portable-hash` crate](https://github.com/hoxxep/portable-hash?tab=readme-ov-file#whats-wrong-with-the-stdhash-traits) for why using the standard library hashing traits is not recommended for portable hashing. Rapidhash is planning to implement the `PortableHash` and `PortableHasher` traits in a future release. ### CLI Rapidhash can be installed as a CLI tool to hash files or stdin. Not a cryptographic hash, but much faster than one. Fully compatible with the C++ rapidhash V1, V2, and V3 algorithms. Output is the decimal `u64` hash value. ```shell # install cargo install rapidhash # hash a file (output: 8543579700415218186) rapidhash --v3 example.txt # hash stdin (output: 8543579700415218186) echo "example" | rapidhash --v3 ``` ## Features - `default`: `std` - `std`: Enables the `RapidHashMap` and `RapidHashSet` helper types. - `rand`: Enables using the `rand` library to more securely initialize `RandomState`. Includes the `rand` crate dependency. - `rng`: Enables `RapidRng`, a fast, non-cryptographic PRNG based on rapidrng. Includes the `rand_core` crate dependency. - `unsafe`: Uses unsafe pointer arithmetic to skip some unnecessary bounds checks for a small 3-4% performance improvement. - `nightly`: Enable nightly-only features for even faster hashing, such as overriding `Hasher::write_str` and likely hints. ## Benchmarks In our benchmarking, rapidhash is one of the fastest general-purpose non-cryptographic hash functions. It places second to gxhash on some benchmarks, but gxhash is not portable and requires AES instructions to compile. ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_aarch64_apple_m1_max.svg) Rapidhash uses raw throughput benchmarks (the charts) to measure performance over various input sizes, and the [foldhash benchmark suite](https://github.com/orlp/foldhash?tab=readme-ov-file#performance) (the txt tables) to measure workloads that are closer to real-world usage. The foldhash suite benchmarks hashers by measuring raw hash throughput, hashmap lookup miss, hashmap lookup hit, and hashmap insertion performance on a wide variety of commonly hashed types. The benchmarks have been compiled with and without `-C target-cpu=native` on a variety of platforms to demonstrate rapidhash's strong all-round performance. The full results are available in the [docs folder](https://github.com/hoxxep/rapidhash/tree/master/docs) and are summarised below.
aarch64 Apple M1 Max ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.11 ┆ 3.53 ┆ 2.84 ┆ 4.62 ┆ 2.88 ┆ 5.05 ┆ 6.97 │ │ geometric_mean ┆ 4.29 ┆ 4.82 ┆ 4.83 ┆ 5.24 ┆ 5.50 ┆ 5.94 ┆ 22.17 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_aarch64_apple_m1_max.svg)
aarch64 Apple M1 Max (target-cpu=native) ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ gxhash ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.23 ┆ 3.94 ┆ 3.30 ┆ 5.08 ┆ 4.69 ┆ 3.16 ┆ 5.64 ┆ 7.97 │ │ geometric_mean ┆ 4.25 ┆ 4.79 ┆ 4.79 ┆ 5.19 ┆ 4.93 ┆ 5.48 ┆ 5.91 ┆ 21.99 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_aarch64_apple_m1_max_native.svg)
aarch64 AWS Graviton3 ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.27 ┆ 3.88 ┆ 3.08 ┆ 4.66 ┆ 2.11 ┆ 5.05 ┆ 6.97 │ │ geometric_mean ┆ 7.82 ┆ 9.03 ┆ 8.53 ┆ 9.66 ┆ 8.02 ┆ 10.98 ┆ 29.31 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_aarch64_aws_graviton3.svg)
aarch64 AWS Graviton3 (target-cpu=native) ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ gxhash ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.59 ┆ 4.20 ┆ 3.38 ┆ 5.28 ┆ 4.09 ┆ 2.50 ┆ 5.98 ┆ 7.97 │ │ geometric_mean ┆ 7.84 ┆ 8.97 ┆ 8.56 ┆ 9.68 ┆ 8.59 ┆ 8.15 ┆ 11.16 ┆ 32.59 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_aarch64_aws_graviton3_native.svg)
x86_64 AMD EPYC 9R14 ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.05 ┆ 3.75 ┆ 2.81 ┆ 4.42 ┆ 3.09 ┆ 4.91 ┆ 6.97 │ │ geometric_mean ┆ 4.67 ┆ 5.38 ┆ 5.27 ┆ 5.99 ┆ 6.13 ┆ 6.50 ┆ 23.66 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_x86_64_amd_epyc_9R14.svg)
x86_64 AMD EPYC 9R14 (target-cpu=native) ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ gxhash ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.56 ┆ 4.36 ┆ 3.45 ┆ 5.38 ┆ 4.31 ┆ 3.36 ┆ 4.61 ┆ 7.97 │ │ geometric_mean ┆ 4.68 ┆ 5.34 ┆ 5.24 ┆ 5.91 ┆ 5.01 ┆ 5.98 ┆ 5.63 ┆ 25.75 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_x86_64_amd_epyc_9R14_native.svg)
x86_64 Intel Xeon Platinum 8488C ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 1.86 ┆ 3.83 ┆ 2.86 ┆ 4.50 ┆ 2.95 ┆ 5.03 ┆ 6.97 │ │ geometric_mean ┆ 4.52 ┆ 5.18 ┆ 4.95 ┆ 5.55 ┆ 5.67 ┆ 6.33 ┆ 20.24 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_x86_64_intel_xeon_8488c.svg)
x86_64 Intel Xeon Platinum 8488C (target-cpu=native) ```text ┌────────────────┬─────────────┬─────────────┬────────────┬────────────┬────────┬────────┬───────┬─────────┐ │ metric ┆ rapidhash-f ┆ rapidhash-q ┆ foldhash-f ┆ foldhash-q ┆ gxhash ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪═════════════╪═════════════╪════════════╪════════════╪════════╪════════╪═══════╪═════════╡ │ avg_rank ┆ 2.38 ┆ 4.69 ┆ 3.52 ┆ 5.30 ┆ 4.08 ┆ 3.39 ┆ 4.69 ┆ 7.97 │ │ geometric_mean ┆ 4.46 ┆ 5.09 ┆ 4.88 ┆ 5.42 ┆ 4.73 ┆ 5.58 ┆ 5.26 ┆ 21.34 │ └────────────────┴─────────────┴─────────────┴────────────┴────────────┴────────┴────────┴───────┴─────────┘ ``` ![Hashing Benchmarks](https://github.com/hoxxep/rapidhash/raw/master/docs/bench_hash_x86_64_intel_xeon_8488c_native.svg)
Benchmark notes - Hash throughput does not measure hash "quality", and many of the benchmarked functions fail the [SMHasher3 hash quality benchmarks](https://gitlab.com/fwojcik/smhasher3). Rapidhash is the fastest hash to pass all quality benchmarks. Hash quality affects hashmap performance, as well as algorithms that benefit from high quality hash functions such as HyperLogLog and MinHash. - **Comparison to foldhash**: Rapidhash uses the same integer buffer construction as foldhash, but is notably faster when hashing strings by making use of the rapidhash algorithm. Rapidhash also offers portable and streaming hash flavours. - **Comparison to gxhash**: gxhash achieves its high throughput by using AES instructions and consistently outperforms the other accelerated hashers (ahash, th1a, xxhash3_64). It's a great hash function, but is not a portable hash function, requiring `target-cpu=native` or specific feature flags to compile. Gxhash is a great choice for applications that can guarantee the availability of AES instructions and mostly hash strings, but rapidhash may be preferred for hashing tuples and structs, or by libraries that aim to support a wide range of platforms. - The default rust hasher (SipHasher) unexpectedly appears to run consistently faster _without_ `target-cpu=native` on various x86 and ARM chips. - Benchmark your own use case, with your real world dataset! We suggest experimenting with different hash functions to see which one works best for your use case. Rapidhash is great for fast general-purpose hashing in libraries and applications that only need minimal DoS resistance, but certain hashers will outperform for specific use cases. - We recommend using `lto = "fat"` and `codegen-units = 1` in your `Cargo.toml` release and bench profiles to ensure consistent inlining, application performance, and benchmarking results. For example: ```toml [profile.release] opt-level = 3 lto = "fat" codegen-units = 1 ```
## Minimal DoS Resistance Rapidhash is a keyed hash function and the rust implementation deviates from its C++ counterpart by also randomising the secrets array. The algorithm primarily relies on the same 128-bit folded multiply mixing step used by foldhash and ahash's fallback algorithm. It aims to be immune to length extension and re-ordering attacks. We believe rapidhash is a minimally DoS resistant hash function, such that a non-interactive attacker cannot trivially create collisions if they do not know the seed or secrets. The adverb "minimally" is used to describe that rapidhash is not a cryptographic hash, it is possible to construct collisions if the seed or secrets are known, and it may be possible for an interactive attacker to learn the seed by observing hash outputs or application response times over a large number of inputs. Provided rapidhash has been instantiated through `RandomState` or `RapidSecrets` using a randomized secret seed, we believe rapidhash is minimally resistant to hash DoS attacks. ## Rapidhash Versioning ### Portable Hashing C++ compatibility is presented in `rapidhash::v1`, `rapidhash::v2`, and `rapidhash::v3` modules. The output for these is guaranteed to be stable between major crate versions. Rapidhash V3 is the recommended, fastest, and most recent version of the hash. Streaming is only possible with the rapidhash V3 algorithm. Others are provided for backwards compatibility. ### In-Memory Hashing Rust hashing traits (`RapidHasher`, `RandomState`, etc.) are implemented in `rapidhash::fast`, `rapidhash::quality`, and `rapidhash::inner` modules. These are not guaranteed to give a consistent hash output between platforms, compiler versions, or crate versions as the rust `Hasher` trait [is not suitable](https://github.com/hoxxep/portable-hash/?tab=readme-ov-file#whats-wrong-with-the-stdhash-traits) for portable hashing. - Use `rapidhash::fast` for optimal hashing speed with a slightly lower hash quality. Best for most datastructures such as HashMap and HashSet usage. - Use `rapidhash::quality` where statistical hash quality is the priority, such as HyperLogLog or MinHash algorithms. - Use `rapidhash::inner` to set advanced parameters to configure the hash function specifically to your use case. ## Crate Versioning The minimum supported Rust version (MSRV) is 1.71.0. The rapidhash crate follows this versioning scheme: - **Major**: breaking API changes, MSRV bumps, or any changes to `rapidhash_v*` output. - **Minor**: API additions/deprecations, or changes to `RapidHasher` output. - **Patch**: bug fixes and performance improvements. Portable hash outputs (e.g. `rapidhash_v3`) are guaranteed to be stable. In-memory hash outputs (e.g. `RapidHasher`) may change between minor versions to allow freely improving performance. ## License and Acknowledgements This project is licensed under both the MIT and Apache-2.0 licenses. You are free to choose either license. With thanks to [Nicolas De Carli](https://github.com/Nicoshev) for the original [rapidhash](https://github.com/Nicoshev/rapidhash) C++ implementation, which is licensed under the [MIT License](https://github.com/Nicoshev/rapidhash/blob/master/LICENSE). With thanks to [Orson Peters](https://github.com/orlp) for his work on [foldhash](https://github.com/orlp/foldhash), which inspired much of the integer hashing optimisations in this crate. Some of the RapidHasher string hashing [optimisations](https://github.com/orlp/foldhash/pull/35) have also made their way back into foldhash as a thanks. With thanks to [Justin Bradford](https://github.com/jabr) for letting us use the rapidhash crate name 🍻 rapidhash-4.4.1/src/collections.rs000064400000000000000000000051561046102023000152570ustar 00000000000000use crate::fast::RandomState; /// A [`std::collections::HashMap`] that uses the [`crate::fast::RandomState`] hasher. /// /// # Example /// ``` /// use rapidhash::{HashMapExt, RapidHashMap}; /// /// let mut map = RapidHashMap::default(); /// map.insert(42, "the answer"); /// /// // with capacity /// let mut map = RapidHashMap::with_capacity(10); /// map.insert(42, "the answer"); /// ``` pub type RapidHashMap = std::collections::HashMap; /// A [`std::collections::HashSet`] that uses the [`crate::fast::RandomState`] hasher. /// /// # Example /// ``` /// use rapidhash::{HashSetExt, RapidHashSet}; /// /// let mut set = RapidHashSet::default(); /// set.insert("the answer"); /// /// // with capacity /// let mut set = RapidHashSet::with_capacity(10); /// set.insert("the answer"); /// ``` pub type RapidHashSet = std::collections::HashSet; /// A trait for creating a [`RapidHashMap`] with a specified capacity and hasher. pub trait HashMapExt { /// Create a new [`RapidHashMap`] with the default capacity and hasher. fn new() -> Self; /// Create a new [`RapidHashMap`] with the given capacity and hasher. fn with_capacity(capacity: usize) -> Self; } impl HashMapExt for RapidHashMap { #[inline] fn new() -> Self { RapidHashMap::default() } #[inline] fn with_capacity(capacity: usize) -> Self { RapidHashMap::with_capacity_and_hasher(capacity, RandomState::default()) } } /// A trait for creating a [`RapidHashSet`] with a specified capacity and hasher. pub trait HashSetExt { /// Create a new [`RapidHashSet`] with the default capacity and hasher. fn new() -> Self; /// Create a new [`RapidHashSet`] with the given capacity and hasher. fn with_capacity(capacity: usize) -> Self; } impl HashSetExt for RapidHashSet { #[inline] fn new() -> Self { RapidHashSet::default() } #[inline] fn with_capacity(capacity: usize) -> Self { RapidHashSet::with_capacity_and_hasher(capacity, RandomState::default()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_hashmap_new() { let mut map = RapidHashMap::new(); map.insert("key", "value"); assert_eq!(map.get("key"), Some(&"value")); assert_eq!(map.get("na"), None); } #[test] fn test_hashset_new() { let mut set = RapidHashSet::new(); set.insert("value"); assert!(set.contains("value")); assert!(!set.contains("na")); } #[test] fn test_hashmap_size() { assert_eq!(core::mem::size_of::>(), 40); } } rapidhash-4.4.1/src/fast.rs000064400000000000000000000066421046102023000136770ustar 00000000000000//! In-memory hashing: RapidHasher with a focus on speed. //! //! Designed to maximize hashmap fetch and insert performance on most datasets. //! //! This is a specific instantiation of the [`crate::inner`] module with the following settings: //! - `AVALANCHE` is disabled. //! - `SPONGE` is enabled. //! - `COMPACT` is disabled, unless building for WASM targets. //! - `PROTECTED` is disabled. const AVALANCHE: bool = false; const SPONGE: bool = true; const COMPACT: bool = cfg!(target_family = "wasm"); const PROTECTED: bool = false; use crate::inner; /// A [`std::hash::Hasher`] inspired by [`crate::v3::rapidhash_v3`] with a focus on speed and /// throughput. /// /// This is an alias for [inner::RapidHasher] with the following settings: /// - `AVALANCHE` is disabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::quality::RapidHasher`] for a higher quality hash output where necessary. pub type RapidHasher<'s> = inner::RapidHasher<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED>; /// A rapidhash equivalent to [`std::hash::RandomState`] that uses a random seed and secrets for /// minimal DoS resistance. /// /// This initializes a [`crate::quality::RapidHasher`] with the following settings: /// - `AVALANCHE` is disabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [crate::quality::RandomState] for a higher quality but slower hash output where desirable. pub type RandomState = inner::RandomState; /// A [`std::hash::BuildHasher`] that uses user-provided seed and secrets. /// /// We recommend using [`RandomState`] or [`GlobalState`] instead for most use cases. /// /// This initializes a [`RapidHasher`] with the following settings: /// - `AVALANCHE` is disabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::quality::SeedableState`] for a higher quality but slower hash output where desirable. pub type SeedableState<'secrets> = inner::SeedableState<'secrets, AVALANCHE, SPONGE, COMPACT, PROTECTED>; /// A [`std::hash::BuildHasher`] that uses a global seed and secrets, randomized only once on startup. /// /// All instances of GlobalState will use the same global seed and secrets for the lifetime of the /// program. This provides minimal HashDoS resistance by randomizing the seed and secrets between /// application runs. /// /// This initializes a [`RapidHasher`] with the following settings: /// - `AVALANCHE` is disabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::quality::GlobalState`] for a higher quality but slower hash output where desirable. pub type GlobalState = inner::GlobalState; #[cfg(any(feature = "std", docsrs))] #[deprecated(since = "0.4.0", note = "Please use the top-level rapidhash::RapidHashMap instead")] pub use crate::RapidHashMap; #[cfg(any(feature = "std", docsrs))] #[deprecated(since = "0.4.0", note = "Please use the top-level rapidhash::RapidHashSet instead")] pub use crate::RapidHashSet; #[cfg(any(feature = "std", docsrs))] #[deprecated(since = "0.4.0", note = "Please use the top-level rapidhash::HashMapExt instead")] pub use crate::HashMapExt; #[cfg(any(feature = "std", docsrs))] #[deprecated(since = "0.4.0", note = "Please use the top-level rapidhash::HashSetExt instead")] pub use crate::HashSetExt; rapidhash-4.4.1/src/inner/mix_np.rs000064400000000000000000000131641046102023000153440ustar 00000000000000//! Internal module that provides the folded multiply. /// NON-PORTABLE 64*64 to 128 bit multiply /// /// Returns the (low, high) 64 bits of the 128 bit result. /// /// # Non-portable version /// This version is not portable across all architectures and is intended for use only on the /// in-memory hash functions. /// /// # From the C code: /// Calculates 128-bit C = *A * *B. /// /// When RAPIDHASH_FAST is defined: /// Overwrites A contents with C's low 64 bits. /// Overwrites B contents with C's high 64 bits. /// /// When RAPIDHASH_PROTECTED is defined: /// Xors and overwrites A contents with C's low 64 bits. /// Xors and overwrites B contents with C's high 64 bits. #[inline(always)] #[must_use] pub(super) const fn rapid_mum_np(a: u64, b: u64) -> (u64, u64) { #[cfg(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), ))] { let r = (a as u128).wrapping_mul(b as u128); if !PROTECTED { (r as u64, (r >> 64) as u64) } else { (a ^ r as u64, b ^ (r >> 64) as u64) } } #[cfg(not(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), )))] { // u64 x u64 -> u128 product is quite expensive on 32-bit. // We approximate it by expanding the multiplication and eliminating // carries by replacing additions with XORs: // (2^32 hx + lx)*(2^32 hy + ly) = // 2^64 hx*hy + 2^32 (hx*ly + lx*hy) + lx*ly ~= // 2^64 hx*hy ^ 2^32 (hx*ly ^ lx*hy) ^ lx*ly // Which when folded becomes: // (hx*hy ^ lx*ly) ^ (hx*ly ^ lx*hy).rotate_right(32) let lx = a as u32; let ly = b as u32; let hx = (a >> 32) as u32; let hy = (b >> 32) as u32; let ll = (lx as u64).wrapping_mul(ly as u64); let lh = (lx as u64).wrapping_mul(hy as u64); let hl = (hx as u64).wrapping_mul(ly as u64); let hh = (hx as u64).wrapping_mul(hy as u64); if !PROTECTED { ((hh ^ ll), (hl ^ lh).rotate_right(32)) } else { // If protected, we XOR the inputs with the results. // This is to ensure that the inputs are not recoverable from the output. ((a ^ hh ^ ll), (b ^ hl ^ lh).rotate_right(32)) } } } /// NON-PORTABLE Folded 64-bit multiply. [rapid_mum] then XOR the results together. /// /// # Non-portable version /// This version is not portable across all architectures and is intended for use only on the /// in-memory hash functions. #[inline(always)] #[must_use] pub(super) const fn rapid_mix_np(a: u64, b: u64) -> u64 { #[cfg(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), ))] { let r = (a as u128).wrapping_mul(b as u128); if !PROTECTED { (r as u64) ^ (r >> 64) as u64 } else { (a ^ r as u64) ^ (b ^ (r >> 64) as u64) } } #[cfg(not(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), )))] { // u64 x u64 -> u128 product is quite expensive on 32-bit. // We approximate it by expanding the multiplication and eliminating // carries by replacing additions with XORs: // (2^32 hx + lx)*(2^32 hy + ly) = // 2^64 hx*hy + 2^32 (hx*ly + lx*hy) + lx*ly ~= // 2^64 hx*hy ^ 2^32 (hx*ly ^ lx*hy) ^ lx*ly // Which when folded becomes: // (hx*hy ^ lx*ly) ^ (hx*ly ^ lx*hy).rotate_right(32) let lx = a as u32; let ly = b as u32; let hx = (a >> 32) as u32; let hy = (b >> 32) as u32; let ll = (lx as u64).wrapping_mul(ly as u64); let lh = (lx as u64).wrapping_mul(hy as u64); let hl = (hx as u64).wrapping_mul(ly as u64); let hh = (hx as u64).wrapping_mul(hy as u64); if !PROTECTED { (hh ^ ll) ^ (hl ^ lh).rotate_right(32) } else { // If protected, we XOR the inputs with the results. // This is to ensure that the inputs are not recoverable from the output. (a ^ hh ^ ll) ^ (b ^ hl ^ lh).rotate_right(32) } } } #[cfg(test)] mod tests { use super::*; #[test] #[cfg(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), ))] fn test_rapid_mum() { let (a, b) = rapid_mum_np::(0, 0); assert_eq!(a, 0); assert_eq!(b, 0); let (a, b) = rapid_mum_np::(100, 100); assert_eq!(a, 10000); assert_eq!(b, 0); let (a, b) = rapid_mum_np::(u64::MAX, 2); assert_eq!(a, u64::MAX - 1); assert_eq!(b, 1); } } rapidhash-4.4.1/src/inner/mod.rs000064400000000000000000000265061046102023000146350ustar 00000000000000//! In-memory hashing: RapidHasher with full configurability via compile-time arguments. //! //! This module contains the Hasher, BuildHasher, HashMap, HashSet, and RandomState //! implementations. It is recommended to use [crate::fast] or [crate::quality], but for the //! advanced user, [crate::inner] can be used directly to customise the compile time options to //! modify the hash function. //! //! Each structure may have the compile time const generics: //! - `AVALANCHE`: Whether to use a final avalanche mix step, required to pass SMHasher3. This //! option changes the hash output. Enabled on [crate::quality], disabled on [crate::fast]. //! - `SPONGE`: Allow RapidHasher to cache integers into a 128-bit buffer to perform a single //! folded multiply step on the entire buffer. If disabled, a mix step is performed on each //! individual integer. This changes the hash output when hashing integers. Enabled on both //! [crate::quality] and [crate::fast]. //! - `COMPACT`: Reduce the code size of the hasher by preventing manually unrolled loops. This does //! _not_ affect the hash output. Disabled on both [crate::quality] and [crate::fast]. //! - `PROTECTED`: When performing the folded multiply mix step, XOR the a and b back into their //! original values to make it harder for an attacker to generate collisions. This changes the //! hash ouput. Disabled on both [crate::quality] and [crate::fast]. //! //! The `RapidHasher` struct is _inspired by_ rapidhash, but is not a direct port and will output //! different hash values. It keeps the same hasher quality but uses various optimisations to //! improve performance when used in the Rust Hasher trait. //! //! The output values of functions in the `inner` module are not guaranteed to be stable between //! versions. Please use the `v1`, `v2`, or `v3` modules for stable output values between rapidhash //! crate versions. mod rapid_const; mod rapid_hasher; mod state; pub(crate) mod seeding; mod mix_np; mod seed; mod read_np; #[doc(inline)] pub use rapid_hasher::*; #[doc(inline)] pub use state::*; #[doc(inline)] use seed::*; #[cfg(test)] mod tests { extern crate std; use std::hash::{BuildHasher, Hash, Hasher}; use std::collections::BTreeSet; use rand::Rng; use crate::inner::mix_np::rapid_mix_np; use super::seed::{DEFAULT_RAPID_SECRETS, DEFAULT_SEED}; use super::rapid_const::{rapidhash_rs, rapidhash_rs_seeded}; type RapidHasher = super::RapidHasher<'static, true, true, true, false>; type SeedableState = super::SeedableState<'static, true, true, true, false>; #[derive(Hash)] struct Object { string: &'static str, } /// `#[derive(Hash)]` writes a length prefix first, check understanding. #[cfg(target_endian = "little")] #[test] fn derive_hash_works() { #[cfg(not(feature = "nightly"))] const EXPECTED: u64 = 7608958509739739138; #[cfg(feature = "nightly")] const EXPECTED: u64 = 8977256838778740407; let object = Object { string: "hello world" }; let mut hasher = RapidHasher::default(); object.hash(&mut hasher); assert_eq!(hasher.finish(), EXPECTED); let mut hasher = RapidHasher::default(); hasher.write(object.string.as_bytes()); #[cfg(not(feature = "nightly"))] { hasher.write_u8(0xFF); } assert_eq!(hasher.finish(), EXPECTED); } /// Check RapidHasher is equivalent to the raw rapidhash for a single byte stream. /// /// Also check that the hash is unique for different byte streams. #[test] fn all_sizes() { let mut hashes = BTreeSet::new(); for size in 0..=1024 { let mut data = std::vec![0; size]; rand::rng().fill(data.as_mut_slice()); let hash1 = rapidhash_rs(&data); let mut hasher = RapidHasher::default(); hasher.write(&data); let hash2 = hasher.finish(); assert_eq!(hash1, hash2, "Failed on size {}", size); assert!(!hashes.contains(&hash1), "Duplicate for size {}", size); hashes.insert(hash1); } } /// Ensure that changing a single bit flips at least 10 bits in the resulting hash, and on /// average flips half of the bits. /// /// These tests are not deterministic, but should fail with a very low probability. #[test] fn flip_bit_trial() { use rand::Rng; let mut flips = std::vec![]; for len in 1..=512 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); let hash = rapidhash_rs(&data); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; let new_hash = rapidhash_rs(&data); assert_ne!(hash, new_hash, "Flipping byte {} bit {} did not change hash for input len {}", byte, bit, len); let xor = hash ^ new_hash; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 8, "Flipping bit {byte}:{bit} changed only {flipped} bits"); flips.push(flipped); } } } let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {average}, expected: 32.0"); } /// Helper method for [flip_bit_trial_streaming]. Hashes a byte stream in u8 chunks. fn streaming_hash(data: &[u8]) -> u64 { let mut hasher = RapidHasher::default(); for byte in data { hasher.write_u8(*byte); } hasher.finish() } /// Ensure various subsequent `write_u8` calls produce a stable result. /// /// Used to help diagnose an issue using rapidhash for PHF. #[test] fn sponge_buffer_stability() { use std::collections::HashSet; /// Simulate the UniCase Ascii/Unicode string hashing fn manual_string_hash(data: &[u8]) -> u64 { // ensure avalanche is disabled, sponge enabled to match PHF let mut hasher = crate::inner::SeedableState::<'static, false, true, false, false>::fixed().build_hasher(); for byte in data { hasher.write_u8(*byte); } hasher.write_u8(0xFF); // prefix freedom hasher.finish() } let mut hashes = HashSet::new(); for len in 1..=64 { for byte in 0u8..=255 { // don't randomized the data, simply extend an extra byte each time let data = std::vec![byte; len]; let hash1 = manual_string_hash(&data); let hash2 = manual_string_hash(&data); assert_eq!(hash1, hash2, "Mismatch for length {}", len); assert!(!hashes.contains(&hash1), "Duplicate hash at length {}", len); hashes.insert(hash1); } } } /// The same as [flip_bit_trial], but against our streaming implementation, to ensure that /// reusing the `a`, `b`, and `seed` state is not causing glaringly obvious issues. /// /// This test is not a substitute for SMHasher or similar. /// /// These tests are not deterministic, but should fail with a very low probability. #[test] fn flip_bit_trial_streaming() { use rand::Rng; let mut flips = std::vec![]; for len in 1..=300 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); let hash = streaming_hash(&data); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; // check that the hash changed let new_hash = streaming_hash(&data); assert_ne!(hash, new_hash, "Flipping bit {byte}:{bit} for input len {len} did not change hash"); // track how many bits were flipped let xor = hash ^ new_hash; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 8, "Flipping bit {byte}:{bit} for input len {len} changed only {flipped} bits"); flips.push(flipped); } } } // check that on average half of the bits were flipped let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {average}, expected: 32.0"); } /// Compare to the C rapidhash implementation to ensure we match perfectly. #[cfg(target_endian = "little")] #[test] fn compare_to_c() { use rand::Rng; use rapidhash_c::rapidhashcc_rs; for len in 0..=512 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; let rust_hash = rapidhash_rs_seeded(&data, &DEFAULT_RAPID_SECRETS); let mut c_hash = rapidhashcc_rs(&data, DEFAULT_SEED); // TODO: remove this hack; it's to make it work with how the Hasher avalanches c_hash = rapid_mix_np::(c_hash, DEFAULT_RAPID_SECRETS.secrets[1]); assert_eq!(rust_hash, c_hash, "Mismatch with input {} byte {} bit {}", len, byte, bit); let mut rust_hasher = SeedableState::fixed().build_hasher(); rust_hasher.write(&data); let rust_hasher_hash = rust_hasher.finish(); assert_eq!(rust_hash, rust_hasher_hash, "Hasher mismatch with input {} byte {} bit {}", len, byte, bit); } } } } #[test] fn disambiguation_check() { use std::vec::Vec; let hasher = SeedableState::default(); let a = [std::vec![1], std::vec![2, 3]]; let b = [std::vec![1, 2], std::vec![3]]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [std::vec![], std::vec![1]]; let b = [std::vec![1], std::vec![]]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a: [Vec>; 2] = [std::vec![], std::vec![std::vec![]]]; let b: [Vec>; 2] = [std::vec![std::vec![]], std::vec![]]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = ["abc", "def"]; let b = ["fed", "abc"]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = ["abc", "def"]; let b = ["abcd", "ef"]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [1u8, 2]; let b = [2u8, 1]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [1u16, 2]; let b = [2u16, 1]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [1u32, 2]; let b = [2u32, 1]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [1u64, 2]; let b = [2u64, 1]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); let a = [1u128, 2]; let b = [2u128, 1]; assert_ne!(hasher.hash_one(a), hasher.hash_one(b)); } } rapidhash-4.4.1/src/inner/rapid_const.rs000064400000000000000000000314261046102023000163600ustar 00000000000000use crate::util::hints::{assume, likely, unlikely}; use super::mix_np::{rapid_mix_np, rapid_mum_np}; use super::read_np::{read_u32_np, read_u64_np}; #[cfg(test)] use super::{DEFAULT_RAPID_SECRETS, RapidSecrets}; /// This is a somewhat arbitrary cutoff for the long path. /// /// It's dependent on the cost of the function call, register clobbering, setup/teardown of the 7 /// independent lanes etc. The current value should be reached by testing against the /// hash/rapidhash-f/medium benchmarks, and may benefit from being tuned per target platform. const COLD_PATH_CUTOFF: usize = 400; /// Rapidhash a single byte stream, matching the C++ implementation, with the default seed. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_rs_inline] instead. #[inline] #[cfg(test)] pub(crate) const fn rapidhash_rs(data: &[u8]) -> u64 { rapidhash_rs_inline::(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash a single byte stream, matching the C++ implementation, with a custom seed. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_rs_inline] instead. #[inline] #[cfg(test)] pub(crate) const fn rapidhash_rs_seeded(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_rs_inline::(data, secrets) } /// Rapidhash a single byte stream, matching the C++ implementation. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for fixed-length inputs at compile time. #[inline(always)] #[cfg(test)] pub(crate) const fn rapidhash_rs_inline(data: &[u8], secrets: &RapidSecrets) -> u64 { let seed = secrets.seed; let secrets = &secrets.secrets; let hash = rapidhash_core::(seed, secrets, data); if AVALANCHE { rapid_mix_np::(hash, super::DEFAULT_RAPID_SECRETS.secrets[1]) } else { hash } } #[inline(always)] #[must_use] pub(super) const fn rapidhash_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { if likely(data.len() <= 16) { let mut a = 0; let mut b = 0; if likely(data.len() >= 8) { a = read_u64_np(data, 0); b = read_u64_np(data, data.len() - 8); } else if likely(data.len() >= 4) { a = read_u32_np(data, 0) as u64; b = read_u32_np(data, data.len() - 4) as u64; } else if !data.is_empty() { a = ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b = data[data.len() >> 1] as u64; } seed = seed.wrapping_add(data.len() as u64); rapidhash_finish::(a, b , seed, secrets) } else { // SAFETY: we have just checked the length is >16 unsafe { rapidhash_core_17_288::(seed, secrets, data) } } } // Never inline this, keep the small string path as small as possible to improve the inlining // chances of the write_length_prefix and finish functions. If those two don't get inlined, the // overall performance can be 5x worse when hashing a single string under 100 bytes. <=288 inputs // pay the cost of one extra if, and >288 inputs pay one more function call, but this is nominal // in comparison to the overall hashing cost. #[cold] #[inline(never)] #[must_use] const unsafe fn rapidhash_core_17_288(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { // SAFETY: we promise to never call this with <=16 length data to omit some bounds checks. // This is really intended for codegen-units >1 and/or no LTO. assume(data.len() > 16); // This branch is a hack to move the function prologue/epilogue (stack spilling) into the >48 // path as it otherwise unnecessarily hurts the <48 path. // - It slows down aarch64 >48 inputs, where there is no stack spill. // - It speeds up x86_64 by removing the stack spill on the <48 path, but is // slightly slower on the >48 path... I cannot figure out how to move the spill into the >48 // path only but then re-use the already cached <48 path finish. Ideas welcome. // - It makes minimal difference on WASM. #[cfg(not(target_arch = "aarch64"))] if likely(data.len() <= 48) { // SAFETY: data.len() is guaranteed to be >16 return rapidhash_final_48::(seed, secrets, data, data); } let mut slice = data; if unlikely(data.len() > 48) { if unlikely(data.len() > COLD_PATH_CUTOFF) { // SAFETY: data.len() is guaranteed to be >COLD_PATH_CUTOFF return rapidhash_core_cold::(seed, secrets, data); } let mut see1 = seed; let mut see2 = seed; while slice.len() >= 48 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } seed ^= see1 ^ see2; } // SAFETY: data.len() is guaranteed to be >48, and therefore >16 rapidhash_final_48::(seed, secrets, slice, data) } /// The long path, intentionally kept cold because at this length of data the function call is /// minor, but the complexity of this function — if it were inlined — could prevent x.hash() from /// being inlined which would have a much higher penalty and prevent other optimisations. #[cold] #[inline(never)] #[must_use] const unsafe fn rapidhash_core_cold(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { // SAFETY: we promise to never call this with <=COLD_PATH_CUTOFF length data to omit some bounds checks assume(data.len() > COLD_PATH_CUTOFF); let mut slice = data; let mut see1 = seed; let mut see2 = seed; let mut see3 = seed; let mut see4 = seed; let mut see5 = seed; let mut see6 = seed; if !COMPACT { while slice.len() >= 224 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); seed = rapid_mix_np::(read_u64_np(slice, 112) ^ secrets[0], read_u64_np(slice, 120) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 128) ^ secrets[1], read_u64_np(slice, 136) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 144) ^ secrets[2], read_u64_np(slice, 152) ^ see2); see3 = rapid_mix_np::(read_u64_np(slice, 160) ^ secrets[3], read_u64_np(slice, 168) ^ see3); see4 = rapid_mix_np::(read_u64_np(slice, 176) ^ secrets[4], read_u64_np(slice, 184) ^ see4); see5 = rapid_mix_np::(read_u64_np(slice, 192) ^ secrets[5], read_u64_np(slice, 200) ^ see5); see6 = rapid_mix_np::(read_u64_np(slice, 208) ^ secrets[6], read_u64_np(slice, 216) ^ see6); let (_, split) = slice.split_at(224); slice = split; } if likely(slice.len() >= 112) { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } } else { while slice.len() >= 112 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); see3 = rapid_mix_np::(read_u64_np(slice, 48) ^ secrets[3], read_u64_np(slice, 56) ^ see3); see4 = rapid_mix_np::(read_u64_np(slice, 64) ^ secrets[4], read_u64_np(slice, 72) ^ see4); see5 = rapid_mix_np::(read_u64_np(slice, 80) ^ secrets[5], read_u64_np(slice, 88) ^ see5); see6 = rapid_mix_np::(read_u64_np(slice, 96) ^ secrets[6], read_u64_np(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } } if !COMPACT { if slice.len() >= 48 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; if slice.len() >= 48 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } } else { while slice.len() >= 48 { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); see1 = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[1], read_u64_np(slice, 24) ^ see1); see2 = rapid_mix_np::(read_u64_np(slice, 32) ^ secrets[2], read_u64_np(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } see3 ^= see4; see5 ^= see6; seed ^= see1; see3 ^= see2; seed ^= see5; seed ^= see3; rapidhash_final_48::(seed, secrets, slice, data) } #[inline(always)] #[must_use] const unsafe fn rapidhash_final_48(mut seed: u64, secrets: &[u64; 7], slice: &[u8], data: &[u8]) -> u64 { // SAFETY: the final 48 byte handling is only called with >16 length data assume(data.len() > 16); if likely(slice.len() > 16) { seed = rapid_mix_np::(read_u64_np(slice, 0) ^ secrets[0], read_u64_np(slice, 8) ^ seed); if likely(slice.len() > 32) { seed = rapid_mix_np::(read_u64_np(slice, 16) ^ secrets[0], read_u64_np(slice, 24) ^ seed); } } let a = read_u64_np(data, data.len() - 16); let b = read_u64_np(data, data.len() - 8); seed = seed.wrapping_add(data.len() as u64); rapidhash_finish::(a, b, seed, secrets) } #[inline(always)] #[must_use] const fn rapidhash_finish(mut a: u64, mut b: u64, seed: u64, secrets: &[u64; 7]) -> u64 { a ^= secrets[0]; b ^= seed; (a, b) = rapid_mum_np::(a, b); if AVALANCHE { // we keep RAPID_CONST constant as the XOR 0xaa can be done in a single instruction on some // platforms, whereas it would require an additional load for a random secret. rapid_mix_np::(a ^ 0xaaaaaaaaaaaaaaaa ^ seed, b ^ secrets[1]) } else { a ^ b } } rapidhash-4.4.1/src/inner/rapid_hasher.rs000064400000000000000000000240331046102023000165000ustar 00000000000000use core::hash::Hasher; use super::DEFAULT_RAPID_SECRETS; use super::mix_np::rapid_mix_np; use super::rapid_const::rapidhash_core; use super::seed::rapidhash_seed; use crate::util::hints::likely; /// This function needs to be as small as possible to have as high a chance of being inlined as /// possible. /// /// We try to generate the least amount of LLVM-IR code to reduce the inlining cost. Rust should /// remove the const statements before generating the LLVM-IR. macro_rules! write_num { ($write_num:ident, $int:ident, $unsigned:ident) => { /// Write an integer to the hasher, marked as `#[inline(always)]`. #[inline(always)] fn $write_num(&mut self, i: $int) { const N: u8 = core::mem::size_of::<$int>() as u8 * 8; if SPONGE { // This early u128 conversion seems to be important on x86, as if it impacts the // LLVM inlining cost too much to have it inside the if statement... // The compiler also converts an i32 -> i128 -> u128 unless we coerce it into its // unsigned type first. let bytes = (i as $unsigned) as u128; if likely(self.sponge_len + N <= 128) { // HOT: add the bytes into the sponge self.sponge |= bytes << self.sponge_len; self.sponge_len += N; } else { // COLD: sponge is full, so we need to flush it let a = self.sponge as u64; let b = (self.sponge >> 64) as u64; self.seed = rapid_mix_np::(a ^ self.seed, b ^ self.secrets[0]); self.sponge = bytes; self.sponge_len = N; } } else { // slower but high-quality rapidhash let bytes = (i as $unsigned) as u64; self.seed = rapid_mix_np::(bytes ^ self.secrets[0], bytes ^ self.seed); } } }; } /// A [Hasher] trait compatible hasher that uses the [rapidhash](https://github.com/Nicoshev/rapidhash) /// algorithm, and uses `#[inline(always)]` for all methods. /// /// Using `#[inline(always)]` can deliver a large performance improvement when hashing complex /// objects, but should be benchmarked for your specific use case. If you have HashMaps for many /// different types this may come at the cost of some binary size increase. /// /// See [crate::fast::RandomState] for usage with [std::collections::HashMap]. /// /// # Example /// ``` /// use std::hash::Hasher; /// /// use rapidhash::quality::RapidHasher; /// /// let mut hasher = RapidHasher::default(); /// hasher.write(b"hello world"); /// let hash = hasher.finish(); /// ``` #[derive(Copy, Clone)] pub struct RapidHasher<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool = false, const PROTECTED: bool = false> { seed: u64, secrets: &'s [u64; 7], sponge: u128, sponge_len: u8, } impl<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool, const PROTECTED: bool> RapidHasher<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED> { /// Default `RapidHasher` seed. pub const DEFAULT_SEED: u64 = super::seed::DEFAULT_SEED; /// Create a new [RapidHasher] with a custom seed. /// /// See instead [crate::quality::RandomState::new] or [crate::fast::RandomState::new] for a random /// seed and random secret initialization, for minimal DoS resistance. #[inline(always)] #[must_use] pub const fn new(mut seed: u64) -> Self { // do most of the rapidhash_seed initialization here to avoid doing it on each int seed = rapidhash_seed(seed); Self::new_precomputed_seed(seed, &DEFAULT_RAPID_SECRETS.secrets) } #[inline(always)] #[must_use] pub(super) const fn new_precomputed_seed(seed: u64, secrets: &'s [u64; 7]) -> Self { Self { seed, secrets, sponge: 0, sponge_len: 0, } } /// Create a new [RapidHasher] using the default seed and secrets. #[inline(always)] #[must_use] pub const fn default_const() -> Self { Self::new(Self::DEFAULT_SEED) } } impl Default for RapidHasher<'_, AVALANCHE, SPONGE, COMPACT, PROTECTED> { /// Create a new [RapidHasher] with the default seed. /// /// See [crate::inner::RandomState] for a [std::hash::BuildHasher] that initializes with a random /// seed. #[inline(always)] fn default() -> Self { Self::new(super::seed::DEFAULT_SEED) } } /// This implementation implements methods for all integer types as the compiler will (hopefully...) /// inline and heavily optimize the rapidhash_core for each. Where the bytes length is known the /// compiler can make significant optimisations and saves us writing them out by hand. impl Hasher for RapidHasher<'_, AVALANCHE, SPONGE, COMPACT, PROTECTED> { /// Produce the final hash value, marked as `#[inline(always)]`. #[inline(always)] fn finish(&self) -> u64 { // written to minimise the LLVM-IR lines, as rust should remove the const if statements #[allow(clippy::collapsible_else_if)] if SPONGE { if !AVALANCHE { if self.sponge_len > 0 { let a = self.sponge as u64; let b = (self.sponge >> 64) as u64; rapid_mix_np::(a ^ self.seed, b ^ self.secrets[0]) } else { self.seed } } else { let mut seed = self.seed; if self.sponge_len > 0 { let a = self.sponge as u64; let b = (self.sponge >> 64) as u64; seed = rapid_mix_np::(a ^ self.seed, b ^ self.secrets[0]); } // FUTURE: revisit when write_str is stable, as we'd want to move this into the // above if statement rapid_mix_np::(seed, DEFAULT_RAPID_SECRETS.secrets[1]) } } else { if !AVALANCHE { self.seed } else { rapid_mix_np::(self.seed, DEFAULT_RAPID_SECRETS.secrets[1]) } } } /// Write a byte slice to the hasher, marked as `#[inline(always)]`. #[inline(always)] fn write(&mut self, bytes: &[u8]) { self.seed = rapidhash_core::(self.seed, self.secrets, bytes); } write_num!(write_u8, u8, u8); write_num!(write_u16, u16, u16); write_num!(write_u32, u32, u32); write_num!(write_u64, u64, u64); write_num!(write_usize, usize, usize); write_num!(write_i8, i8, u8); write_num!(write_i16, i16, u16); write_num!(write_i32, i32, u32); write_num!(write_i64, i64, u64); write_num!(write_isize, isize, usize); /// Write an int to the hasher, marked as `#[inline(always)]`. #[inline(always)] fn write_u128(&mut self, i: u128) { let a = i as u64; let b = (i >> 64) as u64; self.seed = rapid_mix_np::(a ^ self.seed, b ^ self.secrets[0]); } /// Write an int to the hasher, marked as `#[inline(always)]`. #[inline(always)] fn write_i128(&mut self, i: i128) { let a = (i as u128) as u64; let b = (i as u128 >> 64) as u64; self.seed = rapid_mix_np::(a ^ self.seed, b ^ self.secrets[0]); } /// Write a length prefix to the hasher, marked as `#[inline(always)]`. #[cfg(feature = "nightly")] #[inline(always)] fn write_length_prefix(&mut self, len: usize) { self.write_usize(len); } /// Write a string to the hasher, without adding a length prefix as rapidhash already mixes in /// the byte length to prevent length extension attacks, marked as `#[inline(always)]`. #[cfg(feature = "nightly")] #[inline(always)] fn write_str(&mut self, s: &str) { self.write(s.as_bytes()); } } #[cfg(test)] mod tests { extern crate std; use std::hash::BuildHasher; use crate::fast::SeedableState; use super::*; #[test] fn test_hasher_size() { assert_eq!(core::mem::size_of::>(), 48); } /// Test that writing a single u64 outputs the same as writing the equivalent bytes. /// /// Does not apply if the algorithm is using a sponge. #[ignore] #[test] fn test_hasher_write_u64() { assert_eq!((8 & 24) >> (8 >> 3), 4); let ints = [1234u64, 0, 1, u64::MAX, u64::MAX - 2385962040453523]; for int in ints { let mut hasher = RapidHasher::::default(); hasher.write(int.to_le_bytes().as_slice()); let a = hasher.finish(); assert_eq!(int.to_le_bytes().as_slice().len(), 8); let mut hasher = RapidHasher::::default(); hasher.write_u64(int); let b = hasher.finish(); assert_eq!(a, b, "Mismatching hash for u64 with input {int}"); } } /// Check the number of collisions when writing numbers. #[test] #[ignore] #[cfg(feature = "std")] fn test_num_collisions() { let builder = SeedableState::default(); let mut collisions = 0; let mut set = std::collections::HashSet::new(); for i in 0..=u16::MAX { let hash_u16 = builder.hash_one(i) & 0xFFFFFF; if set.contains(&hash_u16) { collisions += 1; } else { set.insert(hash_u16); } // if i < 256 { // let hash_u8 = builder.hash_one(i as u8) & 0xFFFF; // if set.contains(&hash_u8) { // collisions += 1; // } else { // set.insert(hash_u8); // } // } } assert_eq!(collisions, 0, "Collisions found when hashing numbers with seed {builder:?}"); } } rapidhash-4.4.1/src/inner/read_np.rs000064400000000000000000000075241046102023000154650ustar 00000000000000//! Internal module for reading unaligned bytes from a slice into `u64` and `u32` values. //! //! This is a non-portable implementation specifically designed for `RapidHasher`. /// A macro for assertions that can be disabled with the `unsafe` feature. These should all be /// elided at compile-time anyway. macro_rules! unsafe_assert { ($cond:expr) => { #[cfg(feature = "unsafe")] { debug_assert!($cond); } #[cfg(not(feature = "unsafe"))] { assert!($cond); } }; } /// Unsafe but const-friendly unaligned bytes to u64. The compiler can't seem to remove the bounds /// checks for small integers because we do some funky bit shifting in the indexing. /// /// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash /// implementation. #[inline(always)] pub(crate) const fn read_u64_np(slice: &[u8], offset: usize) -> u64 { unsafe_assert!(slice.len() >= 8 + offset); // SAFETY: read_u64_np must always be called in a manner that guarantees the above assertions unsafe { core::ptr::read_unaligned(slice.as_ptr().add(offset) as *const u64) } } /// Unsafe but const-friendly unaligned bytes to u32. The compiler can't seem to remove the bounds /// checks for small integers because we do some funky bit shifting in the indexing. /// /// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash /// implementation. #[inline(always)] pub(crate) const fn read_u32_np(slice: &[u8], offset: usize) -> u32 { unsafe_assert!(slice.len() >= 4 + offset); // SAFETY: read_u64_np must always be called in a manner that guarantees the above assertions unsafe { core::ptr::read_unaligned(slice.as_ptr().add(offset) as *const u32) } } #[cfg(test)] mod tests { use super::*; #[cfg(target_endian = "little")] #[test] fn test_read_u32_np() { let bytes = &[23, 145, 3, 34]; assert_eq!(read_u32_np(bytes, 0), 570659095); let bytes = &[24, 54, 3, 23, 145, 3, 34]; assert_eq!(read_u32_np(bytes, 3), 570659095); assert_eq!(read_u32_np(&[0, 0, 0, 0], 0), 0); assert_eq!(read_u32_np(&[1, 0, 0, 0], 0), 1); assert_eq!(read_u32_np(&[12, 0, 0, 0], 0), 12); assert_eq!(read_u32_np(&[0, 10, 0, 0], 0), 2560); } #[cfg(target_endian = "little")] #[test] fn test_read_u64_np() { let bytes = [23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64_np(bytes, 0), 570659095); let bytes = [1, 2, 3, 23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64_np(bytes, 3), 570659095); let bytes = [0, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64_np(bytes, 0), 0); } #[cfg(target_endian = "little")] #[cfg(feature = "std")] #[test] fn test_u32_to_u128_delta() { fn formula(len: u64) -> u64 { (len & 24) >> (len >> 3) } fn formula2(len: u64) -> u64 { match len { 8.. => 4, _ => 0, } } let inputs: std::vec::Vec = (4..=16).collect(); let outputs: std::vec::Vec = inputs.iter().map(|&x| formula(x)).collect(); let expected = std::vec![0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4]; assert_eq!(outputs, expected); assert_eq!(outputs, inputs.iter().map(|&x| formula2(x)).collect::>()); } #[test] #[should_panic] #[cfg(any(test, not(feature = "unsafe")))] fn test_read_u32_np_to_short_panics() { let bytes = [23, 145, 0].as_slice(); assert_eq!(read_u32_np(bytes, 0), 0); } #[test] #[should_panic] #[cfg(any(test, not(feature = "unsafe")))] fn test_read_u64_np_to_short_panics() { let bytes = [23, 145, 0].as_slice(); assert_eq!(read_u64_np(bytes, 0), 0); } } rapidhash-4.4.1/src/inner/seed.rs000064400000000000000000000121661046102023000147730ustar 00000000000000//! Reliable seeding and secrets generation for the hash functions. // Allow dead code as we don't export the unstable rapidhash_rs or the RapidSecrets asa they aren't // used in the RapidHasher API yet. #![allow(dead_code)] use crate::util::mix::rapid_mix; /// The default seed used in the C++ implementation. pub(crate) const DEFAULT_SEED: u64 = 0; /// Used only for generating random secrets. pub(crate) const DEFAULT_SECRETS: [u64; 7] = [ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47, 0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x90ed1765281c388c, ]; /// The default rapidhash secrets used in the C++ implementation. /// /// We recommend generating your own secrets using the [`RapidSecrets::seed`] method to avoid /// trivial collision attacks if you need minimal HashDoS protection. pub const DEFAULT_RAPID_SECRETS: RapidSecrets = RapidSecrets::seed_cpp(DEFAULT_SEED); /// Hold the seed and secrets to be used by rapidhash. /// /// RapidSecrets premix the seed and generate a set of other secrets based on the seed that are all /// used in the hashing process. There are some quality checks on the random values to ensure a /// reasonable distribution of entropy in the generated secrets. /// /// Constructing this struct is fairly cheap, but unnecessary in the critical path. We therefore /// recommend instantiating it once and re-using the same instance for any persistent hashing. The /// `seed` method is marked `const` to also do so at compile time. /// /// # Minimal HashDoS Protection /// We recommend changing the default seed and secrets must be changed to avoid trivial collision /// attacks. For persistent hashing, you can hard code your own randomized seed at compile time. /// /// ```rust /// use rapidhash::v3::RapidSecrets; /// const DEFAULT_SECRETS: RapidSecrets = RapidSecrets::seed(0x123456); // <-- change this value! /// /// /// Export your chosen rapidhash version and secrets for use throughout your project. /// pub fn rapidhash(data: &[u8]) -> u64 { /// rapidhash::v3::rapidhash_v3_seeded(data, &DEFAULT_SECRETS) /// } /// ``` /// /// TODO: serde or serialization support. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct RapidSecrets { /// The core rapidhash seed. pub seed: u64, /// The secrets, effectively other seeds used in the hashing process. pub secrets: [u64; 7], } impl RapidSecrets { /// Generate secrets from a given randomized seed. /// /// Note the chosen seed will be pre-mixed to further randomized it, and the secrets will be /// computed based on the seed. /// /// If compatibility with the C++ implementation is required, use the `seed_cpp` method instead. #[inline] pub const fn seed(seed: u64) -> Self { let seed = premix_seed(seed, 0); let mut secrets = [0; 7]; secrets[0] = premix_seed(seed, 0); secrets[1] = premix_seed(secrets[0], 1); secrets[2] = premix_seed(secrets[1], 2); secrets[3] = premix_seed(secrets[2], 3); secrets[4] = premix_seed(secrets[3], 4); secrets[5] = premix_seed(secrets[4], 5); secrets[6] = premix_seed(secrets[5], 6); Self { seed, secrets } } /// Creates a new `RapidSecrets` instance with a different seed and the same secrets. /// /// This is useful for in-memory hashing, so we can quickly use a different seed for other /// HashMaps. #[inline(always)] pub const fn reseed(&self) -> Self { Self { seed: premix_seed(self.seed, 6), secrets: self.secrets, } } /// Creates a new `RapidSecrets` instance using a seed and secrets that are compatible with the /// C++ implementation. /// /// Note that these **use the default secrets** and therefore are liable to some trivial /// collision attacks, as randomising both the seed and secrets is necessary to provide minimal /// HashDoS resistance. #[inline(always)] pub const fn seed_cpp(seed: u64) -> Self { Self { seed: rapidhash_seed(seed), secrets: DEFAULT_SECRETS, } } /// Creates a new `RapidSecrets` instance with a randomized seed and secrets. /// /// The quality of the randomness will be better with the `rand` feature enabled. #[inline] pub fn random() -> Self { let seed = crate::inner::seeding::seed::get_seed(); let secrets = crate::inner::seeding::secrets::get_secrets(); Self { seed, secrets: *secrets, } } } #[inline(always)] pub(super) const fn rapidhash_seed(seed: u64) -> u64 { seed ^ rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[1]) } #[inline] const fn premix_seed(mut seed: u64, i: usize) -> u64 { seed ^= rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[i]); // ensure the seeds are of reasonable non-zero quality const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; if (seed & HI) == 0 { seed |= 1u64 << 63; } if (seed & MI) == 0 { seed |= 1u64 << 31; } if (seed & LO) == 0 { seed |= 1u64; } seed } rapidhash-4.4.1/src/inner/seeding.rs000064400000000000000000000302121046102023000154610ustar 00000000000000//! Internal module for seeding the hash functions. //! //! Located here instead of `util` to make use of the non-portable mix functions. /// Don't want to have a recursive import here, so we copy it... const DEFAULT_SECRETS: [u64; 7] = [ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47, 0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x90ed1765281c388c, ]; pub(crate) mod seed { use crate::inner::mix_np::rapid_mix_np; use super::DEFAULT_SECRETS; #[inline] pub fn get_seed() -> u64 { // this would all be so much easier if the rust std exposed how it does RandomState // we take the stack pointer as a rather poor but cheap source of entropy let mut seed = 0; let arbitrary = core::ptr::addr_of!(seed) as u64; // with std we avoid using global atomics #[cfg(feature = "std")] { use core::cell::Cell; thread_local! { static RANDOM_SEED: Cell = const { Cell::new(0) } } seed = RANDOM_SEED.with(|cell| { let mut seed = cell.get(); seed = rapid_mix_np::(seed ^ DEFAULT_SECRETS[1], arbitrary ^ DEFAULT_SECRETS[0]); cell.set(seed); seed }); } // without std we fall back to a global atomic and accept the chance of // race conditions, but don't consider this an issue // // Most targets without atomics can still do atomic load/store, but just can't // do atomic compare-and-swap instructions. So this should still compile/work... #[cfg(not(feature = "std"))] { use core::sync::atomic::{AtomicUsize, Ordering}; static RANDOM_SEED: AtomicUsize = AtomicUsize::new(0); seed = RANDOM_SEED.load(Ordering::Relaxed) as u64; seed = rapid_mix_np::(seed ^ DEFAULT_SECRETS[1], arbitrary ^ DEFAULT_SECRETS[0]); RANDOM_SEED.store(seed as usize, Ordering::Relaxed); } seed ^ rapid_mix_np::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[1]) } #[cfg(test)] mod tests { use super::get_seed; #[test] fn test_get_seed() { let seed1 = get_seed(); let seed2 = get_seed(); assert_ne!(seed1, seed2, "get_seed should return different values on subsequent calls"); } } } #[cfg(not(target_has_atomic = "ptr"))] pub(crate) mod secrets { #[inline(always)] pub fn get_secrets() -> &'static [u64; 7] { // This is a no-op for platforms that do not support atomic pointers. // The secrets are not used, so we return an empty slice. &crate::inner::seed::DEFAULT_RAPID_SECRETS.secrets } #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct GlobalSecrets { _only_uses_default_secrets: (), } impl GlobalSecrets { /// Set up the global secrets if they are not already initialized. #[inline(always)] pub fn new() -> Self { Self { _only_uses_default_secrets: (), } } /// Get the global secrets, which are guaranteed to be initialized, but these will /// be the default rapidhash secrets as this target does not support atomic pointers. #[inline(always)] pub fn get(self) -> &'static [u64; 7] { get_secrets() } /// Get the fixed seed, which is guaranteed to be initialized. #[inline(always)] pub fn get_global_seed(self) -> u64 { // rapidhash v1 seed as default 0xbdd89aa982704029 } } } #[cfg(target_has_atomic = "ptr")] pub(crate) mod secrets { use core::cell::UnsafeCell; use core::sync::atomic::{AtomicUsize, Ordering}; use crate::util::mix::rapid_mix; use super::DEFAULT_SECRETS; /// A hacky sync-friendly, std-free, OnceCell that sadly needs unsafe inspired by foldhash's /// `seed.rs` which includes some similar bodges. struct SecretStorage { state: AtomicUsize, seed: UnsafeCell, secrets: UnsafeCell<[u64; 7]>, } unsafe impl Sync for SecretStorage {} static SECRET_STORAGE: SecretStorage = SecretStorage { state: AtomicUsize::new(0), seed: UnsafeCell::new(0), secrets: UnsafeCell::new([0; 7]), }; enum SecretStorageStates { Uninitialized = 0, Initializing = 1, Initialized = 2, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct GlobalSecrets { _private: (), } impl GlobalSecrets { /// Set up the global secrets if they are not already initialized. #[inline(always)] pub fn new() -> Self { if SECRET_STORAGE.state.load(Ordering::Acquire) != SecretStorageStates::Initialized as usize { initialize_secrets(); } Self { _private: () } } /// Get the global secrets, which are guaranteed to be initialized. #[inline(always)] pub fn get(self) -> &'static [u64; 7] { // SAFETY: The secrets are guaranteed to be initialized before being accessed // as we cannot construct this struct without first calling `new()` unsafe { &*SECRET_STORAGE.secrets.get() } } /// Get the fixed seed, which is guaranteed to be initialized. #[inline(always)] pub fn get_global_seed(self) -> u64 { // SAFETY: The secrets are guaranteed to be initialized before being accessed // as we cannot construct this struct without first calling `new()` unsafe { *SECRET_STORAGE.seed.get() } } } /// Get the global secrets, slow(ish). /// /// Short for `GlobalSecrets::new().get()`. #[inline(always)] pub fn get_secrets() -> &'static [u64; 7] { GlobalSecrets::new().get() } #[cold] #[inline(never)] fn initialize_secrets() { let seed = generate_random(); let secrets = create_secrets(seed); const INITIALIZED: usize = SecretStorageStates::Initialized as usize; loop { match SECRET_STORAGE.state.compare_exchange_weak( SecretStorageStates::Uninitialized as usize, SecretStorageStates::Initializing as usize, Ordering::Acquire, Ordering::Acquire, ) { // This thread is the first to initialize, so we can safely set the secrets Ok(_) => { unsafe { *SECRET_STORAGE.seed.get() = seed; *SECRET_STORAGE.secrets.get() = secrets; } SECRET_STORAGE.state.store(SecretStorageStates::Initialized as usize, Ordering::Release); break; } // Another thread has initialized for us, so we're done. Err(INITIALIZED) => { return; } // We are spinning here until the other thread is done initializing. This should // be very fast, as the initializing thread should only be copying the already // generated secrets for a few instructions. _ => core::hint::spin_loop(), } } } fn create_secrets(mut seed: u64) -> [u64; 7] { let mut secrets = [0u64; 7]; for i in 0..secrets.len() { const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; seed = rapid_mix::(seed ^ DEFAULT_SECRETS[0], DEFAULT_SECRETS[i]); // ensure at least one high, middle, and low bit is set for a semi-decent secret if (seed & HI) == 0 { seed |= 1u64 << 63; } if (seed & MI) == 0 { seed |= 1u64 << 31; } if (seed & LO) == 0 { seed |= 1u64; } secrets[i] = seed; } secrets } /// Generate a random number, trying our best to make this a good random number. /// /// To only be called sparingly as it's fairly slow. pub fn generate_random() -> u64 { #[cfg(feature = "rand")] { rand::random() } #[cfg(not(feature = "rand"))] { // trying out best to generate a good random number on all platforms let mut seed = DEFAULT_SECRETS[0]; let stack_ptr = core::ptr::addr_of!(seed) as u64; let static_ptr = &DEFAULT_SECRETS as *const _ as usize as u64; let function_ptr = generate_random as *const () as usize as u64; seed = rapid_mix::(seed ^ DEFAULT_SECRETS[4], stack_ptr ^ DEFAULT_SECRETS[1]); seed = rapid_mix::(seed ^ DEFAULT_SECRETS[5], function_ptr ^ DEFAULT_SECRETS[2]); seed = rapid_mix::(seed ^ DEFAULT_SECRETS[6], static_ptr ^ DEFAULT_SECRETS[3]); #[cfg(feature = "std")] { // we can allocate to add extra noise let box_ptr = &*Box::new(1u64) as *const _ as usize as u64; seed = rapid_mix::(seed ^ DEFAULT_SECRETS[4], box_ptr ^ DEFAULT_SECRETS[1]); } #[cfg(all( feature = "std", not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )) ))] { // we can use the system time for extra noise seed = crate::rng::rapidrng_time(&mut seed); } // final avalanche step seed = rapid_mix::(seed ^ DEFAULT_SECRETS[6], DEFAULT_SECRETS[0]); seed } } #[cfg(test)] mod tests { extern crate std; use std::collections::BTreeSet; use super::*; #[test] fn test_get_secrets() { let secrets1 = get_secrets(); let secrets2 = get_secrets(); assert_eq!(secrets1, secrets2, "get_secrets should return the same value on subsequent calls"); } #[test] fn test_get_global_seed() { let global_secrets = GlobalSecrets::new(); let seed1 = global_secrets.get_global_seed(); let seed2 = global_secrets.get_global_seed(); assert_eq!(seed1, seed2, "get_fixed_seed should return the same value on subsequent calls"); } #[test] fn test_create_secrets() { let seed = super::generate_random(); let secrets1 = super::create_secrets(seed); let secrets2 = super::create_secrets(seed); assert_eq!(secrets1, secrets2, "create_secrets should return the same value for the same seed"); #[cfg(feature = "std")] { let secrets3 = super::create_secrets(seed + 1); assert_ne!(secrets1, secrets3, "create_secrets should not return the same value for different seeds"); } // Check that the secrets are well-formed for secret in secrets1.iter() { const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; assert_ne!(*secret & HI, 0, "Secret should have a high bit set"); assert_ne!(*secret & MI, 0, "Secret should have a middle bit set"); assert_ne!(*secret & LO, 0, "Secret should have a low bit set"); } // Check that the secrets are unique let mut unique_secrets = BTreeSet::new(); for secret in secrets1.iter() { unique_secrets.insert(*secret); } assert_eq!(unique_secrets.len(), secrets1.len(), "Secrets should be unique across both calls"); } #[test] #[cfg(feature = "std")] fn test_generate_random() { let random1 = super::generate_random(); let random2 = super::generate_random(); assert_ne!(random1, random2, "generate_random should return different values on subsequent calls"); } } } rapidhash-4.4.1/src/inner/state/global_state.rs000064400000000000000000000064461046102023000176370ustar 00000000000000use core::hash::BuildHasher; use core::fmt::Formatter; use crate::inner::RapidHasher; use crate::inner::seeding::secrets::GlobalSecrets; /// A [`BuildHasher`] that uses a global seed and secrets, randomized only once on startup. /// /// The global secrets are randomized on the first instantiation, and then every subsequent instance /// of GlobalState will re-use the same seed and secrets, ensuring consistent hash outputs for the /// duration of the program. #[derive(Copy, Clone, Eq, PartialEq)] pub struct GlobalState { /// The global secrets is a zero-sized type to keep HashMap small. secrets: GlobalSecrets, } impl GlobalState { /// Create a new global state with a global seed and secrets. /// /// The seed and secrets will be randomized on the first instantiation of `GlobalState`, but all /// subsequent instances will share the same seed and secrets. /// /// On platforms which do not support atomic pointers, the secrets will be the default rapidhash /// secrets, which are not randomized. Therefore, **targets without atomic pointer support will /// not have minimal HashDoS resistance guarantees**. #[inline(always)] pub fn new() -> Self { Self { secrets: GlobalSecrets::new(), } } } /// Warning that `GlobalState` only randomizes the seed on platforms that support atomic pointers. impl Default for GlobalState { #[inline(always)] fn default() -> Self { Self::new() } } impl BuildHasher for GlobalState { type Hasher = RapidHasher<'static, AVALANCHE, SPONGE, COMPACT, PROTECTED>; #[inline(always)] fn build_hasher(&self) -> Self::Hasher { RapidHasher::new_precomputed_seed( self.secrets.get_global_seed(), self.secrets.get() ) } } impl core::fmt::Debug for GlobalState { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.debug_struct("GlobalState").finish_non_exhaustive() } } #[cfg(test)] mod tests { use core::hash::BuildHasher; type GlobalState = super::GlobalState; #[test] fn test_global_state() { assert_eq!(core::mem::size_of::(), 0); let state1 = GlobalState::new(); let state2 = GlobalState::new(); let finish1a = state1.hash_one(b"hello"); let finish1b = state1.hash_one(b"hello"); let finish2a = state2.hash_one(b"hello"); assert_eq!(finish1a, finish1b); assert_eq!(finish1a, finish2a); } #[test] fn test_debug() { extern crate alloc; let state = GlobalState::new(); let debug_str = alloc::format!("{:?}", state); assert_eq!(debug_str, "GlobalState { .. }"); } } rapidhash-4.4.1/src/inner/state/mod.rs000064400000000000000000000002461046102023000157460ustar 00000000000000mod random_state; mod seedable_state; mod global_state; pub use global_state::GlobalState; pub use random_state::RandomState; pub use seedable_state::SeedableState; rapidhash-4.4.1/src/inner/state/random_state.rs000064400000000000000000000105711046102023000176510ustar 00000000000000use core::hash::{BuildHasher}; use core::fmt::Formatter; use crate::inner::RapidHasher; use crate::inner::seeding::secrets::GlobalSecrets; /// A [`std::hash::RandomState`] compatible hasher that initializes a [`RapidHasher`] with a random /// seed and random global secrets. /// /// This is designed to provide some HashDoS resistance by using a random seed per hashmap, and /// a global random set of secrets. /// /// # Portability /// /// On most target platforms, the secrets are randomly initialized once and cached globally for the /// lifetime of the program using a mix of ASLR and other entropy sources. The seed is randomly /// initialized for each new instance of `RandomState` using only ASLR and a mixing step. /// /// On targets without atomic pointer support, the global secrets will not be randomized, and /// instead will fall back to the default secrets. This means these platforms will not have minimal /// HashDoS resistance guarantees. If this is important for your application, please raise a GitHub /// issue to improve support for these platforms. /// /// # Example /// ```rust /// use std::collections::HashMap; /// use std::hash::Hasher; /// /// use rapidhash::quality::RandomState; /// /// let mut map = HashMap::with_hasher(RandomState::default()); /// map.insert(42, "the answer"); /// ``` #[derive(Copy, Clone, Eq, PartialEq)] pub struct RandomState { seed: u64, /// The global secrets is a zero-sized type to keep HashMap small. secrets: GlobalSecrets, } impl RandomState { /// Create a new random state with a random seed. /// /// The seed is always randomized by using ASLR on every new instance of RandomState. /// /// With the `rand` feature enabled, the secrets will be randomized using [rand::random]. /// Otherwise, a mix of ASLR and some other poorer sources of entropy will be mixed together to /// generate the secrets. The secrets are statically cached for the lifetime of the program /// after their initial generation. /// /// On platforms that do not support atomic pointers, the secrets will be the default rapidhash /// secrets, which are not randomized. Therefore, **targets without atomic pointer support will /// not have minimal HashDoS resistance guarantees**. #[inline] pub fn new() -> Self { Self { seed: crate::inner::seeding::seed::get_seed(), secrets: GlobalSecrets::new(), } } } /// Warning that `RandomState` only randomizes the seed on platforms that support atomic pointers. impl Default for RandomState { #[inline] fn default() -> Self { Self::new() } } impl BuildHasher for RandomState { type Hasher = RapidHasher<'static, AVALANCHE, SPONGE, COMPACT, PROTECTED>; #[inline(always)] fn build_hasher(&self) -> Self::Hasher { RapidHasher::new_precomputed_seed(self.seed, self.secrets.get()) } } impl core::fmt::Debug for RandomState { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.debug_struct("RandomState").finish_non_exhaustive() } } #[cfg(test)] mod tests { use core::hash::BuildHasher; type RandomState = super::RandomState; #[test] fn test_random_state() { assert_eq!(core::mem::size_of::(), 8); let state1 = RandomState::new(); let state2 = RandomState::new(); let finish1a = state1.hash_one(b"hello"); let finish1b = state1.hash_one(b"hello"); let finish2a = state2.hash_one(b"hello"); assert_eq!(finish1a, finish1b); assert_ne!(finish1a, finish2a); } #[test] fn test_debug() { extern crate alloc; let state = RandomState::new(); let debug_str = alloc::format!("{:?}", state); assert_eq!(debug_str, "RandomState { .. }"); } } rapidhash-4.4.1/src/inner/state/seedable_state.rs000064400000000000000000000164631046102023000201430ustar 00000000000000use core::hash::BuildHasher; use core::fmt::Formatter; use crate::inner::RapidHasher; use crate::inner::seeding::secrets::GlobalSecrets; /// A [`std::hash::BuildHasher`] that initializes a [`RapidHasher`] with a user-provided seed and /// secrets. /// /// `SeedableState` should rarely be used as providing DoS resistance requires a randomized seed and /// secrets. Users should instead prefer either: /// * [`crate::inner::GlobalState`], which uses a global random seed and secrets initialized once at /// program start. /// * [`crate::inner::RandomState`], which uses a random seed per instance and global random secrets. /// /// The lifetime `'s` is for the reference to the secrets. When using [`SeedableState::random`] or /// [`SeedableState::fixed`] secrets, this lifetime will be `'static`. /// /// # Example /// ``` /// use std::collections::HashMap; /// use std::hash::Hasher; /// /// use rapidhash::quality::SeedableState; /// /// let mut map = HashMap::with_hasher(SeedableState::default()); /// map.insert(42, "the answer"); /// ``` #[derive(Copy, Clone, Eq, PartialEq)] pub struct SeedableState<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool = false, const PROTECTED: bool = false> { seed: u64, secrets: &'s [u64; 7], } impl<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool, const PROTECTED: bool> Default for SeedableState<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED> { /// Create a new [SeedableState] with a random seed. See [SeedableState::random] for more details. #[inline] fn default() -> Self { Self::random() } } impl<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool, const PROTECTED: bool> SeedableState<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED> { /// Create a new seedable state with a custom seed and automatically generated secrets. /// /// The seed will be pre-mixed to improve entropy. The global secrets are randomly generated /// once at program start, and then will be re-used for all subsequent calls to this function. /// /// # Example /// ``` /// use core::hash::BuildHasher; /// use rapidhash::quality::SeedableState; /// /// let state = SeedableState::new(0); /// /// let hash: u64 = state.hash_one(b"hello"); /// println!("hash: {hash}"); /// ``` pub fn new(seed: u64) -> Self { Self { seed: crate::inner::seed::rapidhash_seed(seed), secrets: GlobalSecrets::new().get(), } } /// Create a new seedable state with a random seed. /// /// This is slower than using [`crate::inner::RandomState`], please use that instead. #[inline] pub fn random() -> Self { Self { seed: crate::inner::seeding::seed::get_seed(), secrets: GlobalSecrets::new().get(), } } /// Create a new seedable state with the default seed and secrets. /// /// Using the default secrets does not offer HashDoS resistance, but they will be fixed between /// different runs of the program. /// /// Please note that `fast::RapidHasher` and `quality::RapidHasher` are **not guaranteed** to /// produce the same hash outputs between different crate versions, compiler versions, or /// platforms. /// /// Also see [`GlobalState`] for a faster zero-sized alternative that uses global secrets that /// are fixed only for the lifetime of the program. #[inline] pub fn fixed() -> Self { Self { seed: crate::inner::seed::rapidhash_seed(crate::inner::seed::DEFAULT_SEED), secrets: &crate::inner::seed::DEFAULT_SECRETS, } } /// Create a new seedable state with a custom seed and secrets. /// /// ## Warning /// This constructor uses the provided `seed` and `secrets` as the initial state /// **without any pre-mixing or validation**. Supplying low-entropy or structured /// values (e.g., `0`, all-zero arrays, counters, timestamps) can produce /// degenerate hashing (high collision rates or identical outputs). /// /// ### Requirements /// - `seed` and `secrets` **must not** be zero; avoid any all-zero/near-zero state. /// - Generate `seed` and `secrets` with a **cryptographically secure PRNG** and /// treat them as **independent** for each hasher instance. /// - Do not derive successive seeds from predictable data (time, PID/TID, memory /// addresses) or by simple incrementation. /// /// ### Recommendation /// If you cannot pre-mix the seed yourself, use [`SeedableState::new`] instead. /// /// ### Example (secure generation) /// ```rust /// use core::hash::BuildHasher; /// use rapidhash::quality::SeedableState; /// /// // randomly generate secrets /// let seed: u64 = rand::random(); /// let secrets: [u64; 7] = rand::random(); /// /// // create the state /// let state = SeedableState::custom(seed, &secrets); /// /// // hash using the state /// let hash: u64 = state.hash_one(b"hello"); /// println!("hash: {hash}"); /// ``` #[inline] pub fn custom(seed: u64, secrets: &'s [u64; 7]) -> Self { Self { seed, secrets, } } /// Deprecated and renamed to [`SeedableState::custom`]. #[deprecated(since = "4.1.0", note = "Use custom() or new() instead.")] #[inline] pub fn with_seed(seed: u64, secrets: &'s [u64; 7]) -> Self { Self::custom(seed, secrets) } } impl<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool, const PROTECTED: bool> BuildHasher for SeedableState<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED> { type Hasher = RapidHasher<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED>; #[inline(always)] fn build_hasher(&self) -> Self::Hasher { RapidHasher::new_precomputed_seed(self.seed, self.secrets) } } impl<'s, const AVALANCHE: bool, const SPONGE: bool, const COMPACT: bool, const PROTECTED: bool> core::fmt::Debug for SeedableState<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED> { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.debug_struct("SeedableState").finish_non_exhaustive() } } #[cfg(test)] mod tests { use core::hash::BuildHasher; type SeedableState<'s> = super::SeedableState<'s, false, true, false, false>; #[test] fn test_random_init() { assert_eq!(core::mem::size_of::(), 16); let state1 = SeedableState::random(); let state2 = SeedableState::random(); let finish1a = state1.hash_one(b"hello"); let finish1b = state1.hash_one(b"hello"); let finish2a = state2.hash_one(b"hello"); assert_eq!(finish1a, finish1b); assert_ne!(finish1a, finish2a); } #[test] fn test_fixed_init() { assert_eq!(core::mem::size_of::(), 16); let state1 = SeedableState::fixed(); let state2 = SeedableState::fixed(); let finish1a = state1.hash_one(b"hello"); let finish1b = state1.hash_one(b"hello"); let finish2a = state2.hash_one(b"hello"); assert_eq!(finish1a, finish1b); assert_eq!(finish1a, finish2a); } #[test] fn test_debug() { extern crate alloc; let state = SeedableState::random(); let debug_str = alloc::format!("{:?}", state); assert_eq!(debug_str, "SeedableState { .. }"); } } rapidhash-4.4.1/src/lib.rs000064400000000000000000000012771046102023000135070ustar 00000000000000#![cfg_attr(docsrs, doc = include_str!("../README.md"))] #![cfg_attr(not(docsrs), doc = "# Rapidhash")] #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, doc(auto_cfg(hide(docsrs))))] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(feature = "nightly", feature(likely_unlikely))] #![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))] #![deny(missing_docs)] #![deny(unused_must_use)] #![allow(clippy::manual_hash_one)] pub(crate) mod util; pub mod v1; pub mod v2; pub mod v3; pub mod inner; pub mod fast; pub mod quality; #[cfg(any(feature = "std", docsrs))] mod collections; pub mod rng; #[doc(inline)] #[cfg(any(feature = "std", docsrs))] pub use collections::*; rapidhash-4.4.1/src/main.rs000064400000000000000000000245271046102023000136700ustar 00000000000000/// Command-line tool for rapidhash. /// /// Rapidhash produces a `u64` hash value, and terminal output is a decimal string of the hash /// value. /// /// # Install /// ```shell /// cargo install rapidhash /// ``` /// /// # Usage /// /// ## Hashing files /// /// On rapidhash V1 and V2: /// This will first check the metadata of the file to get the length, and then stream the file. /// /// On rapidhash V3: /// This will stream the file using an 8KB buffer without any metadata checks. /// /// ```bash /// rapidhash example.txt /// 8543579700415218186 /// ``` /// /// ## Hashing stdin /// /// On rapidhash V1 and V2: /// Because of how rapidhash is seeded using the data length, the length must be known at the start /// of the stream. Therefore, reading from stdin is not recommended as it will cache the entire /// input in memory before being able to hash it. /// /// On rapidhash V3: /// This will stream from stdin using an 8KB buffer. /// /// ```shell /// echo "example" | rapidhash /// 8543579700415218186 /// ``` pub fn main() { #[cfg(not(feature = "std"))] { panic!("CLI must be compiled with the `std` feature. Try: `cargo install rapidhash --all-features`"); } #[cfg(feature = "std")] { let args: Vec = std::env::args().collect(); // TODO: multiple output types (hex, decimal) // TODO: --seed arg, with hex input support if args.iter().any(|a| a == "--help" || a == "-h") || args.is_empty() { println!("Usage: rapidhash [opts] [filename]"); println!(""); println!(" --v1 Use v1 hashing algorithm (no streaming)"); println!(" --v2.0 Use v2.0 hashing algorithm (no streaming)"); println!(" --v2.1 Use v2.1 hashing algorithm (no streaming)"); println!(" --v2.2 Use v2.2 hashing algorithm (no streaming)"); println!(" --v3 Use v3 hashing algorithm (default)"); println!("[opts]"); println!(" --protected Use the protected variant (default: false)"); println!("[filename]"); println!(" Providing a filename is optional and will read a file directly. Otherwise input"); println!(" is read from stdin."); println!(); println!("Note that only some rapidhash versions support streaming, others require"); println!("buffering the entire input in memory."); println!(); println!("Docs: https://github.com/hoxxep/rapidhash?tab=readme-ov-file#cli"); return; } // file name is the first non-option argument let filename = args.iter().skip(1).find(|a| !a.starts_with('-')); // get the rapidhash version from the command line arguments let version = RapidhashVersion::new(&args[1..]) .expect("You must specify a single rapidhash version to use. See --help for more."); let hash: u64 = match filename { None => { version.hash_stdin() } #[allow(unreachable_code)] #[allow(unused_variables)] Some(filename) => { let mut file = std::fs::File::open(filename).expect("Could not open file."); version.hash_file(&mut file) } }; println!("{hash}"); } } /// Ohhhh boy, this one ain't pretty. Sorry! #[cfg(feature = "std")] enum RapidhashVersion { V1 { protected: bool }, V2 { protected: bool, version: u8 }, V3 { protected: bool }, } #[cfg(feature = "std")] impl RapidhashVersion { pub fn new(args: &[String]) -> Option { let v1 = args.iter().any(|a| a == "--v1"); let v2_0 = args.iter().any(|a| a == "--v2.0"); let v2_1 = args.iter().any(|a| a == "--v2.1"); let v2_2 = args.iter().any(|a| a == "--v2.2"); let mut v3 = args.iter().any(|a| a == "--v3"); let protected = args.iter().any(|a| a == "--protected"); let sum = (v1 as u8) + (v2_0 as u8) + (v2_1 as u8) + (v2_2 as u8) + (v3 as u8); match sum { 0 => { v3 = true; // Default to v3 if no version is specified }, 1 => {} _ => return None, }; if v1 { Some(RapidhashVersion::V1 { protected }) } else if v2_0 { Some(RapidhashVersion::V2 { protected, version: 0 }) } else if v2_1 { Some(RapidhashVersion::V2 { protected, version: 1 }) } else if v2_2 { Some(RapidhashVersion::V2 { protected, version: 2 }) } else if v3 { Some(RapidhashVersion::V3 { protected }) } else { None } } pub fn hash_stdin(&self) -> u64 { use std::io::Read; match self { RapidhashVersion::V1 { protected } => { let mut buffer = Vec::with_capacity(1024); std::io::stdin().read_to_end(&mut buffer).expect("Could not read from stdin."); if *protected { rapidhash::v1::rapidhash_v1_inline::(&buffer, &rapidhash::v1::DEFAULT_RAPID_SECRETS) } else { rapidhash::v1::rapidhash_v1_inline::(&buffer, &rapidhash::v1::DEFAULT_RAPID_SECRETS) } }, RapidhashVersion::V2 { protected, version } => { let mut buffer = Vec::with_capacity(1024); std::io::stdin().read_to_end(&mut buffer).expect("Could not read from stdin."); match version { 0 => { if *protected { rapidhash::v2::rapidhash_v2_inline::<0, true, false, true>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } else { rapidhash::v2::rapidhash_v2_inline::<0, true, false, false>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } } 1 => { if *protected { rapidhash::v2::rapidhash_v2_inline::<1, true, false, true>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } else { rapidhash::v2::rapidhash_v2_inline::<1, true, false, false>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } } 2 => { if *protected { rapidhash::v2::rapidhash_v2_inline::<2, true, false, true>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } else { rapidhash::v2::rapidhash_v2_inline::<2, true, false, false>(&buffer, &rapidhash::v2::DEFAULT_RAPID_SECRETS) } } _ => { panic!("Unsupported v2 version: {version}. Supported versions are 0, 1, and 2."); } } }, RapidhashVersion::V3 { protected } => { if *protected { rapidhash::v3::rapidhash_v3_file_inline::<_, true>(std::io::stdin(), &rapidhash::v3::DEFAULT_RAPID_SECRETS) .expect("Could not read from stdin.") } else { rapidhash::v3::rapidhash_v3_file_inline::<_, false>(std::io::stdin(), &rapidhash::v3::DEFAULT_RAPID_SECRETS) .expect("Could not read from stdin.") } }, } } #[allow(deprecated)] pub fn hash_file(&self, reader: &mut std::fs::File) -> u64 { match self { RapidhashVersion::V1 { protected } => { if *protected { rapidhash::v1::rapidhash_v1_file_inline::(reader, &rapidhash::v1::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } else { rapidhash::v1::rapidhash_v1_file_inline::(reader, &rapidhash::v1::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } }, RapidhashVersion::V2 { protected, version } => { match version { 0 => { if *protected { rapidhash::v2::rapidhash_v2_file_inline::<0, true>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } else { rapidhash::v2::rapidhash_v2_file_inline::<0, false>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } } 1 => { if *protected { rapidhash::v2::rapidhash_v2_file_inline::<1, true>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } else { rapidhash::v2::rapidhash_v2_file_inline::<1, false>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } } 2 => { if *protected { rapidhash::v2::rapidhash_v2_file_inline::<2, true>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } else { rapidhash::v2::rapidhash_v2_file_inline::<2, false>(reader, &rapidhash::v2::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } } _ => { panic!("Unsupported v2 version: {version}. Supported versions are 0, 1, and 2."); } } }, RapidhashVersion::V3 { protected} => { if *protected { rapidhash::v3::rapidhash_v3_file_inline::<_, true>(reader, &rapidhash::v3::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } else { rapidhash::v3::rapidhash_v3_file_inline::<_, false>(reader, &rapidhash::v3::DEFAULT_RAPID_SECRETS) .expect("Failed to hash file.") } }, } } } rapidhash-4.4.1/src/quality.rs000064400000000000000000000053411046102023000144250ustar 00000000000000//! In-memory hashing: RapidHasher with a focus on hash quality. //! //! Designed to produce minimal hash collisions. //! //! This is a specific instantiation of the [`crate::inner`] module with the following settings: //! - `AVALANCHE` is enabled. //! - `SPONGE` is enabled. //! - `COMPACT` is disabled, unless building for WASM targets. //! - `PROTECTED` is disabled. const AVALANCHE: bool = true; const SPONGE: bool = true; const COMPACT: bool = cfg!(target_family = "wasm"); const PROTECTED: bool = false; use crate::inner; /// A [std::hash::Hasher] inspired by [`crate::v3::rapidhash_v3`] with a focus on output hash /// quality. /// /// This is an alias for [inner::RapidHasher] with the following settings: /// - `AVALANCHE` is enabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::fast::RapidHasher`] for a lower quality but faster hash output where desirable. pub type RapidHasher<'s> = inner::RapidHasher<'s, AVALANCHE, SPONGE, COMPACT, PROTECTED>; /// A rapidhash equivalent to [`std::hash::RandomState`] that uses a random seed and secrets for /// minimal DoS resistance. /// /// This initializes a [`RapidHasher`] with the following settings: /// - `AVALANCHE` is enabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [crate::fast::RandomState] for a lower quality but faster hash output where desirable. pub type RandomState = inner::RandomState; /// A [`std::hash::BuildHasher`] that uses user-provided seed and secrets. /// /// We recommend using [`RandomState`] or [`GlobalState`] instead for most use cases. /// /// This initializes a [RapidHasher] with the following settings: /// - `AVALANCHE` is enabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::fast::SeedableState`] for a lower quality but faster hash output where desirable. pub type SeedableState<'secrets> = inner::SeedableState<'secrets, AVALANCHE, SPONGE, COMPACT, PROTECTED>; /// A [`std::hash::BuildHasher`] that uses a global seed and secrets, randomized only once on startup. /// /// All instances of GlobalState will use the same global seed and secrets for the lifetime of the /// program. This provides minimal HashDoS resistance by randomizing the seed and secrets between /// application runs. /// /// This initializes a [`RapidHasher`] with the following settings: /// - `AVALANCHE` is disabled. /// - `SPONGE` is enabled. /// - `COMPACT` is disabled. /// - `PROTECTED` is disabled. /// /// Use [`crate::fast::GlobalState`] for a higher quality but slower hash output where desirable. pub type GlobalState = inner::GlobalState; rapidhash-4.4.1/src/rng.rs000064400000000000000000000302671046102023000135300ustar 00000000000000//! Fast random number generation using rapidhash mixing. #[cfg(feature = "rng")] use rand_core::{RngCore, SeedableRng, impls}; use crate::util::mix::rapid_mix; /// Uses the V1 rapid seed. const RAPID_SEED: u64 = 0xbdd89aa982704029; /// Uses the V1 rapid secrets. const RAPID_SECRET: [u64; 3] = [0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3]; /// Generate a random number using rapidhash mixing. /// /// This RNG is deterministic and optimized for throughput. It is not a cryptographic random number /// generator. /// /// This implementation is equivalent in logic and performance to /// [wyhash::wyrng](https://docs.rs/wyhash/latest/wyhash/fn.wyrng.html) and /// [fasthash::u64](https://docs.rs/fastrand/latest/fastrand/), but uses rapidhash /// constants/secrets. /// /// The weakness with this RNG is that at best it's a single cycle over the u64 space, as the seed /// is simple a position in a constant sequence. Future work could involve using a wider state to /// ensure we can generate many different sequences. #[inline] pub fn rapidrng_fast(seed: &mut u64) -> u64 { *seed = seed.wrapping_add(RAPID_SECRET[0]); rapid_mix::(*seed, *seed ^ RAPID_SECRET[1]) } /// A lower quality version of [`rapidrng_fast`] with that's slightly faster, with optimisations for /// u32 platforms and those without wide-arithmetic support. /// /// This is not a portable RNG, as it will produce different results on different platforms. Use /// [`rapidrng_fast`] if stable outputs are required. /// /// Used in the rapidhash WASM benchmarks. #[inline] pub fn rapidrng_fast_not_portable(seed: &mut u64) -> u64 { *seed = seed.wrapping_add(RAPID_SECRET[0]); rapid_mix_np_low_quality(*seed, RAPID_SECRET[1]) } /// A very fast low-quality mixing function used only for the ultra-fast PRNG. /// /// Uses the standard `rapid_mix` for 64-bit architectures, and otherwise uses a very cheap /// u32-mix for platforms without wide-arithmetic support. This is even cheaper/lower quality than /// `rapid_mix_np`. #[inline(always)] fn rapid_mix_np_low_quality(x: u64, y: u64) -> u64 { #[cfg(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), ))] { rapid_mix::(x, y) } #[cfg(not(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), )))] { // u64 x u64 -> u128 product is prohibitively expensive on 32-bit. // Decompose into 32-bit parts. let lx = x as u32; let ly = y as u32; let hx = (x >> 32) as u32; let hy = (y >> 32) as u32; // u32 x u32 -> u64 the low bits of one with the high bits of the other. let afull = (lx as u64) * (hy as u64); let bfull = (hx as u64) * (ly as u64); // Combine, swapping low/high of one of them so the upper bits of the // product of one combine with the lower bits of the other. afull ^ bfull.rotate_right(32) } } /// Generate a random number non-deterministically by re-seeding with the current time. /// /// This is not a cryptographic random number generator. /// /// Note fetching system time requires a syscall and is therefore much slower than [rapidrng_fast]. /// It can also be used to seed [rapidrng_fast]. /// /// Requires the `std` feature and a platform that supports [std::time::SystemTime]. /// /// # Example /// ```rust /// use rapidhash::rng::{rapidrng_fast, rapidrng_time}; /// /// // choose a non-deterministic random seed (50-100ns) /// let mut seed = rapidrng_time(&mut 0); /// /// // rapid fast deterministic random numbers (~1ns/iter) /// for _ in 0..10 { /// println!("{}", rapidrng_fast(&mut seed)); /// } /// ``` #[cfg(any( all( feature = "std", not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )) ), docsrs ))] #[inline] pub fn rapidrng_time(seed: &mut u64) -> u64 { let time = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap(); // NOTE limited entropy: only a few of the time.as_secs bits will change between calls, and the // time.subsec_nanos may only have milli- or micro-second precision on some platforms. // This is why we further stretch the teed with multiple rounds of rapid_mix. let mut teed = (time.as_secs() << 32) | time.subsec_nanos() as u64; teed = rapid_mix::(teed ^ RAPID_SECRET[0], *seed ^ RAPID_SECRET[1]); *seed = rapid_mix::(teed ^ RAPID_SECRET[0], RAPID_SECRET[2]); rapid_mix::(*seed, *seed ^ RAPID_SECRET[1]) } /// A random number generator that uses the rapidhash mixing algorithm. /// /// This deterministic RNG is optimized for speed and throughput. This is not a cryptographic random /// number generator. /// /// This RNG is compatible with [`rand_core::RngCore`] and [`rand_core::SeedableRng`]. /// /// # Example /// ```rust /// use rapidhash::rng::RapidRng; /// /// let mut rng = RapidRng::default(); /// println!("{}", rng.next()); /// ``` #[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)] pub struct RapidRng { seed: u64, } #[cfg(any( all( feature = "std", not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )) ), docsrs ))] impl Default for RapidRng { /// Create a new random number generator. /// /// With `std` enabled, the seed is generated using the current system time via [rapidrng_time]. /// /// Without `std`, the seed is set to the default seed. #[inline] fn default() -> Self { let mut seed = RAPID_SEED; Self { seed: rapidrng_time(&mut seed), } } } #[cfg(not(any( all( feature = "std", not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )) ), docsrs )))] impl Default for RapidRng { /// Create a new random number generator. /// /// With `std` enabled, the seed is generated using the current system time via [rapidrng_time]. /// /// Without `std`, the seed is set to [RAPID_SEED]. #[inline] fn default() -> Self { Self { seed: RAPID_SEED, } } } impl RapidRng { /// Create a new random number generator from a specified seed. /// /// Also see [RapidRng::default()] with the `std` feature enabled for seed randomisation based /// on the current time. #[inline] pub fn new(seed: u64) -> Self { Self { seed, } } /// Export the current state of the random number generator. #[inline] pub fn state(&self) -> [u8; 8] { self.seed.to_le_bytes() } /// Get the next random number from this PRNG and iterate the state. #[inline] #[allow(clippy::should_implement_trait)] pub fn next(&mut self) -> u64 { rapidrng_fast(&mut self.seed) } } #[cfg(feature = "rng")] impl RngCore for RapidRng { #[inline] fn next_u32(&mut self) -> u32 { self.next_u64() as u32 } #[inline] fn next_u64(&mut self) -> u64 { self.next() } #[inline] fn fill_bytes(&mut self, dest: &mut [u8]) { impls::fill_bytes_via_next(self, dest) } } #[cfg(feature = "rng")] impl SeedableRng for RapidRng { type Seed = [u8; 8]; #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { seed: u64::from_le_bytes(seed), } } #[inline] fn seed_from_u64(state: u64) -> Self { Self::new(state) } } #[cfg(test)] mod tests { use super::*; #[cfg(feature = "rng")] #[test] fn test_rapidrng() { let mut rng = RapidRng::new(0); let x = rng.next(); let y = rng.next(); assert_ne!(x, 0); assert_ne!(x, y); } #[cfg(all(feature = "rng", feature = "std"))] #[test] fn bit_flip_trial() { let cycles = 100_000; let mut seen = std::collections::HashSet::with_capacity(cycles); let mut flips = std::vec::Vec::with_capacity(cycles); let mut rng = RapidRng::new(0); let mut prev = 0; for _ in 0..cycles { let next = rng.next_u64(); let xor = prev ^ next; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 10, "Flipping bit changed only {} bits", flipped); flips.push(flipped); assert!(!seen.contains(&next), "RapidRngFast produced a duplicate value"); seen.insert(next); prev = next; } let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {}, expected: 32.0", average); } #[cfg(feature = "std")] #[test] fn bit_flip_trial_fast() { let cycles = 100_000; let mut seen = std::collections::HashSet::with_capacity(cycles); let mut flips = std::vec::Vec::with_capacity(cycles); let mut prev = 0; for _ in 0..cycles { let next = rapidrng_fast(&mut prev); let xor = prev ^ next; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 10, "Flipping bit changed only {} bits", flipped); flips.push(flipped); assert!(!seen.contains(&next), "rapidrng_fast produced a duplicate value"); seen.insert(next); prev = next; } let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {}, expected: 32.0", average); } #[cfg(feature = "std")] #[test] fn bit_flip_trial_time() { let cycles = 100_000; let mut seen = std::collections::HashSet::with_capacity(cycles); let mut flips = std::vec::Vec::with_capacity(cycles); let mut prev = 0; for _ in 0..cycles { let next = rapidrng_time(&mut prev); let xor = prev ^ next; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 10, "Flipping bit changed only {} bits", flipped); flips.push(flipped); assert!(!seen.contains(&next), "rapidrng_time produced a duplicate value"); seen.insert(next); prev = next; } let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {}, expected: 32.0", average); } /// detects a cycle at: 4294967296:1751221902 /// note that we're detecting _seed_ cycles, not output values. #[test] #[ignore] fn find_cycle() { let mut fast = 0; let mut slow = 0; let mut power: u64 = 1; let mut lam: u64 = 1; rapidrng_fast(&mut fast); while fast != slow { if power == lam { slow = fast; power *= 2; lam = 0; } rapidrng_fast(&mut fast); lam += 1; } panic!("Cycle found after {power}:{lam} iterations."); } #[cfg(feature = "rng")] #[test] #[ignore] fn find_cycle_slow() { let mut rng = RapidRng::new(0); let mut power: u64 = 1; let mut lam: u64 = 1; let mut fast = rng.next_u64(); let mut slow = 0; while fast != slow { if power == lam { slow = fast; power *= 2; lam = 0; } fast = rng.next_u64(); lam += 1; } assert!(false, "Cycle found after {power}:{lam} iterations."); } #[cfg(feature = "rng")] #[test] fn test_construction() { let mut rng = RapidRng::default(); assert_ne!(rng.next(), 0); } } rapidhash-4.4.1/src/util/hints.rs000064400000000000000000000030411046102023000150320ustar 00000000000000/// Wraps the `core::hint::likely` intrinsic if the `nightly` feature is enabled. #[inline(always)] pub(crate) const fn likely(x: bool) -> bool { #[cfg(feature = "nightly")] { core::hint::likely(x) } #[cfg(not(feature = "nightly"))] { if !x { cold_path(); } x } } /// Wraps the `core::hint::unlikely` intrinsic if the `nightly` feature is enabled. #[inline(always)] pub(crate) const fn unlikely(x: bool) -> bool { #[cfg(feature = "nightly")] { core::hint::unlikely(x) } #[cfg(not(feature = "nightly"))] { if x { cold_path(); } x } } #[allow(dead_code)] #[cold] #[inline(always)] const fn cold_path() {} /// Provides a stable `assume` function that uses `core::hint::assert_unchecked` when the stable /// rust compiler supports it. /// /// This is particularly relevant when LLVM isn't able to specialise the >16 input functions. This /// often happens with the default release profile, which uses a large number of codegen units and /// LTO off. #[cfg_attr(not(docsrs), rustversion::since(1.81))] #[inline(always)] pub(crate) const unsafe fn assume(cond: bool) { debug_assert!(cond); core::hint::assert_unchecked(cond); } /// Provides a stable `assume` function that uses `core::hint::assert_unchecked` when the stable /// rust compiler supports it. #[cfg_attr(not(docsrs), rustversion::before(1.81))] #[cfg_attr(docsrs, cfg(not(docsrs)))] #[inline(always)] pub(crate) const unsafe fn assume(cond: bool) { debug_assert!(cond); } rapidhash-4.4.1/src/util/macros.rs000064400000000000000000000142051046102023000151750ustar 00000000000000/// Compare a Rust hash to the C implementation, and the COMPACT version. macro_rules! compare_to_c { ($test:ident, $rust_fn:path, $compact_fn:path, $cc_fn:ident) => { #[test] fn $test() { use rand::Rng; use rapidhash_c::$cc_fn; // test zero-length input let rust_hash = $rust_fn(&[], &DEFAULT_RAPID_SECRETS); let compact_hash = $compact_fn(&[], &DEFAULT_RAPID_SECRETS); let c_hash = $cc_fn(&[], DEFAULT_SEED); assert_eq!(rust_hash, c_hash, "Mismatch with C on zero len input"); assert_eq!(rust_hash, compact_hash, "Mismatch with COMPACT on zero len input"); // test up to 512 bytes for len in 0..=512 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; let rust_hash = $rust_fn(&data, &DEFAULT_RAPID_SECRETS); let compact_hash = $compact_fn(&data, &DEFAULT_RAPID_SECRETS); let c_hash = $cc_fn(&data, DEFAULT_SEED); assert_eq!(rust_hash, c_hash, "Mismatch with C on input {} byte {} bit {}", len, byte, bit); assert_eq!(rust_hash, compact_hash, "Mismatch with COMPACT on input {} byte {} bit {}", len, byte, bit); } } } } }; } /// Check that flipping a single bit changes enough bits of output. macro_rules! flip_bit_trial { ($test:ident, $hash:path) => { #[test] fn $test() { use rand::Rng; let mut flips = std::vec![]; for len in 1..=256 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); let hash = $hash(&data, &DEFAULT_RAPID_SECRETS); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; let new_hash = $hash(&data, &DEFAULT_RAPID_SECRETS); assert_ne!(hash, new_hash, "Flipping byte {} bit {} did not change hash for input len {}", byte, bit, len); let xor = hash ^ new_hash; let flipped = xor.count_ones() as u64; assert!(xor.count_ones() >= 8, "Flipping bit {byte}:{bit} changed only {flipped} bits"); flips.push(flipped); } } } let average = flips.iter().sum::() as f64 / flips.len() as f64; assert!(average > 31.95 && average < 32.05, "Did not flip an average of half the bits. average: {average}, expected: 32.0"); let mut hashes_seen = std::collections::HashSet::new(); // "ray casting" -> flip a single bit across the whole range, using a repeating pattern // which simulates swapped bits. The previous part of the test uses randomized data // which would not simulate bytes swapping positions. for len in 1..=512 { // should ensure that the patterns won't collide when we flip a bit, eg. 0x00 and // 0x01 will naturally collide when we flip the last bit of 0x00 for pattern in [0x00, 0xAA, 0x53] { let data = std::vec![pattern; len]; for byte in 0..len { for bit in 0..8 { // cast a single bit along the whole data let mut data = data.clone(); data[byte] ^= 1 << bit; // ensure hash is unique let new_hash = $hash(&data, &DEFAULT_RAPID_SECRETS); assert!(!hashes_seen.contains(&new_hash), "Hash collision detected for input len vec![{pattern}; {len}] at pos {byte}:{bit}: hash {new_hash} already seen"); hashes_seen.insert(new_hash); } } } } } }; } macro_rules! compare_rapidhash_file { ($test:ident, $hash:path, $file:path) => { #[test] fn $test() { use rand::RngCore; const LENGTH: usize = 1024; for len in 1..=LENGTH { let mut data = vec![0u8; len]; rand::rng().fill_bytes(&mut data); let mut file = tempfile::tempfile().unwrap(); file.write_all(&data).unwrap(); file.seek(SeekFrom::Start(0)).unwrap(); assert_eq!( $hash(&data, &DEFAULT_RAPID_SECRETS), $file(&mut file, &DEFAULT_RAPID_SECRETS).unwrap(), "Mismatch for input len: {}", &data.len() ); } } }; } macro_rules! compare_rapid_stream_hasher { ($test:ident, $hash:path, $hasher:path) => { #[test] fn $test() { extern crate alloc; use rand::RngCore; type H<'a> = $hasher; // test every length and every chunking size for the stream hasher for len in 0..1024 { let mut data = alloc::vec![0u8; len]; rand::rng().fill_bytes(&mut data); let expected_hash = $hash(&data, &DEFAULT_RAPID_SECRETS); let mut hasher = H::new(&DEFAULT_RAPID_SECRETS); for chunk_size in 1..512 { for chunk in data.chunks(chunk_size) { hasher.write(chunk); } let actual_hash = hasher.finish(); assert_eq!(expected_hash, actual_hash, "Mismatch for input len: {} and chunk size: {}", len, chunk_size); hasher.reset(); } } } }; } pub(crate) use compare_to_c; pub(crate) use flip_bit_trial; pub(crate) use compare_rapidhash_file; pub(crate) use compare_rapid_stream_hasher; rapidhash-4.4.1/src/util/mix.rs000064400000000000000000000030421046102023000145030ustar 00000000000000//! Internal module that provides the folded multiply. /// 64*64 to 128 bit multiply /// /// Returns the (low, high) 64 bits of the 128 bit result. /// /// # From the C code: /// Calculates 128-bit C = *A * *B. /// /// When RAPIDHASH_FAST is defined: /// Overwrites A contents with C's low 64 bits. /// Overwrites B contents with C's high 64 bits. /// /// When RAPIDHASH_PROTECTED is defined: /// Xors and overwrites A contents with C's low 64 bits. /// Xors and overwrites B contents with C's high 64 bits. #[inline(always)] #[must_use] pub(crate) const fn rapid_mum(a: u64, b: u64) -> (u64, u64) { let r = (a as u128).wrapping_mul(b as u128); if !PROTECTED { (r as u64, (r >> 64) as u64) } else { (a ^ r as u64, b ^ (r >> 64) as u64) } } /// Folded 64-bit multiply. [rapid_mum] then XOR the results together. #[inline(always)] #[must_use] pub(crate) const fn rapid_mix(a: u64, b: u64) -> u64 { let r = (a as u128).wrapping_mul(b as u128); if !PROTECTED { (r as u64) ^ (r >> 64) as u64 } else { (a ^ r as u64) ^ (b ^ (r >> 64) as u64) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_rapid_mum() { let (a, b) = rapid_mum::(0, 0); assert_eq!(a, 0); assert_eq!(b, 0); let (a, b) = rapid_mum::(100, 100); assert_eq!(a, 10000); assert_eq!(b, 0); let (a, b) = rapid_mum::(u64::MAX, 2); assert_eq!(a, u64::MAX - 1); assert_eq!(b, 1); } } rapidhash-4.4.1/src/util/mod.rs000064400000000000000000000002321046102023000144630ustar 00000000000000//! Utility functions and types for Rapidhash. All should be marked `pub(crate)`. pub mod mix; pub mod read; #[cfg(test)] pub mod macros; pub mod hints; rapidhash-4.4.1/src/util/read.rs000064400000000000000000000076561046102023000146400ustar 00000000000000//! Internal module for reading unaligned bytes from a slice into `u64` and `u32` values. /// A macro for assertions that can be disabled with the `unsafe` feature. These should all be /// elided at compile-time anyway. macro_rules! unsafe_assert { ($cond:expr) => { #[cfg(feature = "unsafe")] { debug_assert!($cond); } #[cfg(not(feature = "unsafe"))] { assert!($cond); } }; } /// Unsafe but const-friendly unaligned bytes to u64. The compiler can't seem to remove the bounds /// checks for small integers because we do some funky bit shifting in the indexing. /// /// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash /// implementation. #[inline(always)] pub(crate) const fn read_u64(slice: &[u8], offset: usize) -> u64 { unsafe_assert!(slice.len() >= 8 + offset); let val = unsafe { core::ptr::read_unaligned(slice.as_ptr().add(offset) as *const u64) }; val.to_le() // swap bytes on big-endian systems to get the same u64 value } /// Unsafe but const-friendly unaligned bytes to u32. The compiler can't seem to remove the bounds /// checks for small integers because we do some funky bit shifting in the indexing. /// /// SAFETY: `slice` must be at least `offset+8` bytes long, which we guarantee in this rapidhash /// implementation. #[inline(always)] pub(crate) const fn read_u32(slice: &[u8], offset: usize) -> u32 { unsafe_assert!(slice.len() >= 4 + offset); let val = unsafe { core::ptr::read_unaligned(slice.as_ptr().add(offset) as *const u32) }; val.to_le() // swap bytes on big-endian systems to get the same u64 value } /// Only used in rapidhash V1 #[inline(always)] pub(crate) const fn read_u32_combined(slice: &[u8], offset_top: usize, offset_bot: usize) -> u64 { debug_assert!(slice.len() >= 4 + offset_top && slice.len() >= 4 + offset_bot); let top = read_u32(slice, offset_top) as u64; let bot = read_u32(slice, offset_bot) as u64; (top << 32) | bot } #[cfg(test)] mod tests { use super::*; #[test] fn test_read_u32() { let bytes = &[23, 145, 3, 34]; assert_eq!(read_u32(bytes, 0), 570659095); let bytes = &[24, 54, 3, 23, 145, 3, 34]; assert_eq!(read_u32(bytes, 3), 570659095); assert_eq!(read_u32(&[0, 0, 0, 0], 0), 0); assert_eq!(read_u32(&[1, 0, 0, 0], 0), 1); assert_eq!(read_u32(&[12, 0, 0, 0], 0), 12); assert_eq!(read_u32(&[0, 10, 0, 0], 0), 2560); } #[test] fn test_read_u64() { let bytes = [23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64(bytes, 0), 570659095); let bytes = [1, 2, 3, 23, 145, 3, 34, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64(bytes, 3), 570659095); let bytes = [0, 0, 0, 0, 0, 0, 0, 0].as_slice(); assert_eq!(read_u64(bytes, 0), 0); } #[cfg(feature = "std")] #[test] fn test_u32_to_u128_delta() { fn formula(len: u64) -> u64 { (len & 24) >> (len >> 3) } fn formula2(len: u64) -> u64 { match len { 8.. => 4, _ => 0, } } let inputs: std::vec::Vec = (4..=16).collect(); let outputs: std::vec::Vec = inputs.iter().map(|&x| formula(x)).collect(); let expected = std::vec![0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4]; assert_eq!(outputs, expected); assert_eq!(outputs, inputs.iter().map(|&x| formula2(x)).collect::>()); } #[test] #[should_panic] #[cfg(any(test, not(feature = "unsafe")))] fn test_read_u32_to_short_panics() { let bytes = [23, 145, 0].as_slice(); assert_eq!(read_u32(bytes, 0), 0); } #[test] #[should_panic] #[cfg(any(test, not(feature = "unsafe")))] fn test_read_u64_to_short_panics() { let bytes = [23, 145, 0].as_slice(); assert_eq!(read_u64(bytes, 0), 0); } } rapidhash-4.4.1/src/v1/mod.rs000064400000000000000000000046171046102023000140470ustar 00000000000000//! Portable hashing: rapidhash V1 algorithm. //! //! For new code, please use [`crate::v3`] instead, as it is a superior hashing algorithm. mod rapid_const; #[cfg(any(feature = "std", docsrs))] mod rapid_file; mod seed; #[doc(inline)] pub use rapid_const::*; #[doc(inline)] #[cfg(any(feature = "std", docsrs))] pub use rapid_file::*; #[doc(inline)] pub use seed::*; #[cfg(test)] mod tests { #![allow(deprecated)] extern crate std; use crate::util::macros::{compare_to_c, flip_bit_trial}; use super::*; flip_bit_trial!(flip_bit_trial_v1, rapidhash_v1_inline::); flip_bit_trial!(flip_bit_trial_v1_bug, rapidhash_v1_inline::); compare_to_c!(compare_to_c_v1, rapidhash_v1_inline::, rapidhash_v1_inline::, rapidhashcc_v1); #[test] fn test_v1_bug() { fn rapidhash_bug(data: &str) -> u64 { rapidhash_v1_inline::(data.as_bytes(), &DEFAULT_RAPID_SECRETS) } // The v1.x.x bug was for the 48-byte case // The v2.x.x attempted fix ended up not hashing a bunch of data beyond 48 bytes... :facepalm: assert_eq!(5006746792674864303, rapidhash_bug("\n")); assert_eq!(4933522537766704430, rapidhash_bug("abcdef\n")); assert_eq!(3345456103814863532, rapidhash_bug("abcdefghijklmnopqrstuvwxyz12345678901234567890\n")); assert_eq!(8825074939507110130, rapidhash_bug("abcdefghijklmnopqrstuvwxyz123456789012345678901\n")); assert_eq!(2762901732509801681, rapidhash_bug("abcdefghijklmnopqrstuvwxyz1234567890123456789012\n")); assert_eq!( 934306286158757431, rapidhash_bug("abcdefghijklmnopqrstuvwxyz12345678901234567890123\n")); } #[test] fn test_hardcoded_v1() { assert_eq!(6516417773221693515, rapidhash_v1(&[])); assert_eq!(5006746792674864303, rapidhash_v1("\n".as_bytes())); assert_eq!(15965596575264898037, rapidhash_v1("something\n".as_bytes())); // below is 47 bytes, an extra character would hit the V1 bug assert_eq!(10644405912457645442, rapidhash_v1("abcdefghijklmnopqrstuvwxyz01234567890123456789\n".as_bytes())); assert_eq!(7545813847373533788, rapidhash_v1("abcdefghijklmnopqrstuvwxyz012345678901234567890abcdefghijklmnopqrstuvwxyz012345678901234567890abcdefghijklmnopqrstuvwxyz012345678901234567890\n".as_bytes())); } } rapidhash-4.4.1/src/v1/rapid_const.rs000064400000000000000000000146101046102023000155670ustar 00000000000000use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32_combined, read_u64}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets}; /// Rapidhash a single byte stream, matching the C++ implementation. #[inline] pub const fn rapidhash_v1(data: &[u8]) -> u64 { rapidhash_v1_inline::(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash a single byte stream, matching the C++ implementation, with a custom seed. #[inline] pub const fn rapidhash_v1_seeded(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_v1_inline::(data, secrets) } /// Rapidhash a single byte stream, matching the C++ implementation. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for inputs where the length is known at compile time. /// /// Compile time arguments: /// - `AVALANCHE`: Perform an extra mix step to avalanche the bits for higher hash quality. Enabled /// by default to match the C++ implementation. /// - `COMPACT`: Generates fewer instructions at compile time with less manual loop unrolling, but /// may be slower on some platforms. Disabled by default. /// - `PROTECTED`: Slightly stronger hash quality and DoS resistance by performing two extra XOR /// instructions on every mix step. Disabled by default. /// - `V1_BUG`: True to re-introduce the bug that was present on 48 byte length inputs in the /// 1.x crate versions for backwards compatibility with the old rust implementation. #[inline(always)] pub const fn rapidhash_v1_inline(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_core::(secrets.seed, &secrets.secrets, data) } #[inline(always)] pub(super) const fn rapidhash_core(mut seed: u64, secrets: &[u64; 3], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; seed ^= data.len() as u64; if data.len() <= 16 { // deviation from the C++ impl computes delta as follows // let delta = (data.len() & 24) >> (data.len() >> 3); // this is equivalent to "match {..8=>0, 8..=>4}" // and so using the extra if-else statement is equivalent and allows the compiler to skip // some unnecessary bounds checks while still being safe rust. if data.len() >= 8 { // len is 4..=16 let plast = data.len() - 4; let delta = 4; a ^= read_u32_combined(data, 0, plast); b ^= read_u32_combined(data, delta, plast - delta); } else if data.len() >= 4 { let plast = data.len() - 4; let delta = 0; a ^= read_u32_combined(data, 0, plast); b ^= read_u32_combined(data, delta, plast - delta); } else if !data.is_empty() { // len is 1..=3 let len = data.len(); a ^= ((data[0] as u64) << 56) | ((data[len >> 1] as u64) << 32) | data[len - 1] as u64; // b = 0; } } else { let mut slice = data; // the v1.x.x versions of rapidhash had a bug where this if statement was omitted, which // caused the hash to be incorrect for 48 byte inputs. The v2.x.x versions of this crate // incorrectly handled the V1_BUG... The v3.x.x versions of this crate should now match // the buggy v1.x.x crate version when V1_BUG=true. Kicking myself for this one. if slice.len() > 48 || V1_BUG { // most CPUs appear to benefit from this unrolled loop let mut see1 = seed; let mut see2 = seed; if !COMPACT { while slice.len() >= 96 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); seed = rapid_mix::(read_u64(slice, 48) ^ secrets[0], read_u64(slice, 56) ^ seed); see1 = rapid_mix::(read_u64(slice, 64) ^ secrets[1], read_u64(slice, 72) ^ see1); see2 = rapid_mix::(read_u64(slice, 80) ^ secrets[2], read_u64(slice, 88) ^ see2); let (_, split) = slice.split_at(96); slice = split; } if slice.len() >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } else { while slice.len() >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } seed ^= see1 ^ see2; } if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed ^ secrets[1]); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); } } a ^= read_u64(data, data.len() - 16); b ^= read_u64(data, data.len() - 8); } a ^= secrets[1]; b ^= seed; let (a2, b2) = rapid_mum::(a, b); a = a2; b = b2; if AVALANCHE { rapidhash_finish::(a, b, data.len() as u64, secrets) } else { a ^ b } } #[inline(always)] pub(super) const fn rapidhash_finish(a: u64, b: u64, len: u64, secrets: &[u64; 3]) -> u64 { rapid_mix::(a ^ secrets[0] ^ len, b ^ secrets[1]) } rapidhash-4.4.1/src/v1/rapid_file.rs000064400000000000000000000155071046102023000153660ustar 00000000000000use std::fs::File; use std::io::{BufReader, Read}; use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32_combined, read_u64}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets, rapidhash_finish}; /// Rapidhash a file, matching the C++ implementation. /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. #[inline] #[deprecated(note = "Rapidhash V1 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v1_file(data: &mut File) -> std::io::Result { #[allow(deprecated)] rapidhash_v1_file_inline::(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash a file, matching the C++ implementation, with a custom seed. /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. #[inline] #[deprecated(note = "Rapidhash V1 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v1_file_seeded(data: &mut File, secrets: &RapidSecrets) -> std::io::Result { #[allow(deprecated)] rapidhash_v1_file_inline::(data, secrets) } /// Rapidhash a file, matching the C++ implementation. /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. /// /// We could easily add more ways to read other streams that can be converted to a [BufReader], /// but the length must be known at the start of the stream due to how rapidhash is seeded using /// the data length. Raise a [GitHub](https://github.com/hoxxep/rapidhash) issue if you have a /// use case to support other stream types. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for inputs where the length is known at compile time. #[inline(always)] #[deprecated(note = "Rapidhash V1 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v1_file_inline(data: &mut File, secrets: &RapidSecrets) -> std::io::Result { let len = data.metadata()?.len(); let mut reader = BufReader::new(data); let hash = rapidhash_file_core::(secrets.seed, &secrets.secrets, len as usize, &mut reader)?; Ok(hash) } #[inline(always)] fn rapidhash_file_core(mut seed: u64, secrets: &[u64; 3], len: usize, iter: &mut BufReader<&mut File>) -> std::io::Result { let mut a = 0; let mut b = 0; seed ^= len as u64; if len <= 16 { let mut data = [0u8; 16]; iter.read_exact(&mut data[0..len])?; // deviation from the C++ impl computes delta as follows // let delta = (data.len() & 24) >> (data.len() >> 3); // this is equivalent to "match {..8=>0, 8..=>4}" // and so using the extra if-else statement is equivalent and allows the compiler to skip // some unnecessary bounds checks while still being safe rust. if len >= 8 { // len is 8..=16 let plast = len - 4; let delta = 4; a ^= read_u32_combined(&data, 0, plast); b ^= read_u32_combined(&data, delta, plast - delta); } else if len >= 4 { // len is 4..=7 let plast = len - 4; let delta = 0; a ^= read_u32_combined(&data, 0, plast); b ^= read_u32_combined(&data, delta, plast - delta); } else if len > 0 { // len is 1..=3 a ^= ((data[0] as u64) << 56) | ((data[len >> 1] as u64) << 32) | data[len - 1] as u64; // b = 0; } } else { let mut remaining = len; let mut buf = [0u8; 192]; // slice is a view on the buffer that we use for reading into, and reading from, depending // on the stage of the loop. let mut slice = &mut buf[..96]; let end; if remaining > 48 { // because we're using a buffered reader, it might be worth unrolling this loop further let mut see1 = seed; let mut see2 = seed; while remaining >= 96 { // read into and process using the first half of the buffer iter.read_exact(slice)?; seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); seed = rapid_mix::(read_u64(slice, 48) ^ secrets[0], read_u64(slice, 56) ^ seed); see1 = rapid_mix::(read_u64(slice, 64) ^ secrets[1], read_u64(slice, 72) ^ see1); see2 = rapid_mix::(read_u64(slice, 80) ^ secrets[2], read_u64(slice, 88) ^ see2); remaining -= 96; } // remaining might be up to 95 bytes, so we read into the second half of the buffer, // which allows us to negative index safely in the final a and b xor using `end`. slice = &mut buf[96..96 + remaining]; iter.read_exact(slice)?; end = 96 + remaining; if remaining >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); slice = &mut buf[96 + 48..96 + remaining]; remaining -= 48; } seed ^= see1 ^ see2; } else { end = remaining; slice = &mut buf[..remaining]; iter.read_exact(slice)?; } if remaining > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed ^ secrets[1]); if remaining > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); } } a ^= read_u64(&buf, end - 16); b ^= read_u64(&buf, end - 8); } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); Ok(rapidhash_finish::(a, b, len as u64, secrets)) } #[cfg(test)] mod tests { #![allow(deprecated)] use std::io::{Seek, SeekFrom, Write}; use crate::util::macros::compare_rapidhash_file; use crate::v1::rapidhash_v1_inline; use super::*; compare_rapidhash_file!(compare_rapidhash_v1_file, rapidhash_v1_inline::, rapidhash_v1_file_inline::); } rapidhash-4.4.1/src/v1/seed.rs000064400000000000000000000104131046102023000141770ustar 00000000000000//! Reliable seeding and secrets generation for the hash functions. use crate::util::mix::rapid_mix; /// The default seed used in the C++ implementation. pub(crate) const DEFAULT_SEED: u64 = 0xbdd89aa982704029; /// Used only for generating random secrets. const DEFAULT_SECRETS: [u64; 3] = [ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, ]; /// The default rapidhash secrets used in the C++ implementation. /// /// We recommend generating your own secrets using the [`crate::v3::RapidSecrets::seed`] method to avoid /// trivial collision attacks if you need minimal HashDoS protection. pub const DEFAULT_RAPID_SECRETS: RapidSecrets = RapidSecrets::seed_cpp(DEFAULT_SEED); /// Hold the seed and secrets to be used by rapidhash. /// /// RapidSecrets premix the seed and generate a set of other secrets based on the seed that are all /// used in the hashing process. There are some quality checks on the random values to ensure a /// reasonable distribution of entropy in the generated secrets. /// /// Constructing this struct is fairly cheap, but unnecessary in the critical path. We therefore /// recommend instantiating it once and re-using the same instance for any persistent hashing. The /// `seed` method is marked `const` to also do so at compile time. /// /// # Minimal HashDoS Protection /// We recommend changing the default seed and secrets must be changed to avoid trivial collision /// attacks. For persistent hashing, you can hard code your own randomized seed at compile time. /// /// ```rust /// use rapidhash::v1::RapidSecrets; /// const DEFAULT_SECRETS: RapidSecrets = RapidSecrets::seed(0x123456); // <-- change this value! /// /// /// Export your chosen rapidhash version and secrets for use throughout your project. /// pub fn rapidhash(data: &[u8]) -> u64 { /// rapidhash::v1::rapidhash_v1_seeded(data, &DEFAULT_SECRETS) /// } /// ``` /// /// TODO: serde or serialization support. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct RapidSecrets { /// The core rapidhash seed. pub seed: u64, /// The secrets, effectively other seeds used in the hashing process. pub secrets: [u64; 3], } impl RapidSecrets { /// Generate secrets from a given randomized seed. /// /// Note the chosen seed will be pre-mixed to further randomized it, and the secrets will be /// computed based on the seed. /// /// If compatibility with the C++ implementation is required, use the `seed_cpp` method instead. #[inline] pub const fn seed(seed: u64) -> Self { let seed = premix_seed(seed, 0); let mut secrets = [0; 3]; secrets[0] = premix_seed(seed, 0); secrets[1] = premix_seed(secrets[0], 1); secrets[2] = premix_seed(secrets[1], 2); Self { seed, secrets } } /// Creates a new `RapidSecrets` instance with a different seed and the same secrets. /// /// This is useful for in-memory hashing, so we can quickly use a different seed for other /// HashMaps. #[inline(always)] pub const fn reseed(&self) -> Self { Self { seed: premix_seed(self.seed, 6), secrets: self.secrets, } } /// Creates a new `RapidSecrets` instance using a seed and secrets that are compatible with the /// C++ implementation. /// /// Note that these **use the default secrets** and therefore are liable to some trivial /// collision attacks, as randomising both the seed and secrets is necessary to provide minimal /// HashDoS resistance. #[inline(always)] pub const fn seed_cpp(seed: u64) -> Self { Self { seed: rapidhash_seed(seed), secrets: DEFAULT_SECRETS, } } } #[inline(always)] const fn rapidhash_seed(seed: u64) -> u64 { seed ^ rapid_mix::(seed ^ DEFAULT_SECRETS[0], DEFAULT_SECRETS[1]) } #[inline] const fn premix_seed(mut seed: u64, i: usize) -> u64 { seed ^= rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[i]); // ensure the seeds are of reasonable non-zero quality const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; if (seed & HI) == 0 { seed |= 1u64 << 63; } if (seed & MI) == 0 { seed |= 1u64 << 31; } if (seed & LO) == 0 { seed |= 1u64; } seed } rapidhash-4.4.1/src/v2/mod.rs000064400000000000000000000022651046102023000140450ustar 00000000000000//! Portable hashing: rapidhash V2.2 algorithm. //! //! For new code, please use [`crate::v3`] instead, as it is a superior hashing algorithm. mod rapid_const; #[cfg(any(feature = "std", docsrs))] mod rapid_file; mod seed; #[doc(inline)] pub use rapid_const::*; #[doc(inline)] #[cfg(any(feature = "std", docsrs))] pub use rapid_file::*; #[doc(inline)] pub use seed::*; #[cfg(test)] mod tests { extern crate std; use crate::util::macros::{compare_to_c, flip_bit_trial}; use super::*; flip_bit_trial!(flip_bit_trial_v2_0, rapidhash_v2_inline::<0, true, false, false>); flip_bit_trial!(flip_bit_trial_v2_1, rapidhash_v2_inline::<1, true, false, false>); flip_bit_trial!(flip_bit_trial_v2_2, rapidhash_v2_inline::<2, true, false, false>); compare_to_c!(compare_to_c_v2_0, rapidhash_v2_inline::<0, true, false, false>, rapidhash_v2_inline::<0, true, true, false>, rapidhashcc_v2); compare_to_c!(compare_to_c_v2_1, rapidhash_v2_inline::<1, true, false, false>, rapidhash_v2_inline::<1, true, true, false>, rapidhashcc_v2_1); compare_to_c!(compare_to_c_v2_2, rapidhash_v2_inline::<2, true, false, false>, rapidhash_v2_inline::<2, true, true, false>, rapidhashcc_v2_2); } rapidhash-4.4.1/src/v2/rapid_const.rs000064400000000000000000000254611046102023000155760ustar 00000000000000use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32, read_u64}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets}; /// Rapidhash V2.2 a single byte stream, matching the C++ implementation, with the default seed. /// /// See [rapidhash_v2_inline] to compute the hash value using V2.0 or V2.2. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_v2_inline] instead. #[inline] pub const fn rapidhash_v2_2(data: &[u8]) -> u64 { rapidhash_v2_inline::<2, true, false, false>(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash V2.2 a single byte stream, matching the C++ implementation, with a custom seed. /// /// See [rapidhash_v2_inline] to compute the hash value using V2.0 or V2.2. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_v2_inline] instead. #[inline] pub const fn rapidhash_v2_2_seeded(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_v2_inline::<2, true, false, false>(data, secrets) } /// Rapidhash V2 a single byte stream, matching the C++ implementation. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for fixed-length inputs at compile time. /// /// Compile time arguments: /// - `MINOR`: the minor version of the rapidhash algorithm: /// - 0: v2.0 /// - 1: v2.1 /// - 2: v2.2 /// - `AVALANCHE`: Perform an extra mix step to avalanche the bits for higher hash quality. Enabled /// by default to match the C++ implementation. /// - `COMPACT`: Generates fewer instructions at compile time with less manual loop unrolling, but /// may be slower on some platforms. Disabled by default. /// - `PROTECTED`: Slightly stronger hash quality and DoS resistance by performing two extra XOR /// instructions on every mix step. Disabled by default. #[inline(always)] pub const fn rapidhash_v2_inline(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_core::(secrets.seed, &secrets.secrets, data) } #[inline(always)] pub(super) const fn rapidhash_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { if MINOR > 2 { panic!("rapidhash_core unsupported minor version. Supported versions are 0, 1, and 2."); } let mut a = 0; let mut b = 0; seed ^= data.len() as u64; if data.len() <= 16 { if data.len() >= 4 { if data.len() >= 8 { let plast = data.len() - 8; a ^= read_u64(data, 0); b ^= read_u64(data, plast); } else { let plast = data.len() - 4; a ^= read_u32(data, 0) as u64; b ^= read_u32(data, plast) as u64; } } else if !data.is_empty() { if MINOR < 2 { a ^= ((data[0] as u64) << 56) | ((data[data.len() >> 1] as u64) << 32) | data[data.len() - 1] as u64; } else { a ^= ((data[0] as u64) << 56) | data[data.len() - 1] as u64; b ^= data[data.len() >> 1] as u64; } } } else if (MINOR == 0 && data.len() <= 56) || (MINOR > 0 && data.len() <= 64) { // len is 17..=64 return rapidhash_core_17_64::(seed, secrets, data); } else { return rapidhash_core_cold::(seed, secrets, data); } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, data.len() as u64, secrets) } else { a ^ b } } #[inline] // intentionally not always const fn rapidhash_core_17_64(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; let slice = data; seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ seed); if slice.len() > 48 { let index: usize = if MINOR < 2 { 0 } else { 1 }; seed = rapid_mix::(read_u64(slice, 32) ^ secrets[index], read_u64(slice, 40) ^ seed); } } a ^= read_u64(data, data.len() - 16); b ^= read_u64(data, data.len() - 8); a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, data.len() as u64, secrets) } else { a ^ b } } /// The long path, intentionally kept cold because at this length of data the function call is /// minor, but the complexity of this function — if it were inlined — could prevent x.hash() from /// being inlined which would have a much higher penalty and prevent other optimisations. #[cold] const fn rapidhash_core_cold(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; let mut slice = data; // most CPUs appear to benefit from this unrolled loop let mut see1 = seed; let mut see2 = seed; let mut see3 = seed; let mut see4 = seed; let mut see5 = seed; let mut see6 = seed; if !COMPACT { while slice.len() >= 224 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); seed = rapid_mix::(read_u64(slice, 112) ^ secrets[0], read_u64(slice, 120) ^ seed); see1 = rapid_mix::(read_u64(slice, 128) ^ secrets[1], read_u64(slice, 136) ^ see1); see2 = rapid_mix::(read_u64(slice, 144) ^ secrets[2], read_u64(slice, 152) ^ see2); see3 = rapid_mix::(read_u64(slice, 160) ^ secrets[3], read_u64(slice, 168) ^ see3); see4 = rapid_mix::(read_u64(slice, 176) ^ secrets[4], read_u64(slice, 184) ^ see4); see5 = rapid_mix::(read_u64(slice, 192) ^ secrets[5], read_u64(slice, 200) ^ see5); see6 = rapid_mix::(read_u64(slice, 208) ^ secrets[6], read_u64(slice, 216) ^ see6); let (_, split) = slice.split_at(224); slice = split; } if slice.len() >= 112 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } if slice.len() >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; if slice.len() >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } } else { while slice.len() >= 112 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } while slice.len() >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } } see3 ^= see4; see5 ^= see6; seed ^= see1; see3 ^= see2; seed ^= see5; seed ^= see3; if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); } } a ^= read_u64(data, data.len() - 16); b ^= read_u64(data, data.len() - 8); a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, data.len() as u64, secrets) } else { a ^ b } } #[inline(always)] pub(super) const fn rapidhash_finish(a: u64, b: u64, len: u64, secrets: &[u64; 7]) -> u64 { rapid_mix::(a ^ 0xaaaaaaaaaaaaaaaa ^ len, b ^ secrets[1]) } rapidhash-4.4.1/src/v2/rapid_file.rs000064400000000000000000000242051046102023000153620ustar 00000000000000use std::fs::File; use std::io::{BufReader, Read}; use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32, read_u64}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets, rapidhash_finish}; /// Rapidhash V2.2 a file, matching the C++ implementation. /// /// See [rapidhash_v2_file_inline] to compute the hash value using V2.0 or V2.2. /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. #[inline] #[deprecated(note = "Rapidhash V2 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v2_2_file(data: &mut File) -> std::io::Result { #[allow(deprecated)] rapidhash_v2_file_inline::<2, false>(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash V2.2 a file, matching the C++ implementation, with a custom seed. /// /// See [rapidhash_v2_file_inline] to compute the hash value using V2.0 or V2.2. /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. #[inline] #[deprecated(note = "Rapidhash V2 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v2_2_file_seeded(data: &mut File, secrets: &RapidSecrets) -> std::io::Result { #[allow(deprecated)] rapidhash_v2_file_inline::<2, false>(data, secrets) } /// Rapidhash V2 a file, matching the C++ implementation. (2.0, 2.1, and 2.2 supported) /// /// This method will check the metadata for a file length, and then stream the file with a /// [BufReader] to compute the hash. This avoids loading the entire file into memory. /// /// We could easily add more ways to read other streams that can be converted to a [BufReader], /// but the length must be known at the start of the stream due to how rapidhash is seeded using /// the data length. Raise a [GitHub](https://github.com/hoxxep/rapidhash) issue if you have a /// use case to support other stream types. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for inputs where the length is known at compile time. /// /// `MINOR` is the minor version of the rapidhash algorithm: /// - 0: v2.0 /// - 1: v2.1 /// - 2: v2.2 #[inline(always)] #[deprecated(note = "Rapidhash V2 is not a streaming algorithm. We recommend using V3 instead.")] pub fn rapidhash_v2_file_inline(data: &mut File, secrets: &RapidSecrets) -> std::io::Result { let len = data.metadata()?.len(); let mut reader = BufReader::new(data); let hash = rapidhash_file_core::(secrets.seed, &secrets.secrets, len as usize, &mut reader)?; Ok(hash) } #[inline(always)] fn rapidhash_file_core(mut seed: u64, secrets: &[u64; 7], len: usize, iter: &mut BufReader<&mut File>) -> std::io::Result { if MINOR > 2 { panic!("rapidhash_file_core does not support minor version {}. Supported versions are 0, 1, and 2.", MINOR); } let mut a = 0; let mut b = 0; seed ^= len as u64; if len <= 16 { let mut buf = [0u8; 16]; iter.read_exact(&mut buf[0..len])?; let data = &buf[..len]; if data.len() >= 4 { if data.len() >= 8 { let plast = data.len() - 8; a = read_u64(data, 0); b = read_u64(data, plast); } else { let plast = data.len() - 4; a = read_u32(data, 0) as u64; b = read_u32(data, plast) as u64; } } else if !data.is_empty() { if MINOR < 2 { a = ((data[0] as u64) << 56) | ((data[data.len() >> 1] as u64) << 32) | data[data.len() - 1] as u64; } else { a = ((data[0] as u64) << 56) | data[data.len() - 1] as u64; b = data[data.len() >> 1] as u64; } } } else if (MINOR >= 1 && len > 64) || (MINOR == 0 && len > 56) { let mut remaining = len; let mut buf = [0u8; 448]; // slice is a view on the buffer that we use for reading into, and reading from, depending // on the stage of the loop. let mut slice = &mut buf[..224]; // because we're using a buffered reader, it might be worth unrolling this loop further let mut see1 = seed; let mut see2 = seed; let mut see3 = seed; let mut see4 = seed; let mut see5 = seed; let mut see6 = seed; while remaining >= 224 { // read into and process using the first half of the buffer iter.read_exact(slice)?; seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); seed = rapid_mix::(read_u64(slice, 112) ^ secrets[0], read_u64(slice, 120) ^ seed); see1 = rapid_mix::(read_u64(slice, 128) ^ secrets[1], read_u64(slice, 136) ^ see1); see2 = rapid_mix::(read_u64(slice, 144) ^ secrets[2], read_u64(slice, 152) ^ see2); see3 = rapid_mix::(read_u64(slice, 160) ^ secrets[3], read_u64(slice, 168) ^ see3); see4 = rapid_mix::(read_u64(slice, 176) ^ secrets[4], read_u64(slice, 184) ^ see4); see5 = rapid_mix::(read_u64(slice, 192) ^ secrets[5], read_u64(slice, 200) ^ see5); see6 = rapid_mix::(read_u64(slice, 208) ^ secrets[6], read_u64(slice, 216) ^ see6); remaining -= 224; } // remaining might be up to 224 bytes, so we read into the second half of the buffer, // which allows us to negative index safely in the final a and b xor using `end`. slice = &mut buf[224..224 + remaining]; iter.read_exact(slice)?; let end = 224 + remaining; if slice.len() >= 112 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); slice = &mut slice[112..remaining]; remaining -= 112; } if remaining >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); slice = &mut slice[48..remaining]; remaining -= 48; if remaining >= 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); slice = &mut slice[48..remaining]; remaining -= 48; } } see3 ^= see4; see5 ^= see6; seed ^= see1; see3 ^= see2; seed ^= see5; seed ^= see3; if remaining > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); if remaining > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); } } a ^= read_u64(&buf, end - 16); b ^= read_u64(&buf, end - 8); } else { let data = &mut [0u8; 64]; iter.read_exact(&mut data[0..len])?; let slice = &data[..len]; seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ seed); if slice.len() > 48 { let index: usize = if MINOR < 2 { 0 } else { 1 }; seed = rapid_mix::(read_u64(slice, 32) ^ secrets[index], read_u64(slice, 40) ^ seed); } } a = read_u64(slice, slice.len() - 16); b = read_u64(slice, slice.len() - 8); } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); let hash = rapidhash_finish::(a, b, len as u64, secrets); Ok(hash) } #[cfg(test)] mod tests { #![allow(deprecated)] use std::io::{Seek, SeekFrom, Write}; use crate::util::macros::compare_rapidhash_file; use crate::v2::rapidhash_v2_inline; use super::*; compare_rapidhash_file!(compare_rapidhash_v2_0_file, rapidhash_v2_inline::<0, true, false, false>, rapidhash_v2_file_inline::<0, false>); compare_rapidhash_file!(compare_rapidhash_v2_1_file, rapidhash_v2_inline::<1, true, false, false>, rapidhash_v2_file_inline::<1, false>); compare_rapidhash_file!(compare_rapidhash_v2_2_file, rapidhash_v2_inline::<2, true, false, false>, rapidhash_v2_file_inline::<2, false>); } rapidhash-4.4.1/src/v2/seed.rs000064400000000000000000000110161046102023000142000ustar 00000000000000//! Reliable seeding and secrets generation for the hash functions. use crate::util::mix::rapid_mix; /// The default seed used in the C++ implementation. pub(crate) const DEFAULT_SEED: u64 = 0; /// Used only for generating random secrets. const DEFAULT_SECRETS: [u64; 7] = [ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47, 0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x90ed1765281c388c, ]; /// The default rapidhash secrets used in the C++ implementation. /// /// We recommend generating your own secrets using the [`crate::v3::RapidSecrets::seed`] method to avoid /// trivial collision attacks if you need minimal HashDoS protection. pub const DEFAULT_RAPID_SECRETS: RapidSecrets = RapidSecrets::seed_cpp(DEFAULT_SEED); /// Hold the seed and secrets to be used by rapidhash. /// /// RapidSecrets premix the seed and generate a set of other secrets based on the seed that are all /// used in the hashing process. There are some quality checks on the random values to ensure a /// reasonable distribution of entropy in the generated secrets. /// /// Constructing this struct is fairly cheap, but unnecessary in the critical path. We therefore /// recommend instantiating it once and re-using the same instance for any persistent hashing. The /// `seed` method is marked `const` to also do so at compile time. /// /// # Minimal HashDoS Protection /// We recommend changing the default seed and secrets must be changed to avoid trivial collision /// attacks. For persistent hashing, you can hard code your own randomized seed at compile time. /// /// ```rust /// use rapidhash::v2::RapidSecrets; /// const DEFAULT_SECRETS: RapidSecrets = RapidSecrets::seed(0x123456); // <-- change this value! /// /// /// Export your chosen rapidhash version and secrets for use throughout your project. /// pub fn rapidhash(data: &[u8]) -> u64 { /// rapidhash::v2::rapidhash_v2_2_seeded(data, &DEFAULT_SECRETS) /// } /// ``` /// /// TODO: serde or serialization support. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct RapidSecrets { /// The core rapidhash seed. pub seed: u64, /// The secrets, effectively other seeds used in the hashing process. pub secrets: [u64; 7], } impl RapidSecrets { /// Generate secrets from a given randomized seed. /// /// Note the chosen seed will be pre-mixed to further randomized it, and the secrets will be /// computed based on the seed. /// /// If compatibility with the C++ implementation is required, use the `seed_cpp` method instead. #[inline] pub const fn seed(seed: u64) -> Self { let seed = premix_seed(seed, 0); let mut secrets = [0; 7]; secrets[0] = premix_seed(seed, 0); secrets[1] = premix_seed(secrets[0], 1); secrets[2] = premix_seed(secrets[1], 2); secrets[3] = premix_seed(secrets[2], 3); secrets[4] = premix_seed(secrets[3], 4); secrets[5] = premix_seed(secrets[4], 5); secrets[6] = premix_seed(secrets[5], 6); Self { seed, secrets } } /// Creates a new `RapidSecrets` instance with a different seed and the same secrets. /// /// This is useful for in-memory hashing, so we can quickly use a different seed for other /// HashMaps. #[inline(always)] pub const fn reseed(&self) -> Self { Self { seed: premix_seed(self.seed, 6), secrets: self.secrets, } } /// Creates a new `RapidSecrets` instance using a seed and secrets that are compatible with the /// C++ implementation. /// /// Note that these **use the default secrets** and therefore are liable to some trivial /// collision attacks, as randomising both the seed and secrets is necessary to provide minimal /// HashDoS resistance. #[inline(always)] pub const fn seed_cpp(seed: u64) -> Self { Self { seed: rapidhash_seed(seed), secrets: DEFAULT_SECRETS, } } } const fn rapidhash_seed(seed: u64) -> u64 { seed ^ rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[1]) } #[inline] const fn premix_seed(mut seed: u64, i: usize) -> u64 { seed ^= rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[i]); // ensure the seeds are of reasonable non-zero quality const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; if (seed & HI) == 0 { seed |= 1u64 << 63; } if (seed & MI) == 0 { seed |= 1u64 << 31; } if (seed & LO) == 0 { seed |= 1u64; } seed } rapidhash-4.4.1/src/v3/mod.rs000064400000000000000000000061311046102023000140420ustar 00000000000000//! Portable hashing: rapidhash V3 algorithm. mod rapid_const; #[cfg(any(feature = "std", docsrs))] mod rapid_file; mod seed; mod rapid_stream_hasher; #[doc(inline)] pub use rapid_const::*; #[doc(inline)] #[cfg(any(feature = "std", docsrs))] pub use rapid_file::*; #[doc(inline)] pub use rapid_stream_hasher::*; #[doc(inline)] pub use seed::*; #[cfg(test)] mod tests { extern crate std; use rand::Rng; use crate::util::macros::{compare_to_c, flip_bit_trial}; use super::*; flip_bit_trial!(flip_bit_trial_v3, rapidhash_v3_inline::); flip_bit_trial!(flip_bit_trial_v3_micro, rapidhash_v3_micro_inline::); flip_bit_trial!(flip_bit_trial_v3_nano, rapidhash_v3_nano_inline::); compare_to_c!(compare_to_c_v3, rapidhash_v3_inline::, rapidhash_v3_inline::, rapidhashcc_v3); compare_to_c!(compare_to_c_v3_micro, rapidhash_v3_micro_inline::, rapidhash_v3_micro_inline::, rapidhashcc_v3_micro); compare_to_c!(compare_to_c_v3_nano, rapidhash_v3_nano_inline::, rapidhash_v3_nano_inline::, rapidhashcc_v3_nano); /// Compare the main rapidhash version matches micro (80 btyes) and nano (48 bytes) up to /// the expected length. #[test] fn compare_micro_nano_v3() { // test zero-length input let hash_v3 = rapidhash_v3_inline::(&[], &DEFAULT_RAPID_SECRETS); let hash_micro = rapidhash_v3_micro_inline::(&[], &DEFAULT_RAPID_SECRETS); let hash_nano = rapidhash_v3_nano_inline::(&[], &DEFAULT_RAPID_SECRETS); assert_eq!(hash_v3, hash_micro, "Mismatch with micro on zero length input"); assert_eq!(hash_v3, hash_nano, "Mismatch with nano on zero length input"); for len in 0..=82 { let mut data = std::vec![0; len]; rand::rng().fill(&mut data[..]); for byte in 0..len { for bit in 0..8 { let mut data = data.clone(); data[byte] ^= 1 << bit; let hash_v3 = rapidhash_v3_inline::(&data, &DEFAULT_RAPID_SECRETS); let hash_micro = rapidhash_v3_micro_inline::(&data, &DEFAULT_RAPID_SECRETS); let hash_nano = rapidhash_v3_nano_inline::(&data, &DEFAULT_RAPID_SECRETS); if len <= 80 { assert_eq!(hash_v3, hash_micro, "Mismatch with mico on input {} byte {} bit {}", len, byte, bit); } else { assert_ne!(hash_v3, hash_micro, "Micro should mismatch on input {} byte {} bit {}", len, byte, bit); } if len <= 48 { assert_eq!(hash_v3, hash_nano, "Mismatch with nano on input {} byte {} bit {}", len, byte, bit); } else { assert_ne!(hash_v3, hash_nano, "Nano should mismatch on input {} byte {} bit {}", len, byte, bit); } } } } } } rapidhash-4.4.1/src/v3/rapid_const.rs000064400000000000000000000403441046102023000155740ustar 00000000000000use crate::util::hints::{assume, likely, unlikely}; use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32, read_u64}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets}; /// Rapidhash V3 a single byte stream, matching the C++ implementation, with the default seed. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_v3_inline] instead. #[inline] pub const fn rapidhash_v3(data: &[u8]) -> u64 { rapidhash_v3_inline::(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash V3 a single byte stream, matching the C++ implementation, with a custom seed. /// /// Fixed length inputs will greatly benefit from inlining with [rapidhash_v3_inline] instead. #[inline] pub const fn rapidhash_v3_seeded(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_v3_inline::(data, secrets) } /// Rapidhash V3 a single byte stream, matching the C++ implementation. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// Can provide large performance uplifts for fixed-length inputs at compile time. /// /// Compile time arguments: /// - `AVALANCHE`: Perform an extra mix step to avalanche the bits for higher hash quality. Enabled /// by default to match the C++ implementation. /// - `COMPACT`: Generates fewer instructions at compile time with less manual loop unrolling, but /// may be slower on some platforms. Disabled by default. /// - `PROTECTED`: Slightly stronger hash quality and DoS resistance by performing two extra XOR /// instructions on every mix step. Disabled by default. #[inline(always)] pub const fn rapidhash_v3_inline(data: &[u8], secrets: &RapidSecrets) -> u64 { rapidhash_core::(secrets.seed, &secrets.secrets, data) } /// Rapidhash V3 Micro, a very compact version of the rapidhash algorithm. /// /// WARNING: This produces a different output from `rapidhash_v3`. /// /// Designed for HPC and server applications, where cache misses make a noticeable performance /// detriment. Compiles it to ~140 instructions without stack usage, both on x86-64 and aarch64. /// Faster for sizes up to 512 bytes, just 15%-20% slower for inputs above 1kb. /// /// Compile time arguments: /// - `AVALANCHE`: Perform an extra mix step to avalanche the bits for higher hash quality. Enabled /// by default to match the C++ implementation. /// - `PROTECTED`: Slightly stronger hash quality and DoS resistance by performing two extra XOR /// instructions on every mix step. Disabled by default. #[inline(always)] pub const fn rapidhash_v3_micro_inline(data: &[u8], seed: &RapidSecrets) -> u64 { rapidhash_micro_core::(seed.seed, &seed.secrets, data) } /// Rapidhash V3 Nano, a very compact version of the rapidhash algorithm. /// /// WARNING: This produces a different output from `rapidhash_v3`. /// /// Designed for Mobile and embedded applications, where keeping a small code size is a top priority. /// This should compile it to less than 100 instructions with minimal stack usage, both on x86-64 /// and aarch64. The fastest for sizes up to 48 bytes, but may be considerably slower for larger /// inputs. /// /// Compile time arguments: /// - `AVALANCHE`: Perform an extra mix step to avalanche the bits for higher hash quality. Enabled /// by default to match the C++ implementation. /// - `PROTECTED`: Slightly stronger hash quality and DoS resistance by performing two extra XOR /// instructions on every mix step. Disabled by default. #[inline(always)] pub const fn rapidhash_v3_nano_inline(data: &[u8], seed: &RapidSecrets) -> u64 { rapidhash_nano_core::(seed.seed, &seed.secrets, data) } #[inline(always)] pub(super) const fn rapidhash_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a; let mut b; let remainder; if likely(data.len() <= 16) { a = 0; b = 0; if data.len() >= 4 { seed ^= data.len() as u64; if data.len() >= 8 { let plast = data.len() - 8; a ^= read_u64(data, 0); b ^= read_u64(data, plast); } else { let plast = data.len() - 4; a ^= read_u32(data, 0) as u64; b ^= read_u32(data, plast) as u64; } } else if !data.is_empty() { a ^= ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b ^= data[data.len() >> 1] as u64; } remainder = data.len() as u64; } else { // SAFETY: we have just verified that data.len() > 16 unsafe { return rapidhash_core_cold::(seed, secrets, data); } } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, remainder, secrets) } else { a ^ b } } // This is sadly a fat function with a lot of calling overhead because it clobbers registers. // Great for reaching max performance on 1kB+ inputs, but not great for 25 byte // inputs... We therefore mark this as #[inline] to let the compiler decide whether to inline it or // not, if it knows the input size. If the input size is known to be <112, there's a lot to gain // through inlining and optimising away the 7 data-independent execution paths. The RapidHasher // deviates from the V3 implementation here because of this! #[inline] const unsafe fn rapidhash_core_cold(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { // SAFETY: we promise to never call this with <=16 length data to omit some bounds checks. // This is really intended for codegen-units >1 and/or no LTO. assume(data.len() > 16); let mut a = 0; let mut b = 0; let mut slice = data; if unlikely(slice.len() > 112) { // most CPUs appear to benefit from this unrolled loop let mut see1 = seed; let mut see2 = seed; let mut see3 = seed; let mut see4 = seed; let mut see5 = seed; let mut see6 = seed; if !COMPACT { while slice.len() > 224 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); seed = rapid_mix::(read_u64(slice, 112) ^ secrets[0], read_u64(slice, 120) ^ seed); see1 = rapid_mix::(read_u64(slice, 128) ^ secrets[1], read_u64(slice, 136) ^ see1); see2 = rapid_mix::(read_u64(slice, 144) ^ secrets[2], read_u64(slice, 152) ^ see2); see3 = rapid_mix::(read_u64(slice, 160) ^ secrets[3], read_u64(slice, 168) ^ see3); see4 = rapid_mix::(read_u64(slice, 176) ^ secrets[4], read_u64(slice, 184) ^ see4); see5 = rapid_mix::(read_u64(slice, 192) ^ secrets[5], read_u64(slice, 200) ^ see5); see6 = rapid_mix::(read_u64(slice, 208) ^ secrets[6], read_u64(slice, 216) ^ see6); let (_, split) = slice.split_at(224); slice = split; } if slice.len() > 112 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } } else { while slice.len() > 112 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); see5 = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ see5); see6 = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ see6); let (_, split) = slice.split_at(112); slice = split; } } seed ^= see1; see2 ^= see3; see4 ^= see5; seed ^= see6; see2 ^= see4; seed ^= see2; } if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); if slice.len() > 48 { seed = rapid_mix::(read_u64(slice, 32) ^ secrets[1], read_u64(slice, 40) ^ seed); if slice.len() > 64 { seed = rapid_mix::(read_u64(slice, 48) ^ secrets[1], read_u64(slice, 56) ^ seed); if slice.len() > 80 { seed = rapid_mix::(read_u64(slice, 64) ^ secrets[2], read_u64(slice, 72) ^ seed); if slice.len() > 96 { seed = rapid_mix::(read_u64(slice, 80) ^ secrets[1], read_u64(slice, 88) ^ seed); } } } } } } a ^= read_u64(data, data.len() - 16) ^ slice.len() as u64; b ^= read_u64(data, data.len() - 8); a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, slice.len() as u64, secrets) } else { a ^ b } } const fn rapidhash_micro_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; let remainder; if likely(data.len() <= 16) { if data.len() >= 4 { seed ^= data.len() as u64; if data.len() >= 8 { let plast = data.len() - 8; a ^= read_u64(data, 0); b ^= read_u64(data, plast); } else { let plast = data.len() - 4; a ^= read_u32(data, 0) as u64; b ^= read_u32(data, plast) as u64; } } else if !data.is_empty() { a ^= ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b ^= data[data.len() >> 1] as u64; } remainder = data.len() as u64; } else { let mut slice = data; if unlikely(slice.len() > 80) { let mut see1 = seed; let mut see2 = seed; let mut see3 = seed; let mut see4 = seed; while slice.len() > 80 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); see3 = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ see3); see4 = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ see4); let (_, split) = slice.split_at(80); slice = split; } seed ^= see1; see2 ^= see3; seed ^= see4; seed ^= see2; } if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); if slice.len() > 48 { seed = rapid_mix::(read_u64(slice, 32) ^ secrets[1], read_u64(slice, 40) ^ seed); if slice.len() > 64 { seed = rapid_mix::(read_u64(slice, 48) ^ secrets[1], read_u64(slice, 56) ^ seed); } } } } remainder = slice.len() as u64; a ^= read_u64(data, data.len() - 16) ^ remainder; b ^= read_u64(data, data.len() - 8); } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, remainder, secrets) } else { a ^ b } } const fn rapidhash_nano_core(mut seed: u64, secrets: &[u64; 7], data: &[u8]) -> u64 { let mut a = 0; let mut b = 0; let remainder; if likely(data.len() <= 16) { if data.len() >= 4 { seed ^= data.len() as u64; if data.len() >= 8 { let plast = data.len() - 8; a ^= read_u64(data, 0); b ^= read_u64(data, plast); } else { let plast = data.len() - 4; a ^= read_u32(data, 0) as u64; b ^= read_u32(data, plast) as u64; } } else if !data.is_empty() { a ^= ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b ^= data[data.len() >> 1] as u64; } remainder = data.len() as u64; } else { let mut slice = data; if unlikely(slice.len() > 48) { let mut see1 = seed; let mut see2 = seed; while slice.len() > 48 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ seed); see1 = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ see1); see2 = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ see2); let (_, split) = slice.split_at(48); slice = split; } seed ^= see1; seed ^= see2; } if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ secrets[2], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ secrets[2], read_u64(slice, 24) ^ seed); } } remainder = slice.len() as u64; a ^= read_u64(data, data.len() - 16) ^ remainder; b ^= read_u64(data, data.len() - 8); } a ^= secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, remainder, secrets) } else { a ^ b } } #[inline(always)] pub(super) const fn rapidhash_finish(a: u64, b: u64, remainder: u64, secrets: &[u64; 7]) -> u64 { rapid_mix::(a ^ 0xaaaaaaaaaaaaaaaa, b ^ secrets[1] ^ remainder) } rapidhash-4.4.1/src/v3/rapid_file.rs000064400000000000000000000061461046102023000153670ustar 00000000000000use std::io::Read; use crate::util::hints::{likely, unlikely}; use super::{DEFAULT_RAPID_SECRETS, RapidSecrets, RapidStreamHasherInlineV3}; /// Rapidhash a stream or file, matching the C++ implementation. /// /// This is a streaming implementation of rapidhash v3. It will produce exactly the same output as /// [`crate::v3::rapidhash_v3`], but accepts a streaming `Read` interface. /// /// This implementation makes use of the incremental [`RapidStreamHasherInlineV3`] interface, which /// may be preferred over a `Read` interface for some streaming use cases. #[inline] pub fn rapidhash_v3_file(data: R) -> std::io::Result { rapidhash_v3_file_inline::(data, &DEFAULT_RAPID_SECRETS) } /// Rapidhash a stream or file, matching the C++ implementation, with a custom seed. /// /// This is a streaming implementation of rapidhash v3. It will produce exactly the same output as /// [`crate::v3::rapidhash_v3_seeded`], but accepts a streaming `Read` interface. /// /// This implementation makes use of the incremental [`RapidStreamHasherInlineV3`] interface, which /// may be preferred over a `Read` interface for some streaming use cases. #[inline] pub fn rapidhash_v3_file_seeded(data: R, secrets: &RapidSecrets) -> std::io::Result { rapidhash_v3_file_inline::(data, secrets) } /// Rapidhash a stream or file, matching the C++ implementation. /// /// This is a streaming implementation of rapidhash v3. It will produce exactly the same output as /// [`crate::v3::rapidhash_v3_inline`], but accepts a streaming `Read` interface. /// /// Is marked with `#[inline(always)]` to force the compiler to inline and optimize the method. /// /// This implementation makes use of the incremental [`RapidStreamHasherInlineV3`] interface, which /// may be preferred over a `Read` interface for some streaming use cases. #[inline(always)] pub fn rapidhash_v3_file_inline(mut data: R, secrets: &RapidSecrets) -> std::io::Result { let mut hasher = RapidStreamHasherInlineV3::::new(secrets); let mut buf = [0u8; 8 * 1024]; // TODO(v5): make the buffer size configurable. let mut pos = 0; loop { let n = data.read(&mut buf[pos..])?; pos += n; // The Read interface _forces_ us to copy into `buf`, but we then want to avoid the // double-copy into the `RapidStreamHasher` buffer too. So if an interface is giving us // lots of small reads, it's better to cache these all in the `buf` so that the // `hasher.write` call will zero-copy most of the buffer in 112 byte chunks. if likely(n > 0 && pos < buf.len()) { continue; } hasher.write(&buf[..pos]); if unlikely(n == 0) { break; } pos = 0; } Ok(hasher.finish()) } #[cfg(test)] mod tests { use std::io::{Seek, SeekFrom, Write}; use crate::util::macros::compare_rapidhash_file; use crate::v3::rapidhash_v3_inline; use super::*; compare_rapidhash_file!(compare_rapidhash_v1_file, rapidhash_v3_inline::, rapidhash_v3_file_inline::<_, false>); } rapidhash-4.4.1/src/v3/rapid_stream_hasher.rs000064400000000000000000000331401046102023000172670ustar 00000000000000use crate::util::hints::{likely, unlikely}; use crate::util::mix::{rapid_mix, rapid_mum}; use crate::util::read::{read_u32, read_u64}; use crate::v3::rapid_const::rapidhash_finish; use crate::v3::RapidSecrets; /// A bytewise-style incremental interface for rapidhash. /// /// This interface guarantees incremental inputs are the same as a bulk hash of the same bytes. /// /// See [`RapidStreamHasherInlineV3`] for more details, or view [`crate::v3::rapidhash_v3_file`] for a /// `Read`-based incremental interface. /// /// This is a type alias for [`RapidStreamHasherInlineV3`] that sets: /// - `AVALANCHE`: `true` /// - `PROTECTED`: `false` pub type RapidStreamHasherV3<'a> = RapidStreamHasherInlineV3<'a, true, false>; /// A bytewise-style incremental interface for rapidhash. /// /// This interface guarantees incremental inputs are the same as a bulk hash of the same bytes. /// /// See [`crate::v3::rapidhash_v3_file`] for an alternative `Read`-based incremental interface. /// /// ## Speed /// /// `RapidStreamHasher` is slower than `rapidhash_v3` due to the extra overhead from the incremental /// interface. Where possible, we recommend using `rapidhash_v3` for bulk hashing. /// /// This will copy bytes, except where written chunks are larger than 112 bytes. Larger chunks /// will perform better than smaller chunks by avoiding copying. /// /// ## Portability /// /// `RapidStreamHasher` does not implement `std::hash::Hasher` and is specially designed to produce /// stable hashes across platforms and compiler versions. Any changes to hash output in /// `RapidStreamHasher` will result in a major crate bump. /// /// We're aiming to support the [portable-hash crate](https://github.com/hoxxep/portable-hash) in /// the future to enable `derive(PortableHash)` on user-defined types. Please leave a comment or /// upvote if this would be useful to you on a large project. /// /// ## Example /// /// ```rust /// use rapidhash::v3::{rapidhash_v3_seeded, RapidSecrets, RapidStreamHasherV3}; /// /// let secrets = RapidSecrets::seed(0); /// let data: &[u8] = [0, 1, 2, 3, 4, 5, 6, 7].as_slice(); /// /// // classic rapidhash v3 /// let expected_hash = rapidhash_v3_seeded(data, &secrets); /// /// // incremental rapidhash v3 /// let mut hasher = RapidStreamHasherV3::new(&secrets); /// hasher.write(&data[0..3]); /// hasher.write(&data[3..6]); /// hasher.write(&data[6..]); /// let actual_hash = hasher.finish(); /// /// // equal hashes! /// assert_eq!(expected_hash, actual_hash); /// ``` pub struct RapidStreamHasherInlineV3<'a, const AVALANCHE: bool, const PROTECTED: bool> { seed: u64, secrets: &'a [u64; 7], state: RapidStreamChunkState, /// We treat this as an array with two parts, `[CHUNK_PREV] + [CHUNK]` where /// the `CHUNK_PREV` is the final 16 bytes of the preceding chunk, and /// the `CHUNK` is the latest 112 byte block that we're appending `data` to /// before processing once the block has been filled (or `finish()` is /// called). Rapidhash in its longest form processes 112 byte blocks. buffer: [u8; CHUNK_PREV + CHUNK_SIZE], } /// The size of a single rapidhash processing chunk. const CHUNK_SIZE: usize = 112; /// The minimum tail we must keep in the buffer for processing. const CHUNK_PREV: usize = 16; /// The intermediate hasher state for any full 112-byte chunks that have been written. /// /// This is separated to allow mutably borrowing the state and buffer at the same time. struct RapidStreamChunkState { seeds: [u64; 7], /// `buffer_len` **excludes** the `CHUNK_PREV` bytes buffer_len: usize, /// Have we processed a full 112-byte chunk? processed: bool, } impl<'a, const AVALANCHE: bool, const PROTECTED: bool> RapidStreamHasherInlineV3<'a, AVALANCHE, PROTECTED> { /// Create a new `RapidStreamHasher` with seed and secrets. #[inline(always)] pub fn new(secrets: &'a RapidSecrets) -> Self { Self { seed: secrets.seed, secrets: &secrets.secrets, state: RapidStreamChunkState::new(secrets.seed), buffer: [0; CHUNK_PREV + CHUNK_SIZE], } } /// Write data to the stream hasher. #[inline(always)] pub fn write(&mut self, data: &[u8]) { // if this data doesn't fit in the remaining buffer, slow-path to write the buffer chunk and // any full chunks we can process from `data`. if unlikely(CHUNK_SIZE < self.state.buffer_len + data.len()) { self.write_inner(data); return; } // fast inlined path for copying into the buffer let start = CHUNK_PREV + self.state.buffer_len; let end = start + data.len(); self.buffer[start..end].copy_from_slice(data); self.state.buffer_len += data.len(); } /// Write cold path that we keep separate so the copy logic is fast. #[inline] fn write_inner(&mut self, data: &[u8]) { // set up arrays: chunk_prev as buffer[..16] and chunk_buffer as buffer[16..] let (chunk_prev, chunk_curr) = self.buffer.split_at_mut(CHUNK_PREV); let chunk_prev: &mut [u8; CHUNK_PREV] = chunk_prev.try_into().unwrap(); let chunk_buffer: &mut [u8; CHUNK_SIZE] = chunk_curr.try_into().unwrap(); // write buffer up to 112 bytes let copy_bytes = CHUNK_SIZE - self.state.buffer_len; let start = self.state.buffer_len; chunk_buffer[start..].copy_from_slice(&data[..copy_bytes]); debug_assert_eq!(CHUNK_SIZE, self.state.buffer_len + copy_bytes); // write buffer chunk self.state.chunk_write(self.secrets, chunk_buffer); // write large data chunks without copying // Keep back the last chunk when chunk-aligned: rapidhash v3 uses `pos + 112 < len` // (not <=), so the final 112 bytes must go through the tail path in finish(). let remaining_data = &data[copy_bytes..]; let stop = (remaining_data.len().saturating_sub(1) / CHUNK_SIZE) * CHUNK_SIZE; let mut chunk_last = None; let mut pos = 0; while pos < stop { let chunk = remaining_data[pos..pos + CHUNK_SIZE].try_into().unwrap(); chunk_last = Some(chunk); self.state.chunk_write(self.secrets, chunk); pos += CHUNK_SIZE; } let unprocessed_data = &remaining_data[pos..]; // copy the final 16 data bytes from the previous chunk if let Some(chunk) = chunk_last { // if the last full chunk was from `data` chunk_prev.copy_from_slice(&chunk[CHUNK_SIZE - CHUNK_PREV..]); } else { // otherwise the last chunk was from the buffer let trailing_end = chunk_buffer.len() - CHUNK_PREV; chunk_prev.copy_from_slice(&chunk_buffer[trailing_end..]); } // write remainder into the buffer chunk_buffer[..unprocessed_data.len()].copy_from_slice(unprocessed_data); self.state.buffer_len = unprocessed_data.len(); } /// Finalize a hash from the hasher state. #[inline(always)] #[must_use] pub fn finish(&self) -> u64 { let mut seed = self.seed; let mut a; let mut b; let remainder; if likely(!self.state.processed && self.state.buffer_len <= 16) { // short <= 16 pass only if we haven't processed a full chunk yet let data = &self.buffer[CHUNK_PREV..CHUNK_PREV + self.state.buffer_len]; if data.len() >= 4 { seed ^= data.len() as u64; if data.len() >= 8 { let plast = data.len() - 8; a = read_u64(data, 0); b = read_u64(data, plast); } else { let plast = data.len() - 4; a = read_u32(data, 0) as u64; b = read_u32(data, plast) as u64; } } else if !data.is_empty() { a = ((data[0] as u64) << 45) | data[data.len() - 1] as u64; b = data[data.len() >> 1] as u64; } else { a = 0; b = 0; } remainder = data.len() as u64; } else { if self.state.processed { // merge independent lanes if we'd previously processed a full 112 byte chunk seed = self.state.seeds[0] ^ self.state.seeds[1] ^ self.state.seeds[2] ^ self.state.seeds[3] ^ self.state.seeds[4] ^ self.state.seeds[5] ^ self.state.seeds[6]; } // the >16 tail is the same whether we've processed a full chunk or not let slice = &self.buffer[CHUNK_PREV..CHUNK_PREV + self.state.buffer_len]; if slice.len() > 16 { seed = rapid_mix::(read_u64(slice, 0) ^ self.secrets[2], read_u64(slice, 8) ^ seed); if slice.len() > 32 { seed = rapid_mix::(read_u64(slice, 16) ^ self.secrets[2], read_u64(slice, 24) ^ seed); if slice.len() > 48 { seed = rapid_mix::(read_u64(slice, 32) ^ self.secrets[1], read_u64(slice, 40) ^ seed); if slice.len() > 64 { seed = rapid_mix::(read_u64(slice, 48) ^ self.secrets[1], read_u64(slice, 56) ^ seed); if slice.len() > 80 { seed = rapid_mix::(read_u64(slice, 64) ^ self.secrets[2], read_u64(slice, 72) ^ seed); if slice.len() > 96 { seed = rapid_mix::(read_u64(slice, 80) ^ self.secrets[1], read_u64(slice, 88) ^ seed); } } } } } } // the final 16 bytes may read from the CHUNK_PREV part of the buffer let data = &self.buffer[..CHUNK_PREV + self.state.buffer_len]; a = read_u64(data, data.len() - 16) ^ slice.len() as u64; b = read_u64(data, data.len() - 8); // passed to rapidhash_finish remainder = self.state.buffer_len as u64; } a ^= self.secrets[1]; b ^= seed; (a, b) = rapid_mum::(a, b); if AVALANCHE { rapidhash_finish::(a, b, remainder, self.secrets) } else { a ^ b } } /// Reuse the buffer within this RapidStreamHasher. #[inline(always)] pub fn reset(&mut self) { self.state.reset(self.seed); } } impl RapidStreamChunkState { #[inline(always)] pub fn new(seed: u64) -> Self { Self { seeds: [seed; 7], processed: false, buffer_len: 0, } } /// Write a 112-len chunk to the internal state. #[inline(always)] fn chunk_write(&mut self, secrets: &[u64; 7], chunk: &[u8; 112]) { let slice = chunk.as_slice(); self.seeds[0] = rapid_mix::(read_u64(slice, 0) ^ secrets[0], read_u64(slice, 8) ^ self.seeds[0]); self.seeds[1] = rapid_mix::(read_u64(slice, 16) ^ secrets[1], read_u64(slice, 24) ^ self.seeds[1]); self.seeds[2] = rapid_mix::(read_u64(slice, 32) ^ secrets[2], read_u64(slice, 40) ^ self.seeds[2]); self.seeds[3] = rapid_mix::(read_u64(slice, 48) ^ secrets[3], read_u64(slice, 56) ^ self.seeds[3]); self.seeds[4] = rapid_mix::(read_u64(slice, 64) ^ secrets[4], read_u64(slice, 72) ^ self.seeds[4]); self.seeds[5] = rapid_mix::(read_u64(slice, 80) ^ secrets[5], read_u64(slice, 88) ^ self.seeds[5]); self.seeds[6] = rapid_mix::(read_u64(slice, 96) ^ secrets[6], read_u64(slice, 104) ^ self.seeds[6]); self.processed = true; } /// Reuse the buffer within this RapidStreamHasher. #[inline(always)] pub fn reset(&mut self, seed: u64) { self.seeds = [seed; 7]; self.processed = false; self.buffer_len = 0; } } #[cfg(test)] mod tests { use crate::util::macros::compare_rapid_stream_hasher; use crate::v3::{rapidhash_v3_inline, DEFAULT_RAPID_SECRETS}; use super::*; compare_rapid_stream_hasher!(compare_stream_hasher_v3, rapidhash_v3_inline::, RapidStreamHasherV3<'a>); compare_rapid_stream_hasher!(compare_stream_hasher_v3_protected, rapidhash_v3_inline::, RapidStreamHasherInlineV3::<'a, true, true>); compare_rapid_stream_hasher!(compare_stream_hasher_v3_no_avalanche, rapidhash_v3_inline::, RapidStreamHasherInlineV3::<'a, false, false>); #[test] fn test_rapid_stream_hasher() { let secrets = DEFAULT_RAPID_SECRETS; let data: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; let expected_hash = rapidhash_v3_inline::(data, &secrets); let mut hasher = RapidStreamHasherV3::new(&secrets); hasher.write(data); assert_eq!(expected_hash, hasher.finish()); hasher.reset(); hasher.write(&data[..1]); hasher.write(&data[1..3]); hasher.write(&data[3..6]); hasher.write(&data[6..]); assert_eq!(expected_hash, hasher.finish()); } #[test] fn test_chunk_writing() { let secrets = DEFAULT_RAPID_SECRETS; let mut hasher = RapidStreamHasherV3::new(&secrets); let mut data = [0; 128]; for i in 0..data.len() { data[i] = i as u8; } hasher.write(&data); assert_eq!(&hasher.buffer[..32], &data[128-32..]); } } rapidhash-4.4.1/src/v3/seed.rs000064400000000000000000000117021046102023000142030ustar 00000000000000//! Reliable seeding and secrets generation for the hash functions. use crate::util::mix::rapid_mix; /// The default seed used in the C++ implementation. pub(crate) const DEFAULT_SEED: u64 = 0; /// Used only for generating random secrets. const DEFAULT_SECRETS: [u64; 7] = [ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47, 0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x90ed1765281c388c, ]; /// The default rapidhash secrets used in the C++ implementation. /// /// We recommend generating your own secrets using the [`RapidSecrets::seed`] method to avoid /// trivial collision attacks if you need minimal HashDoS protection. pub const DEFAULT_RAPID_SECRETS: RapidSecrets = RapidSecrets::seed_cpp(DEFAULT_SEED); /// Hold the seed and secrets to be used by rapidhash. /// /// RapidSecrets premix the seed and generate a set of other secrets based on the seed that are all /// used in the hashing process. There are some quality checks on the random values to ensure a /// reasonable distribution of entropy in the generated secrets. /// /// Constructing this struct is fairly cheap, but unnecessary in the critical path. We therefore /// recommend instantiating it once and re-using the same instance for any persistent hashing. The /// `seed` method is marked `const` to also do so at compile time. /// /// # Minimal HashDoS Protection /// We recommend changing the default seed and secrets must be changed to avoid trivial collision /// attacks. For persistent hashing, you can hard code your own randomized seed at compile time. /// /// ```rust /// use rapidhash::v3::RapidSecrets; /// const DEFAULT_SECRETS: RapidSecrets = RapidSecrets::seed(0x123456); // <-- change this value! /// /// /// Export your chosen rapidhash version and secrets for use throughout your project. /// pub fn rapidhash(data: &[u8]) -> u64 { /// rapidhash::v3::rapidhash_v3_seeded(data, &DEFAULT_SECRETS) /// } /// ``` /// /// TODO: serde or serialization support. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct RapidSecrets { /// The core rapidhash seed. pub seed: u64, /// The secrets, effectively other seeds used in the hashing process. pub secrets: [u64; 7], } impl RapidSecrets { /// Generate secrets from a given randomized seed. /// /// Note the chosen seed will be pre-mixed to further randomized it, and the secrets will be /// computed based on the seed. /// /// If compatibility with the C++ implementation is required, use the `seed_cpp` method instead. #[inline] pub const fn seed(seed: u64) -> Self { let seed = premix_seed(seed, 0); let mut secrets = [0; 7]; secrets[0] = premix_seed(seed, 0); secrets[1] = premix_seed(secrets[0], 1); secrets[2] = premix_seed(secrets[1], 2); secrets[3] = premix_seed(secrets[2], 3); secrets[4] = premix_seed(secrets[3], 4); secrets[5] = premix_seed(secrets[4], 5); secrets[6] = premix_seed(secrets[5], 6); Self { seed, secrets } } /// Creates a new `RapidSecrets` instance with a different seed and the same secrets. /// /// This is useful for in-memory hashing, so we can quickly use a different seed for other /// HashMaps. #[inline(always)] pub const fn reseed(&self) -> Self { Self { seed: premix_seed(self.seed, 6), secrets: self.secrets, } } /// Creates a new `RapidSecrets` instance using a seed and secrets that are compatible with the /// C++ implementation. /// /// Note that these **use the default secrets** and therefore are liable to some trivial /// collision attacks, as randomising both the seed and secrets is necessary to provide minimal /// HashDoS resistance. #[inline(always)] pub const fn seed_cpp(seed: u64) -> Self { Self { seed: rapidhash_seed(seed), secrets: DEFAULT_SECRETS, } } /// Creates a new `RapidSecrets` instance with a randomized seed and secrets. /// /// The quality of the randomness will be better with the `rand` feature enabled. #[inline] pub fn random() -> Self { let seed = crate::inner::seeding::seed::get_seed(); let secrets = crate::inner::seeding::secrets::get_secrets(); Self { seed, secrets: *secrets, } } } #[inline(always)] const fn rapidhash_seed(seed: u64) -> u64 { seed ^ rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[1]) } #[inline] const fn premix_seed(mut seed: u64, i: usize) -> u64 { seed ^= rapid_mix::(seed ^ DEFAULT_SECRETS[2], DEFAULT_SECRETS[i]); // ensure the seeds are of reasonable non-zero quality const HI: u64 = 0xFFFF << 48; const MI: u64 = 0xFFFF << 24; const LO: u64 = 0xFFFF; if (seed & HI) == 0 { seed |= 1u64 << 63; } if (seed & MI) == 0 { seed |= 1u64 << 31; } if (seed & LO) == 0 { seed |= 1u64; } seed } rapidhash-4.4.1/tests/cli.rs000064400000000000000000000043301046102023000140540ustar 00000000000000//! Test the rapidhash CLI tool. //! //! Installation: cargo install rapidhash //! Usage example: rapidhash --v3 [filename] use std::fs::File; use std::io::Write; use assert_cmd::Command; use tempfile::tempdir; use rapidhash::v1::rapidhash_v1; use rapidhash::v2::rapidhash_v2_inline; use rapidhash::v3::rapidhash_v3; /// Test: `echo "test input" | rapidhash --v3` /// /// Note `echo` appends a newline character at the end of the input. #[test] #[cfg(feature = "std")] fn cli_stdin() { let input = "test input\n"; let expected = rapidhash_v3("test input\n".as_bytes()).to_string(); Command::new(assert_cmd::cargo_bin!("rapidhash")) .args(&["--v3"]) .write_stdin(input) .assert() .success() .stdout(format!("{expected}\n")); } /// Test: `rapidhash --v3 file.txt` #[test] #[cfg(feature = "std")] fn cli_file() { let input = "test input\n"; let expected = rapidhash_v3(input.as_bytes()).to_string(); let dir = tempdir().unwrap(); let file_path = dir.path().join("test.txt"); let mut file = File::create_new(file_path.clone()).unwrap(); file.write_all(input.as_bytes()).unwrap(); file.flush().unwrap(); Command::new(assert_cmd::cargo_bin!("rapidhash")) .args(&["--v3", file_path.to_str().unwrap()]) .assert() .success() .stdout(format!("{expected}\n")); } /// Test all rapidhash versions. #[test] #[cfg(feature = "std")] fn cli_versions() { let input = "test input\n".as_bytes(); let versions = [ ("--v1", rapidhash_v1(input).to_string()), ("--v2.0", rapidhash_v2_inline::<0, true, false, false>(input, &rapidhash::v2::DEFAULT_RAPID_SECRETS).to_string()), ("--v2.1", rapidhash_v2_inline::<1, true, false, false>(input, &rapidhash::v2::DEFAULT_RAPID_SECRETS).to_string()), ("--v2.2", rapidhash_v2_inline::<2, true, false, false>(input, &rapidhash::v2::DEFAULT_RAPID_SECRETS).to_string()), ("--v3", rapidhash_v3(input).to_string()), ]; for (flag, expected) in versions { Command::new(assert_cmd::cargo_bin!("rapidhash")) .args(&[flag]) .write_stdin(input) .assert() .success() .stdout(format!("{}\n", expected)); } }