pax_global_header00006660000000000000000000000064151352342210014510gustar00rootroot0000000000000052 comment=5c4fab06f7aae8e084522d9b595bba1d187acf01 xsync-4.4.0/000077500000000000000000000000001513523422100126615ustar00rootroot00000000000000xsync-4.4.0/.github/000077500000000000000000000000001513523422100142215ustar00rootroot00000000000000xsync-4.4.0/.github/workflows/000077500000000000000000000000001513523422100162565ustar00rootroot00000000000000xsync-4.4.0/.github/workflows/build-32-bit.yml000066400000000000000000000006411513523422100210770ustar00rootroot00000000000000name: build-32-bit on: [push] jobs: test: runs-on: ubuntu-latest strategy: matrix: go-version: [1.25.x] name: Build with Go ${{ matrix.go-version }} 32-bit steps: - uses: actions/checkout@v3 - name: Install Go uses: actions/setup-go@v3 with: go-version: ${{ matrix.go-version }} - name: Run tests run: GOARCH=386 go test -v ./... xsync-4.4.0/.github/workflows/build.yml000066400000000000000000000010361513523422100201000ustar00rootroot00000000000000name: build on: [push] jobs: test: runs-on: ubuntu-latest strategy: matrix: go-version: [1.24.x, 1.25.x] name: Build with Go ${{ matrix.go-version }} steps: - uses: actions/checkout@v3 - name: Install Go uses: actions/setup-go@v3 with: go-version: ${{ matrix.go-version }} - name: Run vet run: go vet ./... - name: Run tests run: go test -v ./... - name: Run tests with race detector run: go test -timeout 10m -race -v ./... xsync-4.4.0/.github/workflows/coverage.yml000066400000000000000000000010651513523422100205760ustar00rootroot00000000000000name: report-coverage on: [push] jobs: test: runs-on: ubuntu-latest strategy: matrix: go-version: [1.25.x] name: Build with Go ${{ matrix.go-version }} steps: - uses: actions/checkout@v3 - name: Install Go uses: actions/setup-go@v3 with: go-version: ${{ matrix.go-version }} - name: Run coverage run: go test -race -coverprofile=coverage.out -covermode=atomic - name: Publish to Codecov uses: codecov/codecov-action@v3 with: files: coverage.out xsync-4.4.0/.github/workflows/lint.yml000066400000000000000000000005751513523422100177560ustar00rootroot00000000000000name: lint on: [push] jobs: test: runs-on: ubuntu-latest strategy: matrix: go-version: [1.25.x] name: Build with Go ${{ matrix.go-version }} steps: - uses: actions/checkout@v3 - name: Install Go uses: actions/setup-go@v3 with: go-version: ${{ matrix.go-version }} - name: Run vet run: go vet . xsync-4.4.0/.gitignore000066400000000000000000000004151513523422100146510ustar00rootroot00000000000000# Binaries for programs and plugins *.exe *.exe~ *.dll *.so *.dylib # Test binary, built with `go test -c` *.test # Output of the go coverage tool, specifically when used with LiteIDE *.out # Dependency directories (remove the comment below to include it) # vendor/ xsync-4.4.0/BENCHMARKS.md000066400000000000000000000160451513523422100146260ustar00rootroot00000000000000# xsync benchmarks If you're interested in `MapOf` comparison with some of the popular concurrent hash maps written in Go, check [this](https://github.com/cornelk/hashmap/pull/70) and [this](https://github.com/alphadose/haxmap/pull/22) PRs. The below results were obtained for xsync v2.3.1 on a c6g.metal EC2 instance (64 CPU, 128GB RAM) running Linux and Go 1.19.3. I'd like to thank [@felixge](https://github.com/felixge) who kindly ran the benchmarks. The following commands were used to run the benchmarks: ```bash $ go test -run='^$' -cpu=1,2,4,8,16,32,64 -bench . -count=30 -timeout=0 | tee bench.txt $ benchstat bench.txt | tee benchstat.txt ``` The below sections contain some of the results. Refer to [this gist](https://gist.github.com/puzpuzpuz/e62e38e06feadecfdc823c0f941ece0b) for the complete output. Please note that `MapOf` got a number of optimizations since v2.3.1, so the current result is likely to be different. ### Counter vs. atomic int64 ``` name time/op Counter 27.3ns ± 1% Counter-2 27.2ns ±11% Counter-4 15.3ns ± 8% Counter-8 7.43ns ± 7% Counter-16 3.70ns ±10% Counter-32 1.77ns ± 3% Counter-64 0.96ns ±10% AtomicInt64 7.60ns ± 0% AtomicInt64-2 12.6ns ±13% AtomicInt64-4 13.5ns ±14% AtomicInt64-8 12.7ns ± 9% AtomicInt64-16 12.8ns ± 8% AtomicInt64-32 13.0ns ± 6% AtomicInt64-64 12.9ns ± 7% ``` Here `time/op` stands for average time spent on operation. If you divide `10^9` by the result in nanoseconds per operation, you'd get the throughput in operations per second. Thus, the ideal theoretical scalability of a concurrent data structure implies that the reported `time/op` decreases proportionally with the increased number of CPU cores. On the contrary, if the measured time per operation increases when run on more cores, it means performance degradation. ### MapOf vs. sync.Map 1,000 `[int, int]` entries with a warm-up, 100% Loads: ``` IntegerMapOf_WarmUp/reads=100% 24.0ns ± 0% IntegerMapOf_WarmUp/reads=100%-2 12.0ns ± 0% IntegerMapOf_WarmUp/reads=100%-4 6.02ns ± 0% IntegerMapOf_WarmUp/reads=100%-8 3.01ns ± 0% IntegerMapOf_WarmUp/reads=100%-16 1.50ns ± 0% IntegerMapOf_WarmUp/reads=100%-32 0.75ns ± 0% IntegerMapOf_WarmUp/reads=100%-64 0.38ns ± 0% IntegerMapStandard_WarmUp/reads=100% 55.3ns ± 0% IntegerMapStandard_WarmUp/reads=100%-2 27.6ns ± 0% IntegerMapStandard_WarmUp/reads=100%-4 16.1ns ± 3% IntegerMapStandard_WarmUp/reads=100%-8 8.35ns ± 7% IntegerMapStandard_WarmUp/reads=100%-16 4.24ns ± 7% IntegerMapStandard_WarmUp/reads=100%-32 2.18ns ± 6% IntegerMapStandard_WarmUp/reads=100%-64 1.11ns ± 3% ``` 1,000 `[int, int]` entries with a warm-up, 99% Loads, 0.5% Stores, 0.5% Deletes: ``` IntegerMapOf_WarmUp/reads=99% 31.0ns ± 0% IntegerMapOf_WarmUp/reads=99%-2 16.4ns ± 1% IntegerMapOf_WarmUp/reads=99%-4 8.42ns ± 0% IntegerMapOf_WarmUp/reads=99%-8 4.41ns ± 0% IntegerMapOf_WarmUp/reads=99%-16 2.38ns ± 2% IntegerMapOf_WarmUp/reads=99%-32 1.37ns ± 4% IntegerMapOf_WarmUp/reads=99%-64 0.85ns ± 2% IntegerMapStandard_WarmUp/reads=99% 121ns ± 1% IntegerMapStandard_WarmUp/reads=99%-2 109ns ± 3% IntegerMapStandard_WarmUp/reads=99%-4 115ns ± 4% IntegerMapStandard_WarmUp/reads=99%-8 114ns ± 2% IntegerMapStandard_WarmUp/reads=99%-16 105ns ± 2% IntegerMapStandard_WarmUp/reads=99%-32 97.0ns ± 3% IntegerMapStandard_WarmUp/reads=99%-64 98.0ns ± 2% ``` 1,000 `[int, int]` entries with a warm-up, 75% Loads, 12.5% Stores, 12.5% Deletes: ``` IntegerMapOf_WarmUp/reads=75%-reads 46.2ns ± 1% IntegerMapOf_WarmUp/reads=75%-reads-2 36.7ns ± 2% IntegerMapOf_WarmUp/reads=75%-reads-4 22.0ns ± 1% IntegerMapOf_WarmUp/reads=75%-reads-8 12.8ns ± 2% IntegerMapOf_WarmUp/reads=75%-reads-16 7.69ns ± 1% IntegerMapOf_WarmUp/reads=75%-reads-32 5.16ns ± 1% IntegerMapOf_WarmUp/reads=75%-reads-64 4.91ns ± 1% IntegerMapStandard_WarmUp/reads=75%-reads 156ns ± 0% IntegerMapStandard_WarmUp/reads=75%-reads-2 177ns ± 1% IntegerMapStandard_WarmUp/reads=75%-reads-4 197ns ± 1% IntegerMapStandard_WarmUp/reads=75%-reads-8 221ns ± 2% IntegerMapStandard_WarmUp/reads=75%-reads-16 242ns ± 1% IntegerMapStandard_WarmUp/reads=75%-reads-32 258ns ± 1% IntegerMapStandard_WarmUp/reads=75%-reads-64 264ns ± 1% ``` ### MPMCQueue vs. Go channels Concurrent producers and consumers (1:1), queue/channel size 1,000, some work done by both producers and consumers: ``` QueueProdConsWork100 252ns ± 0% QueueProdConsWork100-2 206ns ± 5% QueueProdConsWork100-4 136ns ±12% QueueProdConsWork100-8 110ns ± 6% QueueProdConsWork100-16 108ns ± 2% QueueProdConsWork100-32 102ns ± 2% QueueProdConsWork100-64 101ns ± 0% ChanProdConsWork100 283ns ± 0% ChanProdConsWork100-2 406ns ±21% ChanProdConsWork100-4 549ns ± 7% ChanProdConsWork100-8 754ns ± 7% ChanProdConsWork100-16 828ns ± 7% ChanProdConsWork100-32 810ns ± 8% ChanProdConsWork100-64 832ns ± 4% ``` ### RBMutex vs. sync.RWMutex The writer locks on each 100,000 iteration with some work in the critical section for both readers and the writer: ``` RBMutexWorkWrite100000 146ns ± 0% RBMutexWorkWrite100000-2 73.3ns ± 0% RBMutexWorkWrite100000-4 36.7ns ± 0% RBMutexWorkWrite100000-8 18.6ns ± 0% RBMutexWorkWrite100000-16 9.83ns ± 3% RBMutexWorkWrite100000-32 5.53ns ± 0% RBMutexWorkWrite100000-64 4.04ns ± 3% RWMutexWorkWrite100000 121ns ± 0% RWMutexWorkWrite100000-2 128ns ± 1% RWMutexWorkWrite100000-4 124ns ± 2% RWMutexWorkWrite100000-8 101ns ± 1% RWMutexWorkWrite100000-16 92.9ns ± 1% RWMutexWorkWrite100000-32 89.9ns ± 1% RWMutexWorkWrite100000-64 88.4ns ± 1% ``` xsync-4.4.0/LICENSE000066400000000000000000000261351513523422100136750ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. xsync-4.4.0/README.md000066400000000000000000000260101513523422100141370ustar00rootroot00000000000000[![GoDoc reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://pkg.go.dev/github.com/puzpuzpuz/xsync/v4) [![GoReport](https://goreportcard.com/badge/github.com/puzpuzpuz/xsync/v4)](https://goreportcard.com/report/github.com/puzpuzpuz/xsync/v4) [![codecov](https://codecov.io/gh/puzpuzpuz/xsync/branch/main/graph/badge.svg)](https://codecov.io/gh/puzpuzpuz/xsync) # xsync Concurrent data structures for Go. Aims to provide more scalable alternatives for some of the data structures from the standard `sync` package, but not only. Apart from direct library dependencies, `xsync` data structures can also be met in-code in other libraries like [Otter](https://github.com/maypok86/otter/blob/8c526307556486ea0337280a4211135720bc29cc/internal/hashmap/map.go) caching library. Covered with tests following the approach described [here](https://puzpuzpuz.dev/testing-concurrent-code-for-fun-and-profit). ## Benchmarks Benchmark results may be found [here](BENCHMARKS.md). I'd like to thank [@felixge](https://github.com/felixge) who kindly ran the benchmarks on a beefy multicore machine. Also, a non-scientific, unfair benchmark comparing Java's [j.u.c.ConcurrentHashMap](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/concurrent/ConcurrentHashMap.html) and `xsync.Map` is available [here](https://puzpuzpuz.dev/concurrent-map-in-go-vs-java-yet-another-meaningless-benchmark). ## Usage The latest xsync major version is v4, so `/v4` suffix should be used when importing the library: ```go import ( "github.com/puzpuzpuz/xsync/v4" ) ``` Minimal required Golang version is 1.24. *Note for pre-v4 users*: the main change between v3 and v4 is removal of non-generic data structures and some improvements in `Map` API. The old `*Of` types are kept as type aliases for the renamed data structures to simplify the migration, e.g. `MapOf` is an alias for `Map`. While the API has some breaking changes, the migration should be trivial. ### Counter A `Counter` is a striped `int64` counter inspired by the `j.u.c.a.LongAdder` class from the Java standard library. ```go c := xsync.NewCounter() // increment and decrement the counter c.Inc() c.Dec() // read the current value v := c.Value() ``` Works better in comparison with a single atomically updated `int64` counter in high contention scenarios. ### Map A `Map` is like a concurrent hash table-based map. It follows the interface of `sync.Map` with a number of valuable extensions like `Compute` or `Size`. ```go m := xsync.NewMap[string, string]() m.Store("foo", "bar") v, ok := m.Load("foo") s := m.Size() ``` `Map` uses a modified version of Cache-Line Hash Table (CLHT) data structure: https://github.com/LPD-EPFL/CLHT CLHT is built around the idea of organizing the hash table in cache-line-sized buckets, so that on all modern CPUs update operations complete with minimal cache-line transfer. Also, `Get` operations are obstruction-free and involve no writes to shared memory, hence no mutexes or any other sort of locks. Due to this design, in all considered scenarios `Map` outperforms `sync.Map`. `Map` also uses cooperative parallel rehashing: this means that the goroutines executing write operations may participate in a concurrent rehashing instead of waiting for it to finish. Apart from CLHT, `Map` borrows ideas from Java's `j.u.c.ConcurrentHashMap` (immutable K/V pair structs instead of atomic snapshots) and C++'s `absl::flat_hash_map` a.k.a. SwissTable (meta memory and SWAR-based lookups). `Map` uses the built-in Golang's hash function which has DDOS protection. It uses `maphash.Comparable` as the default hash function. This means that each map instance gets its own seed number and the hash function uses that seed for hash code calculation. Besides the `Range` and `All` methods available for map iteration, there is also `ToPlainMap` utility function to convert a `Map` to a built-in Go's `map`: ```go m := xsync.NewMap[int, int]() m.Store(42, 42) pm := xsync.ToPlainMap(m) ``` For bulk conditional deletions, `DeleteMatching` can be used. This method is handy in caching use cases when it's necessary to delete stale entries: ```go m.DeleteMatching(func(key int, value int) (delete, stop bool) { return key%2 == 0, false // delete even keys }) ``` For high-performance iteration, `RangeRelaxed` (and `AllRelaxed` for Go 1.23+ iterators) can be used. Unlike `Range`, it is lock-free. However, it has relaxed consistency: the same key may be visited more than once if it is concurrently deleted and re-inserted during the iteration. ```go m.RangeRelaxed(func(key int, value int) bool { // process entry return true // continue iteration }) ``` ### UMPSCQueue A `UMPSCQueue` is an unbounded multi-producer single-consumer concurrent queue. This means that multiple goroutines can publish items to the queue while not more than a single goroutine must be consuming those items. Unlike bounded queues, this one puts no limit to the queue capacity. ```go q := xsync.NewUMPSCQueue[string]() // producer inserts an item into the queue; doesn't block // safe to invoke from multiple goroutines inserted := q.Enqueue("bar") // consumer obtains an item from the queue // must be called from a single goroutine item := q.Dequeue() // string ``` `UMPSCQueue` is meant to serve as a replacement for a channel. However, crucially, it has infinite capacity. This is a very bad idea in many cases as it means that it never exhibits backpressure. In other words, if nothing is consuming elements from the queue, it will eventually consume all available memory and crash the process. However, there are also cases where this is desired behavior as it means the queue will dynamically allocate more memory to store temporary bursts, allowing producers to never block while the consumer catches up. The backing data structure is represented as a singly linked list of large segments. Each segment is a slice of `T` along with a corresponding `sync.WaitGroup` for each index. Producers use an atomic counter to determine the unique index in the segment where they will write their value, and mark the corresponding wait group as done after having written the value. The consumer simply keeps track of the index it wants to read and waits for the corresponding wait group to complete. Neither operation acquires a lock and therefore performs quite well under highly contentious loads. Note however that because no locks are acquired, it is unsafe for multiple goroutines to consume from the queue. Consumers must explicitly synchronize between themselves. This allows setups with a single consumer to never acquire a lock, significantly speeding up consumption. ### SPSCQueue A `SPSCQueue` is a bounded single-producer single-consumer concurrent queue. This means that not more than a single goroutine must be publishing items to the queue while not more than a single goroutine must be consuming those items. ```go q := xsync.NewSPSCQueue[string](1024) // producer inserts an item into the queue // optimistic insertion attempt; doesn't block inserted := q.TryEnqueue("bar") // consumer obtains an item from the queue // optimistic obtain attempt; doesn't block item, ok := q.TryDequeue() // string ``` The queue is based on the data structure from this [article](https://rigtorp.se/ringbuffer). The idea is to reduce the CPU cache coherency traffic by keeping cached copies of read and write indexes used by producer and consumer respectively. Make sure to implement proper back-off strategy to handle failed optimistic operation attempts. The most basic back-off would be calling `runtime.Gosched()`. ### MPMCQueue A `MPMCQueue` is a bounded multi-producer multi-consumer concurrent queue. ```go q := xsync.NewMPMCQueue[string](1024) // producer optimistically inserts an item into the queue // optimistic insertion attempt; doesn't block inserted := q.TryEnqueue("bar") // consumer obtains an item from the queue // optimistic obtain attempt; doesn't block item, ok := q.TryDequeue() // string ``` The queue is based on the algorithm from the [MPMCQueue](https://github.com/rigtorp/MPMCQueue) C++ library which in its turn references D.Vyukov's [MPMC queue](https://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue). According to the following [classification](https://www.1024cores.net/home/lock-free-algorithms/queues), the queue is array-based, fails on overflow, provides causal FIFO, has blocking producers and consumers. The idea of the algorithm is to allow parallelism for concurrent producers and consumers by introducing the notion of tickets, i.e. values of two counters, one per producers/consumers. An atomic increment of one of those counters is the only noticeable contention point in queue operations. The rest of the operation avoids contention on writes thanks to the turn-based read/write access for each of the queue items. In essence, `MPMCQueue` is a specialized queue for scenarios where there are multiple concurrent producers and consumers of a single queue running on a large multicore machine. To get the optimal performance, you may want to set the queue size to be large enough, say, an order of magnitude greater than the number of producers/consumers, to allow producers and consumers to progress with their queue operations in parallel most of the time. Other than that, make sure to implement proper back-off strategy to handle failed optimistic operation attempts. The most basic back-off would be calling `runtime.Gosched()`. ### RBMutex A `RBMutex` is a reader-biased reader/writer mutual exclusion lock. The lock can be held by many readers or a single writer. ```go mu := xsync.NewRBMutex() // reader lock calls return a token t := mu.RLock() // the token must be later used to unlock the mutex mu.RUnlock(t) // writer locks are the same as in sync.RWMutex mu.Lock() mu.Unlock() ``` `RBMutex` is based on a modified version of BRAVO (Biased Locking for Reader-Writer Locks) algorithm: https://arxiv.org/pdf/1810.01553.pdf The idea of the algorithm is to build on top of an existing reader-writer mutex and introduce a fast path for readers. On the fast path, reader lock attempts are sharded over an internal array based on the reader identity (a token in the case of Golang). This means that readers do not contend over a single atomic counter like it's done in, say, `sync.RWMutex` allowing for better scalability in terms of cores. Hence, by the design `RBMutex` is a specialized mutex for scenarios, such as caches, where the vast majority of locks are acquired by readers and write lock acquire attempts are infrequent. In such scenarios, `RBMutex` should perform better than the `sync.RWMutex` on large multicore machines. `RBMutex` extends `sync.RWMutex` internally and uses it as the "reader bias disabled" fallback, so the same semantics apply. The only noticeable difference is in the reader tokens returned from the `RLock`/`RUnlock` methods. Apart from blocking methods, `RBMutex` also has methods for optimistic locking: ```go mu := xsync.NewRBMutex() if locked, t := mu.TryRLock(); locked { // critical reader section... mu.RUnlock(t) } if mu.TryLock() { // critical writer section... mu.Unlock() } ``` ## License Licensed under Apache v2. xsync-4.4.0/counter.go000066400000000000000000000043611513523422100146730ustar00rootroot00000000000000package xsync import ( "sync" "sync/atomic" ) // pool for P tokens var ptokenPool sync.Pool // a P token is used to point at the current OS thread (P) // on which the goroutine is run; exact identity of the thread, // as well as P migration tolerance, is not important since // it's used to as a best effort mechanism for assigning // concurrent operations (goroutines) to different stripes of // the counter type ptoken struct { idx uint32 // Padding to prevent false sharing. _ [cacheLineSize - 4]byte } // A Counter is a striped int64 counter. // // Should be preferred over a single atomically updated int64 // counter in high contention scenarios. // // A Counter must not be copied after first use. type Counter struct { stripes []cstripe mask uint32 } type cstripe struct { c int64 // Padding to prevent false sharing. _ [cacheLineSize - 8]byte } // NewCounter creates a new Counter instance. func NewCounter() *Counter { nstripes := nextPowOf2(parallelism()) c := Counter{ stripes: make([]cstripe, nstripes), mask: nstripes - 1, } return &c } // Inc increments the counter by 1. func (c *Counter) Inc() { c.Add(1) } // Dec decrements the counter by 1. func (c *Counter) Dec() { c.Add(-1) } // Add adds the delta to the counter. func (c *Counter) Add(delta int64) { t, ok := ptokenPool.Get().(*ptoken) if !ok { t = new(ptoken) t.idx = runtime_cheaprand() } for { stripe := &c.stripes[t.idx&c.mask] cnt := atomic.LoadInt64(&stripe.c) if atomic.CompareAndSwapInt64(&stripe.c, cnt, cnt+delta) { break } // Give a try with another randomly selected stripe. t.idx = runtime_cheaprand() } ptokenPool.Put(t) } // Value returns the current counter value. // The returned value may not include all of the latest operations in // presence of concurrent modifications of the counter. func (c *Counter) Value() int64 { v := int64(0) for i := 0; i < len(c.stripes); i++ { stripe := &c.stripes[i] v += atomic.LoadInt64(&stripe.c) } return v } // Reset resets the counter to zero. // This method should only be used when it is known that there are // no concurrent modifications of the counter. func (c *Counter) Reset() { for i := 0; i < len(c.stripes); i++ { stripe := &c.stripes[i] atomic.StoreInt64(&stripe.c, 0) } } xsync-4.4.0/counter_test.go000066400000000000000000000044321513523422100157310ustar00rootroot00000000000000package xsync_test import ( "runtime" "sync/atomic" "testing" . "github.com/puzpuzpuz/xsync/v4" ) func TestCounterInc(t *testing.T) { c := NewCounter() for i := range 100 { if v := c.Value(); v != int64(i) { t.Fatalf("got %v, want %d", v, i) } c.Inc() } } func TestCounterDec(t *testing.T) { c := NewCounter() for i := range 100 { if v := c.Value(); v != int64(-i) { t.Fatalf("got %v, want %d", v, -i) } c.Dec() } } func TestCounterAdd(t *testing.T) { c := NewCounter() for i := range 100 { if v := c.Value(); v != int64(i*42) { t.Fatalf("got %v, want %d", v, i*42) } c.Add(42) } } func TestCounterReset(t *testing.T) { c := NewCounter() c.Add(42) if v := c.Value(); v != 42 { t.Fatalf("got %v, want %d", v, 42) } c.Reset() if v := c.Value(); v != 0 { t.Fatalf("got %v, want %d", v, 0) } } func parallelIncrementor(c *Counter, numIncs int, cdone chan bool) { for range numIncs { c.Inc() } cdone <- true } func doTestParallelIncrementors(t *testing.T, numModifiers, gomaxprocs int) { runtime.GOMAXPROCS(gomaxprocs) c := NewCounter() cdone := make(chan bool) numIncs := 10_000 for range numModifiers { go parallelIncrementor(c, numIncs, cdone) } // Wait for the goroutines to finish. for range numModifiers { <-cdone } expected := int64(numModifiers * numIncs) if v := c.Value(); v != expected { t.Fatalf("got %d, want %d", v, expected) } } func TestCounterParallelIncrementors(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) doTestParallelIncrementors(t, 4, 2) doTestParallelIncrementors(t, 16, 4) doTestParallelIncrementors(t, 64, 8) } func benchmarkCounter(b *testing.B, writeRatio int) { c := NewCounter() runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { foo++ if writeRatio > 0 && foo%writeRatio == 0 { c.Value() } else { c.Inc() } } _ = foo }) } func BenchmarkCounter(b *testing.B) { benchmarkCounter(b, 10000) } func benchmarkAtomicInt64(b *testing.B, writeRatio int) { var c int64 runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { foo++ if writeRatio > 0 && foo%writeRatio == 0 { atomic.LoadInt64(&c) } else { atomic.AddInt64(&c, 1) } } _ = foo }) } func BenchmarkAtomicInt64(b *testing.B) { benchmarkAtomicInt64(b, 10000) } xsync-4.4.0/example_test.go000066400000000000000000000062301513523422100157030ustar00rootroot00000000000000package xsync_test import ( "errors" "fmt" "github.com/puzpuzpuz/xsync/v4" ) func ExampleMapOf_Compute() { counts := xsync.NewMap[int, int]() // Store a new value. v, ok := counts.Compute(42, func(oldValue int, loaded bool) (newValue int, op xsync.ComputeOp) { // loaded is false here. newValue = 42 op = xsync.UpdateOp return }) // v: 42, ok: true fmt.Printf("v: %v, ok: %v\n", v, ok) // Update an existing value. v, ok = counts.Compute(42, func(oldValue int, loaded bool) (newValue int, op xsync.ComputeOp) { // loaded is true here. newValue = oldValue + 42 op = xsync.UpdateOp return }) // v: 84, ok: true fmt.Printf("v: %v, ok: %v\n", v, ok) // Set a new value or keep the old value conditionally. var oldVal int minVal := 63 v, ok = counts.Compute(42, func(oldValue int, loaded bool) (newValue int, op xsync.ComputeOp) { oldVal = oldValue if !loaded || oldValue < minVal { newValue = minVal op = xsync.UpdateOp return } // Here, the value is already greater than minVal, so instead of // updating the map, do nothing. op = xsync.CancelOp return }) // v: 84, ok: true, oldVal: 84 fmt.Printf("v: %v, ok: %v, oldVal: %v\n", v, ok, oldVal) // Delete an existing value. v, ok = counts.Compute(42, func(oldValue int, loaded bool) (newValue int, op xsync.ComputeOp) { // loaded is true here. op = xsync.DeleteOp return }) // v: 84, ok: false fmt.Printf("v: %v, ok: %v\n", v, ok) // Propagate an error from the compute function to the outer scope. var err error v, ok = counts.Compute(42, func(oldValue int, loaded bool) (newValue int, op xsync.ComputeOp) { if oldValue == 42 { err = errors.New("something went wrong") return 0, xsync.CancelOp // no need to create a key/value pair } newValue = 0 op = xsync.UpdateOp return }) fmt.Printf("err: %v\n", err) } func ExampleMap_DeleteMatching() { m := xsync.NewMap[string, int]() m.Store("alice", 10) m.Store("bob", 20) m.Store("carol", 30) m.Store("dave", 40) // Delete entries with value greater than 25. deleted := m.DeleteMatching(func(key string, value int) (delete, stop bool) { return value > 25, false }) fmt.Printf("deleted: %d\n", deleted) fmt.Printf("size: %d\n", m.Size()) // Output: // deleted: 2 // size: 2 } func ExampleMap_Range() { m := xsync.NewMap[string, int]() m.Store("alice", 10) m.Store("bob", 20) m.Store("carol", 30) // Range iterates over all entries in the map. // It acquires bucket locks to ensure each key is visited at most once. sum := 0 m.Range(func(key string, value int) bool { sum += value return true // continue iteration }) fmt.Printf("sum: %d\n", sum) // Output: // sum: 60 } func ExampleMap_RangeRelaxed() { m := xsync.NewMap[string, int]() m.Store("alice", 10) m.Store("bob", 20) m.Store("carol", 30) // RangeRelaxed is a faster, lock-free alternative to Range. // It does not acquire locks. However, the same key may be // visited more than once if it is concurrently deleted and // re-inserted during the iteration. sum := 0 m.RangeRelaxed(func(key string, value int) bool { sum += value return true // continue iteration }) fmt.Printf("sum: %d\n", sum) // Output: // sum: 60 } xsync-4.4.0/export_test.go000066400000000000000000000014571513523422100155770ustar00rootroot00000000000000package xsync const ( EntriesPerMapBucket = entriesPerMapBucket MapLoadFactor = mapLoadFactor DefaultMinMapTableLen = defaultMinMapTableLen DefaultMinMapTableCap = defaultMinMapTableLen * entriesPerMapBucket MaxMapCounterLen = maxMapCounterLen ) type ( BucketPadded = bucketPadded ) func EnableAssertions() { assertionsEnabled = true } func DisableAssertions() { assertionsEnabled = false } func Cheaprand() uint32 { return runtime_cheaprand() } func Broadcast(b uint8) uint64 { return broadcast(b) } func FirstMarkedByteIndex(w uint64) int { return firstMarkedByteIndex(w) } func MarkZeroBytes(w uint64) uint64 { return markZeroBytes(w) } func SetByte(w uint64, b uint8, idx int) uint64 { return setByte(w, b, idx) } func NextPowOf2(v uint32) uint32 { return nextPowOf2(v) } xsync-4.4.0/go.mod000066400000000000000000000000561513523422100137700ustar00rootroot00000000000000module github.com/puzpuzpuz/xsync/v4 go 1.24 xsync-4.4.0/map.go000066400000000000000000001215451513523422100137750ustar00rootroot00000000000000package xsync import ( "fmt" "hash/maphash" "iter" "math" "math/bits" "runtime" "strings" "sync" "sync/atomic" "unsafe" ) const ( // number of Map entries per bucket; 5 entries lead to size of 64B // (one cache line) on 64-bit machines entriesPerMapBucket = 5 // threshold fraction of table occupation to start a table shrinking // when deleting the last entry in a bucket chain mapShrinkFraction = 128 // map load factor to trigger a table resize during insertion; // a map holds up to mapLoadFactor*entriesPerMapBucket*mapTableLen // key-value pairs (this is a soft limit) mapLoadFactor = 0.75 // minimal table size, i.e. number of buckets; thus, minimal map // capacity can be calculated as entriesPerMapBucket*defaultMinMapTableLen defaultMinMapTableLen = 32 // minimum counter stripes to use minMapCounterLen = 8 // maximum counter stripes to use; stands for around 4KB of memory maxMapCounterLen = 32 metaMask = 0xffffffffff occupiedMeta = 0x8080808080808080 occupiedMetaMasked = occupiedMeta & metaMask // minimal number of buckets to transfer when participating in cooperative // resize; should be at least defaultMinMapTableLen minResizeTransferStride = 64 // upper limit for max number of additional goroutines that participate // in cooperative resize; must be changed simultaneously with resizeCtl // and the related code maxResizeHelpersLimit = (1 << 5) - 1 ) // max number of additional goroutines that participate in cooperative resize; // "resize owner" goroutine isn't counted var maxResizeHelpers = min(max(int32(parallelism()-1), 1), maxResizeHelpersLimit) type mapResizeHint int const ( mapGrowHint mapResizeHint = 0 mapShrinkHint mapResizeHint = 1 mapClearHint mapResizeHint = 2 ) type ComputeOp int const ( // CancelOp signals to Compute to not do anything as a result // of executing the lambda. If the entry was not present in // the map, nothing happens, and if it was present, the // returned value is ignored. CancelOp ComputeOp = iota // UpdateOp signals to Compute to update the entry to the // value returned by the lambda, creating it if necessary. UpdateOp // DeleteOp signals to Compute to always delete the entry // from the map. DeleteOp ) type loadOp int const ( noLoadOp loadOp = iota loadOrComputeOp loadAndDeleteOp ) type hashKind int const ( hashKindComparable hashKind = iota hashKindInt hashKindInt64 hashKindUint64 hashKindUintptr ) // Deprecated: use [Map] type MapOf[K comparable, V any] = Map[K, V] // Map is like a Go map[K]V but is safe for concurrent // use by multiple goroutines without additional locking or // coordination. It follows the interface of sync.Map with // a number of valuable extensions like Compute or Size. // // A Map must not be copied after first use. // // Map uses a modified version of Cache-Line Hash Table (CLHT) // data structure: https://github.com/LPD-EPFL/CLHT // // CLHT is built around idea to organize the hash table in // cache-line-sized buckets, so that on all modern CPUs update // operations complete with at most one cache-line transfer. // Also, Get operations involve no write to memory, as well as no // mutexes or any other sort of locks. Due to this design, in all // considered scenarios Map outperforms sync.Map. // // Map also borrows ideas from Java's j.u.c.ConcurrentHashMap // (immutable K/V pair structs instead of atomic snapshots) // and C++'s absl::flat_hash_map (meta memory and SWAR-based // lookups). type Map[K comparable, V any] struct { totalGrowths atomic.Int64 totalShrinks atomic.Int64 table atomic.Pointer[mapTable[K, V]] // table being transferred to nextTable atomic.Pointer[mapTable[K, V]] // resize control state: combines resize sequence number (upper 59 bits) and // the current number of resize helpers (lower 5 bits); // odd values of resize sequence mean in-progress resize resizeCtl atomic.Uint64 // only used along with resizeCond resizeMu sync.Mutex // used to wake up resize waiters (concurrent writes) resizeCond sync.Cond // transfer progress index for resize resizeIdx atomic.Int64 minTableLen int growOnly bool hashKind hashKind } type mapTable[K comparable, V any] struct { buckets []bucketPadded // striped counter for number of table entries; // used to determine if a table shrinking is needed // occupies min(buckets_memory/1024, 64KB) of memory size []counterStripe seed maphash.Seed // intSeed is derived from seed for fast integer hashing intSeed uint64 } type counterStripe struct { c int64 // Padding to prevent false sharing. _ [cacheLineSize - 8]byte } // bucketPadded is a CL-sized map bucket holding up to // entriesPerMapBucket entries. type bucketPadded struct { //lint:ignore U1000 ensure each bucket takes two cache lines on both 32 and 64-bit archs pad [cacheLineSize - unsafe.Sizeof(bucket{})]byte bucket } type bucket struct { meta uint64 entries [entriesPerMapBucket]unsafe.Pointer // *entry next unsafe.Pointer // *bucketPadded mu sync.Mutex } // entry is an immutable map entry. type entry[K comparable, V any] struct { key K value V } // MapConfig defines configurable Map options. type MapConfig struct { sizeHint int growOnly bool } // WithPresize configures new Map instance with capacity enough // to hold sizeHint entries. The capacity is treated as the minimal // capacity meaning that the underlying hash table will never shrink // to a smaller capacity. If sizeHint is zero or negative, the value // is ignored. func WithPresize(sizeHint int) func(*MapConfig) { return func(c *MapConfig) { c.sizeHint = sizeHint } } // WithGrowOnly configures new Map instance to be grow-only. // This means that the underlying hash table grows in capacity when // new keys are added, but does not shrink when keys are deleted. // The only exception to this rule is the Clear method which // shrinks the hash table back to the initial capacity. func WithGrowOnly() func(*MapConfig) { return func(c *MapConfig) { c.growOnly = true } } // Deprecated: map resizing now happens cooperatively, without starting // any additional goroutines. func WithSerialResize() func(*MapConfig) { return func(c *MapConfig) { } } // Deprecated: use [NewMap]. func NewMapOf[K comparable, V any](options ...func(*MapConfig)) *Map[K, V] { return NewMap[K, V](options...) } // NewMap creates a new Map instance configured with the given // options. func NewMap[K comparable, V any](options ...func(*MapConfig)) *Map[K, V] { c := &MapConfig{ sizeHint: defaultMinMapTableLen * entriesPerMapBucket, } for _, o := range options { o(c) } m := &Map[K, V]{} m.resizeCond = *sync.NewCond(&m.resizeMu) m.hashKind = detectHashKind[K]() var table *mapTable[K, V] if c.sizeHint <= defaultMinMapTableLen*entriesPerMapBucket { table = newMapTable[K, V](defaultMinMapTableLen, maphash.MakeSeed()) } else { tableLen := nextPowOf2(uint32((float64(c.sizeHint) / entriesPerMapBucket) / mapLoadFactor)) table = newMapTable[K, V](int(tableLen), maphash.MakeSeed()) } m.minTableLen = len(table.buckets) m.growOnly = c.growOnly m.table.Store(table) return m } // detectHashKind returns the appropriate hash kind for the key type. func detectHashKind[K comparable]() hashKind { var zero K switch any(zero).(type) { case int: return hashKindInt case int64: return hashKindInt64 case uint64: return hashKindUint64 case uintptr: return hashKindUintptr default: return hashKindComparable } } // hashUint64 computes a hash for integer keys using a 128-bit // multiply-xorshift mixer (wyhash-style). The constant is xxHash's // PRIME64_1 which provides excellent avalanche. This is significantly // faster than maphash.Comparable for integer types. func hashUint64(seed, v uint64) uint64 { hi, lo := bits.Mul64(v^seed, 0x9E3779B185EBCA87) return hi ^ lo } func hashKey[K comparable](k K, hashKind hashKind, seed maphash.Seed, intSeed uint64) uint64 { switch hashKind { case hashKindInt: return hashUint64(intSeed, uint64(any(k).(int))) case hashKindInt64: return hashUint64(intSeed, uint64(any(k).(int64))) case hashKindUint64: return hashUint64(intSeed, any(k).(uint64)) case hashKindUintptr: return hashUint64(intSeed, uint64(any(k).(uintptr))) default: return maphash.Comparable(seed, k) } } func newMapTable[K comparable, V any](minTableLen int, seed maphash.Seed) *mapTable[K, V] { buckets := make([]bucketPadded, minTableLen) counterLen := minTableLen >> 10 if counterLen < minMapCounterLen { counterLen = minMapCounterLen } else if counterLen > maxMapCounterLen { counterLen = maxMapCounterLen } counter := make([]counterStripe, counterLen) // Derive intSeed from maphash.Seed for fast integer hashing var h maphash.Hash h.SetSeed(seed) h.WriteByte(0) intSeed := h.Sum64() t := &mapTable[K, V]{ buckets: buckets, size: counter, seed: seed, intSeed: intSeed, } return t } // ToPlainMap returns a native map with a copy of xsync Map's // contents. The copied xsync Map should not be modified while // this call is made. If the copied Map is modified, the copying // behavior is the same as in the Range method. func ToPlainMap[K comparable, V any](m *Map[K, V]) map[K]V { pm := make(map[K]V) if m != nil { m.Range(func(key K, value V) bool { pm[key] = value return true }) } return pm } // Load returns the value stored in the map for a key, or zero value // of type V if no value is present. // The ok result indicates whether value was found in the map. func (m *Map[K, V]) Load(key K) (value V, ok bool) { table := m.table.Load() // This is hot path, hence hand-inlined hashKey(). var hash uint64 switch m.hashKind { case hashKindInt: hash = hashUint64(table.intSeed, uint64(any(key).(int))) case hashKindInt64: hash = hashUint64(table.intSeed, uint64(any(key).(int64))) case hashKindUint64: hash = hashUint64(table.intSeed, any(key).(uint64)) case hashKindUintptr: hash = hashUint64(table.intSeed, uint64(any(key).(uintptr))) default: hash = maphash.Comparable(table.seed, key) } h1 := h1(hash) h2w := broadcast(h2(hash)) bidx := uint64(len(table.buckets)-1) & h1 // Same as: b := &table.buckets[bidx] // Inline bounds check elimination via unsafe pointer arithmetic. // Safety: bidx is always < len(table.buckets) since it's masked with (len-1). b := (*bucketPadded)(unsafe.Add(unsafe.Pointer(&table.buckets[0]), uintptr(bidx)*unsafe.Sizeof(bucketPadded{}))) for { metaw := atomic.LoadUint64(&b.meta) markedw := markZeroBytes(metaw^h2w) & metaMask for markedw != 0 { idx := firstMarkedByteIndex(markedw) // Same as: eptr := atomic.LoadPointer(&b.entries[idx]) // Inline bounds check elimination via unsafe pointer arithmetic. // Safety: idx is always < entriesPerMapBucket (5) since it comes from // firstMarkedByteIndex which returns index of a marked byte in the // 5-byte metadata mask (metaMask). eptr := atomic.LoadPointer((*unsafe.Pointer)(unsafe.Add( unsafe.Pointer(&b.entries[0]), uintptr(idx)*unsafe.Sizeof(b.entries[0])))) if eptr != nil { e := (*entry[K, V])(eptr) if e.key == key { return e.value, true } } markedw &= markedw - 1 } bptr := atomic.LoadPointer(&b.next) if bptr == nil { return } b = (*bucketPadded)(bptr) } } // Store sets the value for a key. func (m *Map[K, V]) Store(key K, value V) { // Store is a popular operation, hence instead of using doCompute, // it uses a simplified and slightly faster version of it. for { store_attempt: var ( emptyb *bucketPadded emptyidx int ) table := m.table.Load() tableLen := len(table.buckets) // This is hot path, hence hand-inlined hashKey(). var hash uint64 switch m.hashKind { case hashKindInt: hash = hashUint64(table.intSeed, uint64(any(key).(int))) case hashKindInt64: hash = hashUint64(table.intSeed, uint64(any(key).(int64))) case hashKindUint64: hash = hashUint64(table.intSeed, any(key).(uint64)) case hashKindUintptr: hash = hashUint64(table.intSeed, uint64(any(key).(uintptr))) default: hash = maphash.Comparable(table.seed, key) } h1 := h1(hash) h2 := h2(hash) h2w := broadcast(h2) bidx := uint64(len(table.buckets)-1) & h1 rootb := &table.buckets[bidx] rootb.mu.Lock() // The following two checks must go in reverse to what's // in the resize method. if seq := resizeSeq(m.resizeCtl.Load()); seq&1 == 1 { // Resize is in progress. Help with the transfer, then go for another attempt. rootb.mu.Unlock() m.helpResize(seq) goto store_attempt } if m.newerTableExists(table) { // Someone resized the table. Go for another attempt. rootb.mu.Unlock() goto store_attempt } b := rootb for { metaw := b.meta markedw := markZeroBytes(metaw^h2w) & metaMask for markedw != 0 { idx := firstMarkedByteIndex(markedw) eptr := b.entries[idx] if eptr != nil { e := (*entry[K, V])(eptr) if e.key == key { // In-place update. newe := new(entry[K, V]) newe.key = key newe.value = value atomic.StorePointer(&b.entries[idx], unsafe.Pointer(newe)) rootb.mu.Unlock() return } } markedw &= markedw - 1 } if emptyb == nil { // Search for empty entries (up to 5 per bucket). emptyw := ^metaw & occupiedMetaMasked if emptyw != 0 { idx := firstMarkedByteIndex(emptyw) emptyb = b emptyidx = idx } } if b.next == nil { if emptyb != nil { // Insertion into an existing bucket. newe := new(entry[K, V]) newe.key = key newe.value = value // First we update meta, then the entry. atomic.StoreUint64(&emptyb.meta, setByte(emptyb.meta, h2, emptyidx)) atomic.StorePointer(&emptyb.entries[emptyidx], unsafe.Pointer(newe)) rootb.mu.Unlock() table.addSize(bidx, 1) return } growThreshold := float64(tableLen) * entriesPerMapBucket * mapLoadFactor if table.sumSize() > int64(growThreshold) { // Need to grow the table. Then go for another attempt. rootb.mu.Unlock() m.resize(table, mapGrowHint) goto store_attempt } // Insertion into a new bucket. // Create and append a bucket. newb := new(bucketPadded) newb.meta = setByte(0, h2, 0) newe := new(entry[K, V]) newe.key = key newe.value = value newb.entries[0] = unsafe.Pointer(newe) atomic.StorePointer(&b.next, unsafe.Pointer(newb)) rootb.mu.Unlock() table.addSize(bidx, 1) return } b = (*bucketPadded)(b.next) } } } // LoadOrStore returns the existing value for the key if present. // Otherwise, it stores and returns the given value. // The loaded result is true if the value was loaded, false if stored. func (m *Map[K, V]) LoadOrStore(key K, value V) (actual V, loaded bool) { return m.doCompute( key, func(oldValue V, loaded bool) (V, ComputeOp) { if loaded { return oldValue, CancelOp } return value, UpdateOp }, loadOrComputeOp, false, ) } // LoadAndStore returns the existing value for the key if present, // while setting the new value for the key. // It stores the new value and returns the existing one, if present. // The loaded result is true if the existing value was loaded, // false otherwise. func (m *Map[K, V]) LoadAndStore(key K, value V) (actual V, loaded bool) { return m.doCompute( key, func(V, bool) (V, ComputeOp) { return value, UpdateOp }, noLoadOp, false, ) } // LoadOrCompute returns the existing value for the key if // present. Otherwise, it tries to compute the value using the // provided function and, if successful, stores and returns // the computed value. The loaded result is true if the value was // loaded, or false if computed. If valueFn returns true as the // cancel value, the computation is cancelled and the zero value // for type V is returned. // // This call locks a hash table bucket while the compute function // is executed. It means that modifications on other entries in // the bucket will be blocked until the valueFn executes. Consider // this when the function includes long-running operations. func (m *Map[K, V]) LoadOrCompute( key K, valueFn func() (newValue V, cancel bool), ) (value V, loaded bool) { return m.doCompute( key, func(oldValue V, loaded bool) (V, ComputeOp) { if loaded { return oldValue, CancelOp } newValue, c := valueFn() if !c { return newValue, UpdateOp } return oldValue, CancelOp }, loadOrComputeOp, false, ) } // Compute either sets the computed new value for the key, // deletes the value for the key, or does nothing, based on // the returned [ComputeOp]. When the op returned by valueFn // is [UpdateOp], the value is updated to the new value. If // it is [DeleteOp], the entry is removed from the map // altogether. And finally, if the op is [CancelOp] then the // entry is left as-is. In other words, if it did not already // exist, it is not created, and if it did exist, it is not // updated. This is useful to synchronously execute some // operation on the value without incurring the cost of // updating the map every time. The ok result indicates // whether the entry is present in the map after the compute // operation. The actual result contains the value of the map // if a corresponding entry is present, or the zero value // otherwise. See the example for a few use cases. // // This call locks a hash table bucket while the compute function // is executed. It means that modifications on other entries in // the bucket will be blocked until the valueFn executes. Consider // this when the function includes long-running operations. func (m *Map[K, V]) Compute( key K, valueFn func(oldValue V, loaded bool) (newValue V, op ComputeOp), ) (actual V, ok bool) { return m.doCompute(key, valueFn, noLoadOp, true) } // LoadAndDelete deletes the value for a key, returning the previous // value if any. The loaded result reports whether the key was // present. func (m *Map[K, V]) LoadAndDelete(key K) (value V, loaded bool) { return m.doCompute( key, func(value V, loaded bool) (V, ComputeOp) { return value, DeleteOp }, loadAndDeleteOp, false, ) } // Delete deletes the value for a key. func (m *Map[K, V]) Delete(key K) { m.LoadAndDelete(key) } func (m *Map[K, V]) doCompute( key K, valueFn func(oldValue V, loaded bool) (V, ComputeOp), loadOp loadOp, computeOnly bool, ) (V, bool) { for { compute_attempt: var ( emptyb *bucketPadded emptyidx int ) table := m.table.Load() tableLen := len(table.buckets) // This is hot path, hence hand-inlined hashKey(). var hash uint64 switch m.hashKind { case hashKindInt: hash = hashUint64(table.intSeed, uint64(any(key).(int))) case hashKindInt64: hash = hashUint64(table.intSeed, uint64(any(key).(int64))) case hashKindUint64: hash = hashUint64(table.intSeed, any(key).(uint64)) case hashKindUintptr: hash = hashUint64(table.intSeed, uint64(any(key).(uintptr))) default: hash = maphash.Comparable(table.seed, key) } h1 := h1(hash) h2 := h2(hash) h2w := broadcast(h2) bidx := uint64(len(table.buckets)-1) & h1 rootb := &table.buckets[bidx] if loadOp != noLoadOp { b := rootb load: for { metaw := atomic.LoadUint64(&b.meta) markedw := markZeroBytes(metaw^h2w) & metaMask for markedw != 0 { idx := firstMarkedByteIndex(markedw) eptr := atomic.LoadPointer(&b.entries[idx]) if eptr != nil { e := (*entry[K, V])(eptr) if e.key == key { if loadOp == loadOrComputeOp { return e.value, true } break load } } markedw &= markedw - 1 } bptr := atomic.LoadPointer(&b.next) if bptr == nil { if loadOp == loadAndDeleteOp { return *new(V), false } break load } b = (*bucketPadded)(bptr) } } rootb.mu.Lock() // The following two checks must go in reverse to what's // in the resize method. if seq := resizeSeq(m.resizeCtl.Load()); seq&1 == 1 { // Resize is in progress. Help with the transfer, then go for another attempt. rootb.mu.Unlock() m.helpResize(seq) goto compute_attempt } if m.newerTableExists(table) { // Someone resized the table. Go for another attempt. rootb.mu.Unlock() goto compute_attempt } b := rootb for { metaw := b.meta markedw := markZeroBytes(metaw^h2w) & metaMask for markedw != 0 { idx := firstMarkedByteIndex(markedw) eptr := b.entries[idx] if eptr != nil { e := (*entry[K, V])(eptr) if e.key == key { // In-place update/delete. // We get a copy of the value via an interface{} on each call, // thus the live value pointers are unique. Otherwise atomic // snapshot won't be correct in case of multiple Store calls // using the same value. oldv := e.value newv, op := valueFn(oldv, true) switch op { case DeleteOp: // Deletion. // First we update the hash, then the entry. newmetaw := setByte(metaw, 0, idx) atomic.StoreUint64(&b.meta, newmetaw) atomic.StorePointer(&b.entries[idx], nil) rootb.mu.Unlock() table.addSize(bidx, -1) // Might need to shrink the table if we left bucket empty. if newmetaw == 0 { m.resize(table, mapShrinkHint) } return oldv, !computeOnly case UpdateOp: newe := new(entry[K, V]) newe.key = key newe.value = newv atomic.StorePointer(&b.entries[idx], unsafe.Pointer(newe)) case CancelOp: newv = oldv } rootb.mu.Unlock() if computeOnly { // Compute expects the new value to be returned. return newv, true } // LoadAndStore expects the old value to be returned. return oldv, true } } markedw &= markedw - 1 } if emptyb == nil { // Search for empty entries (up to 5 per bucket). emptyw := ^metaw & occupiedMetaMasked if emptyw != 0 { idx := firstMarkedByteIndex(emptyw) emptyb = b emptyidx = idx } } if b.next == nil { if emptyb != nil { // Insertion into an existing bucket. var zeroV V newValue, op := valueFn(zeroV, false) switch op { case DeleteOp, CancelOp: rootb.mu.Unlock() return zeroV, false default: newe := new(entry[K, V]) newe.key = key newe.value = newValue // First we update meta, then the entry. atomic.StoreUint64(&emptyb.meta, setByte(emptyb.meta, h2, emptyidx)) atomic.StorePointer(&emptyb.entries[emptyidx], unsafe.Pointer(newe)) rootb.mu.Unlock() table.addSize(bidx, 1) return newValue, computeOnly } } growThreshold := float64(tableLen) * entriesPerMapBucket * mapLoadFactor if table.sumSize() > int64(growThreshold) { // Need to grow the table. Then go for another attempt. rootb.mu.Unlock() m.resize(table, mapGrowHint) goto compute_attempt } // Insertion into a new bucket. var zeroV V newValue, op := valueFn(zeroV, false) switch op { case DeleteOp, CancelOp: rootb.mu.Unlock() return zeroV, false default: // Create and append a bucket. newb := new(bucketPadded) newb.meta = setByte(0, h2, 0) newe := new(entry[K, V]) newe.key = key newe.value = newValue newb.entries[0] = unsafe.Pointer(newe) atomic.StorePointer(&b.next, unsafe.Pointer(newb)) rootb.mu.Unlock() table.addSize(bidx, 1) return newValue, computeOnly } } b = (*bucketPadded)(b.next) } } } func (m *Map[K, V]) newerTableExists(table *mapTable[K, V]) bool { return table != m.table.Load() } func resizeSeq(ctl uint64) uint64 { return ctl >> 5 } func resizeHelpers(ctl uint64) uint64 { return ctl & maxResizeHelpersLimit } func resizeCtl(seq uint64, helpers uint64) uint64 { return (seq << 5) | (helpers & maxResizeHelpersLimit) } func (m *Map[K, V]) waitForResize() { m.resizeMu.Lock() for resizeSeq(m.resizeCtl.Load())&1 == 1 { m.resizeCond.Wait() } m.resizeMu.Unlock() } func (m *Map[K, V]) resize(knownTable *mapTable[K, V], hint mapResizeHint) { knownTableLen := len(knownTable.buckets) // Fast path for shrink attempts. if hint == mapShrinkHint { if m.growOnly || m.minTableLen == knownTableLen || knownTable.sumSize() > int64((knownTableLen*entriesPerMapBucket)/mapShrinkFraction) { return } } // Slow path. seq := resizeSeq(m.resizeCtl.Load()) if seq&1 == 1 || !m.resizeCtl.CompareAndSwap(resizeCtl(seq, 0), resizeCtl(seq+1, 0)) { m.helpResize(seq) return } var newTable *mapTable[K, V] table := m.table.Load() tableLen := len(table.buckets) switch hint { case mapGrowHint: // Grow the table with factor of 2. // We must keep the same table seed here to keep the same hash codes // allowing us to avoid locking destination buckets when resizing. m.totalGrowths.Add(1) newTable = newMapTable[K, V](tableLen<<1, table.seed) case mapShrinkHint: shrinkThreshold := int64((tableLen * entriesPerMapBucket) / mapShrinkFraction) if tableLen > m.minTableLen && table.sumSize() <= shrinkThreshold { // Shrink the table with factor of 2. // Analogous to growth, we must preserve the seed to ensure stable // hash mapping, enabling lock-free writes to destination buckets. m.totalShrinks.Add(1) newTable = newMapTable[K, V](tableLen>>1, table.seed) } else { // No need to shrink. Wake up all waiters and give up. m.resizeMu.Lock() m.resizeCtl.Store(resizeCtl(seq+2, 0)) m.resizeCond.Broadcast() m.resizeMu.Unlock() return } case mapClearHint: newTable = newMapTable[K, V](m.minTableLen, maphash.MakeSeed()) default: panic(fmt.Sprintf("unexpected resize hint: %d", hint)) } // Copy the data only if we're not clearing the map. if hint != mapClearHint { // Set up cooperative transfer state. // Next table must be published as the last step. m.resizeIdx.Store(0) m.nextTable.Store(newTable) // Copy the buckets. m.transfer(table, newTable) } // We're about to publish the new table, but before that // we must wait for all helpers to finish. for resizeHelpers(m.resizeCtl.Load()) != 0 { runtime.Gosched() } m.table.Store(newTable) m.nextTable.Store(nil) ctl := resizeCtl(seq+1, 0) newCtl := resizeCtl(seq+2, 0) // Increment the sequence number and wake up all waiters. m.resizeMu.Lock() // There may be slowpoke helpers who have just incremented // the helper counter. This CAS loop makes sure to wait // for them to back off. for !m.resizeCtl.CompareAndSwap(ctl, newCtl) { runtime.Gosched() } m.resizeCond.Broadcast() m.resizeMu.Unlock() } func (m *Map[K, V]) helpResize(seq uint64) { for { table := m.table.Load() nextTable := m.nextTable.Load() if resizeSeq(m.resizeCtl.Load()) == seq { if nextTable == nil || nextTable == table { // Carry on until the next table is set by the main // resize goroutine or until the resize finishes. runtime.Gosched() continue } // The resize is still in-progress, so let's try registering // as a helper. for { ctl := m.resizeCtl.Load() if resizeSeq(ctl) != seq || resizeHelpers(ctl) >= uint64(maxResizeHelpers) { // The resize has ended or there are too many helpers. break } if m.resizeCtl.CompareAndSwap(ctl, ctl+1) { // Yay, we're a resize helper! m.transfer(table, nextTable) // Don't forget to unregister as a helper. m.resizeCtl.Add(^uint64(0)) break } } m.waitForResize() } break } } func (m *Map[K, V]) transfer(table, newTable *mapTable[K, V]) { tableLen := len(table.buckets) newTableLen := len(newTable.buckets) // Determines the concurrent task range for destination buckets. // We iterate based on these properties to avoid locking destination // buckets: // - Grow (Pow2): baseLen == tableLen // Entries from source bucket i move to dest buckets i and i+baseLen // - Shrink (Pow2): baseLen == newTableLen // Entries from source buckets i and i+baseLen move to dest bucket i // By iterating 0..baseLen and processing all possible source buckets // (srcIdx += baseLen) in the inner loop, a single goroutine exclusively // owns the write operations for its assigned destination buckets. baseLen := min(tableLen, newTableLen) stride := max((baseLen>>3)/int(maxResizeHelpers), minResizeTransferStride) for { // Claim work by incrementing resizeIdx. nextIdx := m.resizeIdx.Add(int64(stride)) start := max(0, int(nextIdx)-stride) if start >= baseLen { break } end := min(int(nextIdx), baseLen) // Transfer buckets in this range. total := 0 for i := start; i < end; i++ { // Visit all source buckets that map to this destination bucket. // When growing, runs once. When shrinking, runs twice. for srcIdx := i; srcIdx < tableLen; srcIdx += baseLen { total += transferBucketUnsafe(&table.buckets[srcIdx], newTable, m.hashKind) } } // The exact counter stripe doesn't matter here, so pick up the one // that corresponds to the start value to avoid contention. newTable.addSize(uint64(start), total) } } // Doesn't acquire dest bucket lock. func transferBucketUnsafe[K comparable, V any]( b *bucketPadded, destTable *mapTable[K, V], hashKind hashKind, ) (copied int) { rootb := b rootb.mu.Lock() for { for i := range entriesPerMapBucket { if eptr := b.entries[i]; eptr != nil { e := (*entry[K, V])(eptr) hash := hashKey(e.key, hashKind, destTable.seed, destTable.intSeed) bidx := uint64(len(destTable.buckets)-1) & h1(hash) destb := &destTable.buckets[bidx] appendToBucket(h2(hash), e, destb) copied++ } } if b.next == nil { rootb.mu.Unlock() return } b = (*bucketPadded)(b.next) } } // Range calls f sequentially for each key and value present in the // map. If f returns false, range stops the iteration. // // Range does not necessarily correspond to any consistent snapshot // of the Map's contents: no key will be visited more than once, but // if the value for any key is stored or deleted concurrently, Range // may reflect any mapping for that key from any point during the // Range call. // // It is safe to modify the map while iterating it, including entry // creation, modification and deletion. However, the concurrent // modification rule apply, i.e. the changes may be not reflected // in the subsequently iterated entries. // // For a faster, lock-free alternative with relaxed consistency // guarantees, see [RangeRelaxed]. func (m *Map[K, V]) Range(f func(key K, value V) bool) { // Pre-allocate array big enough to fit entries for most hash tables. bentries := make([]*entry[K, V], 0, 16*entriesPerMapBucket) table := m.table.Load() for i := range table.buckets { rootb := &table.buckets[i] b := rootb // Prevent concurrent modifications and copy all entries into // the intermediate slice. rootb.mu.Lock() for { for i := range entriesPerMapBucket { if b.entries[i] != nil { bentries = append(bentries, (*entry[K, V])(b.entries[i])) } } if b.next == nil { rootb.mu.Unlock() break } b = (*bucketPadded)(b.next) } // Call the function for all copied entries. for j, e := range bentries { if !f(e.key, e.value) { return } // Remove the reference to avoid preventing the copied // entries from being GCed until this method finishes. bentries[j] = nil } bentries = bentries[:0] } } // All is similar to [Range], but returns an [iter.Seq2], so is compatible with // Go 1.23+ iterators. All of the same caveats and behaviour from [Range] apply // to All. // // For a faster, lock-free alternative with relaxed consistency // guarantees, see [AllRelaxed]. func (m *Map[K, V]) All() iter.Seq2[K, V] { return m.Range } // RangeRelaxed calls f sequentially for each key and value present // in the map. If f returns false, range stops the iteration. // // RangeRelaxed is a faster, lock-free alternative to [Range]. Unlike // Range, it does not acquire bucket locks and does not allocate memory // for entry snapshots. Instead, it reads entries directly using atomic // loads. // // RangeRelaxed does not necessarily correspond to any consistent // snapshot of the Map's contents: if the value for any key is stored // or deleted concurrently, RangeRelaxed may reflect any mapping for // that key from any point during the RangeRelaxed call. Unlike [Range], // the same key may be visited more than once if it is concurrently // deleted and re-inserted during the iteration. // // It is safe to modify the map while iterating it, including entry // creation, modification and deletion. However, the concurrent // modification rule apply, i.e. the changes may be not reflected // in the subsequently iterated entries. // // For stronger consistency guarantees where each key is visited at // most once, see [Range]. func (m *Map[K, V]) RangeRelaxed(f func(key K, value V) bool) { table := m.table.Load() for i := range table.buckets { b := &table.buckets[i] for { metaw := atomic.LoadUint64(&b.meta) markedw := metaw & occupiedMeta for markedw != 0 { idx := firstMarkedByteIndex(markedw) eptr := atomic.LoadPointer(&b.entries[idx]) if eptr != nil { e := (*entry[K, V])(eptr) if !f(e.key, e.value) { return } } markedw &= markedw - 1 } bptr := atomic.LoadPointer(&b.next) if bptr == nil { break } b = (*bucketPadded)(bptr) } } } // AllRelaxed is similar to [RangeRelaxed], but returns an [iter.Seq2], // so is compatible with Go 1.23+ iterators. All of the same caveats // and behaviour from [RangeRelaxed] apply to AllRelaxed. // // For stronger consistency guarantees where each key is visited at // most once, see [All]. func (m *Map[K, V]) AllRelaxed() iter.Seq2[K, V] { return m.RangeRelaxed } // DeleteMatching deletes all entries for which the delete return // value of f is true. If the stop return value is true, the // iteration stops immediately. The function returns the number // of deleted entries. // // DeleteMatching does not necessarily correspond to any consistent // snapshot of the Map's contents: if the value for any key is stored // or deleted concurrently (including by a concurrent DeleteMatching // call), DeleteMatching may reflect any mapping for that key from // any point during the call. In particular, if the map is resized // during the call (for example, due to concurrent modifications), // the iteration restarts internally with the new table, which may // result in calling f with the same key more than once. // // This call locks a hash table bucket for the duration of // evaluating f for all entries in the bucket and performing // deletions. It means that modifications on other entries in // the bucket will be blocked until f executes. Consider this when // the function includes long-running operations. func (m *Map[K, V]) DeleteMatching(f func(key K, value V) (delete, stop bool)) int { var totalDeleted int var anyBucketEmptied bool delete_loop_attempt: table := m.table.Load() for bidx := range table.buckets { rootb := &table.buckets[bidx] rootb.mu.Lock() // The following two checks must go in reverse to what's // in the resize method. if seq := resizeSeq(m.resizeCtl.Load()); seq&1 == 1 { // Resize is in progress. Help with the transfer, then go for another attempt. rootb.mu.Unlock() m.helpResize(seq) goto delete_loop_attempt } if m.newerTableExists(table) { // Someone resized the table. Go for another attempt. rootb.mu.Unlock() goto delete_loop_attempt } var bucketDeleted int b := rootb for { for i := range entriesPerMapBucket { eptr := b.entries[i] if eptr != nil { e := (*entry[K, V])(eptr) del, stop := f(e.key, e.value) if del { // Deletion. // First we update the meta, then the entry. newmetaw := setByte(b.meta, 0, i) atomic.StoreUint64(&b.meta, newmetaw) atomic.StorePointer(&b.entries[i], nil) bucketDeleted++ if newmetaw == 0 { anyBucketEmptied = true } } if stop { rootb.mu.Unlock() totalDeleted += bucketDeleted if bucketDeleted > 0 { table.addSize(uint64(bidx), -bucketDeleted) } if anyBucketEmptied { m.resize(table, mapShrinkHint) } return totalDeleted } } } if b.next == nil { break } b = (*bucketPadded)(b.next) } rootb.mu.Unlock() if bucketDeleted > 0 { totalDeleted += bucketDeleted table.addSize(uint64(bidx), -bucketDeleted) } } if anyBucketEmptied { m.resize(table, mapShrinkHint) } return totalDeleted } // Clear deletes all keys and values currently stored in the map. func (m *Map[K, V]) Clear() { m.resize(m.table.Load(), mapClearHint) } // Size returns current size of the map. func (m *Map[K, V]) Size() int { return int(m.table.Load().sumSize()) } // It is safe to use plain stores here because the destination bucket must be // either locked or exclusively written to by the helper during resize. func appendToBucket[K comparable, V any](h2 uint8, e *entry[K, V], b *bucketPadded) { for { for i := range entriesPerMapBucket { if b.entries[i] == nil { b.meta = setByte(b.meta, h2, i) b.entries[i] = unsafe.Pointer(e) return } } if b.next == nil { newb := new(bucketPadded) newb.meta = setByte(0, h2, 0) newb.entries[0] = unsafe.Pointer(e) b.next = unsafe.Pointer(newb) return } b = (*bucketPadded)(b.next) } } func (table *mapTable[K, V]) addSize(bucketIdx uint64, delta int) { cidx := bucketIdx & uint64(len(table.size)-1) atomic.AddInt64(&table.size[cidx].c, int64(delta)) } func (table *mapTable[K, V]) sumSize() int64 { sum := int64(0) for i := range table.size { sum += atomic.LoadInt64(&table.size[i].c) } return sum } func h1(h uint64) uint64 { return h >> 7 } func h2(h uint64) uint8 { return 0x80 | uint8(h&0x7f) } // MapStats is Map statistics. // // Warning: map statistics are intented to be used for diagnostic // purposes, not for production code. This means that breaking changes // may be introduced into this struct even between minor releases. type MapStats struct { // RootBuckets is the number of root buckets in the hash table. // Each bucket holds a few entries. RootBuckets int // TotalBuckets is the total number of buckets in the hash table, // including root and their chained buckets. Each bucket holds // a few entries. TotalBuckets int // EmptyBuckets is the number of buckets that hold no entries. EmptyBuckets int // Capacity is the Map capacity, i.e. the total number of // entries that all buckets can physically hold. This number // does not consider the load factor. Capacity int // Size is the exact number of entries stored in the map. Size int // Counter is the number of entries stored in the map according // to the internal atomic counter. In case of concurrent map // modifications this number may be different from Size. Counter int // CounterLen is the number of internal atomic counter stripes. // This number may grow with the map capacity to improve // multithreaded scalability. CounterLen int // MinEntries is the minimum number of entries per a chain of // buckets, i.e. a root bucket and its chained buckets. MinEntries int // MinEntries is the maximum number of entries per a chain of // buckets, i.e. a root bucket and its chained buckets. MaxEntries int // TotalGrowths is the number of times the hash table grew. TotalGrowths int64 // TotalGrowths is the number of times the hash table shrinked. TotalShrinks int64 } // ToString returns string representation of map stats. func (s *MapStats) ToString() string { var sb strings.Builder sb.WriteString("MapStats{\n") sb.WriteString(fmt.Sprintf("RootBuckets: %d\n", s.RootBuckets)) sb.WriteString(fmt.Sprintf("TotalBuckets: %d\n", s.TotalBuckets)) sb.WriteString(fmt.Sprintf("EmptyBuckets: %d\n", s.EmptyBuckets)) sb.WriteString(fmt.Sprintf("Capacity: %d\n", s.Capacity)) sb.WriteString(fmt.Sprintf("Size: %d\n", s.Size)) sb.WriteString(fmt.Sprintf("Counter: %d\n", s.Counter)) sb.WriteString(fmt.Sprintf("CounterLen: %d\n", s.CounterLen)) sb.WriteString(fmt.Sprintf("MinEntries: %d\n", s.MinEntries)) sb.WriteString(fmt.Sprintf("MaxEntries: %d\n", s.MaxEntries)) sb.WriteString(fmt.Sprintf("TotalGrowths: %d\n", s.TotalGrowths)) sb.WriteString(fmt.Sprintf("TotalShrinks: %d\n", s.TotalShrinks)) sb.WriteString("}\n") return sb.String() } // Stats returns statistics for the Map. Just like other map // methods, this one is thread-safe. Yet it's an O(N) operation, // so it should be used only for diagnostics or debugging purposes. func (m *Map[K, V]) Stats() MapStats { stats := MapStats{ TotalGrowths: m.totalGrowths.Load(), TotalShrinks: m.totalShrinks.Load(), MinEntries: math.MaxInt32, } table := m.table.Load() stats.RootBuckets = len(table.buckets) stats.Counter = int(table.sumSize()) stats.CounterLen = len(table.size) for i := range table.buckets { nentries := 0 b := &table.buckets[i] stats.TotalBuckets++ for { nentriesLocal := 0 stats.Capacity += entriesPerMapBucket for i := range entriesPerMapBucket { if atomic.LoadPointer(&b.entries[i]) != nil { stats.Size++ nentriesLocal++ } } nentries += nentriesLocal if nentriesLocal == 0 { stats.EmptyBuckets++ } if b.next == nil { break } b = (*bucketPadded)(atomic.LoadPointer(&b.next)) stats.TotalBuckets++ } if nentries < stats.MinEntries { stats.MinEntries = nentries } if nentries > stats.MaxEntries { stats.MaxEntries = nentries } } return stats } xsync-4.4.0/map_test.go000066400000000000000000001661121513523422100150330ustar00rootroot00000000000000package xsync_test import ( "math" "math/rand" "runtime" "strconv" "sync" "sync/atomic" "testing" "time" "unsafe" . "github.com/puzpuzpuz/xsync/v4" ) const ( // number of entries to use in benchmarks benchmarkNumEntries = 1_000 // key prefix used in benchmarks benchmarkKeyPrefix = "what_a_looooooooooooooooooooooong_key_prefix_" ) type point struct { x int32 y int32 } var benchmarkCases = []struct { name string readPercentage int }{ {"reads=100%", 100}, // 100% loads, 0% stores, 0% deletes {"reads=99%", 99}, // 99% loads, 0.5% stores, 0.5% deletes {"reads=90%", 90}, // 90% loads, 5% stores, 5% deletes {"reads=75%", 75}, // 75% loads, 12.5% stores, 12.5% deletes } var benchmarkKeys []string func init() { benchmarkKeys = make([]string, benchmarkNumEntries) for i := range benchmarkNumEntries { benchmarkKeys[i] = benchmarkKeyPrefix + strconv.Itoa(i) } } func runParallel(b *testing.B, benchFn func(pb *testing.PB)) { b.ResetTimer() start := time.Now() b.RunParallel(benchFn) opsPerSec := float64(b.N) / float64(time.Since(start).Seconds()) b.ReportMetric(opsPerSec, "ops/s") } func TestMap_BucketStructSize(t *testing.T) { size := unsafe.Sizeof(BucketPadded{}) if size != 64 { t.Fatalf("size of 64B (one cache line) is expected, got: %d", size) } size = unsafe.Sizeof(BucketPadded{}) if size != 64 { t.Fatalf("size of 64B (one cache line) is expected, got: %d", size) } } func TestMap_MissingEntry(t *testing.T) { m := NewMap[string, string]() v, ok := m.Load("foo") if ok { t.Fatalf("value was not expected: %v", v) } if deleted, loaded := m.LoadAndDelete("foo"); loaded { t.Fatalf("value was not expected %v", deleted) } if actual, loaded := m.LoadOrStore("foo", "bar"); loaded { t.Fatalf("value was not expected %v", actual) } } func TestMap_EmptyStringKey(t *testing.T) { m := NewMap[string, string]() m.Store("", "foobar") v, ok := m.Load("") if !ok { t.Fatal("value was expected") } if v != "foobar" { t.Fatalf("value does not match: %v", v) } } func TestMapStore_NilValue(t *testing.T) { m := NewMap[string, *struct{}]() m.Store("foo", nil) v, ok := m.Load("foo") if !ok { t.Fatal("nil value was expected") } if v != nil { t.Fatalf("value was not nil: %v", v) } } func TestMapLoadOrStore_NilValue(t *testing.T) { m := NewMap[string, *struct{}]() m.LoadOrStore("foo", nil) v, loaded := m.LoadOrStore("foo", nil) if !loaded { t.Fatal("nil value was expected") } if v != nil { t.Fatalf("value was not nil: %v", v) } } func TestMapLoadOrStore_NonNilValue(t *testing.T) { type foo struct{} m := NewMap[string, *foo]() newv := &foo{} v, loaded := m.LoadOrStore("foo", newv) if loaded { t.Fatal("no value was expected") } if v != newv { t.Fatalf("value does not match: %v", v) } newv2 := &foo{} v, loaded = m.LoadOrStore("foo", newv2) if !loaded { t.Fatal("value was expected") } if v != newv { t.Fatalf("value does not match: %v", v) } } func TestMapLoadAndStore_NilValue(t *testing.T) { m := NewMap[string, *struct{}]() m.LoadAndStore("foo", nil) v, loaded := m.LoadAndStore("foo", nil) if !loaded { t.Fatal("nil value was expected") } if v != nil { t.Fatalf("value was not nil: %v", v) } v, loaded = m.Load("foo") if !loaded { t.Fatal("nil value was expected") } if v != nil { t.Fatalf("value was not nil: %v", v) } } func TestMapLoadAndStore_NonNilValue(t *testing.T) { m := NewMap[string, int]() v1 := 1 v, loaded := m.LoadAndStore("foo", v1) if loaded { t.Fatal("no value was expected") } if v != v1 { t.Fatalf("value does not match: %v", v) } v2 := 2 v, loaded = m.LoadAndStore("foo", v2) if !loaded { t.Fatal("value was expected") } if v != v1 { t.Fatalf("value does not match: %v", v) } v, loaded = m.Load("foo") if !loaded { t.Fatal("value was expected") } if v != v2 { t.Fatalf("value does not match: %v", v) } } func TestMapAll(t *testing.T) { m := NewMap[string, int]() for range m.All() { t.Fatal("got an iteration on empty map") } for i := range 1000 { m.Store(strconv.Itoa(i), i) } iters := 0 met := make(map[string]int) for key, value := range m.All() { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) break } met[key] += 1 iters++ } if iters != 1000 { t.Fatalf("got unexpected number of iterations: %d", iters) } for i := range 1000 { if c := met[strconv.Itoa(i)]; c != 1 { t.Fatalf("range did not iterate correctly over %d: %d", i, c) } } } func TestMapAll_Break(t *testing.T) { m := NewMap[string, int]() for i := range 100 { m.Store(strconv.Itoa(i), i) } iters := 0 for range m.All() { iters++ if iters == 50 { break } } if iters != 50 { t.Fatalf("got unexpected number of iterations: %d", iters) } } func TestMapAll_NestedDelete(t *testing.T) { const numEntries = 256 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } for key := range m.All() { m.Delete(key) } for i := range numEntries { if _, ok := m.Load(strconv.Itoa(i)); ok { t.Fatalf("value found for %d", i) } } } func TestMapRange(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } iters := 0 met := make(map[string]int) m.Range(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) return false } met[key] += 1 iters++ return true }) if iters != numEntries { t.Fatalf("got unexpected number of iterations: %d", iters) } for i := range numEntries { if c := met[strconv.Itoa(i)]; c != 1 { t.Fatalf("range did not iterate correctly over %d: %d", i, c) } } } func TestMapRange_FalseReturned(t *testing.T) { m := NewMap[string, int]() for i := range 100 { m.Store(strconv.Itoa(i), i) } iters := 0 m.Range(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) } iters++ return iters != 13 }) if iters != 13 { t.Fatalf("got unexpected number of iterations: %d", iters) } } func TestMapRange_NestedDelete(t *testing.T) { const numEntries = 256 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } m.Range(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value: %v/%v", key, value) } m.Delete(key) return true }) for i := range numEntries { if _, ok := m.Load(strconv.Itoa(i)); ok { t.Fatalf("value found for %d", i) } } } func TestMapRangeRelaxed(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } iters := 0 met := make(map[string]int) m.RangeRelaxed(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) return false } met[key] += 1 iters++ return true }) if iters != numEntries { t.Fatalf("got unexpected number of iterations: %d", iters) } for i := range numEntries { if c := met[strconv.Itoa(i)]; c != 1 { t.Fatalf("range did not iterate correctly over %d: %d", i, c) } } } func TestMapRangeRelaxed_FalseReturned(t *testing.T) { m := NewMap[string, int]() for i := range 100 { m.Store(strconv.Itoa(i), i) } iters := 0 m.RangeRelaxed(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) } iters++ return iters != 13 }) if iters != 13 { t.Fatalf("got unexpected number of iterations: %d", iters) } } func TestMapRangeRelaxed_NestedDelete(t *testing.T) { const numEntries = 256 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } m.RangeRelaxed(func(key string, value int) bool { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value: %v/%v", key, value) } m.Delete(key) return true }) for i := range numEntries { if _, ok := m.Load(strconv.Itoa(i)); ok { t.Fatalf("value found for %d", i) } } } func TestMapAllRelaxed(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } iters := 0 met := make(map[string]int) for key, value := range m.AllRelaxed() { if key != strconv.Itoa(value) { t.Fatalf("got unexpected key/value for iteration %d: %v/%v", iters, key, value) } met[key] += 1 iters++ } if iters != numEntries { t.Fatalf("got unexpected number of iterations: %d", iters) } for i := range numEntries { if c := met[strconv.Itoa(i)]; c != 1 { t.Fatalf("all did not iterate correctly over %d: %d", i, c) } } } func TestMapDeleteMatching(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } // Delete even values. deleted := m.DeleteMatching(func(key string, value int) (del, stop bool) { return value%2 == 0, false }) if deleted != numEntries/2 { t.Fatalf("expected %d deleted, got %d", numEntries/2, deleted) } if m.Size() != numEntries/2 { t.Fatalf("expected size %d, got %d", numEntries/2, m.Size()) } // Verify only odd values remain. for i := range numEntries { _, ok := m.Load(strconv.Itoa(i)) if i%2 == 0 && ok { t.Fatalf("even value %d should have been deleted", i) } if i%2 != 0 && !ok { t.Fatalf("odd value %d should not have been deleted", i) } } } func TestMapDeleteMatching_Cancel(t *testing.T) { const numEntries = 100 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } // Delete entries and cancel after 10 deletions. callCount := 0 deleted := m.DeleteMatching(func(key string, value int) (del, stop bool) { callCount++ if callCount == 10 { return true, true // delete this one and cancel } return true, false }) if deleted != 10 { t.Fatalf("expected 10 deleted, got %d", deleted) } if callCount != 10 { t.Fatalf("expected f to be called 10 times, got %d", callCount) } if m.Size() != numEntries-10 { t.Fatalf("expected size %d, got %d", numEntries-10, m.Size()) } } func TestMapDeleteMatching_EmptyMap(t *testing.T) { m := NewMap[string, int]() callCount := 0 deleted := m.DeleteMatching(func(key string, value int) (del, stop bool) { callCount++ return false, false }) if deleted != 0 { t.Fatalf("expected 0 deleted on empty map, got %d", deleted) } if callCount != 0 { t.Fatalf("expected f to be called 0 times, got %d", callCount) } } func TestMapDeleteMatching_NoDeletions(t *testing.T) { const numEntries = 100 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } callCount := 0 deleted := m.DeleteMatching(func(key string, value int) (del, stop bool) { callCount++ return false, false // never delete }) if deleted != 0 { t.Fatalf("expected 0 deleted, got %d", deleted) } if callCount != numEntries { t.Fatalf("expected f to be called %d times, got %d", numEntries, callCount) } if m.Size() != numEntries { t.Fatalf("expected size %d, got %d", numEntries, m.Size()) } } func TestMapDeleteMatching_AllDeleted(t *testing.T) { const numEntries = 256 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } deleted := m.DeleteMatching(func(key string, value int) (del, stop bool) { return true, false // delete all }) if deleted != numEntries { t.Fatalf("expected %d deleted, got %d", numEntries, deleted) } if m.Size() != 0 { t.Fatalf("expected size 0, got %d", m.Size()) } } func testParallelRangeRelaxed(t *testing.T, numGoroutines int) { const numEntries = 10000 const numIterations = 50 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } var wg sync.WaitGroup var totalIterations atomic.Int64 // Launch goroutines that iterate using RangeRelaxed. for range numGoroutines / 2 { wg.Add(1) go func() { defer wg.Done() for range numIterations { m.RangeRelaxed(func(key int, value int) bool { if key != value { t.Errorf("key %d != value %d", key, value) } totalIterations.Add(1) return true }) } }() } // Launch goroutines that modify the map. for range numGoroutines / 2 { wg.Add(1) go func() { defer wg.Done() for range numIterations { for i := range numEntries { m.Store(i, i) if i%10 == 0 { m.Delete(i) m.Store(i, i) } } } }() } wg.Wait() if totalIterations.Load() == 0 { t.Error("expected some iterations to occur") } } func TestMapParallelRangeRelaxed(t *testing.T) { testParallelRangeRelaxed(t, 2) testParallelRangeRelaxed(t, runtime.GOMAXPROCS(0)) testParallelRangeRelaxed(t, 100) } func testParallelDeleteMatching(t *testing.T, numGoroutines int) { const numEntries = 10000 const numIterations = 50 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } var wg sync.WaitGroup var totalDeleted atomic.Int64 // Launch goroutines that delete even numbers. for range numGoroutines / 2 { wg.Add(1) go func() { defer wg.Done() for range numIterations { deleted := m.DeleteMatching(func(key int, value int) (del, stop bool) { return key%2 == 0, false }) totalDeleted.Add(int64(deleted)) } }() } // Launch goroutines that re-add entries. for range numGoroutines / 2 { wg.Add(1) go func() { defer wg.Done() for range numIterations { for i := range numEntries { m.Store(i, i) } } }() } wg.Wait() // Verify map is in consistent state. size := m.Size() rangeCount := 0 m.Range(func(key int, value int) bool { if key != value { t.Errorf("key %d != value %d", key, value) } rangeCount++ return true }) if size != rangeCount { t.Errorf("size %d != range count %d", size, rangeCount) } if totalDeleted.Load() == 0 { t.Error("expected some deletions to occur") } } func TestMapParallelDeleteMatching(t *testing.T) { testParallelDeleteMatching(t, 2) testParallelDeleteMatching(t, runtime.GOMAXPROCS(0)) testParallelDeleteMatching(t, 100) } func TestMapDeleteMatching_ConcurrentResize(t *testing.T) { // This test attempts to cover the resize paths in DeleteMatching: // 1. Resize in progress during DeleteMatching iteration // 2. Table changed (resize completed) during DeleteMatching iteration const numIterations = 1000 const numEntries = 100 for iter := range numIterations { // Start with minimal size to maximize resize frequency m := NewMap[int, int]() // Pre-fill just enough to have entries for i := range numEntries { m.Store(i, i) } var wg sync.WaitGroup start := make(chan struct{}) // Goroutines that trigger resize by adding many new entries for range 8 { wg.Add(1) go func() { defer wg.Done() <-start for i := numEntries; i < numEntries*10; i++ { m.Store(i, i) } }() } // Goroutines that call DeleteMatching repeatedly during resize for range 8 { wg.Add(1) go func() { defer wg.Done() <-start for range 10 { m.DeleteMatching(func(key int, value int) (del, stop bool) { return key%5 == 0, false }) } }() } // Start all goroutines simultaneously close(start) wg.Wait() // Verify map consistency size := m.Size() rangeCount := 0 m.Range(func(key int, value int) bool { rangeCount++ return true }) if size != rangeCount { t.Errorf("iteration %d: size %d != range count %d", iter, size, rangeCount) } } } func TestMapStringStore(t *testing.T) { const numEntries = 128 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } for i := range numEntries { v, ok := m.Load(strconv.Itoa(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapIntStore(t *testing.T) { const numEntries = 128 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } for i := range numEntries { v, ok := m.Load(i) if !ok { t.Fatalf("value not found for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapStore_Int64Keys(t *testing.T) { const numEntries = 128 m := NewMap[int64, int64]() for i := range numEntries { m.Store(int64(i), int64(i)) } for i := range numEntries { v, ok := m.Load(int64(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != int64(i) { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapStore_Uint64Keys(t *testing.T) { const numEntries = 128 m := NewMap[uint64, uint64]() for i := range numEntries { m.Store(uint64(i), uint64(i)) } for i := range numEntries { v, ok := m.Load(uint64(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != uint64(i) { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapStore_UintptrKeys(t *testing.T) { const numEntries = 128 m := NewMap[uintptr, uintptr]() for i := range numEntries { m.Store(uintptr(i), uintptr(i)) } for i := range numEntries { v, ok := m.Load(uintptr(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != uintptr(i) { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapStore_StructKeys_IntValues(t *testing.T) { const numEntries = 128 m := NewMap[point, int]() for i := range numEntries { m.Store(point{int32(i), -int32(i)}, i) } for i := range numEntries { v, ok := m.Load(point{int32(i), -int32(i)}) if !ok { t.Fatalf("value not found for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapStore_StructKeys_StructValues(t *testing.T) { const numEntries = 128 m := NewMap[point, point]() for i := range numEntries { m.Store(point{int32(i), -int32(i)}, point{-int32(i), int32(i)}) } for i := range numEntries { v, ok := m.Load(point{int32(i), -int32(i)}) if !ok { t.Fatalf("value not found for %d", i) } if v.x != -int32(i) { t.Fatalf("x value does not match for %d: %v", i, v) } if v.y != int32(i) { t.Fatalf("y value does not match for %d: %v", i, v) } } } func TestMapLoadOrStore(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } for i := range numEntries { if _, loaded := m.LoadOrStore(strconv.Itoa(i), i); !loaded { t.Fatalf("value not found for %d", i) } } } func TestMapLoadOrCompute(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { v, loaded := m.LoadOrCompute(strconv.Itoa(i), func() (newValue int, cancel bool) { return i, true }) if loaded { t.Fatalf("value not computed for %d", i) } if v != 0 { t.Fatalf("values do not match for %d: %v", i, v) } } if m.Size() != 0 { t.Fatalf("zero map size expected: %d", m.Size()) } for i := range numEntries { v, loaded := m.LoadOrCompute(strconv.Itoa(i), func() (newValue int, cancel bool) { return i, false }) if loaded { t.Fatalf("value not computed for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } for i := range numEntries { v, loaded := m.LoadOrCompute(strconv.Itoa(i), func() (newValue int, cancel bool) { t.Fatalf("value func invoked") return newValue, false }) if !loaded { t.Fatalf("value not loaded for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } } func TestMapLoadOrCompute_FunctionCalledOnce(t *testing.T) { m := NewMap[int, int]() for i := 0; i < 100; { m.LoadOrCompute(i, func() (newValue int, cancel bool) { newValue, i = i, i+1 return newValue, false }) } m.Range(func(k, v int) bool { if k != v { t.Fatalf("%dth key is not equal to value %d", k, v) } return true }) } func TestMapLoadOrCompute_ExistingKey(t *testing.T) { m := NewMap[string, int]() m.Store("key", 42) v, loaded := m.LoadOrCompute("key", func() (int, bool) { t.Fatal("value func should not be called for existing key") return 100, false }) if !loaded { t.Fatal("expected loaded to be true") } if v != 42 { t.Fatalf("expected value 42, got %d", v) } } func TestMapLoadOrCompute_ConcurrentExistingKey(t *testing.T) { // This test attempts to cover the race condition where: // 1. LoadOrCompute's fast path doesn't find the key // 2. Another goroutine inserts the key // 3. LoadOrCompute acquires the lock and finds the key const numIters = 10000 for range numIters { m := NewMap[int, int]() var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() m.Store(1, 42) }() go func() { defer wg.Done() m.LoadOrCompute(1, func() (int, bool) { return 100, false }) }() wg.Wait() v, ok := m.Load(1) if !ok { t.Fatal("key should exist") } if v != 42 && v != 100 { t.Fatalf("unexpected value: %d", v) } } } func TestMapOfCompute(t *testing.T) { m := NewMap[string, int]() // Store a new value. v, ok := m.Compute("foobar", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if oldValue != 0 { t.Fatalf("oldValue should be 0 when computing a new value: %d", oldValue) } if loaded { t.Fatal("loaded should be false when computing a new value") } newValue = 42 op = UpdateOp return }) if v != 42 { t.Fatalf("v should be 42 when computing a new value: %d", v) } if !ok { t.Fatal("ok should be true when computing a new value") } // Update an existing value. v, ok = m.Compute("foobar", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if oldValue != 42 { t.Fatalf("oldValue should be 42 when updating the value: %d", oldValue) } if !loaded { t.Fatal("loaded should be true when updating the value") } newValue = oldValue + 42 op = UpdateOp return }) if v != 84 { t.Fatalf("v should be 84 when updating the value: %d", v) } if !ok { t.Fatal("ok should be true when updating the value") } // Check that NoOp doesn't update the value v, ok = m.Compute("foobar", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { return 0, CancelOp }) if v != 84 { t.Fatalf("v should be 84 after using NoOp: %d", v) } if !ok { t.Fatal("ok should be true when updating the value") } // Delete an existing value. v, ok = m.Compute("foobar", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if oldValue != 84 { t.Fatalf("oldValue should be 84 when deleting the value: %d", oldValue) } if !loaded { t.Fatal("loaded should be true when deleting the value") } op = DeleteOp return }) if v != 84 { t.Fatalf("v should be 84 when deleting the value: %d", v) } if ok { t.Fatal("ok should be false when deleting the value") } // Try to delete a non-existing value. Notice different key. v, ok = m.Compute("barbaz", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if oldValue != 0 { t.Fatalf("oldValue should be 0 when trying to delete a non-existing value: %d", oldValue) } if loaded { t.Fatal("loaded should be false when trying to delete a non-existing value") } // We're returning a non-zero value, but the map should ignore it. newValue = 42 op = DeleteOp return }) if v != 0 { t.Fatalf("v should be 0 when trying to delete a non-existing value: %d", v) } if ok { t.Fatal("ok should be false when trying to delete a non-existing value") } // Try NoOp on a non-existing value v, ok = m.Compute("barbaz", func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if oldValue != 0 { t.Fatalf("oldValue should be 0 when trying to delete a non-existing value: %d", oldValue) } if loaded { t.Fatal("loaded should be false when trying to delete a non-existing value") } // We're returning a non-zero value, but the map should ignore it. newValue = 42 op = CancelOp return }) if v != 0 { t.Fatalf("v should be 0 when trying to delete a non-existing value: %d", v) } if ok { t.Fatal("ok should be false when trying to delete a non-existing value") } } func TestMapCompute_CancelOpOnOverflowBucket(t *testing.T) { // This test covers the CancelOp path when inserting into a new overflow bucket. // We need to fill buckets completely so that a new key requires creating // an overflow bucket, then return CancelOp to abort the insertion. const numAttempts = 1000 const sentinel = 999 for attempt := range numAttempts { m := NewMap[int, int]() // Fill the map to create conditions where some bucket chains are full. // Insert entries - some will hash to the same bucket creating overflow. numEntries := 100 + (attempt % 100) // Vary the fill level for i := range numEntries { m.Store(i, i) } // Try Compute with CancelOp for new keys. baseKey := 10000 + attempt*1000 for i := range 50 { key := baseKey + i v, ok := m.Compute(key, func(oldValue int, loaded bool) (newValue int, op ComputeOp) { if loaded { t.Fatal("key should not exist") } return sentinel, CancelOp }) if ok { t.Fatalf("ok should be false when CancelOp is returned for new key") } // Both code paths should return zero value for CancelOp if v != 0 { t.Fatalf("expected zero value for CancelOp, got: %d", v) } // Verify the key was not inserted if _, exists := m.Load(key); exists { t.Fatalf("key %d should not exist after CancelOp", key) } } } } func TestMapStringStoreThenDelete(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } for i := range numEntries { m.Delete(strconv.Itoa(i)) if _, ok := m.Load(strconv.Itoa(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapIntStoreThenDelete(t *testing.T) { const numEntries = 1000 m := NewMap[int32, int32]() for i := range numEntries { m.Store(int32(i), int32(i)) } for i := range numEntries { m.Delete(int32(i)) if _, ok := m.Load(int32(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenDelete_Int64Keys(t *testing.T) { const numEntries = 1000 m := NewMap[int64, int64]() for i := range numEntries { m.Store(int64(i), int64(i)) } for i := range numEntries { m.Delete(int64(i)) if _, ok := m.Load(int64(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenDelete_Uint64Keys(t *testing.T) { const numEntries = 1000 m := NewMap[uint64, uint64]() for i := range numEntries { m.Store(uint64(i), uint64(i)) } for i := range numEntries { m.Delete(uint64(i)) if _, ok := m.Load(uint64(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenDelete_UintptrKeys(t *testing.T) { const numEntries = 1000 m := NewMap[uintptr, uintptr]() for i := range numEntries { m.Store(uintptr(i), uintptr(i)) } for i := range numEntries { m.Delete(uintptr(i)) if _, ok := m.Load(uintptr(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStructStoreThenDelete(t *testing.T) { const numEntries = 1000 m := NewMap[point, string]() for i := range numEntries { m.Store(point{int32(i), 42}, strconv.Itoa(i)) } for i := range numEntries { m.Delete(point{int32(i), 42}) if _, ok := m.Load(point{int32(i), 42}); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStringStoreThenLoadAndDelete(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } for i := range numEntries { if v, loaded := m.LoadAndDelete(strconv.Itoa(i)); !loaded || v != i { t.Fatalf("value was not found or different for %d: %v", i, v) } if _, ok := m.Load(strconv.Itoa(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapIntStoreThenLoadAndDelete(t *testing.T) { const numEntries = 1000 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } for i := range numEntries { if _, loaded := m.LoadAndDelete(i); !loaded { t.Fatalf("value was not found for %d", i) } if _, ok := m.Load(i); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenLoadAndDelete_Int64Keys(t *testing.T) { const numEntries = 1000 m := NewMap[int64, int64]() for i := range numEntries { m.Store(int64(i), int64(i)) } for i := range numEntries { if _, loaded := m.LoadAndDelete(int64(i)); !loaded { t.Fatalf("value was not found for %d", i) } if _, ok := m.Load(int64(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenLoadAndDelete_Uint64Keys(t *testing.T) { const numEntries = 1000 m := NewMap[uint64, uint64]() for i := range numEntries { m.Store(uint64(i), uint64(i)) } for i := range numEntries { if _, loaded := m.LoadAndDelete(uint64(i)); !loaded { t.Fatalf("value was not found for %d", i) } if _, ok := m.Load(uint64(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenLoadAndDelete_UintptrKeys(t *testing.T) { const numEntries = 1000 m := NewMap[uintptr, uintptr]() for i := range numEntries { m.Store(uintptr(i), uintptr(i)) } for i := range numEntries { if _, loaded := m.LoadAndDelete(uintptr(i)); !loaded { t.Fatalf("value was not found for %d", i) } if _, ok := m.Load(uintptr(i)); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStructStoreThenLoadAndDelete(t *testing.T) { const numEntries = 1000 m := NewMap[point, int]() for i := range numEntries { m.Store(point{42, int32(i)}, i) } for i := range numEntries { if _, loaded := m.LoadAndDelete(point{42, int32(i)}); !loaded { t.Fatalf("value was not found for %d", i) } if _, ok := m.Load(point{42, int32(i)}); ok { t.Fatalf("value was not expected for %d", i) } } } func TestMapStoreThenParallelDelete_DoesNotShrinkBelowMinTableLen(t *testing.T) { const numEntries = 1000 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } cdone := make(chan bool) go func() { for i := range numEntries { m.Delete(i) } cdone <- true }() go func() { for i := range numEntries { m.Delete(i) } cdone <- true }() // Wait for the goroutines to finish. <-cdone <-cdone stats := m.Stats() if stats.RootBuckets != DefaultMinMapTableLen { t.Fatalf("table length was different from the minimum: %d", stats.RootBuckets) } } func sizeBasedOnTypedRange(m *Map[string, int]) int { size := 0 m.Range(func(key string, value int) bool { size++ return true }) return size } func TestMapSize(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() size := m.Size() if size != 0 { t.Fatalf("zero size expected: %d", size) } expectedSize := 0 for i := range numEntries { m.Store(strconv.Itoa(i), i) expectedSize++ size := m.Size() if size != expectedSize { t.Fatalf("size of %d was expected, got: %d", expectedSize, size) } rsize := sizeBasedOnTypedRange(m) if size != rsize { t.Fatalf("size does not match number of entries in Range: %v, %v", size, rsize) } } for i := range numEntries { m.Delete(strconv.Itoa(i)) expectedSize-- size := m.Size() if size != expectedSize { t.Fatalf("size of %d was expected, got: %d", expectedSize, size) } rsize := sizeBasedOnTypedRange(m) if size != rsize { t.Fatalf("size does not match number of entries in Range: %v, %v", size, rsize) } } } func TestMapClear(t *testing.T) { const numEntries = 1000 m := NewMap[string, int]() for i := range numEntries { m.Store(strconv.Itoa(i), i) } size := m.Size() if size != numEntries { t.Fatalf("size of %d was expected, got: %d", numEntries, size) } m.Clear() size = m.Size() if size != 0 { t.Fatalf("zero size was expected, got: %d", size) } rsize := sizeBasedOnTypedRange(m) if rsize != 0 { t.Fatalf("zero number of entries in Range was expected, got: %d", rsize) } } func assertMapCapacity[K comparable, V any](t *testing.T, m *Map[K, V], expectedCap int) { stats := m.Stats() if stats.Capacity != expectedCap { t.Fatalf("capacity was different from %d: %d", expectedCap, stats.Capacity) } } func TestNewMapWithPresize(t *testing.T) { assertMapCapacity(t, NewMap[string, string](), DefaultMinMapTableCap) assertMapCapacity(t, NewMap[string, string](WithPresize(0)), DefaultMinMapTableCap) assertMapCapacity(t, NewMap[string, string](WithPresize(-100)), DefaultMinMapTableCap) assertMapCapacity(t, NewMap[string, string](WithPresize(500)), 1280) assertMapCapacity(t, NewMap[int, int](WithPresize(1_000_000)), 2621440) assertMapCapacity(t, NewMap[point, point](WithPresize(100)), 160) } func TestDeprecatedNewMapOf(t *testing.T) { m := NewMapOf[string, int]() m.Store("foo", 42) v, ok := m.Load("foo") if !ok || v != 42 { t.Fatal("NewMapOf should work like NewMap") } } func TestDeprecatedWithSerialResize(t *testing.T) { // WithSerialResize is a no-op, just verify it doesn't panic m := NewMap[string, int](WithSerialResize()) m.Store("foo", 42) v, ok := m.Load("foo") if !ok || v != 42 { t.Fatal("WithSerialResize should be a no-op") } } func TestNewMapWithPresize_DoesNotShrinkBelowMinTableLen(t *testing.T) { const minTableLen = 1024 const numEntries = int(minTableLen * EntriesPerMapBucket * MapLoadFactor) m := NewMap[int, int](WithPresize(numEntries)) for i := range 2 * numEntries { m.Store(i, i) } stats := m.Stats() if stats.RootBuckets <= minTableLen { t.Fatalf("table did not grow: %d", stats.RootBuckets) } for i := range 2 * numEntries { m.Delete(i) } stats = m.Stats() if stats.RootBuckets != minTableLen { t.Fatalf("table length was different from the minimum: %d", stats.RootBuckets) } } func TestNewMapGrowOnly_OnlyShrinksOnClear(t *testing.T) { const minTableLen = 128 const numEntries = minTableLen * EntriesPerMapBucket m := NewMap[int, int](WithPresize(numEntries), WithGrowOnly()) stats := m.Stats() initialTableLen := stats.RootBuckets for i := range 2 * numEntries { m.Store(i, i) } stats = m.Stats() maxTableLen := stats.RootBuckets if maxTableLen <= minTableLen { t.Fatalf("table did not grow: %d", maxTableLen) } for i := range numEntries { m.Delete(i) } stats = m.Stats() if stats.RootBuckets != maxTableLen { t.Fatalf("table length was different from the expected: %d", stats.RootBuckets) } m.Clear() stats = m.Stats() if stats.RootBuckets != initialTableLen { t.Fatalf("table length was different from the initial: %d", stats.RootBuckets) } } func TestMapResize(t *testing.T) { m := NewMap[string, int]() const numEntries = 100_000 for i := range numEntries { m.Store(strconv.Itoa(i), i) } stats := m.Stats() if stats.Size != numEntries { t.Fatalf("size was too small: %d", stats.Size) } expectedCapacity := int(math.RoundToEven(MapLoadFactor+1)) * stats.RootBuckets * EntriesPerMapBucket if stats.Capacity > expectedCapacity { t.Fatalf("capacity was too large: %d, expected: %d", stats.Capacity, expectedCapacity) } if stats.RootBuckets <= DefaultMinMapTableLen { t.Fatalf("table was too small: %d", stats.RootBuckets) } if stats.TotalGrowths == 0 { t.Fatalf("non-zero total growths expected: %d", stats.TotalGrowths) } if stats.TotalShrinks > 0 { t.Fatalf("zero total shrinks expected: %d", stats.TotalShrinks) } // This is useful when debugging table resize and occupancy. // Use -v flag to see the output. t.Log(stats.ToString()) for i := range numEntries { m.Delete(strconv.Itoa(i)) } stats = m.Stats() if stats.Size > 0 { t.Fatalf("zero size was expected: %d", stats.Size) } expectedCapacity = stats.RootBuckets * EntriesPerMapBucket if stats.Capacity != expectedCapacity { t.Fatalf("capacity was too large: %d, expected: %d", stats.Capacity, expectedCapacity) } if stats.RootBuckets != DefaultMinMapTableLen { t.Fatalf("table was too large: %d", stats.RootBuckets) } if stats.TotalShrinks == 0 { t.Fatalf("non-zero total shrinks expected: %d", stats.TotalShrinks) } t.Log(stats.ToString()) } func TestMapResize_CounterLenLimit(t *testing.T) { const numEntries = 1_000_000 m := NewMap[string, string]() for i := range numEntries { m.Store("foo"+strconv.Itoa(i), "bar"+strconv.Itoa(i)) } stats := m.Stats() if stats.Size != numEntries { t.Fatalf("size was too small: %d", stats.Size) } if stats.CounterLen != MaxMapCounterLen { t.Fatalf("number of counter stripes was too large: %d, expected: %d", stats.CounterLen, MaxMapCounterLen) } } func testParallelResize(t *testing.T, numGoroutines int) { m := NewMap[int, int]() // Fill the map to trigger resizing const initialEntries = 10000 const newEntries = 5000 for i := range initialEntries { m.Store(i, i*2) } // Start concurrent operations that should trigger helping behavior var wg sync.WaitGroup // Launch goroutines that will encounter resize operations for g := range numGoroutines { wg.Add(1) go func(goroutineID int) { defer wg.Done() // Perform many operations to trigger resize and helping for i := range newEntries { key := goroutineID*newEntries + i + initialEntries m.Store(key, key*2) // Verify the value if val, ok := m.Load(key); !ok || val != key*2 { t.Errorf("Failed to load key %d: got %v, %v", key, val, ok) return } } }(g) } wg.Wait() // Verify all entries are present finalSize := m.Size() expectedSize := initialEntries + numGoroutines*newEntries if finalSize != expectedSize { t.Errorf("Expected size %d, got %d", expectedSize, finalSize) } stats := m.Stats() if stats.TotalGrowths == 0 { t.Error("Expected at least one table growth due to concurrent operations") } } func TestMapParallelResize(t *testing.T) { testParallelResize(t, 1) testParallelResize(t, runtime.GOMAXPROCS(0)) testParallelResize(t, 100) } func testParallelResizeWithSameKeys(t *testing.T, numGoroutines int) { m := NewMap[int, int]() // Fill the map to trigger resizing const entries = 1000 for i := range entries { m.Store(2*i, 2*i) } // Start concurrent operations that should trigger helping behavior var wg sync.WaitGroup // Launch goroutines that will encounter resize operations for g := range numGoroutines { wg.Add(1) go func(goroutineID int) { defer wg.Done() for i := range 10 * entries { m.Store(i, i) } }(g) } wg.Wait() // Verify all entries are present finalSize := m.Size() expectedSize := 10 * entries if finalSize != expectedSize { t.Errorf("Expected size %d, got %d", expectedSize, finalSize) } stats := m.Stats() if stats.TotalGrowths == 0 { t.Error("Expected at least one table growth due to concurrent operations") } } func TestMapParallelResize_IntersectingKeys(t *testing.T) { testParallelResizeWithSameKeys(t, 1) testParallelResizeWithSameKeys(t, runtime.GOMAXPROCS(0)) testParallelResizeWithSameKeys(t, 100) } func testParallelShrinking(t *testing.T, numGoroutines int) { m := NewMap[int, int]() // Fill the map to trigger resizing const entries = 100000 for i := range entries { m.Store(i, i) } // Start concurrent operations that should trigger helping behavior var wg sync.WaitGroup // Launch goroutines that will encounter resize operations for g := range numGoroutines { wg.Add(1) go func(goroutineID int) { defer wg.Done() for i := range entries { m.Delete(i) } }(g) } wg.Wait() // Verify all entries are present finalSize := m.Size() if finalSize != 0 { t.Errorf("Expected size 0, got %d", finalSize) } stats := m.Stats() if stats.TotalShrinks == 0 { t.Error("Expected at least one table shrinking due to concurrent operations") } } func TestMapParallelShrinking(t *testing.T) { testParallelShrinking(t, 1) testParallelShrinking(t, runtime.GOMAXPROCS(0)) testParallelShrinking(t, 100) } func parallelSeqMapGrower(m *Map[int, int], numEntries int, positive bool, cdone chan bool) { for i := range numEntries { if positive { m.Store(i, i) } else { m.Store(-i, -i) } } cdone <- true } func TestMapParallelGrowth_GrowOnly(t *testing.T) { const numEntries = 100_000 m := NewMap[int, int]() cdone := make(chan bool) go parallelSeqMapGrower(m, numEntries, true, cdone) go parallelSeqMapGrower(m, numEntries, false, cdone) // Wait for the goroutines to finish. <-cdone <-cdone // Verify map contents. for i := -numEntries + 1; i < numEntries; i++ { v, ok := m.Load(i) if !ok { t.Fatalf("value not found for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } if s := m.Size(); s != 2*numEntries-1 { t.Fatalf("unexpected size: %v", s) } } func parallelRandMapResizer(t *testing.T, m *Map[string, int], numIters, numEntries int, cdone chan bool) { r := rand.New(rand.NewSource(time.Now().UnixNano())) for range numIters { coin := r.Int63n(2) for j := range numEntries { if coin == 1 { m.Store(strconv.Itoa(j), j) } else { m.Delete(strconv.Itoa(j)) } } } cdone <- true } func TestMapParallelGrowth(t *testing.T) { const numIters = 1_000 const numEntries = 2 * EntriesPerMapBucket * DefaultMinMapTableLen m := NewMap[string, int]() cdone := make(chan bool) go parallelRandMapResizer(t, m, numIters, numEntries, cdone) go parallelRandMapResizer(t, m, numIters, numEntries, cdone) // Wait for the goroutines to finish. <-cdone <-cdone // Verify map contents. for i := range numEntries { v, ok := m.Load(strconv.Itoa(i)) if !ok { // The entry may be deleted and that's ok. continue } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } s := m.Size() if s > numEntries { t.Fatalf("unexpected size: %v", s) } rs := sizeBasedOnTypedRange(m) if s != rs { t.Fatalf("size does not match number of entries in Range: %v, %v", s, rs) } } func parallelRandMapClearer(t *testing.T, m *Map[string, int], numIters, numEntries int, cdone chan bool) { r := rand.New(rand.NewSource(time.Now().UnixNano())) for range numIters { coin := r.Int63n(2) for j := range numEntries { if coin == 1 { m.Store(strconv.Itoa(j), j) } else { m.Clear() } } } cdone <- true } func TestMapParallelClear(t *testing.T) { const numIters = 100 const numEntries = 1_000 m := NewMap[string, int]() cdone := make(chan bool) go parallelRandMapClearer(t, m, numIters, numEntries, cdone) go parallelRandMapClearer(t, m, numIters, numEntries, cdone) // Wait for the goroutines to finish. <-cdone <-cdone // Verify map size. s := m.Size() if s > numEntries { t.Fatalf("unexpected size: %v", s) } rs := sizeBasedOnTypedRange(m) if s != rs { t.Fatalf("size does not match number of entries in Range: %v, %v", s, rs) } } func parallelSeqMapStorer(t *testing.T, m *Map[string, int], storeEach, numIters, numEntries int, cdone chan bool) { for range numIters { for j := range numEntries { if storeEach == 0 || j%storeEach == 0 { m.Store(strconv.Itoa(j), j) // Due to atomic snapshots we must see a ""/j pair. v, ok := m.Load(strconv.Itoa(j)) if !ok { t.Errorf("value was not found for %d", j) break } if v != j { t.Errorf("value was not expected for %d: %d", j, v) break } } } } cdone <- true } func TestMapParallelStores(t *testing.T) { const numStorers = 4 const numIters = 10_000 const numEntries = 100 m := NewMap[string, int]() cdone := make(chan bool) for i := range numStorers { go parallelSeqMapStorer(t, m, i, numIters, numEntries, cdone) } // Wait for the goroutines to finish. for range numStorers { <-cdone } // Verify map contents. for i := range numEntries { v, ok := m.Load(strconv.Itoa(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != i { t.Fatalf("values do not match for %d: %v", i, v) } } } func parallelRandMapStorer(t *testing.T, m *Map[string, int], numIters, numEntries int, cdone chan bool) { r := rand.New(rand.NewSource(time.Now().UnixNano())) for range numIters { j := r.Intn(numEntries) if v, loaded := m.LoadOrStore(strconv.Itoa(j), j); loaded { if v != j { t.Errorf("value was not expected for %d: %d", j, v) } } } cdone <- true } func parallelRandMapDeleter(t *testing.T, m *Map[string, int], numIters, numEntries int, cdone chan bool) { r := rand.New(rand.NewSource(time.Now().UnixNano())) for range numIters { j := r.Intn(numEntries) if v, loaded := m.LoadAndDelete(strconv.Itoa(j)); loaded { if v != j { t.Errorf("value was not expected for %d: %d", j, v) } } } cdone <- true } func parallelMapLoader(t *testing.T, m *Map[string, int], numIters, numEntries int, cdone chan bool) { for range numIters { for j := range numEntries { // Due to atomic snapshots we must either see no entry, or a ""/j pair. if v, ok := m.Load(strconv.Itoa(j)); ok { if v != j { t.Errorf("value was not expected for %d: %d", j, v) } } } } cdone <- true } func TestMapAtomicSnapshot(t *testing.T) { const numIters = 100_000 const numEntries = 100 m := NewMap[string, int]() cdone := make(chan bool) // Update or delete random entry in parallel with loads. go parallelRandMapStorer(t, m, numIters, numEntries, cdone) go parallelRandMapDeleter(t, m, numIters, numEntries, cdone) go parallelMapLoader(t, m, numIters, numEntries, cdone) // Wait for the goroutines to finish. for range 3 { <-cdone } } func TestMapParallelStoresAndDeletes(t *testing.T) { const numWorkers = 2 const numIters = 100_000 const numEntries = 1000 m := NewMap[string, int]() cdone := make(chan bool) // Update random entry in parallel with deletes. for range numWorkers { go parallelRandMapStorer(t, m, numIters, numEntries, cdone) go parallelRandMapDeleter(t, m, numIters, numEntries, cdone) } // Wait for the goroutines to finish. for range 2 * numWorkers { <-cdone } } func parallelMapComputer(m *Map[uint64, uint64], numIters, numEntries int, cdone chan bool) { for range numIters { for j := range numEntries { m.Compute(uint64(j), func(oldValue uint64, loaded bool) (newValue uint64, op ComputeOp) { return oldValue + 1, UpdateOp }) } } cdone <- true } func TestMapParallelComputes(t *testing.T) { const numWorkers = 4 // Also stands for numEntries. const numIters = 10_000 m := NewMap[uint64, uint64]() cdone := make(chan bool) for range numWorkers { go parallelMapComputer(m, numIters, numWorkers, cdone) } // Wait for the goroutines to finish. for range numWorkers { <-cdone } // Verify map contents. for i := range numWorkers { v, ok := m.Load(uint64(i)) if !ok { t.Fatalf("value not found for %d", i) } if v != numWorkers*numIters { t.Fatalf("values do not match for %d: %v", i, v) } } } func parallelRangeMapStorer(m *Map[int, int], numEntries int, stopFlag *int64, cdone chan bool) { for { for i := range numEntries { m.Store(i, i) } if atomic.LoadInt64(stopFlag) != 0 { break } } cdone <- true } func parallelRangeMapDeleter(m *Map[int, int], numEntries int, stopFlag *int64, cdone chan bool) { for { for i := range numEntries { m.Delete(i) } if atomic.LoadInt64(stopFlag) != 0 { break } } cdone <- true } func TestMapParallelRange(t *testing.T) { const numEntries = 10_000 m := NewMap[int, int](WithPresize(numEntries)) for i := range numEntries { m.Store(i, i) } // Start goroutines that would be storing and deleting items in parallel. cdone := make(chan bool) stopFlag := int64(0) go parallelRangeMapStorer(m, numEntries, &stopFlag, cdone) go parallelRangeMapDeleter(m, numEntries, &stopFlag, cdone) // Iterate the map and verify that no duplicate keys were met. met := make(map[int]int) m.Range(func(key int, value int) bool { if key != value { t.Fatalf("got unexpected value for key %d: %d", key, value) return false } met[key] += 1 return true }) if len(met) == 0 { t.Fatal("no entries were met when iterating") } for k, c := range met { if c != 1 { t.Fatalf("met key %d multiple times: %d", k, c) } } // Make sure that both goroutines finish. atomic.StoreInt64(&stopFlag, 1) <-cdone <-cdone } func parallelMapShrinker(t *testing.T, m *Map[uint64, *point], numIters, numEntries int, stopFlag *int64, cdone chan bool) { for range numIters { for j := range numEntries { if p, loaded := m.LoadOrStore(uint64(j), &point{int32(j), int32(j)}); loaded { t.Errorf("value was present for %d: %v", j, p) } } for j := range numEntries { m.Delete(uint64(j)) } } atomic.StoreInt64(stopFlag, 1) cdone <- true } func parallelMapUpdater(t *testing.T, m *Map[uint64, *point], idx int, stopFlag *int64, cdone chan bool) { for atomic.LoadInt64(stopFlag) != 1 { sleepUs := int(Cheaprand() % 10) if p, loaded := m.LoadOrStore(uint64(idx), &point{int32(idx), int32(idx)}); loaded { t.Errorf("value was present for %d: %v", idx, p) } time.Sleep(time.Duration(sleepUs) * time.Microsecond) if _, ok := m.Load(uint64(idx)); !ok { t.Errorf("value was not found for %d", idx) } m.Delete(uint64(idx)) } cdone <- true } func TestMapDoesNotLoseEntriesOnResize(t *testing.T) { const numIters = 10_000 const numEntries = 128 m := NewMap[uint64, *point]() cdone := make(chan bool) stopFlag := int64(0) go parallelMapShrinker(t, m, numIters, numEntries, &stopFlag, cdone) go parallelMapUpdater(t, m, numEntries, &stopFlag, cdone) // Wait for the goroutines to finish. <-cdone <-cdone // Verify map contents. if s := m.Size(); s != 0 { t.Fatalf("map is not empty: %d", s) } } func TestMapStats(t *testing.T) { m := NewMap[int, int]() stats := m.Stats() if stats.RootBuckets != DefaultMinMapTableLen { t.Fatalf("unexpected number of root buckets: %d", stats.RootBuckets) } if stats.TotalBuckets != stats.RootBuckets { t.Fatalf("unexpected number of total buckets: %d", stats.TotalBuckets) } if stats.EmptyBuckets != stats.RootBuckets { t.Fatalf("unexpected number of empty buckets: %d", stats.EmptyBuckets) } if stats.Capacity != EntriesPerMapBucket*DefaultMinMapTableLen { t.Fatalf("unexpected capacity: %d", stats.Capacity) } if stats.Size != 0 { t.Fatalf("unexpected size: %d", stats.Size) } if stats.Counter != 0 { t.Fatalf("unexpected counter: %d", stats.Counter) } if stats.CounterLen != 8 { t.Fatalf("unexpected counter length: %d", stats.CounterLen) } for i := range 200 { m.Store(i, i) } stats = m.Stats() if stats.RootBuckets != 2*DefaultMinMapTableLen { t.Fatalf("unexpected number of root buckets: %d", stats.RootBuckets) } if stats.TotalBuckets < stats.RootBuckets { t.Fatalf("unexpected number of total buckets: %d", stats.TotalBuckets) } if stats.EmptyBuckets >= stats.RootBuckets { t.Fatalf("unexpected number of empty buckets: %d", stats.EmptyBuckets) } if stats.Capacity < 2*EntriesPerMapBucket*DefaultMinMapTableLen { t.Fatalf("unexpected capacity: %d", stats.Capacity) } if stats.Size != 200 { t.Fatalf("unexpected size: %d", stats.Size) } if stats.Counter != 200 { t.Fatalf("unexpected counter: %d", stats.Counter) } if stats.CounterLen != 8 { t.Fatalf("unexpected counter length: %d", stats.CounterLen) } } func TestToPlainMap_NilPointer(t *testing.T) { pm := ToPlainMap[int, int](nil) if len(pm) != 0 { t.Fatalf("got unexpected size of nil map copy: %d", len(pm)) } } func TestToPlainMap(t *testing.T) { const numEntries = 1000 m := NewMap[int, int]() for i := range numEntries { m.Store(i, i) } pm := ToPlainMap[int, int](m) if len(pm) != numEntries { t.Fatalf("got unexpected size of nil map copy: %d", len(pm)) } for i := range numEntries { if v := pm[i]; v != i { t.Fatalf("unexpected value for key %d: %d", i, v) } } } func BenchmarkMap_NoWarmUp(b *testing.B) { for _, bc := range benchmarkCases { if bc.readPercentage == 100 { // This benchmark doesn't make sense without a warm-up. continue } b.Run(bc.name, func(b *testing.B) { m := NewMap[string, int]() benchmarkMapStringKeys(b, func(k string) (int, bool) { return m.Load(k) }, func(k string, v int) { m.Store(k, v) }, func(k string) { m.Delete(k) }, bc.readPercentage) }) } } func BenchmarkMap_WarmUp(b *testing.B) { for _, bc := range benchmarkCases { b.Run(bc.name, func(b *testing.B) { m := NewMap[string, int](WithPresize(benchmarkNumEntries)) for i := range benchmarkNumEntries { m.Store(benchmarkKeyPrefix+strconv.Itoa(i), i) } b.ResetTimer() benchmarkMapStringKeys(b, func(k string) (int, bool) { return m.Load(k) }, func(k string, v int) { m.Store(k, v) }, func(k string) { m.Delete(k) }, bc.readPercentage) }) } } func benchmarkMapStringKeys( b *testing.B, loadFn func(k string) (int, bool), storeFn func(k string, v int), deleteFn func(k string), readPercentage int, ) { runParallel(b, func(pb *testing.PB) { // convert percent to permille to support 99% case storeThreshold := 10 * readPercentage deleteThreshold := 10*readPercentage + ((1000 - 10*readPercentage) / 2) for pb.Next() { op := int(Cheaprand() % 1000) i := int(Cheaprand() % benchmarkNumEntries) if op >= deleteThreshold { deleteFn(benchmarkKeys[i]) } else if op >= storeThreshold { storeFn(benchmarkKeys[i], i) } else { loadFn(benchmarkKeys[i]) } } }) } func BenchmarkMapInt_NoWarmUp(b *testing.B) { for _, bc := range benchmarkCases { if bc.readPercentage == 100 { // This benchmark doesn't make sense without a warm-up. continue } b.Run(bc.name, func(b *testing.B) { m := NewMap[int, int]() benchmarkMapIntKeys(b, func(k int) (int, bool) { return m.Load(k) }, func(k int, v int) { m.Store(k, v) }, func(k int) { m.Delete(k) }, bc.readPercentage) }) } } func BenchmarkMapInt_WarmUp(b *testing.B) { for _, bc := range benchmarkCases { b.Run(bc.name, func(b *testing.B) { m := NewMap[int, int](WithPresize(benchmarkNumEntries)) for i := range benchmarkNumEntries { m.Store(i, i) } b.ResetTimer() benchmarkMapIntKeys(b, func(k int) (int, bool) { return m.Load(k) }, func(k int, v int) { m.Store(k, v) }, func(k int) { m.Delete(k) }, bc.readPercentage) }) } } func BenchmarkIntMapStandard_NoWarmUp(b *testing.B) { for _, bc := range benchmarkCases { if bc.readPercentage == 100 { // This benchmark doesn't make sense without a warm-up. continue } b.Run(bc.name, func(b *testing.B) { var m sync.Map benchmarkMapIntKeys(b, func(k int) (value int, ok bool) { v, ok := m.Load(k) if ok { return v.(int), ok } else { return 0, false } }, func(k int, v int) { m.Store(k, v) }, func(k int) { m.Delete(k) }, bc.readPercentage) }) } } // This is a nice scenario for sync.Map since a lot of updates // will hit the readOnly part of the map. func BenchmarkIntMapStandard_WarmUp(b *testing.B) { for _, bc := range benchmarkCases { b.Run(bc.name, func(b *testing.B) { var m sync.Map for i := range benchmarkNumEntries { m.Store(i, i) } b.ResetTimer() benchmarkMapIntKeys(b, func(k int) (value int, ok bool) { v, ok := m.Load(k) if ok { return v.(int), ok } else { return 0, false } }, func(k int, v int) { m.Store(k, v) }, func(k int) { m.Delete(k) }, bc.readPercentage) }) } } func benchmarkMapIntKeys( b *testing.B, loadFn func(k int) (int, bool), storeFn func(k int, v int), deleteFn func(k int), readPercentage int, ) { runParallel(b, func(pb *testing.PB) { // convert percent to permille to support 99% case storeThreshold := 10 * readPercentage deleteThreshold := 10*readPercentage + ((1000 - 10*readPercentage) / 2) for pb.Next() { op := int(Cheaprand() % 1000) i := int(Cheaprand() % benchmarkNumEntries) if op >= deleteThreshold { deleteFn(i) } else if op >= storeThreshold { storeFn(i, i) } else { loadFn(i) } } }) } func BenchmarkMapRange(b *testing.B) { m := NewMap[string, int](WithPresize(benchmarkNumEntries)) for i := range benchmarkNumEntries { m.Store(benchmarkKeys[i], i) } b.ResetTimer() runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { m.Range(func(key string, value int) bool { foo++ return true }) _ = foo } }) } func BenchmarkMapRangeRelaxed(b *testing.B) { m := NewMap[string, int](WithPresize(benchmarkNumEntries)) for i := range benchmarkNumEntries { m.Store(benchmarkKeys[i], i) } b.ResetTimer() runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { m.RangeRelaxed(func(key string, value int) bool { foo++ return true }) _ = foo } }) } // Benchmarks noop performance of Compute func BenchmarkMapCompute(b *testing.B) { tests := []struct { Name string Op ComputeOp }{ { Name: "UpdateOp", Op: UpdateOp, }, { Name: "CancelOp", Op: CancelOp, }, } for _, test := range tests { b.Run("op="+test.Name, func(b *testing.B) { m := NewMap[struct{}, bool]() m.Store(struct{}{}, true) for b.Loop() { m.Compute(struct{}{}, func(oldValue bool, loaded bool) (newValue bool, op ComputeOp) { return oldValue, test.Op }) } }) } } func BenchmarkMapParallelRehashing(b *testing.B) { tests := []struct { name string goroutines int numEntries int }{ {"1goroutine_10M", 1, 10_000_000}, {"4goroutines_10M", 4, 10_000_000}, {"8goroutines_10M", 8, 10_000_000}, } for _, test := range tests { b.Run(test.name, func(b *testing.B) { for b.Loop() { m := NewMap[int, int]() var wg sync.WaitGroup entriesPerGoroutine := test.numEntries / test.goroutines start := time.Now() for g := 0; g < test.goroutines; g++ { wg.Add(1) go func(goroutineID int) { defer wg.Done() base := goroutineID * entriesPerGoroutine for j := range entriesPerGoroutine { key := base + j m.Store(key, key) } }(g) } wg.Wait() duration := time.Since(start) b.ReportMetric(float64(test.numEntries)/duration.Seconds(), "entries/s") finalSize := m.Size() if finalSize != test.numEntries { b.Fatalf("Expected size %d, got %d", test.numEntries, finalSize) } stats := m.Stats() if stats.TotalGrowths == 0 { b.Error("Expected at least one table growth during rehashing") } } }) } } func BenchmarkMapDeleteMatching(b *testing.B) { tests := []struct { name string numEntries int deletePercent int }{ {"entries=1000_delete=10%", 1000, 10}, {"entries=1000_delete=50%", 1000, 50}, {"entries=1000_delete=100%", 1000, 100}, {"entries=100000_delete=10%", 100000, 10}, {"entries=100000_delete=50%", 100000, 50}, {"entries=100000_delete=100%", 100000, 100}, {"entries=1000000_delete=10%", 1000000, 10}, {"entries=1000000_delete=50%", 1000000, 50}, {"entries=1000000_delete=100%", 1000000, 100}, } for _, test := range tests { b.Run(test.name, func(b *testing.B) { for b.Loop() { m := NewMap[int, int](WithPresize(test.numEntries)) for i := range test.numEntries { m.Store(i, i) } threshold := test.numEntries * test.deletePercent / 100 m.DeleteMatching(func(key int, value int) (del, stop bool) { return key < threshold, false }) } }) } } func BenchmarkMapRangeDelete(b *testing.B) { tests := []struct { name string numEntries int deletePercent int }{ {"entries=1000_delete=10%", 1000, 10}, {"entries=1000_delete=50%", 1000, 50}, {"entries=1000_delete=100%", 1000, 100}, {"entries=100000_delete=10%", 100000, 10}, {"entries=100000_delete=50%", 100000, 50}, {"entries=100000_delete=100%", 100000, 100}, {"entries=1000000_delete=10%", 1000000, 10}, {"entries=1000000_delete=50%", 1000000, 50}, {"entries=1000000_delete=100%", 1000000, 100}, } for _, test := range tests { b.Run(test.name, func(b *testing.B) { for b.Loop() { m := NewMap[int, int](WithPresize(test.numEntries)) for i := range test.numEntries { m.Store(i, i) } threshold := test.numEntries * test.deletePercent / 100 m.Range(func(key int, value int) bool { if key < threshold { m.Delete(key) } return true }) } }) } } xsync-4.4.0/mpmcqueue.go000066400000000000000000000052231513523422100152130ustar00rootroot00000000000000package xsync import ( "sync/atomic" "unsafe" ) // Deprecated: use [MPMCQueue]. type MPMCQueueOf[I any] = MPMCQueue[I] // A MPMCQueue is a bounded multi-producer multi-consumer concurrent // queue. // // MPMCQueue instances must be created with NewMPMCQueue function. // A MPMCQueue must not be copied after first use. // // Based on the data structure from the following C++ library: // https://github.com/rigtorp/MPMCQueue type MPMCQueue[I any] struct { cap uint64 head uint64 // Padding to prevent false sharing. _ [cacheLineSize - 8]byte tail uint64 _ [cacheLineSize - 8]byte slots []slotPadded[I] } type slotPadded[I any] struct { slot[I] // Unfortunately, proper padding like the below one: // // pad [cacheLineSize - (unsafe.Sizeof(slot[I]{}) % cacheLineSize)]byte // // won't compile, so here we add a best-effort padding for items up to // 56 bytes size. _ [cacheLineSize - unsafe.Sizeof(atomic.Uint64{})]byte } type slot[I any] struct { // atomic.Uint64 is used here to get proper 8 byte alignment on // 32-bit archs. turn atomic.Uint64 item I } // Deprecated: use [NewMPMCQueue]. func NewMPMCQueueOf[I any](capacity int) *MPMCQueue[I] { return NewMPMCQueue[I](capacity) } // NewMPMCQueue creates a new MPMCQueue instance with the given // capacity. func NewMPMCQueue[I any](capacity int) *MPMCQueue[I] { if capacity < 1 { panic("capacity must be positive number") } return &MPMCQueue[I]{ cap: uint64(capacity), slots: make([]slotPadded[I], capacity), } } // TryEnqueue inserts the given item into the queue. Does not block // and returns immediately. The result indicates that the queue isn't // full and the item was inserted. func (q *MPMCQueue[I]) TryEnqueue(item I) bool { head := atomic.LoadUint64(&q.head) slot := &q.slots[q.idx(head)] turn := q.turn(head) * 2 if slot.turn.Load() == turn { if atomic.CompareAndSwapUint64(&q.head, head, head+1) { slot.item = item slot.turn.Store(turn + 1) return true } } return false } // TryDequeue retrieves and removes the item from the head of the // queue. Does not block and returns immediately. The ok result // indicates that the queue isn't empty and an item was retrieved. func (q *MPMCQueue[I]) TryDequeue() (item I, ok bool) { tail := atomic.LoadUint64(&q.tail) slot := &q.slots[q.idx(tail)] turn := q.turn(tail)*2 + 1 if slot.turn.Load() == turn { if atomic.CompareAndSwapUint64(&q.tail, tail, tail+1) { var zeroI I item = slot.item ok = true slot.item = zeroI slot.turn.Store(turn + 1) return } } return } func (q *MPMCQueue[I]) idx(i uint64) uint64 { return i % q.cap } func (q *MPMCQueue[I]) turn(i uint64) uint64 { return i / q.cap } xsync-4.4.0/mpmcqueue_test.go000066400000000000000000000123011513523422100162450ustar00rootroot00000000000000// Copyright notice. The following tests are partially based on // the following file from the Go Programming Language core repo: // https://github.com/golang/go/blob/831f9376d8d730b16fb33dfd775618dffe13ce7a/src/runtime/chan_test.go package xsync_test import ( "runtime" "strconv" "sync" "sync/atomic" "testing" . "github.com/puzpuzpuz/xsync/v4" ) func TestMPMCQueue_InvalidSize(t *testing.T) { defer func() { recover() }() NewMPMCQueue[int](0) t.Fatal("no panic detected") } func TestMPMCQueueEnqueueDequeueInt(t *testing.T) { q := NewMPMCQueue[int](10) for i := range 10 { if !q.TryEnqueue(i) { t.Fatalf("failed to enqueue for %d", i) } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != i { t.Fatalf("%v got %v, want %d", ok, got, i) } } } func TestMPMCQueueEnqueueDequeueString(t *testing.T) { q := NewMPMCQueue[string](10) for i := range 10 { if !q.TryEnqueue(strconv.Itoa(i)) { t.Fatalf("failed to enqueue for %d", i) } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != strconv.Itoa(i) { t.Fatalf("%v got %v, want %d", ok, got, i) } } } func TestMPMCQueueEnqueueDequeueStruct(t *testing.T) { type foo struct { bar int baz int } q := NewMPMCQueue[foo](10) for i := range 10 { if !q.TryEnqueue(foo{i, i}) { t.Fatalf("failed to enqueue for %d", i) } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got.bar != i || got.baz != i { t.Fatalf("%v got %v, want %d", ok, got, i) } } } func TestMPMCQueueEnqueueDequeueStructRef(t *testing.T) { type foo struct { bar int baz int } q := NewMPMCQueue[*foo](11) for i := range 10 { if !q.TryEnqueue(&foo{i, i}) { t.Fatalf("failed to enqueue for %d", i) } } if !q.TryEnqueue(nil) { t.Fatal("failed to enqueue for nil") } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got.bar != i || got.baz != i { t.Fatalf("%v got %v, want %d", ok, got, i) } } if last, ok := q.TryDequeue(); !ok || last != nil { t.Fatalf("%v got %v, want nil", ok, last) } } func TestMPMCQueueTryEnqueueDequeue(t *testing.T) { q := NewMPMCQueue[int](10) for i := range 10 { if !q.TryEnqueue(i) { t.Fatalf("failed to enqueue for %d", i) } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != i { t.Fatalf("got %v, want %d, for status %v", got, i, ok) } } } func TestMPMCQueueTryEnqueueOnFull(t *testing.T) { q := NewMPMCQueue[string](1) if !q.TryEnqueue("foo") { t.Error("failed to enqueue initial item") } if q.TryEnqueue("bar") { t.Error("got success for enqueue on full queue") } } func TestMPMCQueueTryDequeueOnEmpty(t *testing.T) { q := NewMPMCQueue[int](2) if _, ok := q.TryDequeue(); ok { t.Error("got success for enqueue on empty queue") } } func hammerMPMCQueueNonBlockingCalls(t *testing.T, gomaxprocs, numOps, numThreads int) { runtime.GOMAXPROCS(gomaxprocs) q := NewMPMCQueue[int](numThreads) startwg := sync.WaitGroup{} startwg.Add(1) csum := make(chan int, numThreads) // Start producers. for i := range numThreads { go func(n int) { startwg.Wait() for j := n; j < numOps; j += numThreads { for !q.TryEnqueue(j) { // busy spin until success } } }(i) } // Start consumers. for i := range numThreads { go func(n int) { startwg.Wait() sum := 0 for j := n; j < numOps; j += numThreads { var ( item int ok bool ) for { // busy spin until success if item, ok = q.TryDequeue(); ok { sum += item break } } } csum <- sum }(i) } startwg.Done() // Wait for all the sums from consumers. sum := 0 for range numThreads { s := <-csum sum += s } // Assert the total sum. expectedSum := numOps * (numOps - 1) / 2 if sum != expectedSum { t.Fatalf("sums don't match for %d num ops, %d num threads: got %d, want %d", numOps, numThreads, sum, expectedSum) } } func TestMPMCQueueNonBlockingCalls(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) n := 10 if testing.Short() { n = 1 } hammerMPMCQueueNonBlockingCalls(t, 1, n, n) hammerMPMCQueueNonBlockingCalls(t, 2, 10*n, 2*n) hammerMPMCQueueNonBlockingCalls(t, 4, 100*n, 4*n) } func benchmarkMPMCQueue(b *testing.B, queueSize, localWork int) { callsPerSched := queueSize procs := runtime.GOMAXPROCS(-1) / 2 if procs == 0 { procs = 1 } N := int32(b.N / callsPerSched) c := make(chan bool, 2*procs) q := NewMPMCQueue[int](queueSize) for p := 0; p < procs; p++ { go func() { foo := 0 for atomic.AddInt32(&N, -1) >= 0 { for range callsPerSched { for range localWork { foo *= 2 foo /= 2 } for !q.TryEnqueue(1) { runtime.Gosched() } } } for !q.TryEnqueue(0) { runtime.Gosched() } c <- foo == 42 }() go func() { foo := 0 for { var ( v int ok bool ) for { if v, ok = q.TryDequeue(); !ok { runtime.Gosched() } else { break } } if v == 0 { break } for range localWork { foo *= 2 foo /= 2 } } c <- foo == 42 }() } for p := 0; p < procs; p++ { <-c <-c } } func BenchmarkMPMCQueue(b *testing.B) { benchmarkMPMCQueue(b, 1000, 0) } func BenchmarkMPMCQueueWork100(b *testing.B) { benchmarkMPMCQueue(b, 1000, 100) } xsync-4.4.0/rbmutex.go000066400000000000000000000117311513523422100147010ustar00rootroot00000000000000package xsync import ( "runtime" "sync" "sync/atomic" "time" ) // slow-down guard const nslowdown = 7 // pool for reader tokens var rtokenPool sync.Pool // RToken is a reader lock token. type RToken struct { slot uint32 // Padding to prevent false sharing. _ [cacheLineSize - 4]byte } // A RBMutex is a reader biased reader/writer mutual exclusion lock. // The lock can be held by an many readers or a single writer. // The zero value for a RBMutex is an unlocked mutex. // // A RBMutex must not be copied after first use. // // RBMutex is based on a modified version of BRAVO // (Biased Locking for Reader-Writer Locks) algorithm: // https://arxiv.org/pdf/1810.01553.pdf // // RBMutex is a specialized mutex for scenarios, such as caches, // where the vast majority of locks are acquired by readers and write // lock acquire attempts are infrequent. In such scenarios, RBMutex // performs better than sync.RWMutex on large multicore machines. // // RBMutex extends sync.RWMutex internally and uses it as the "reader // bias disabled" fallback, so the same semantics apply. The only // noticeable difference is in reader tokens returned from the // RLock/RUnlock methods. type RBMutex struct { rslots []rslot rmask uint32 rbias int32 inhibitUntil time.Time rw sync.RWMutex } type rslot struct { mu int32 //lint:ignore U1000 prevents false sharing pad [cacheLineSize - 4]byte } // NewRBMutex creates a new RBMutex instance. func NewRBMutex() *RBMutex { nslots := nextPowOf2(parallelism()) mu := RBMutex{ rslots: make([]rslot, nslots), rmask: nslots - 1, rbias: 1, } return &mu } // TryRLock tries to lock m for reading without blocking. // When TryRLock succeeds, it returns true and a reader token. // In case of a failure, a false is returned. func (mu *RBMutex) TryRLock() (bool, *RToken) { if t := mu.fastRlock(); t != nil { return true, t } // Optimistic slow path. if mu.rw.TryRLock() { if atomic.LoadInt32(&mu.rbias) == 0 && time.Now().After(mu.inhibitUntil) { atomic.StoreInt32(&mu.rbias, 1) } return true, nil } return false, nil } // RLock locks m for reading and returns a reader token. The // token must be used in the later RUnlock call. // // Should not be used for recursive read locking; a blocked Lock // call excludes new readers from acquiring the lock. func (mu *RBMutex) RLock() *RToken { if t := mu.fastRlock(); t != nil { return t } // Slow path. mu.rw.RLock() if atomic.LoadInt32(&mu.rbias) == 0 && time.Now().After(mu.inhibitUntil) { atomic.StoreInt32(&mu.rbias, 1) } return nil } func (mu *RBMutex) fastRlock() *RToken { if atomic.LoadInt32(&mu.rbias) == 1 { t, ok := rtokenPool.Get().(*RToken) if !ok { t = new(RToken) t.slot = runtime_cheaprand() } // Try all available slots to distribute reader threads to slots. for i := 0; i < len(mu.rslots); i++ { slot := t.slot + uint32(i) rslot := &mu.rslots[slot&mu.rmask] rslotmu := atomic.LoadInt32(&rslot.mu) if atomic.CompareAndSwapInt32(&rslot.mu, rslotmu, rslotmu+1) { if atomic.LoadInt32(&mu.rbias) == 1 { // Hot path succeeded. t.slot = slot return t } // The mutex is no longer reader biased. Roll back. atomic.AddInt32(&rslot.mu, -1) rtokenPool.Put(t) return nil } // Contention detected. Give a try with the next slot. } } return nil } // RUnlock undoes a single RLock call. A reader token obtained from // the RLock call must be provided. RUnlock does not affect other // simultaneous readers. A panic is raised if m is not locked for // reading on entry to RUnlock. func (mu *RBMutex) RUnlock(t *RToken) { if t == nil { mu.rw.RUnlock() return } if atomic.AddInt32(&mu.rslots[t.slot&mu.rmask].mu, -1) < 0 { panic("invalid reader state detected") } rtokenPool.Put(t) } // TryLock tries to lock m for writing without blocking. func (mu *RBMutex) TryLock() bool { if mu.rw.TryLock() { if atomic.LoadInt32(&mu.rbias) == 1 { atomic.StoreInt32(&mu.rbias, 0) for i := 0; i < len(mu.rslots); i++ { if atomic.LoadInt32(&mu.rslots[i].mu) > 0 { // There is a reader. Roll back. atomic.StoreInt32(&mu.rbias, 1) mu.rw.Unlock() return false } } } return true } return false } // Lock locks m for writing. If the lock is already locked for // reading or writing, Lock blocks until the lock is available. func (mu *RBMutex) Lock() { mu.rw.Lock() if atomic.LoadInt32(&mu.rbias) == 1 { atomic.StoreInt32(&mu.rbias, 0) start := time.Now() for i := 0; i < len(mu.rslots); i++ { for atomic.LoadInt32(&mu.rslots[i].mu) > 0 { runtime.Gosched() } } mu.inhibitUntil = time.Now().Add(time.Since(start) * nslowdown) } } // Unlock unlocks m for writing. A panic is raised if m is not locked // for writing on entry to Unlock. // // As with RWMutex, a locked RBMutex is not associated with a // particular goroutine. One goroutine may RLock (Lock) a RBMutex and // then arrange for another goroutine to RUnlock (Unlock) it. func (mu *RBMutex) Unlock() { mu.rw.Unlock() } xsync-4.4.0/rbmutex_test.go000066400000000000000000000201451513523422100157370ustar00rootroot00000000000000// Copyright notice. Initial version of the following tests was based on // the following file from the Go Programming Language core repo: // https://github.com/golang/go/blob/831f9376d8d730b16fb33dfd775618dffe13ce7a/src/sync/rwmutex_test.go package xsync_test import ( "fmt" "runtime" "sync" "sync/atomic" "testing" . "github.com/puzpuzpuz/xsync/v4" ) func TestRBMutexSerialReader(t *testing.T) { const numCalls = 10 mu := NewRBMutex() for range 3 { var rtokens [numCalls]*RToken for j := range numCalls { rtokens[j] = mu.RLock() } for j := range numCalls { mu.RUnlock(rtokens[j]) } } } func TestRBMutexSerialOptimisticReader(t *testing.T) { const numCalls = 10 mu := NewRBMutex() for range 3 { var rtokens [numCalls]*RToken for j := range numCalls { ok, rt := mu.TryRLock() if !ok { t.Fatalf("TryRLock failed for %d", j) } if rt == nil { t.Fatalf("nil reader token for %d", j) } rtokens[j] = rt } for j := range numCalls { mu.RUnlock(rtokens[j]) } } } func TestRBMutexSerialOptimisticWriter(t *testing.T) { mu := NewRBMutex() for range 3 { if !mu.TryLock() { t.Fatal("TryLock failed") } mu.Unlock() } } func parallelReader(mu *RBMutex, clocked, cunlock, cdone chan bool) { t := mu.RLock() clocked <- true <-cunlock mu.RUnlock(t) cdone <- true } func doTestParallelReaders(numReaders, gomaxprocs int) { runtime.GOMAXPROCS(gomaxprocs) mu := NewRBMutex() clocked := make(chan bool) cunlock := make(chan bool) cdone := make(chan bool) for range numReaders { go parallelReader(mu, clocked, cunlock, cdone) } // Wait for all parallel RLock()s to succeed. for range numReaders { <-clocked } for range numReaders { cunlock <- true } // Wait for the goroutines to finish. for range numReaders { <-cdone } } func TestRBMutexParallelReaders(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(0)) doTestParallelReaders(1, 4) doTestParallelReaders(3, 4) doTestParallelReaders(4, 2) } func reader(mu *RBMutex, numIterations int, activity *int32, cdone chan bool) { for range numIterations { t := mu.RLock() n := atomic.AddInt32(activity, 1) if n < 1 || n >= 10000 { mu.RUnlock(t) panic(fmt.Sprintf("rlock(%d)\n", n)) } for range 100 { } atomic.AddInt32(activity, -1) mu.RUnlock(t) } cdone <- true } func writer(mu *RBMutex, numIterations int, activity *int32, cdone chan bool) { for range numIterations { mu.Lock() n := atomic.AddInt32(activity, 10000) if n != 10000 { mu.Unlock() panic(fmt.Sprintf("wlock(%d)\n", n)) } for range 100 { } atomic.AddInt32(activity, -10000) mu.Unlock() } cdone <- true } func hammerRBMutex(gomaxprocs, numReaders, numIterations int) { runtime.GOMAXPROCS(gomaxprocs) // Number of active readers + 10000 * number of active writers. var activity int32 mu := NewRBMutex() cdone := make(chan bool) go writer(mu, numIterations, &activity, cdone) var i int for i = 0; i < numReaders/2; i++ { go reader(mu, numIterations, &activity, cdone) } go writer(mu, numIterations, &activity, cdone) for ; i < numReaders; i++ { go reader(mu, numIterations, &activity, cdone) } // Wait for the 2 writers and all readers to finish. for i := 0; i < 2+numReaders; i++ { <-cdone } } func TestRBMutex(t *testing.T) { const n = 1000 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(0)) hammerRBMutex(1, 1, n) hammerRBMutex(1, 3, n) hammerRBMutex(1, 10, n) hammerRBMutex(4, 1, n) hammerRBMutex(4, 3, n) hammerRBMutex(4, 10, n) hammerRBMutex(10, 1, n) hammerRBMutex(10, 3, n) hammerRBMutex(10, 10, n) hammerRBMutex(10, 5, n) } func optimisticReader(mu *RBMutex, numIterations int, activity *int32, cdone chan bool) { for range numIterations { if ok, t := mu.TryRLock(); ok { n := atomic.AddInt32(activity, 1) if n < 1 || n >= 10000 { mu.RUnlock(t) panic(fmt.Sprintf("rlock(%d)\n", n)) } for range 100 { } atomic.AddInt32(activity, -1) mu.RUnlock(t) } } cdone <- true } func optimisticWriter(mu *RBMutex, numIterations int, activity *int32, cdone chan bool) { for range numIterations { if mu.TryLock() { n := atomic.AddInt32(activity, 10000) if n != 10000 { mu.Unlock() panic(fmt.Sprintf("wlock(%d)\n", n)) } for range 100 { } atomic.AddInt32(activity, -10000) mu.Unlock() } } cdone <- true } func hammerOptimisticRBMutex(gomaxprocs, numReaders, numIterations int) { runtime.GOMAXPROCS(gomaxprocs) // Number of active readers + 10000 * number of active writers. var activity int32 mu := NewRBMutex() cdone := make(chan bool) go optimisticWriter(mu, numIterations, &activity, cdone) var i int for i = 0; i < numReaders/2; i++ { go optimisticReader(mu, numIterations, &activity, cdone) } go optimisticWriter(mu, numIterations, &activity, cdone) for ; i < numReaders; i++ { go optimisticReader(mu, numIterations, &activity, cdone) } // Wait for the 2 writers and all readers to finish. for i := 0; i < 2+numReaders; i++ { <-cdone } } func TestRBMutex_Optimistic(t *testing.T) { const n = 1000 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(0)) hammerOptimisticRBMutex(1, 1, n) hammerOptimisticRBMutex(1, 3, n) hammerOptimisticRBMutex(1, 10, n) hammerOptimisticRBMutex(4, 1, n) hammerOptimisticRBMutex(4, 3, n) hammerOptimisticRBMutex(4, 10, n) hammerOptimisticRBMutex(10, 1, n) hammerOptimisticRBMutex(10, 3, n) hammerOptimisticRBMutex(10, 10, n) hammerOptimisticRBMutex(10, 5, n) } func hammerMixedRBMutex(gomaxprocs, numReaders, numIterations int) { runtime.GOMAXPROCS(gomaxprocs) // Number of active readers + 10000 * number of active writers. var activity int32 mu := NewRBMutex() cdone := make(chan bool) go writer(mu, numIterations, &activity, cdone) var i int for i = 0; i < numReaders/2; i++ { go reader(mu, numIterations, &activity, cdone) } go optimisticWriter(mu, numIterations, &activity, cdone) for ; i < numReaders; i++ { go optimisticReader(mu, numIterations, &activity, cdone) } // Wait for the 2 writers and all readers to finish. for i := 0; i < 2+numReaders; i++ { <-cdone } } func TestRBMutex_Mixed(t *testing.T) { const n = 1000 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(0)) hammerMixedRBMutex(1, 1, n) hammerMixedRBMutex(1, 3, n) hammerMixedRBMutex(1, 10, n) hammerMixedRBMutex(4, 1, n) hammerMixedRBMutex(4, 3, n) hammerMixedRBMutex(4, 10, n) hammerMixedRBMutex(10, 1, n) hammerMixedRBMutex(10, 3, n) hammerMixedRBMutex(10, 10, n) hammerMixedRBMutex(10, 5, n) } func benchmarkRBMutex(b *testing.B, parallelism, localWork, writeRatio int) { mu := NewRBMutex() b.SetParallelism(parallelism) runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { foo++ if writeRatio > 0 && foo%writeRatio == 0 { mu.Lock() for i := 0; i != localWork; i += 1 { foo *= 2 foo /= 2 } mu.Unlock() } else { tk := mu.RLock() for i := 0; i != localWork; i += 1 { foo *= 2 foo /= 2 } mu.RUnlock(tk) } } _ = foo }) } func BenchmarkRBMutexWorkReadOnly_HighParallelism(b *testing.B) { benchmarkRBMutex(b, 1024, 100, -1) } func BenchmarkRBMutexWorkReadOnly(b *testing.B) { benchmarkRBMutex(b, -1, 100, -1) } func BenchmarkRBMutexWorkWrite100000(b *testing.B) { benchmarkRBMutex(b, -1, 100, 100000) } func BenchmarkRBMutexWorkWrite1000(b *testing.B) { benchmarkRBMutex(b, -1, 100, 1000) } func benchmarkRWMutex(b *testing.B, parallelism, localWork, writeRatio int) { var mu sync.RWMutex b.SetParallelism(parallelism) runParallel(b, func(pb *testing.PB) { foo := 0 for pb.Next() { foo++ if writeRatio > 0 && foo%writeRatio == 0 { mu.Lock() for i := 0; i != localWork; i += 1 { foo *= 2 foo /= 2 } mu.Unlock() } else { mu.RLock() for i := 0; i != localWork; i += 1 { foo *= 2 foo /= 2 } mu.RUnlock() } } _ = foo }) } func BenchmarkRWMutexWorkReadOnly_HighParallelism(b *testing.B) { benchmarkRWMutex(b, 1024, 100, -1) } func BenchmarkRWMutexWorkReadOnly(b *testing.B) { benchmarkRWMutex(b, -1, 100, -1) } func BenchmarkRWMutexWorkWrite100000(b *testing.B) { benchmarkRWMutex(b, -1, 100, 100000) } func BenchmarkRWMutexWorkWrite1000(b *testing.B) { benchmarkRWMutex(b, -1, 100, 1000) } xsync-4.4.0/spscqueue.go000066400000000000000000000050601513523422100152260ustar00rootroot00000000000000package xsync import ( "sync/atomic" ) // Deprecated: use [SPSCQueue]. type SPSCQueueOf[I any] = SPSCQueue[I] // A SPSCQueue is a bounded single-producer single-consumer concurrent // queue. This means that not more than a single goroutine must be // publishing items to the queue while not more than a single goroutine // must be consuming those items. // // SPSCQueue instances must be created with NewSPSCQueue function. // A SPSCQueue must not be copied after first use. // // Based on the data structure from the following article: // https://rigtorp.se/ringbuffer/ type SPSCQueue[I any] struct { cap uint64 pidx uint64 // Padding to prevent false sharing. _ [cacheLineSize - 8]byte pcachedIdx uint64 _ [cacheLineSize - 8]byte cidx uint64 _ [cacheLineSize - 8]byte ccachedIdx uint64 _ [cacheLineSize - 8]byte items []I } // Deprecated: use [NewSPSCQueue]. func NewSPSCQueueOf[I any](capacity int) *SPSCQueue[I] { return NewSPSCQueue[I](capacity) } // NewSPSCQueue creates a new SPSCQueue instance with the given // capacity. func NewSPSCQueue[I any](capacity int) *SPSCQueue[I] { if capacity < 1 { panic("capacity must be positive number") } return &SPSCQueue[I]{ cap: uint64(capacity + 1), items: make([]I, capacity+1), } } // TryEnqueue inserts the given item into the queue. Does not block // and returns immediately. The result indicates that the queue isn't // full and the item was inserted. func (q *SPSCQueue[I]) TryEnqueue(item I) bool { // relaxed memory order would be enough here idx := atomic.LoadUint64(&q.pidx) next_idx := idx + 1 if next_idx == q.cap { next_idx = 0 } cached_idx := q.ccachedIdx if next_idx == cached_idx { cached_idx = atomic.LoadUint64(&q.cidx) q.ccachedIdx = cached_idx if next_idx == cached_idx { return false } } q.items[idx] = item atomic.StoreUint64(&q.pidx, next_idx) return true } // TryDequeue retrieves and removes the item from the head of the // queue. Does not block and returns immediately. The ok result // indicates that the queue isn't empty and an item was retrieved. func (q *SPSCQueue[I]) TryDequeue() (item I, ok bool) { // relaxed memory order would be enough here idx := atomic.LoadUint64(&q.cidx) cached_idx := q.pcachedIdx if idx == cached_idx { cached_idx = atomic.LoadUint64(&q.pidx) q.pcachedIdx = cached_idx if idx == cached_idx { return } } var zeroI I item = q.items[idx] q.items[idx] = zeroI ok = true next_idx := idx + 1 if next_idx == q.cap { next_idx = 0 } atomic.StoreUint64(&q.cidx, next_idx) return } xsync-4.4.0/spscqueue_test.go000066400000000000000000000125201513523422100162640ustar00rootroot00000000000000// Copyright notice. The following tests are partially based on // the following file from the Go Programming Language core repo: // https://github.com/golang/go/blob/831f9376d8d730b16fb33dfd775618dffe13ce7a/src/runtime/chan_test.go package xsync_test import ( "runtime" "strconv" "sync" "sync/atomic" "testing" . "github.com/puzpuzpuz/xsync/v4" ) func TestSPSCQueueOf_InvalidSize(t *testing.T) { defer func() { recover() }() NewSPSCQueue[int](0) t.Fatal("no panic detected") } func TestSPSCQueueOfTryEnqueueDequeueInt(t *testing.T) { q := NewSPSCQueue[int](10) for i := range 10 { if !q.TryEnqueue(i) { t.Fatal("TryEnqueue failed") } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != i { t.Fatalf("%v: got %v, want %d", ok, got, i) } } } func TestSPSCQueueOfTryEnqueueDequeueString(t *testing.T) { q := NewSPSCQueue[string](10) for i := range 10 { if !q.TryEnqueue(strconv.Itoa(i)) { t.Fatal("TryEnqueue failed") } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != strconv.Itoa(i) { t.Fatalf("%v: got %v, want %d", ok, got, i) } } } func TestSPSCQueueOfTryEnqueueDequeueStruct(t *testing.T) { type foo struct { bar int baz int } q := NewSPSCQueue[foo](10) for i := range 10 { if !q.TryEnqueue(foo{i, i}) { t.Fatal("TryEnqueue failed") } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got.bar != i || got.baz != i { t.Fatalf("%v: got %v, want %d", ok, got, i) } } } func TestSPSCQueueOfTryEnqueueDequeueStructRef(t *testing.T) { type foo struct { bar int baz int } q := NewSPSCQueue[*foo](11) for i := range 10 { if !q.TryEnqueue(&foo{i, i}) { t.Fatal("TryEnqueue failed") } } if !q.TryEnqueue(nil) { t.Fatal("TryEnqueue with nil failed") } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got.bar != i || got.baz != i { t.Fatalf("%v: got %v, want %d", ok, got, i) } } if last, ok := q.TryDequeue(); !ok || last != nil { t.Fatalf("%v: got %v, want nil", ok, last) } } func TestSPSCQueueOfTryEnqueueDequeue(t *testing.T) { q := NewSPSCQueue[int](10) for i := range 10 { if !q.TryEnqueue(i) { t.Fatalf("failed to enqueue for %d", i) } } for i := range 10 { if got, ok := q.TryDequeue(); !ok || got != i { t.Fatalf("got %v, want %d, for status %v", got, i, ok) } } } func TestSPSCQueueOfTryEnqueueOnFull(t *testing.T) { q := NewSPSCQueue[string](1) if !q.TryEnqueue("foo") { t.Error("failed to enqueue initial item") } if q.TryEnqueue("bar") { t.Error("got success for enqueue on full queue") } } func TestSPSCQueueOfTryDequeueOnEmpty(t *testing.T) { q := NewSPSCQueue[int](2) if _, ok := q.TryDequeue(); ok { t.Error("got success for enqueue on empty queue") } } func hammerSPSCQueueOfNonBlockingCalls(t *testing.T, cap, numOps int) { q := NewSPSCQueue[int](cap) startwg := sync.WaitGroup{} startwg.Add(1) csum := make(chan int, 2) // Start producer. go func() { startwg.Wait() for j := range numOps { for !q.TryEnqueue(j) { // busy spin until success } } }() // Start consumer. go func() { startwg.Wait() sum := 0 for range numOps { var ( item int ok bool ) for { // busy spin until success if item, ok = q.TryDequeue(); ok { sum += item break } } } csum <- sum }() startwg.Done() // Wait for all the sum from the producer. sum := <-csum // Assert the total sum. expectedSum := numOps * (numOps - 1) / 2 if sum != expectedSum { t.Fatalf("sums don't match for %d num ops: got %d, want %d", numOps, sum, expectedSum) } } func TestSPSCQueueOfNonBlockingCalls(t *testing.T) { n := 10 if testing.Short() { n = 1 } hammerSPSCQueueOfNonBlockingCalls(t, 1, n) hammerSPSCQueueOfNonBlockingCalls(t, 2, 2*n) hammerSPSCQueueOfNonBlockingCalls(t, 4, 4*n) } func benchmarkSPSCQueueOfProdCons(b *testing.B, queueSize, localWork int) { callsPerSched := queueSize N := int32(b.N / callsPerSched) c := make(chan bool, 2) q := NewSPSCQueue[int](queueSize) go func() { foo := 0 for atomic.AddInt32(&N, -1) >= 0 { for range callsPerSched { for range localWork { foo *= 2 foo /= 2 } if !q.TryEnqueue(1) { runtime.Gosched() } } } q.TryEnqueue(0) c <- foo == 42 }() go func() { foo := 0 for { v, ok := q.TryDequeue() if ok { if v == 0 { break } for range localWork { foo *= 2 foo /= 2 } } else { runtime.Gosched() } } c <- foo == 42 }() <-c <-c } func BenchmarkSPSCQueueOfProdCons(b *testing.B) { benchmarkSPSCQueueOfProdCons(b, 1000, 0) } func BenchmarkSPSCQueueOfProdConsWork100(b *testing.B) { benchmarkSPSCQueueOfProdCons(b, 1000, 100) } func benchmarkSPSCChan(b *testing.B, chanSize, localWork int) { callsPerSched := chanSize N := int32(b.N / callsPerSched) c := make(chan bool, 2) myc := make(chan int, chanSize) go func() { foo := 0 for atomic.AddInt32(&N, -1) >= 0 { for range callsPerSched { for range localWork { foo *= 2 foo /= 2 } myc <- 1 } } myc <- 0 c <- foo == 42 }() go func() { foo := 0 for { v := <-myc if v == 0 { break } for range localWork { foo *= 2 foo /= 2 } } c <- foo == 42 }() <-c <-c } func BenchmarkSPSCChan(b *testing.B) { benchmarkSPSCChan(b, 1000, 0) } func BenchmarkSPSCChanWork100(b *testing.B) { benchmarkSPSCChan(b, 1000, 100) } xsync-4.4.0/umpscqueue.go000066400000000000000000000120341513523422100154040ustar00rootroot00000000000000package xsync import ( "sync" "sync/atomic" "unsafe" ) // NewUMPSCQueue creates a new UMPSCQueue instance. func NewUMPSCQueue[T any]() *UMPSCQueue[T] { q := &UMPSCQueue[T]{} q.readHead = q.newSegment() q.writeHead.Store(q.readHead) return q } // A UMPSCQueue an unbounded multi-producer single-consumer concurrent queue. It is meant to serve // as a replacement for a channel. However, crucially, it has infinite capacity. This is a very bad // idea in many cases as it means that it never exhibits backpressure. In other words, if nothing // is consuming elements from the queue, it will eventually consume all available memory and crash // the process. However, there are also cases where this is desired behavior as it means the queue // will dynamically allocate more memory to store temporary bursts, allowing producers to never // block while the consumer catches up. // // Note however that because no locks are acquired, it is unsafe for multiple goroutines to consume // from the queue. Consumers must explicitly synchronize between themselves. type UMPSCQueue[T any] struct { // Represents the current head of the queue. This is updated by writers as they materialize the // segments of the queue. writeHead atomic.Pointer[queueSegment[T]] // Padding to prevent false sharing. _ [cacheLineSize - unsafe.Sizeof(atomic.Pointer[queueSegment[T]]{})]byte // Used to pool slices of queueValue to relieve pressure on the garbage collector. segmentPool sync.Pool readHead *queueSegment[T] readIdx int } // This value is chose arbitrarily, as increasing it gives diminishing returns. With some testing (on // 64-core machines), when the segment size is smaller than 2^10, the queue becomes slower as // parallelism increases, while there is no statistically significant difference beyond 2^12. const segmentSize = 1 << 12 // Holds the item and wait group. The reading goroutine should not attempt to read the value until // the ready [sync.WaitGroup] has been marked as done. type queueValue[T any] struct { item T ready sync.WaitGroup } // init initializes the [sync.WaitGroup] so that get blocks until set is called. func (hv *queueValue[T]) init() { hv.ready.Add(1) } // set sets the value and marks it as ready. func (hv *queueValue[T]) set(value T) { hv.item = value hv.ready.Done() } // get waits for the value to be ready, then reads it. func (hv *queueValue[T]) get() T { hv.ready.Wait() return hv.item } type queueSegment[T any] struct { // Incremented every time a writer wants to write to this segment, and prevents multiple writers from // attempting to write to the same index. If the index is greater than the size of the segment, // pending writers should try again in the next segment. idx atomic.Int64 // Padding to prevent false sharing. _ [cacheLineSize - unsafe.Sizeof(atomic.Uint64{})]byte // The set of values this segment. values []queueValue[T] // Synchronizes the creation of the next segment. nextOnce sync.Once next *queueSegment[T] } // newSegment creates a new queueSegment and pre-allocates the value slice by either reusing one // from the pool or creating a fresh one. func (q *UMPSCQueue[T]) newSegment() *queueSegment[T] { var values []queueValue[T] if v, ok := q.segmentPool.Get().(*[]queueValue[T]); ok { values = *v } else { values = make([]queueValue[T], segmentSize) } for i := range values { values[i].init() } s := &queueSegment[T]{ values: values, } // Storing -1 means the first call to Add(1) will return 0. s.idx.Store(-1) return s } func (q *UMPSCQueue[T]) loadNext(s *queueSegment[T]) *queueSegment[T] { s.nextOnce.Do(func() { s.next = q.newSegment() }) return s.next } // Dequeue returns the next value in the queue, blocking if it is empty. It is not safe to invoke Dequeue // from multiple goroutines. func (q *UMPSCQueue[T]) Dequeue() T { t := q.readHead.values[q.readIdx].get() q.readIdx++ if q.readIdx == segmentSize { q.readIdx = 0 // We're done reading a segment, so return the backing value slice to the pool. The actual // queueSegment itself cannot be reused as it contains the pointer to the next segment, which cannot // safely be updated as it cannot be determined whether all writers have released all references to // it. q.segmentPool.Put(&q.readHead.values) q.readHead = q.loadNext(q.readHead) } return t } // Enqueue writes the given value to the queue. It never blocks and is safe to be called by multiple // goroutines concurrently. func (q *UMPSCQueue[T]) Enqueue(value T) { var segment *queueSegment[T] for { segment = q.writeHead.Load() idx := segment.idx.Add(1) if idx < segmentSize { segment.values[idx].set(value) // Optimization: eagerly creating the next segment means less contention as it's unlikely that other // writers have already gotten to the end of the segment and are also invoking loadNext, which blocks // until the segment has been created. if idx == 0 { q.loadNext(segment) } return } else { var prev *queueSegment[T] prev, segment = segment, q.loadNext(segment) q.writeHead.CompareAndSwap(prev, segment) } } } xsync-4.4.0/umpscqueue_test.go000066400000000000000000000105211513523422100164420ustar00rootroot00000000000000package xsync import ( "fmt" "runtime" "strconv" "sync" "testing" "time" ) func TestUMPSCQueueEnqueueDequeueInt(t *testing.T) { q := NewUMPSCQueue[int]() for i := range 10000 { q.Enqueue(i) } for i := range 10000 { if got := q.Dequeue(); got != i { t.Fatalf("got %v, want %d", got, i) } } } func TestUMPSCQueueEnqueueDequeueString(t *testing.T) { q := NewUMPSCQueue[string]() for i := range 100 { q.Enqueue(strconv.Itoa(i)) } for i := range 100 { if got := q.Dequeue(); got != strconv.Itoa(i) { t.Fatalf("got %v, want %d", got, i) } } } func TestUMPSCQueueEnqueueDequeueStruct(t *testing.T) { type foo struct { bar int baz int } q := NewUMPSCQueue[foo]() for i := range 100 { q.Enqueue(foo{i, i}) } for i := range 100 { if got := q.Dequeue(); got.bar != i || got.baz != i { t.Fatalf("got %v, want %d", got, i) } } } func TestUMPSCQueueEnqueueDequeueStructRef(t *testing.T) { type foo struct { bar int baz int } q := NewUMPSCQueue[*foo]() for i := range 100 { q.Enqueue(&foo{i, i}) } q.Enqueue(nil) for i := range 100 { if got := q.Dequeue(); got.bar != i || got.baz != i { t.Fatalf("got %v, want %d", got, i) } } if last := q.Dequeue(); last != nil { t.Fatalf("got %v, want nil", last) } } func TestUMPSCQueue(t *testing.T) { for _, goroutines := range []int{1, 4, 16} { t.Run(fmt.Sprintf("goroutines=%d", goroutines), func(t *testing.T) { q := NewUMPSCQueue[int]() const count = 100 * segmentSize for mod := range goroutines { go func() { for i := range count { if i%goroutines == mod { q.Enqueue(i) } } }() } values := make(map[int]struct{}, count) for range count { actual := q.Dequeue() if _, ok := values[actual]; !ok { values[actual] = struct{}{} } else { t.Fatalf("got duplicate value: %q", actual) } } if len(values) != count { t.Fatalf("got %d values, expected %d", len(values), count) } }) } } func hammerUMPSCQueueBlockingCalls(t *testing.T, gomaxprocs, numOps, numThreads int) { runtime.GOMAXPROCS(gomaxprocs) q := NewUMPSCQueue[int]() startwg := sync.WaitGroup{} startwg.Add(1) csum := make(chan int, 1) // Start producers. for i := range numThreads { go func(n int) { startwg.Wait() for j := n; j < numOps; j += numThreads { q.Enqueue(j) } }(i) } // Start consumer. go func() { startwg.Wait() sum := 0 for range numOps { item := q.Dequeue() sum += item } csum <- sum }() startwg.Done() // Wait for the sum from the consumer. sum := <-csum // Assert the sum. expectedSum := numOps * (numOps - 1) / 2 if sum != expectedSum { t.Fatalf("sums don't match for %d num ops, %d num threads: got %d, want %d", numOps, numThreads, sum, expectedSum) } } func TestUMPSCQueueBlockingCalls(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) n := 10 if testing.Short() { n = 1 } hammerUMPSCQueueBlockingCalls(t, 1, n, n) hammerUMPSCQueueBlockingCalls(t, 2, 10*n, 2*n) hammerUMPSCQueueBlockingCalls(t, 4, 100*n, 4*n) } // This benchmarks the performance of the [UMPSCQueue] vs using a normal channel. In the results, // channels should get slower as the parallelism goes up due to contention on the lock which is // acquired every time an element is added. By contrast, the queue actually should get faster. This // is expected, as [testing.B.RunParallel] executes N operations with G goroutines, so it should // take less time overall. The overall memory cost is negligible, especially since the allocation is not // per-operation, it's per-segment, meaning it is amortized by the size of the segment. Additionally, // segments are reused when possible, further decreasing the cost. func BenchmarkChanVsUMPSCQueue(b *testing.B) { b.Run("method=queue", func(b *testing.B) { q := NewUMPSCQueue[int]() done := make(chan struct{}) go func() { defer close(done) for b.Loop() { q.Dequeue() } }() b.RunParallel(func(pb *testing.PB) { for pb.Next() { q.Enqueue(0) } }) <-done }) b.Run("method=chan", func(b *testing.B) { ch := make(chan time.Duration, segmentSize) done := make(chan struct{}) go func() { defer close(done) var received int for range ch { received++ if received == b.N { break } } }() b.RunParallel(func(pb *testing.PB) { for pb.Next() { ch <- 0 } }) <-done }) } xsync-4.4.0/util.go000066400000000000000000000027731513523422100141760ustar00rootroot00000000000000package xsync import ( "math/bits" "runtime" _ "unsafe" ) // test-only assert()-like flag var assertionsEnabled = false const ( // cacheLineSize is used in paddings to prevent false sharing; // 64B are used instead of 128B as a compromise between // memory footprint and performance; 128B usage may give ~30% // improvement on NUMA machines. cacheLineSize = 64 ) // nextPowOf2 computes the next highest power of 2 of 32-bit v. // Source: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 func nextPowOf2(v uint32) uint32 { if v == 0 { return 1 } v-- v |= v >> 1 v |= v >> 2 v |= v >> 4 v |= v >> 8 v |= v >> 16 v++ return v } func parallelism() uint32 { maxProcs := uint32(runtime.GOMAXPROCS(0)) numCores := uint32(runtime.NumCPU()) if maxProcs < numCores { return maxProcs } return numCores } //go:noescape //go:linkname runtime_cheaprand runtime.cheaprand func runtime_cheaprand() uint32 func broadcast(b uint8) uint64 { return 0x101010101010101 * uint64(b) } func firstMarkedByteIndex(w uint64) int { return bits.TrailingZeros64(w) >> 3 } // SWAR byte search: may produce false positives, e.g. for 0x0100, // so make sure to double-check bytes found by this function. func markZeroBytes(w uint64) uint64 { return ((w - 0x0101010101010101) & (^w) & 0x8080808080808080) } // Sets byte of the input word at the specified index to the given value. func setByte(w uint64, b uint8, idx int) uint64 { shift := idx << 3 return (w &^ (0xff << shift)) | (uint64(b) << shift) } xsync-4.4.0/util_test.go000066400000000000000000000112611513523422100152250ustar00rootroot00000000000000package xsync_test import ( "math/rand" "strconv" "testing" . "github.com/puzpuzpuz/xsync/v4" ) func TestNextPowOf2(t *testing.T) { if NextPowOf2(0) != 1 { t.Error("nextPowOf2 failed") } if NextPowOf2(1) != 1 { t.Error("nextPowOf2 failed") } if NextPowOf2(2) != 2 { t.Error("nextPowOf2 failed") } if NextPowOf2(3) != 4 { t.Error("nextPowOf2 failed") } } // This test is here to catch potential problems // with cheaprand-related changes. func TestCheaprand(t *testing.T) { count := 100 set := make(map[uint32]struct{}, count) for range count { num := Cheaprand() set[num] = struct{}{} } if len(set) != count { t.Error("duplicated rand num") } } func TestBroadcast(t *testing.T) { testCases := []struct { input uint8 expected uint64 }{ { input: 0, expected: 0, }, { input: 1, expected: 0x0101010101010101, }, { input: 2, expected: 0x0202020202020202, }, { input: 42, expected: 0x2a2a2a2a2a2a2a2a, }, { input: 127, expected: 0x7f7f7f7f7f7f7f7f, }, { input: 255, expected: 0xffffffffffffffff, }, } for _, tc := range testCases { t.Run(strconv.Itoa(int(tc.input)), func(t *testing.T) { if Broadcast(tc.input) != tc.expected { t.Errorf("unexpected result: %x", Broadcast(tc.input)) } }) } } func TestFirstMarkedByteIndex(t *testing.T) { testCases := []struct { input uint64 expected int }{ { input: 0, expected: 8, }, { input: 0x8080808080808080, expected: 0, }, { input: 0x0000000000000080, expected: 0, }, { input: 0x0000000000008000, expected: 1, }, { input: 0x0000000000800000, expected: 2, }, { input: 0x0000000080000000, expected: 3, }, { input: 0x0000008000000000, expected: 4, }, { input: 0x0000800000000000, expected: 5, }, { input: 0x0080000000000000, expected: 6, }, { input: 0x8000000000000000, expected: 7, }, } for _, tc := range testCases { t.Run(strconv.Itoa(int(tc.input)), func(t *testing.T) { if FirstMarkedByteIndex(tc.input) != tc.expected { t.Errorf("unexpected result: %x", FirstMarkedByteIndex(tc.input)) } }) } } func TestMarkZeroBytes(t *testing.T) { testCases := []struct { input uint64 expected uint64 }{ { input: 0xffffffffffffffff, expected: 0, }, { input: 0, expected: 0x8080808080808080, }, { input: 1, expected: 0x8080808080808000, }, { input: 1 << 9, expected: 0x8080808080800080, }, { input: 1 << 17, expected: 0x8080808080008080, }, { input: 1 << 25, expected: 0x8080808000808080, }, { input: 1 << 33, expected: 0x8080800080808080, }, { input: 1 << 41, expected: 0x8080008080808080, }, { input: 1 << 49, expected: 0x8000808080808080, }, { input: 1 << 57, expected: 0x0080808080808080, }, // false positive { input: 0x0100, expected: 0x8080808080808080, }, } for _, tc := range testCases { t.Run(strconv.Itoa(int(tc.input)), func(t *testing.T) { if MarkZeroBytes(tc.input) != tc.expected { t.Errorf("unexpected result: %x", MarkZeroBytes(tc.input)) } }) } } func TestSetByte(t *testing.T) { testCases := []struct { word uint64 b uint8 idx int expected uint64 }{ { word: 0xffffffffffffffff, b: 0, idx: 0, expected: 0xffffffffffffff00, }, { word: 0xffffffffffffffff, b: 1, idx: 1, expected: 0xffffffffffff01ff, }, { word: 0xffffffffffffffff, b: 2, idx: 2, expected: 0xffffffffff02ffff, }, { word: 0xffffffffffffffff, b: 3, idx: 3, expected: 0xffffffff03ffffff, }, { word: 0xffffffffffffffff, b: 4, idx: 4, expected: 0xffffff04ffffffff, }, { word: 0xffffffffffffffff, b: 5, idx: 5, expected: 0xffff05ffffffffff, }, { word: 0xffffffffffffffff, b: 6, idx: 6, expected: 0xff06ffffffffffff, }, { word: 0xffffffffffffffff, b: 7, idx: 7, expected: 0x07ffffffffffffff, }, { word: 0, b: 0xff, idx: 7, expected: 0xff00000000000000, }, } for _, tc := range testCases { t.Run(strconv.Itoa(int(tc.word)), func(t *testing.T) { if SetByte(tc.word, tc.b, tc.idx) != tc.expected { t.Errorf("unexpected result: %x", SetByte(tc.word, tc.b, tc.idx)) } }) } } func BenchmarkCheaprand(b *testing.B) { for b.Loop() { _ = Cheaprand() } // <1.4 ns/op on x86-64 } func BenchmarkRand(b *testing.B) { for b.Loop() { _ = rand.Uint32() } // about 5 ns/op on x86-64 }