pax_global_header00006660000000000000000000000064145557474420014533gustar00rootroot0000000000000052 comment=f61e0ca8e562e91b88ecf6ed9c8ef416287b0acc gohashtree-0.0.4-beta/000077500000000000000000000000001455574744200145765ustar00rootroot00000000000000gohashtree-0.0.4-beta/.github/000077500000000000000000000000001455574744200161365ustar00rootroot00000000000000gohashtree-0.0.4-beta/.github/workflows/000077500000000000000000000000001455574744200201735ustar00rootroot00000000000000gohashtree-0.0.4-beta/.github/workflows/go.yml000066400000000000000000000011541455574744200213240ustar00rootroot00000000000000name: Go on: push: branches: [ main ] pull_request: branches: [ '*' ] jobs: build: runs-on: ubuntu-latest strategy: matrix: go: [ '1.20', '1.21' ] name: Go ${{ matrix.go }} test steps: - name: Set up Go 1.x uses: actions/setup-go@v4 with: go-version: ${{ matrix.go }} - name: Check out code into the Go module directory uses: actions/checkout@v3 - name: Get dependencies run: | go get -v -t -d ./... - name: Build run: go build -v ./... - name: Test run: go test -v ./... gohashtree-0.0.4-beta/LICENSE000066400000000000000000000020571455574744200156070ustar00rootroot00000000000000MIT License Copyright (c) 2022 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gohashtree-0.0.4-beta/README.md000066400000000000000000000101531455574744200160550ustar00rootroot00000000000000# Go Hashtree GoHashtree is a SHA256 library highly optimized for Merkle tree computation. It is based on [Intel's implementation](https://github.com/intel/intel-ipsec-mb) with a few modifications like hardcoding the scheduled words of the padding block. It is written in Go Assembly instead of its native assembly counterpart [hashtree](https://github.com/prysmaticlabs/hashtree). # Using the library The library exposes a single function ``` func Hash(digests [][32]byte, chunks [][32]byte) error ``` This function hashes each consecutive pair of 32 byte blocks from `chunks` and writes the corresponding digest to `digests`. It performs runtime detection of CPU features supported. The function returns an error if `digests` is not allocated to hold at least `len(chunks)/2` digests or if an odd number of chunks is given. Most vectorized implementations exploit the fact that independent branches in the Merkle tree can be hashed in "parallel" within one CPU, to take advantage of this, Merkleization algorithms that loop over consecutive tree layers hashing two blocks at a time need to be updated to pass the entire layer, or all consecutive blocks. A naive example on how to accomplish this can be found in [this document](https://hackmd.io/80mJ75A5QeeRcrNmqcuU-g?view) # Running tests and benchmarks - Run the tests ```shell $ cd gohashstree $ go test . ok github.com/prysmaticlabs/gohashtree 0.002s ``` - Some benchmarks in ARM+crypto ``` $ cd gohashtree $ go test . -bench=. goos: darwin goarch: arm64 pkg: github.com/prysmaticlabs/gohashtree BenchmarkHash_1_minio-10 8472337 122.9 ns/op BenchmarkHash_1-10 27011082 42.99 ns/op BenchmarkHash_4_minio-10 2419328 500.1 ns/op BenchmarkHash_4-10 6900236 172.1 ns/op BenchmarkHash_8_minio-10 1217845 985.6 ns/op BenchmarkHash_8-10 3471864 344.0 ns/op BenchmarkHash_16_minio-10 597896 1974 ns/op BenchmarkHash_16-10 1721486 689.2 ns/op BenchmarkHashLargeList_minio-10 38 28401697 ns/op BenchmarkHashList-10 138 8619502 ns/op PASS ok github.com/prysmaticlabs/gohashtree 16.854s ``` - Some benchmarks on a Raspberry-Pi without crypto extensions ``` $ cd gohashtree $ go test . -bench=. goos: linux goarch: arm64 pkg: github.com/prysmaticlabs/gohashtree BenchmarkHash_1_minio-4 338904 3668 ns/op BenchmarkHash_1-4 1000000 1087 ns/op BenchmarkHash_4_minio-4 82258 15537 ns/op BenchmarkHash_4-4 380631 3216 ns/op BenchmarkHash_8_minio-4 41265 34344 ns/op BenchmarkHash_8-4 181153 6569 ns/op BenchmarkHash_16_minio-4 16635 67142 ns/op BenchmarkHash_16-4 75922 13351 ns/op BenchmarkHashLargeList_minio-4 2 826262074 ns/op BenchmarkHashList-4 7 176396035 ns/op PASS ``` - Some benchmarks on a Xeon with AVX-512 ``` $ cd gohashtree $ go test . -bench=. goos: linux goarch: amd64 pkg: github.com/prysmaticlabs/gohashtree cpu: Intel(R) Xeon(R) CPU @ 2.80GHz BenchmarkHash_1_minio-2 2462506 473.1 ns/op BenchmarkHash_1-2 3040208 391.3 ns/op BenchmarkHash_4_minio-2 577078 1959 ns/op BenchmarkHash_4-2 1954473 604.9 ns/op BenchmarkHash_8_minio-2 298208 3896 ns/op BenchmarkHash_8-2 1882191 624.8 ns/op BenchmarkHash_16_minio-2 147230 7933 ns/op BenchmarkHash_16-2 557485 1988 ns/op BenchmarkHashLargeList_minio-2 10 105404666 ns/op BenchmarkHashList-2 45 25368532 ns/op PASS ok github.com/prysmaticlabs/gohashtree 13.969s ``` gohashtree-0.0.4-beta/export_test.go000066400000000000000000000022341455574744200175060ustar00rootroot00000000000000/* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree // Export internal functions for testing. var Sha256_1_generic = sha256_1_generic gohashtree-0.0.4-beta/fuzzbuzz.yaml000066400000000000000000000000331455574744200173670ustar00rootroot00000000000000gohashtree: language: go gohashtree-0.0.4-beta/go.mod000066400000000000000000000002141455574744200157010ustar00rootroot00000000000000module github.com/prysmaticlabs/gohashtree go 1.20 require ( github.com/klauspost/cpuid/v2 v2.0.9 github.com/minio/sha256-simd v1.0.0 ) gohashtree-0.0.4-beta/go.sum000066400000000000000000000006741455574744200157400ustar00rootroot00000000000000github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= gohashtree-0.0.4-beta/hash.go000066400000000000000000000056211455574744200160540ustar00rootroot00000000000000/* MIT License # Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree import ( "fmt" "unsafe" ) func _hash(digests *byte, p [][32]byte, count uint32) // Hash hashes the chunks two at the time and outputs the digests on the first // argument. It does check for lengths on the inputs. func Hash(digests [][32]byte, chunks [][32]byte) error { if len(chunks) == 0 { return nil } if len(chunks)%2 == 1 { return fmt.Errorf("odd number of chunks") } if len(digests) < len(chunks)/2 { return fmt.Errorf("not enough digest length, need at least %v, got %v", len(chunks)/2, len(digests)) } if supportedCPU { _hash(&digests[0][0], chunks, uint32(len(chunks)/2)) } else { sha256_1_generic(digests, chunks) } return nil } // HashChunks is the same as Hash, but does not do error checking on the lengths of the slices func HashChunks(digests [][32]byte, chunks [][32]byte) { if supportedCPU { _hash(&digests[0][0], chunks, uint32(len(chunks)/2)) } else { sha256_1_generic(digests, chunks) } } func HashByteSlice(digests []byte, chunks []byte) error { if len(chunks) == 0 { return nil } if len(chunks)%64 != 0 { return fmt.Errorf("chunks not multiple of 64 bytes") } if len(digests)%32 != 0 { return fmt.Errorf("digests not multiple of 32 bytes") } if len(digests) < len(chunks)/2 { return fmt.Errorf("not enough digest length, need at least %d, got %d", len(chunks)/2, len(digests)) } // We use an unsafe pointer to cast []byte to [][32]byte. The length and // capacity of the slice need to be divided accordingly by 32. sizeChunks := (len(chunks) >> 5) chunkedChunks := unsafe.Slice((*[32]byte)(unsafe.Pointer(&chunks[0])), sizeChunks) sizeDigests := (len(digests) >> 5) chunkedDigest := unsafe.Slice((*[32]byte)(unsafe.Pointer(&digests[0])), sizeDigests) if supportedCPU { Hash(chunkedDigest, chunkedChunks) } else { sha256_1_generic(chunkedDigest, chunkedChunks) } return nil } gohashtree-0.0.4-beta/hash_amd64.go000066400000000000000000000025771455574744200170560ustar00rootroot00000000000000//go:build amd64 // +build amd64 /* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree import ( "github.com/klauspost/cpuid/v2" ) var hasAVX512 = cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512VL) var hasAVX2 = cpuid.CPU.Supports(cpuid.AVX2, cpuid.BMI2) var hasShani = cpuid.CPU.Supports(cpuid.SHA, cpuid.AVX) var supportedCPU = hasAVX2 || hasShani || hasAVX512 gohashtree-0.0.4-beta/hash_amd64.s000066400000000000000000004346071455574744200167160ustar00rootroot00000000000000/* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This code is based on Intel's implementation found in https://github.com/intel/intel-ipsec-mb Copied parts are Copyright (c) 2012-2021, Intel Corporation */ #include "textflag.h" // AVX x1 definitions #define OUTPUT_PTR DI #define DATA_PTR SI #define NUM_BLKS DX #define TBL CX #define RAL AX #define RBL BX #define RCL BP #define RDL R8 #define REL R9 #define RFL R10 #define RGL R11 #define RHL R12 #define XTMP0 X4 #define XTMP1 X5 #define XTMP2 X6 #define XTMP3 X7 #define XTMP4 X8 #define XTMP5 X11 #define XFER X9 #define y0 R13 #define y1 R14 #define y2 R15 #define _SHUF_00BA X10 #define _SHUF_DC00 X12 #define _BYTE_FLIP_MASK X13 #define COPY_XMM_AND_BSWAP(dst,src,msk) \ VMOVDQU src, dst; \ VPSHUFB msk, dst, dst #define FOUR_ROUNDS_AND_SCHEDA(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_) \ RORXL $(25-11), e, y0; \ VPALIGNR $4, X2_, X3_, XTMP0; \ RORXL $(22-13), a, y1; \ XORL e, y0; \ MOVL f, y2; \ RORXL $(11-6), y0, y0; \ XORL a, y1; \ XORL g, y2; \ VPADDD X0_, XTMP0, XTMP0; \ XORL e, y0; \ ANDL e, y2; \ RORXL $(13-2), y1, y1; \ VPALIGNR $4, X0_, X1_, XTMP1; \ XORL a, y1; \ RORXL $6, y0, y0; \ XORL g, y2; \ RORXL $2, y1, y1; \ ADDL y0, y2; \ ADDL (0*4)(SP), y2; \ MOVL a, y0; \ ADDL y2, h; \ MOVL a, y2; \ VPSRLD $7, XTMP1, XTMP2; \ ORL c, y0; \ ADDL h, d; \ ANDL c, y2; \ VPSLLD $(32-7), XTMP1, XTMP3; \ ANDL b, y0; \ ADDL y1, h; \ VPOR XTMP2, XTMP3, XTMP3; \ ORL y2, y0; \ ADDL y0, h #define FOUR_ROUNDS_AND_SCHEDB(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_) \ RORXL $(25-11), e, y0; \ RORXL $(22-13), a, y1; \ XORL e, y0; \ VPSRLD $18, XTMP1, XTMP2; \ MOVL f, y2; \ RORXL $(11-6), y0, y0; \ XORL a, y1; \ XORL g, y2; \ VPSRLD $3, XTMP1, XTMP4; \ XORL e, y0; \ ANDL e, y2; \ RORXL $(13-2), y1, y1; \ XORL a, y1; \ RORXL $6, y0, y0; \ VPSLLD $(32-18), XTMP1, XTMP1; \ XORL g, y2; \ RORXL $2, y1, y1; \ VPXOR XTMP1, XTMP3, XTMP3; \ ADDL y0, y2; \ ADDL (1*4)(SP), y2; \ MOVL a, y0; \ VPXOR XTMP2, XTMP3, XTMP3; \ ADDL y2, h; \ MOVL a, y2; \ ORL c, y0; \ VPXOR XTMP4, XTMP3, XTMP1; \ ADDL h, d; \ ANDL c, y2; \ VPSHUFD $0xFA, X3_, XTMP2; \ ANDL b, y0; \ ADDL y1, h; \ VPADDD XTMP1, XTMP0, XTMP0; \ ORL y2, y0; \ ADDL y0, h #define FOUR_ROUNDS_AND_SCHEDC(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_) \ RORXL $(25-11), e, y0; \ RORXL $(22-13), a, y1; \ XORL e, y0; \ VPSRLD $10, XTMP2, XTMP4; \ MOVL f, y2; \ RORXL $(11-6), y0, y0; \ XORL a, y1; \ VPSRLQ $19, XTMP2, XTMP3; \ XORL g, y2; \ XORL e, y0; \ ANDL e, y2; \ VPSRLQ $17, XTMP2, XTMP2; \ RORXL $(13-2), y1, y1; \ XORL a, y1; \ RORXL $6, y0, y0; \ VPXOR XTMP3, XTMP2, XTMP2; \ XORL g, y2; \ RORXL $2, y1, y1; \ ADDL y0, y2; \ VPXOR XTMP2, XTMP4, XTMP4; \ ADDL (2*4)(SP), y2; \ MOVL a, y0; \ ADDL y2, h; \ VPSHUFB _SHUF_00BA, XTMP4, XTMP4; \ MOVL a, y2; \ ORL c, y0; \ ADDL h, d; \ VPADDD XTMP4, XTMP0, XTMP0; \ ANDL c, y2; \ ANDL b, y0; \ VPSHUFD $0x50, XTMP0, XTMP2; \ ADDL y1, h; \ ORL y2, y0; \ ADDL y0, h #define FOUR_ROUNDS_AND_SCHEDD(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_) \ RORXL $(25-11), e, y0; \ RORXL $(22-13), a, y1; \ VPSRLD $10, XTMP2, XTMP5; \ XORL e, y0; \ MOVL f, y2; \ RORXL $(11-6), y0, y0; \ VPSRLQ $19, XTMP2, XTMP3; \ XORL a, y1; \ XORL g, y2; \ XORL e, y0; \ VPSRLQ $17, XTMP2, XTMP2; \ ANDL e, y2; \ RORXL $(13-2), y1, y1; \ XORL a, y1; \ VPXOR XTMP3, XTMP2, XTMP2; \ RORXL $6, y0, y0; \ XORL g, y2; \ RORXL $2, y1, y1; \ VPXOR XTMP2, XTMP5, XTMP5; \ ADDL y0, y2; \ ADDL (3*4)(SP), y2; \ MOVL a, y0; \ ADDL y2, h; \ MOVL a, y2; \ VPSHUFB _SHUF_DC00, XTMP5, XTMP5; \ ORL c, y0; \ ADDL h, d; \ ANDL c, y2; \ VPADDD XTMP0, XTMP5, X0_; \ ANDL b, y0; \ ADDL y1, h; \ ORL y2, y0; \ ADDL y0, h #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_) \ FOUR_ROUNDS_AND_SCHEDA(a, b, c, d, e, f, g, h, X0_, X1_, X2_, X3_); \ FOUR_ROUNDS_AND_SCHEDB(h, a, b, c, d, e, f, g, X0_, X1_, X2_, X3_); \ FOUR_ROUNDS_AND_SCHEDC(g, h, a, b, c, d, e, f, X0_, X1_, X2_, X3_); \ FOUR_ROUNDS_AND_SCHEDD(f, g, h, a, b, c, d, e, X0_, X1_, X2_, X3_) #define DO_ROUND(base, offset, a, b, c, d, e, f, g, h) \ RORXL $(25-11), e, y0; \ RORXL $(22-13), a, y1; \ XORL e, y0; \ MOVL f, y2; \ RORXL $(11-6), y0, y0; \ XORL a, y1; \ XORL g, y2; \ XORL e, y0; \ ANDL e, y2; \ RORXL $(13-2), y1, y1; \ XORL a, y1; \ RORXL $6, y0, y0; \ XORL g, y2; \ RORXL $2, y1, y1; \ ADDL y0, y2; \ ADDL (offset)(base), y2; \ MOVL a, y0; \ ADDL y2, h; \ MOVL a, y2; \ ORL c, y0; \ ADDL h, d; \ ANDL c, y2; \ ANDL b, y0; \ ADDL y1, h; \ ORL y2, y0; \ ADDL y0, h // AVX x4 definitions #define XA0 X8 #define XA1 X9 #define XA2 X10 #define XT0 X14 #define XT1 X13 #define XT2 X12 #define XT3 X11 #define XT4 X10 #define XT5 X9 #define TMP4 X15 #define TRANSPOSE_4_U32(r0, r1, r2, r3, t0, t1) \ VSHUFPS $0x44, r1, r0, t0; \ VSHUFPS $0xEE, r1, r0, r0; \ VSHUFPS $0x44, r3, r2, t1; \ VSHUFPS $0xEE, r3, r2, r2; \ VSHUFPS $0xDD, t1, t0, r1; \ VSHUFPS $0xDD, r2, r0, r3; \ VSHUFPS $0x88, r2, r0, r0; \ VSHUFPS $0x88, t1, t0, t0 #define PRORD4(src, imm) \ VPSLLD $(32 - imm), src, TMP4; \ VPSRLD $imm, src, src; \ VPOR TMP4, src, src #define PRORD4_nd(dst, src, amt) \ VPSLLD $(32 - amt), src, TMP4; \ VPSRLD $amt, src, dst; \ VPOR TMP4, dst, dst #define ROUND4_00_15_PADD(a, b, c, d, e, f, g, h, T1, i) \ PRORD4_nd(XA0, e, 5); \ VPXOR g, f, XA2; \ VPAND e, XA2, XA2; \ VPXOR g, XA2, XA2; \ PRORD4_nd(XA1, e, 25); \ VMOVDQU (64*i)(TBL), T1; \ VPXOR e, XA0, XA0; \ PRORD4(XA0, 6); \ VPADDD XA2, h, h; \ PRORD4_nd(XA2, a, 11); \ VPADDD T1, h, h; \ VPXOR XA1, XA0, XA0; \ PRORD4_nd(XA1, a, 22); \ VPXOR c, a, T1; \ VPAND b, T1, T1; \ VPADDD XA0, h, h; \ VPADDD h, d, d; \ VPXOR a, XA2, XA2; \ PRORD4(XA2, 2); \ VPXOR XA1, XA2, XA2; \ VPAND c, a, XA1; \ VPOR T1, XA1, XA1; \ VPADDD XA1, h, h; \ VPADDD XA2, h, h #define ROUND4_00_15(a, b, c, d, e, f, g, h, T1, i) \ PRORD4_nd(XA0, e, 5); \ VPXOR g, f, XA2; \ VPAND e, XA2, XA2; \ VPXOR g, XA2, XA2; \ PRORD4_nd(XA1, e, 25); \ VMOVDQU T1, (16*(i&0xf))(SP); \ VPADDD (64*i)(TBL), T1, T1; \ VPXOR e, XA0, XA0; \ PRORD4(XA0, 6); \ VPADDD XA2, h, h; \ PRORD4_nd(XA2, a, 11); \ VPADDD T1, h, h; \ VPXOR XA1, XA0, XA0; \ PRORD4_nd(XA1, a, 22); \ VPXOR c, a, T1; \ VPAND b, T1, T1; \ VPADDD XA0, h, h; \ VPADDD h, d, d; \ VPXOR a, XA2, XA2; \ PRORD4(XA2, 2); \ VPXOR XA1, XA2, XA2; \ VPAND c, a, XA1; \ VPOR T1, XA1, XA1; \ VPADDD XA1, h, h; \ VPADDD XA2, h, h #define ROUND4_16_XX(a, b, c, d, e, f, g, h, T1, i) \ VMOVDQU (16*((i-15)&0x0f))(SP), T1; \ VMOVDQU (16*((i-2)&0x0f))(SP), XA1; \ VMOVDQA T1, XA0; \ PRORD4(T1, 11); \ VMOVDQA XA1, XA2; \ PRORD4(XA1, 2); \ VPXOR XA0, T1, T1; \ PRORD4(T1, 7); \ VPXOR XA2, XA1, XA1; \ PRORD4(XA1, 17); \ VPSRLD $3, XA0, XA0; \ VPXOR XA0, T1, T1; \ VPSRLD $10, XA2, XA2; \ VPXOR XA2, XA1, XA1; \ VPADDD (16*((i-16)&0x0f))(SP), T1, T1; \ VPADDD (16*((i-7)&0x0f))(SP), XA1, XA1; \ VPADDD XA1, T1, T1; \ ROUND4_00_15(a, b, c, d, e, f, g, h, T1, i) // AVX2 x8 definitions #define a0 Y12 #define a1 Y13 #define a2 Y14 #define TMP Y15 #define TMP0 Y6 #define TMP1 Y7 #define TT0 Y8 #define TT1 Y9 #define TT2 Y10 #define TT3 Y11 #define TT4 Y12 #define TT5 Y13 #define TT6 Y14 #define TT7 Y15 #define _DIGEST 512 #define _YTMP 768 #define YTMP0 _YTMP + 0*32 #define YTMP1 _YTMP + 1*32 #define YTMP2 _YTMP + 2*32 #define YTMP3 _YTMP + 3*32 #define TRANSPOSE8_U32_LOAD8(offset) \ VMOVUPS (offset + 0*64)(DATA_PTR), TT0; \ VMOVUPS (offset + 1*64)(DATA_PTR), TT1; \ VMOVUPS (offset + 2*64)(DATA_PTR), TT2; \ VMOVUPS (offset + 3*64)(DATA_PTR), TT3; \ VMOVUPS (offset + 0*64+16)(DATA_PTR), TT4; \ VMOVUPS (offset + 1*64+16)(DATA_PTR), TT5; \ VMOVUPS (offset + 2*64+16)(DATA_PTR), TT6; \ VMOVUPS (offset + 3*64+16)(DATA_PTR), TT7; \ VINSERTI128 $0x01, (offset + 4*64)(DATA_PTR), TT0, TT0; \ VINSERTI128 $0x01, (offset + 5*64)(DATA_PTR), TT1, TT1; \ VINSERTI128 $0x01, (offset + 6*64)(DATA_PTR), TT2, TT2; \ VINSERTI128 $0x01, (offset + 7*64)(DATA_PTR), TT3, TT3; \ VINSERTI128 $0x01, (offset + 4*64+16)(DATA_PTR), TT4, TT4; \ VINSERTI128 $0x01, (offset + 5*64+16)(DATA_PTR), TT5, TT5; \ VINSERTI128 $0x01, (offset + 6*64+16)(DATA_PTR), TT6, TT6; \ VINSERTI128 $0x01, (offset + 7*64+16)(DATA_PTR), TT7, TT7 #define TRANSPOSE8_U32_PRELOADED \ VSHUFPS $0x44, TT1, TT0, TMP0; \ VSHUFPS $0xEE, TT1, TT0, TT0; \ VSHUFPS $0x44, TT3, TT2, TMP1; \ VSHUFPS $0xEE, TT3, TT2, TT2; \ VSHUFPS $0xDD, TMP1, TMP0, TT1; \ VSHUFPS $0xDD, TT2, TT0, TT3; \ VSHUFPS $0x88, TT2, TT0, TT2; \ VSHUFPS $0x88, TMP1, TMP0, TT0; \ VSHUFPS $0x44, TT5, TT4, TMP0; \ VSHUFPS $0xEE, TT5, TT4, TT4; \ VSHUFPS $0x44, TT7, TT6, TMP1; \ VSHUFPS $0xEE, TT7, TT6, TT6; \ VSHUFPS $0xDD, TMP1, TMP0, TT5; \ VSHUFPS $0xDD, TT6, TT4, TT7; \ VSHUFPS $0x88, TT6, TT4, TT6; \ VSHUFPS $0x88, TMP1, TMP0, TT4 #define TRANSPOSE8_U32 \ VSHUFPS $0x44, Y1, Y0, TT0; \ VSHUFPS $0xEE, Y1, Y0, Y0; \ VSHUFPS $0x44, Y3, Y2, TT1; \ VSHUFPS $0xEE, Y3, Y2, Y2; \ VSHUFPS $0xDD, TT1, TT0, Y3; \ VSHUFPS $0x88, Y2, Y0, Y1; \ VSHUFPS $0xDD, Y2, Y0, Y0; \ VSHUFPS $0x88, TT1, TT0, TT0; \ VSHUFPS $0x44, Y5, Y4, Y2; \ VSHUFPS $0xEE, Y5, Y4, Y4; \ VSHUFPS $0x44, Y7, Y6, TT1; \ VSHUFPS $0xEE, Y7, Y6, Y6; \ VSHUFPS $0xDD, TT1, Y2, Y7; \ VSHUFPS $0x88, Y6, Y4, Y5; \ VSHUFPS $0xDD, Y6, Y4, Y4; \ VSHUFPS $0x88, TT1, Y2, TT1; \ VPERM2F128 $0x13, Y1, Y5, Y6; \ VPERM2F128 $0x02, Y1, Y5, Y2; \ VPERM2F128 $0x13, Y3, Y7, Y5; \ VPERM2F128 $0x02, Y3, Y7, Y1; \ VPERM2F128 $0x13, Y0, Y4, Y7; \ VPERM2F128 $0x02, Y0, Y4, Y3; \ VPERM2F128 $0x13, TT0, TT1, Y4; \ VPERM2F128 $0x02, TT0, TT1, Y0 #define PRORD(src, imm) \ VPSLLD $(32 - imm), src, TMP; \ VPSRLD $imm, src, src; \ VPOR TMP, src, src #define PRORD_nd(dst, src, amt) \ VPSLLD $(32 - amt), src, TMP; \ VPSRLD $amt, src, dst; \ VPOR TMP, dst, dst #define ROUND_00_15_PADD(a, b, c, d, e, f, g, h, T1, i) \ PRORD_nd(a0, e, 5); \ VPXOR g, f, a2; \ VPAND e, a2, a2; \ VPXOR g, a2, a2; \ PRORD_nd(a1, e, 25); \ VMOVDQU (64*i)(TBL), T1; \ VPXOR e, a0, a0; \ PRORD(a0, 6); \ VPADDD a2, h, h; \ PRORD_nd(a2, a, 11); \ VPADDD T1, h, h; \ VPXOR a1, a0, a0; \ PRORD_nd(a1, a, 22); \ VPXOR c, a, T1; \ VPAND b, T1, T1; \ VPADDD a0, h, h; \ VPADDD h, d, d; \ VPXOR a, a2, a2; \ PRORD(a2, 2); \ VPXOR a1, a2, a2; \ VPAND c, a, a1; \ VPOR T1, a1, a1; \ VPADDD a1, h, h; \ VPADDD a2, h, h #define ROUND_00_15(a, b, c, d, e, f, g, h, T1, i) \ PRORD_nd(a0, e, 5); \ VPXOR g, f, a2; \ VPAND e, a2, a2; \ VPXOR g, a2, a2; \ PRORD_nd(a1, e, 25); \ VMOVDQU T1, (32*(i&0xf))(SP); \ VPADDD (64*i)(TBL), T1, T1; \ VPXOR e, a0, a0; \ PRORD(a0, 6); \ VPADDD a2, h, h; \ PRORD_nd(a2, a, 11); \ VPADDD T1, h, h; \ VPXOR a1, a0, a0; \ PRORD_nd(a1, a, 22); \ VPXOR c, a, T1; \ VPAND b, T1, T1; \ VPADDD a0, h, h; \ VPADDD h, d, d; \ VPXOR a, a2, a2; \ PRORD(a2, 2); \ VPXOR a1, a2, a2; \ VPAND c, a, a1; \ VPOR T1, a1, a1; \ VPADDD a1, h, h; \ VPADDD a2, h, h #define ROUND_16_XX(a, b, c, d, e, f, g, h, T1, i) \ VMOVDQU (32*((i-15)&0x0f))(SP), T1; \ VMOVDQU (32*((i-2)&0x0f))(SP), a1; \ VMOVDQA T1, a0; \ PRORD(T1, 11); \ VMOVDQA a1, a2; \ PRORD(a1, 2); \ VPXOR a0, T1, T1; \ PRORD(T1, 7); \ VPXOR a2, a1, a1; \ PRORD(a1, 17); \ VPSRLD $3, a0, a0; \ VPXOR a0, T1, T1; \ VPSRLD $10, a2, a2; \ VPXOR a2, a1, a1; \ VPADDD (32*((i-16)&0x0f))(SP), T1, T1; \ VPADDD (32*((i-7)&0x0f))(SP), a1, a1; \ VPADDD a1, T1, T1; \ ROUND_00_15(a, b, c, d, e, f, g, h, T1, i) // AVX x16 definitions #define PADDINGAVX512 R8 #define DIGESTAVX512 R11 #define ZT1 Z8 #define ZTMP0 Z9 #define ZTMP1 Z10 #define ZTMP2 Z11 #define ZTMP3 Z12 #define ZTMP4 Z13 #define ZTMP5 Z14 #define ZTMP6 Z15 #define YW0 Y16 #define YW1 Y17 #define YW2 Y18 #define YW3 Y19 #define YW4 Y20 #define YW5 Y21 #define YW6 Y22 #define YW7 Y23 #define YW8 Y24 #define YW9 Y25 #define YW10 Y26 #define YW11 Y27 #define YW12 Y28 #define YW13 Y29 #define YW14 Y30 #define YW15 Y31 #define W0 Z16 #define W1 Z17 #define W2 Z18 #define W3 Z19 #define W4 Z20 #define W5 Z21 #define W6 Z22 #define W7 Z23 #define W8 Z24 #define W9 Z25 #define W10 Z26 #define W11 Z27 #define W12 Z28 #define W13 Z29 #define W14 Z30 #define W15 Z31 #define TRANSPOSE_8x16_U32 \ VMOVDQA32 ZTMP5, ZTMP0; \ VMOVDQA32 ZTMP5, ZTMP1; \ VPERMI2D Z4, Z0, ZTMP0; \ VPERMI2D Z5, Z1, ZTMP1; \ VMOVDQA32 ZTMP6, ZTMP2; \ VMOVDQA32 ZTMP6, ZTMP3; \ VPERMI2D Z4, Z0, ZTMP2; \ VPERMI2D Z5, Z1, ZTMP3; \ VMOVDQA32 ZTMP5, Z0; \ VMOVDQA32 ZTMP5, Z1; \ VPERMI2D Z6, Z2, Z0; \ VPERMI2D Z7, Z3, Z1; \ VMOVDQA32 ZTMP6, Z4; \ VMOVDQA32 ZTMP6, Z5; \ VPERMI2D Z6, Z2, Z4; \ VPERMI2D Z7, Z3, Z5; \ VSHUFPS $0x88, ZTMP1, ZTMP0, Z6; \ VSHUFPS $0xDD, ZTMP1, ZTMP0, Z7; \ VSHUFPS $0x88, ZTMP3, ZTMP2, ZTMP1; \ VSHUFPS $0xDD, ZTMP3, ZTMP2, ZTMP0; \ VSHUFPS $0x88, Z5, Z4, ZTMP2; \ VSHUFPS $0xDD, Z5, Z4, ZTMP3; \ VSHUFPS $0x88, Z1, Z0, Z4; \ VSHUFPS $0xDD, Z1, Z0, Z5; \ VMOVDQA32 ZTMP5, Z0; \ VMOVDQA32 ZTMP5, Z1; \ VPERMI2D Z4, Z6, Z0; \ VPERMI2D Z5, Z7, Z1; \ VMOVDQA32 ZTMP6, Z2; \ VMOVDQA32 ZTMP6, Z3; \ VPERMI2D Z4, Z6, Z2; \ VPERMI2D Z5, Z7, Z3; \ VMOVDQA32 ZTMP5, Z4; \ VMOVDQA32 ZTMP5, Z5; \ VPERMI2D ZTMP2, ZTMP1, Z4; \ VPERMI2D ZTMP3, ZTMP0, Z5; \ VMOVDQA32 ZTMP6, Z6; \ VMOVDQA32 ZTMP6, Z7; \ VPERMI2D ZTMP2, ZTMP1, Z6; \ VPERMI2D ZTMP3, ZTMP0, Z7 #define TRANSPOSE16_U32_PRELOADED \ VSHUFPS $0x44, W1, W0, ZTMP0; \ VSHUFPS $0xEE, W1, W0, W0; \ VSHUFPS $0x44, W3, W2, ZTMP1; \ VSHUFPS $0xEE, W3, W2, W2; \ VSHUFPS $0xDD, ZTMP1, ZTMP0, W3; \ VSHUFPS $0x88, W2, W0, W1; \ VSHUFPS $0xDD, W2, W0, W0; \ VSHUFPS $0x88, ZTMP1, ZTMP0, ZTMP0; \ VMOVDQU64 _PSHUFFLE_TRANSPOSE_MASK1<>(SB), ZTMP4; \ VMOVDQU64 _PSHUFFLE_TRANSPOSE_MASK2<>(SB), ZTMP5; \ VSHUFPS $0x44, W5, W4, W2; \ VSHUFPS $0xEE, W5, W4, W4; \ VSHUFPS $0x44, W7, W6, ZTMP1; \ VSHUFPS $0xEE, W7, W6, W6; \ VSHUFPS $0xDD, ZTMP1, W2, W7; \ VSHUFPS $0x88, W6, W4, W5; \ VSHUFPS $0xDD, W6, W4, W4; \ VSHUFPS $0x88, ZTMP1, W2, W2; \ VSHUFPS $0x44, W9, W8, W6; \ VSHUFPS $0xEE, W9, W8, W8; \ VSHUFPS $0x44, W11, W10, ZTMP1; \ VSHUFPS $0xEE, W11, W10, W10; \ VSHUFPS $0xDD, ZTMP1, W6, W11; \ VSHUFPS $0x88, W10, W8, W9; \ VSHUFPS $0xDD, W10, W8, W8; \ VSHUFPS $0x88, ZTMP1, W6, W6; \ VSHUFPS $0x44, W13, W12, W10; \ VSHUFPS $0xEE, W13, W12, W12; \ VSHUFPS $0x44, W15, W14, ZTMP1; \ VSHUFPS $0xEE, W15, W14, W14; \ VSHUFPS $0xDD, ZTMP1, W10, W15; \ VSHUFPS $0x88, W14, W12, W13; \ VSHUFPS $0xDD, W14, W12, W12; \ VSHUFPS $0x88, ZTMP1, W10, W10; \ VMOVDQU32 ZTMP4, ZTMP1; \ VPERMI2Q W13, W9, ZTMP1; \ VMOVDQU32 ZTMP5, W14; \ VPERMI2Q W13, W9, W14; \ VMOVDQU32 ZTMP4, W9; \ VPERMI2Q W15, W11, W9; \ VMOVDQU32 ZTMP5, W13; \ VPERMI2Q W15, W11, W13; \ VMOVDQU32 ZTMP4, W11; \ VPERMI2Q W12, W8, W11; \ VMOVDQU32 ZTMP5, W15; \ VPERMI2Q W12, W8, W15; \ VMOVDQU32 ZTMP4, W8; \ VPERMI2Q W10, W6, W8; \ VMOVDQU32 ZTMP5, W12; \ VPERMI2Q W10, W6, W12; \ VMOVDQU32 ZTMP1, W10; \ VMOVDQU32 ZTMP4, ZTMP1; \ VPERMI2Q W5, W1, ZTMP1; \ VMOVDQU32 ZTMP5, W6; \ VPERMI2Q W5, W1, W6; \ VMOVDQU32 ZTMP4, W1; \ VPERMI2Q W7, W3, W1; \ VMOVDQU32 ZTMP5, W5; \ VPERMI2Q W7, W3, W5; \ VMOVDQU32 ZTMP4, W3; \ VPERMI2Q W4, W0, W3; \ VMOVDQU32 ZTMP5, W7; \ VPERMI2Q W4, W0, W7; \ VMOVDQU32 ZTMP4, W0; \ VPERMI2Q W2, ZTMP0, W0; \ VMOVDQU32 ZTMP5, W4; \ VPERMI2Q W2, ZTMP0, W4; \ VMOVDQU32 ZTMP1, W2 #define PROCESS_LOOP_AVX512__(A, B, C, D, E, F, G, H, reg, WT) \ VMOVDQA32 E, ZTMP0; \ VPRORD $6, E, ZTMP1; \ VPRORD $11, E, ZTMP2; \ VPRORD $25, E, ZTMP3; \ VPTERNLOGD $0xCA, G, F, ZTMP0; \ VPADDD WT, reg, ZT1; \ VPTERNLOGD $0x96, ZTMP3, ZTMP2, ZTMP1; \ VPADDD ZTMP0, ZT1, ZT1; \ VPADDD ZTMP1, ZT1, ZT1; \ VPADDD ZT1, D, D; \ VPRORD $2, A, H; \ VPRORD $13, A, ZTMP2; \ VPRORD $22, A, ZTMP3; \ VMOVDQA32 A, ZTMP0; \ VPTERNLOGD $0xE8, C, B, ZTMP0; \ VPTERNLOGD $0x96, ZTMP3, ZTMP2, H; \ VPADDD ZTMP0, H, H; \ VPADDD ZT1, H, H #define PROCESS_LOOP_AVX512(A, B, C, D, E, F, G, H, WT) \ VPADDD ZTMP3, H, ZT1; \ PROCESS_LOOP_AVX512__(A, B, C, D, E, F, G, H, ZT1, WT) #define PROCESS_LOOP_PADDING_AVX512(A, B, C, D, E, F, G, H, WT) \ PROCESS_LOOP_AVX512__(A, B, C, D, E, F, G, H, H, WT) #define MSG_SCHED_ROUND_16_63_AVX512(WT, WTp1, WTp9, WTp14) \ VPRORD $17, WTp14, ZTMP4; \ VPRORD $19, WTp14, ZTMP5; \ VPSRLD $10, WTp14, ZTMP6; \ VPTERNLOGD $0x96, ZTMP6, ZTMP5, ZTMP4; \ VPADDD ZTMP4, WT, WT; \ VPADDD WTp9, WT, WT; \ VPRORD $7, WTp1, ZTMP4; \ VPRORD $18, WTp1, ZTMP5; \ VPSRLD $3, WTp1, ZTMP6; \ VPTERNLOGD $0x96, ZTMP6, ZTMP5, ZTMP4; \ VPADDD ZTMP4, WT, WT // Sha-ni definitions #define SAVE_SP R8 #define SHA256PADDING CX #define SHA256CONSTANTS AX #define MSG X0 #define STATE0 X1 #define STATE1 X2 #define MSGTMP0 X3 #define MSGTMP1 X4 #define MSGTMP2 X5 #define MSGTMP3 X6 #define MSGTMP4 X7 #define SHUF_MASK X8 #define ABEF_SAVE X9 #define CDGH_SAVE X10 #define STATE0b X9 #define STATE1b X10 #define MSGTMP0b X11 #define MSGTMP1b X12 #define MSGTMP2b X13 #define MSGTMP3b X14 #define MSGTMP4b X15 #define ROUNDS_16_XX_SHA(T0, T1, T3, T4, S0, S1, i) \ VMOVDQA T0, MSG; \ PADDD (i*16)(SHA256CONSTANTS), MSG; \ SHA256RNDS2 X0, S0, S1; \ VMOVDQA T0, T4; \ PALIGNR $4, T3, T4; \ PADDD T4, T1; \ SHA256MSG2 T0, T1; \ VPSHUFD $0x0E, MSG, MSG; \ SHA256RNDS2 X0, S1, S0; \ SHA256MSG1 T0, T3 #define ROUND_PADD_SHA_x1(i) \ VMOVDQU (i*16)(SHA256PADDING), MSG; \ SHA256RNDS2 MSG, STATE0, STATE1; \ PSHUFD $0x0E, MSG, MSG; \ SHA256RNDS2 MSG, STATE1, STATE0 #define ROUND_PADD_SHA(i) \ VMOVDQU (i*16)(SHA256PADDING), MSG; \ SHA256RNDS2 MSG, STATE0, STATE1; \ SHA256RNDS2 MSG, STATE0b, STATE1b; \ PSHUFD $0x0E, MSG, MSG; \ SHA256RNDS2 MSG, STATE1, STATE0; \ SHA256RNDS2 MSG, STATE1b, STATE0b TEXT ·_hash(SB), 0, $928-36 CMPB ·hasShani(SB), $1 JE shani CMPB ·hasAVX512(SB), $1 JE avx512 CMPB ·hasAVX2(SB), $1 JE avx2 MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte MOVQ p_base+8(FP), DATA_PTR // p [][32]byte MOVL count+32(FP), NUM_BLKS // NUM_BLKS uint32 avx1: CMPL NUM_BLKS, $4 JB avx1_x1 // Load pre-transposed digest MOVQ $_DIGEST_16<>(SB), TBL VMOVDQU (0*64)(TBL), X0 VMOVDQU (1*64)(TBL), X1 VMOVDQU (2*64)(TBL), X2 VMOVDQU (3*64)(TBL), X3 VMOVDQU (4*64)(TBL), X4 VMOVDQU (5*64)(TBL), X5 VMOVDQU (6*64)(TBL), X6 VMOVDQU (7*64)(TBL), X7 MOVQ $_K256_16<>(SB), TBL // First 16 rounds VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP4 VMOVUPS (0*64 + 0*16)(DATA_PTR), XT2 VMOVUPS (1*64 + 0*16)(DATA_PTR), XT1 VMOVUPS (2*64 + 0*16)(DATA_PTR), XT4 VMOVUPS (3*64 + 0*16)(DATA_PTR), XT3 TRANSPOSE_4_U32(XT2, XT1, XT4, XT3, XT0, XT5) VPSHUFB TMP4, XT0, XT0 VPSHUFB TMP4, XT1, XT1 VPSHUFB TMP4, XT2, XT2 VPSHUFB TMP4, XT3, XT3 ROUND4_00_15(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x0) ROUND4_00_15(X7, X0, X1, X2, X3, X4, X5, X6, XT1, 0x1) ROUND4_00_15(X6, X7, X0, X1, X2, X3, X4, X5, XT2, 0x2) ROUND4_00_15(X5, X6, X7, X0, X1, X2, X3, X4, XT3, 0x3) VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP4 VMOVUPS (0*64 + 1*16)(DATA_PTR), XT2 VMOVUPS (1*64 + 1*16)(DATA_PTR), XT1 VMOVUPS (2*64 + 1*16)(DATA_PTR), XT4 VMOVUPS (3*64 + 1*16)(DATA_PTR), XT3 TRANSPOSE_4_U32(XT2, XT1, XT4, XT3, XT0, XT5) VPSHUFB TMP4, XT0, XT0 VPSHUFB TMP4, XT1, XT1 VPSHUFB TMP4, XT2, XT2 VPSHUFB TMP4, XT3, XT3 ROUND4_00_15(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x4) ROUND4_00_15(X3, X4, X5, X6, X7, X0, X1, X2, XT1, 0x5) ROUND4_00_15(X2, X3, X4, X5, X6, X7, X0, X1, XT2, 0x6) ROUND4_00_15(X1, X2, X3, X4, X5, X6, X7, X0, XT3, 0x7) VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP4 VMOVUPS (0*64 + 2*16)(DATA_PTR), XT2 VMOVUPS (1*64 + 2*16)(DATA_PTR), XT1 VMOVUPS (2*64 + 2*16)(DATA_PTR), XT4 VMOVUPS (3*64 + 2*16)(DATA_PTR), XT3 TRANSPOSE_4_U32(XT2, XT1, XT4, XT3, XT0, XT5) VPSHUFB TMP4, XT0, XT0 VPSHUFB TMP4, XT1, XT1 VPSHUFB TMP4, XT2, XT2 VPSHUFB TMP4, XT3, XT3 ROUND4_00_15(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x8) ROUND4_00_15(X7, X0, X1, X2, X3, X4, X5, X6, XT1, 0x9) ROUND4_00_15(X6, X7, X0, X1, X2, X3, X4, X5, XT2, 0xa) ROUND4_00_15(X5, X6, X7, X0, X1, X2, X3, X4, XT3, 0xb) VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP4 VMOVUPS (0*64 + 3*16)(DATA_PTR), XT2 VMOVUPS (1*64 + 3*16)(DATA_PTR), XT1 VMOVUPS (2*64 + 3*16)(DATA_PTR), XT4 VMOVUPS (3*64 + 3*16)(DATA_PTR), XT3 TRANSPOSE_4_U32(XT2, XT1, XT4, XT3, XT0, XT5) VPSHUFB TMP4, XT0, XT0 VPSHUFB TMP4, XT1, XT1 VPSHUFB TMP4, XT2, XT2 VPSHUFB TMP4, XT3, XT3 ROUND4_00_15(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0xc) ROUND4_00_15(X3, X4, X5, X6, X7, X0, X1, X2, XT1, 0xd) ROUND4_00_15(X2, X3, X4, X5, X6, X7, X0, X1, XT2, 0xe) ROUND4_00_15(X1, X2, X3, X4, X5, X6, X7, X0, XT3, 0xf) // Rounds 16-31 ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x10) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x11) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x12) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x13) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x14) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x15) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x16) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x17) ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x18) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x19) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x1a) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x1b) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x1c) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x1d) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x1e) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x1f) // Rounds 32--47 ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x20) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x21) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x22) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x23) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x24) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x25) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x26) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x27) ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x28) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x29) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x2a) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x2b) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x2c) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x2d) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x2e) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x2f) // Rounds 48--64 ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x30) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x31) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x32) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x33) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x34) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x35) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x36) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x37) ROUND4_16_XX(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x38) ROUND4_16_XX(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x39) ROUND4_16_XX(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x3a) ROUND4_16_XX(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x3b) ROUND4_16_XX(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x3c) ROUND4_16_XX(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x3d) ROUND4_16_XX(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x3e) ROUND4_16_XX(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x3f) // add old digest MOVQ $_DIGEST_16<>(SB), TBL VPADDD (0*64)(TBL), X0, X0 VPADDD (1*64)(TBL), X1, X1 VPADDD (2*64)(TBL), X2, X2 VPADDD (3*64)(TBL), X3, X3 VPADDD (4*64)(TBL), X4, X4 VPADDD (5*64)(TBL), X5, X5 VPADDD (6*64)(TBL), X6, X6 VPADDD (7*64)(TBL), X7, X7 // rounds with padding // save old digest VMOVDQU X0, (_DIGEST + 0*16)(SP) VMOVDQU X1, (_DIGEST + 1*16)(SP) VMOVDQU X2, (_DIGEST + 2*16)(SP) VMOVDQU X3, (_DIGEST + 3*16)(SP) VMOVDQU X4, (_DIGEST + 4*16)(SP) VMOVDQU X5, (_DIGEST + 5*16)(SP) VMOVDQU X6, (_DIGEST + 6*16)(SP) VMOVDQU X7, (_DIGEST + 7*16)(SP) MOVQ $_PADDING_16<>(SB), TBL ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x00) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x01) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x02) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x03) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x04) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x05) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x06) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x07) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x08) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x09) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x0a) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x0b) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x0c) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x0d) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x0e) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x0f) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x10) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x11) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x12) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x13) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x14) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x15) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x16) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x17) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x18) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x19) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x1a) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x1b) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x1c) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x1d) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x1e) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x1f) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x20) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x21) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x22) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x23) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x24) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x25) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x26) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x27) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x28) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x29) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x2a) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x2b) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x2c) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x2d) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x2e) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x2f) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x30) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x31) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x32) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x33) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x34) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x35) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x36) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x37) ROUND4_00_15_PADD(X0, X1, X2, X3, X4, X5, X6, X7, XT0, 0x38) ROUND4_00_15_PADD(X7, X0, X1, X2, X3, X4, X5, X6, XT0, 0x39) ROUND4_00_15_PADD(X6, X7, X0, X1, X2, X3, X4, X5, XT0, 0x3a) ROUND4_00_15_PADD(X5, X6, X7, X0, X1, X2, X3, X4, XT0, 0x3b) ROUND4_00_15_PADD(X4, X5, X6, X7, X0, X1, X2, X3, XT0, 0x3c) ROUND4_00_15_PADD(X3, X4, X5, X6, X7, X0, X1, X2, XT0, 0x3d) ROUND4_00_15_PADD(X2, X3, X4, X5, X6, X7, X0, X1, XT0, 0x3e) ROUND4_00_15_PADD(X1, X2, X3, X4, X5, X6, X7, X0, XT0, 0x3f) // add previous digest VPADDD (_DIGEST + 0*16)(SP), X0, X0 VPADDD (_DIGEST + 1*16)(SP), X1, X1 VPADDD (_DIGEST + 2*16)(SP), X2, X2 VPADDD (_DIGEST + 3*16)(SP), X3, X3 VPADDD (_DIGEST + 4*16)(SP), X4, X4 VPADDD (_DIGEST + 5*16)(SP), X5, X5 VPADDD (_DIGEST + 6*16)(SP), X6, X6 VPADDD (_DIGEST + 7*16)(SP), X7, X7 // transpose the digest and convert to little endian TRANSPOSE_4_U32(X0, X1, X2, X3, XT0, XT1) TRANSPOSE_4_U32(X4, X5, X6, X7, XT2, XT1) VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP4 VPSHUFB TMP4, XT0, XT0 VPSHUFB TMP4, XT2, XT2 VPSHUFB TMP4, X1, X1 VPSHUFB TMP4, X5, X5 VPSHUFB TMP4, X0, X0 VPSHUFB TMP4, X4, X4 VPSHUFB TMP4, X3, X3 VPSHUFB TMP4, X7, X7 // write to output VMOVDQU XT0, (0*16)(OUTPUT_PTR) VMOVDQU XT2, (1*16)(OUTPUT_PTR) VMOVDQU X1, (2*16)(OUTPUT_PTR) VMOVDQU X5, (3*16)(OUTPUT_PTR) VMOVDQU X0, (4*16)(OUTPUT_PTR) VMOVDQU X4, (5*16)(OUTPUT_PTR) VMOVDQU X3, (6*16)(OUTPUT_PTR) VMOVDQU X7, (7*16)(OUTPUT_PTR) // update pointers and loop ADDQ $256, DATA_PTR ADDQ $128, OUTPUT_PTR SUBL $4, NUM_BLKS JMP avx1 avx1_x1: SHLQ $5, NUM_BLKS ADDQ OUTPUT_PTR, NUM_BLKS VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), _BYTE_FLIP_MASK VMOVDQU PSHUF_00BA<>(SB), _SHUF_00BA VMOVDQU PSHUF_DC00<>(SB), _SHUF_DC00 sha256_avx_1_loop: CMPQ OUTPUT_PTR, NUM_BLKS JEQ sha256_1_avx_epilog // load initial digest MOVL $0x6A09E667, RAL // a = H0 MOVL $0xBB67AE85, RBL // b = H1 MOVL $0x3C6EF372, RCL // c = H2 MOVL $0xA54FF53A, RDL // d = H3 MOVL $0x510E527F, REL // e = H4 MOVL $0x9B05688C, RFL // f = H5 MOVL $0x1F83D9AB, RGL // g = H6 MOVL $0x5BE0CD19, RHL // h = H7 MOVQ $K256<>(SB), TBL // byte swap first 16 dwords COPY_XMM_AND_BSWAP(X0, 0*16(DATA_PTR), _BYTE_FLIP_MASK) COPY_XMM_AND_BSWAP(X1, 1*16(DATA_PTR), _BYTE_FLIP_MASK) COPY_XMM_AND_BSWAP(X2, 2*16(DATA_PTR), _BYTE_FLIP_MASK) COPY_XMM_AND_BSWAP(X3, 3*16(DATA_PTR), _BYTE_FLIP_MASK) // schedule 48 input dwords, by doing 3 rounds of 16 each VPADDD 0*16(TBL), X0, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X0, X1, X2, X3) VPADDD 1*16(TBL), X1, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X1, X2, X3, X0) VPADDD 2*16(TBL), X2, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X2, X3, X0, X1) VPADDD 3*16(TBL), X3, XFER VMOVDQU XFER, (SP) ADDQ $(4*16), TBL FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X3, X0, X1, X2) VPADDD 0*16(TBL), X0, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X0, X1, X2, X3) VPADDD 1*16(TBL), X1, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X1, X2, X3, X0) VPADDD 2*16(TBL), X2, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X2, X3, X0, X1) VPADDD 3*16(TBL), X3, XFER VMOVDQU XFER, (SP) ADDQ $(4*16), TBL FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X3, X0, X1, X2) VPADDD 0*16(TBL), X0, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X0, X1, X2, X3) VPADDD 1*16(TBL), X1, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X1, X2, X3, X0) VPADDD 2*16(TBL), X2, XFER VMOVDQU XFER, (SP) FOUR_ROUNDS_AND_SCHED(RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL, X2, X3, X0, X1) VPADDD 3*16(TBL), X3, XFER VMOVDQU XFER, (SP) ADDQ $(4*16), TBL FOUR_ROUNDS_AND_SCHED(REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL, X3, X0, X1, X2) // Final 16 rounds VPADDD 0*16(TBL), X0, XFER VMOVDQU XFER, (SP) DO_ROUND(SP, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(SP, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(SP, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(SP, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) VPADDD 1*16(TBL), X1, XFER VMOVDQU XFER, (SP) ADDQ $(2*16), TBL DO_ROUND(SP, 0, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(SP, 4, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(SP, 8, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(SP, 12, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) VMOVDQA X2, X0 VMOVDQA X3, X1 VPADDD 0*16(TBL), X0, XFER VMOVDQU XFER, (SP) DO_ROUND(SP, 0*4, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(SP, 1*4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(SP, 2*4, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(SP, 3*4, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) VPADDD 1*16(TBL), X1, XFER VMOVDQU XFER, (SP) DO_ROUND(SP, 0, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(SP, 4, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(SP, 8, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(SP, 12, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) // Add initial digest and save it ADDL $0x6A09E667, RAL // H0 = a + H0 ADDL $0xBB67AE85, RBL // H1 = b + H1 ADDL $0x3C6EF372, RCL // H2 = c + H2 ADDL $0xA54FF53A, RDL // H3 = d + H3 ADDL $0x510E527F, REL // H4 = e + H4 ADDL $0x9B05688C, RFL // H5 = f + H5 ADDL $0x1F83D9AB, RGL // H6 = g + H6 ADDL $0x5BE0CD19, RHL // H7 = h + H7 MOVL RAL, tmpdig-(0*4)(SP) MOVL RBL, tmpdig-(1*4)(SP) MOVL RCL, tmpdig-(2*4)(SP) MOVL RDL, tmpdig-(3*4)(SP) MOVL REL, tmpdig-(4*4)(SP) MOVL RFL, tmpdig-(5*4)(SP) MOVL RGL, tmpdig-(6*4)(SP) MOVL RHL, tmpdig-(7*4)(SP) MOVQ $PADDING<>(SB), TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) ADDQ $32, TBL DO_ROUND(TBL, 0, RAL, RBL, RCL, RDL, REL, RFL, RGL, RHL) DO_ROUND(TBL, 4, RHL, RAL, RBL, RCL, RDL, REL, RFL, RGL) DO_ROUND(TBL, 8, RGL, RHL, RAL, RBL, RCL, RDL, REL, RFL) DO_ROUND(TBL, 12, RFL, RGL, RHL, RAL, RBL, RCL, RDL, REL) DO_ROUND(TBL, 16, REL, RFL, RGL, RHL, RAL, RBL, RCL, RDL) DO_ROUND(TBL, 20, RDL, REL, RFL, RGL, RHL, RAL, RBL, RCL) DO_ROUND(TBL, 24, RCL, RDL, REL, RFL, RGL, RHL, RAL, RBL) DO_ROUND(TBL, 28, RBL, RCL, RDL, REL, RFL, RGL, RHL, RAL) // add the previous digest ADDL tmpdig-(0*4)(SP), RAL ADDL tmpdig-(1*4)(SP), RBL ADDL tmpdig-(2*4)(SP), RCL ADDL tmpdig-(3*4)(SP), RDL ADDL tmpdig-(4*4)(SP), REL ADDL tmpdig-(5*4)(SP), RFL ADDL tmpdig-(6*4)(SP), RGL ADDL tmpdig-(7*4)(SP), RHL BSWAPL RAL BSWAPL RBL BSWAPL RCL BSWAPL RDL BSWAPL REL BSWAPL RFL BSWAPL RGL BSWAPL RHL MOVL RAL, (0*4)(OUTPUT_PTR) MOVL RBL, (1*4)(OUTPUT_PTR) MOVL RCL, (2*4)(OUTPUT_PTR) MOVL RDL, (3*4)(OUTPUT_PTR) MOVL REL, (4*4)(OUTPUT_PTR) MOVL RFL, (5*4)(OUTPUT_PTR) MOVL RGL, (6*4)(OUTPUT_PTR) MOVL RHL, (7*4)(OUTPUT_PTR) ADDQ $64, DATA_PTR ADDQ $32, OUTPUT_PTR JMP sha256_avx_1_loop sha256_1_avx_epilog: RET // 8 blocks at a time with AVX2 avx2: MOVL count+32(FP), NUM_BLKS // NUMBLKS uint32 MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte MOVQ p_base+8(FP), DATA_PTR // p [][32]byte sha256_8_avx2_loop: CMPL NUM_BLKS, $8 JB avx1 MOVQ $_DIGEST_16<>(SB), TBL VMOVDQU (0*64)(TBL), Y0 VMOVDQU (1*64)(TBL), Y1 VMOVDQU (2*64)(TBL), Y2 VMOVDQU (3*64)(TBL), Y3 VMOVDQU (4*64)(TBL), Y4 VMOVDQU (5*64)(TBL), Y5 VMOVDQU (6*64)(TBL), Y6 VMOVDQU (7*64)(TBL), Y7 MOVQ $_K256_16<>(SB), TBL // First 16 rounds TRANSPOSE8_U32_LOAD8(0) VMOVDQU Y6, (YTMP0)(SP) VMOVDQU Y7, (YTMP1)(SP) TRANSPOSE8_U32_PRELOADED VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP1 VMOVDQU (YTMP0)(SP), Y6 VPSHUFB TMP1, TT0, TT0 VPSHUFB TMP1, TT1, TT1 VPSHUFB TMP1, TT2, TT2 VPSHUFB TMP1, TT3, TT3 VPSHUFB TMP1, TT4, TT4 VPSHUFB TMP1, TT5, TT5 VPSHUFB TMP1, TT6, TT6 VPSHUFB TMP1, TT7, TT7 VMOVDQU (YTMP1)(SP), Y7 VMOVDQU TT4, (YTMP0)(SP) VMOVDQU TT5, (YTMP1)(SP) VMOVDQU TT6, (YTMP2)(SP) VMOVDQU TT7, (YTMP3)(SP) ROUND_00_15(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0) VMOVDQU (YTMP0)(SP), TT0 ROUND_00_15(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT1, 1) VMOVDQU (YTMP1)(SP), TT1 ROUND_00_15(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT2, 2) VMOVDQU (YTMP2)(SP), TT2 ROUND_00_15(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT3, 3) VMOVDQU (YTMP3)(SP), TT3 ROUND_00_15(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 4) ROUND_00_15(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT1, 5) ROUND_00_15(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT2, 6) ROUND_00_15(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT3, 7) TRANSPOSE8_U32_LOAD8(32) VMOVDQU Y6, (YTMP0)(SP) VMOVDQU Y7, (YTMP1)(SP) TRANSPOSE8_U32_PRELOADED VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TMP1 VMOVDQU (YTMP0)(SP), Y6 VPSHUFB TMP1, TT0, TT0 VPSHUFB TMP1, TT1, TT1 VPSHUFB TMP1, TT2, TT2 VPSHUFB TMP1, TT3, TT3 VPSHUFB TMP1, TT4, TT4 VPSHUFB TMP1, TT5, TT5 VPSHUFB TMP1, TT6, TT6 VPSHUFB TMP1, TT7, TT7 VMOVDQU (YTMP1)(SP), Y7 VMOVDQU TT4, (YTMP0)(SP) VMOVDQU TT5, (YTMP1)(SP) VMOVDQU TT6, (YTMP2)(SP) VMOVDQU TT7, (YTMP3)(SP) ROUND_00_15(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 8) VMOVDQU (YTMP0)(SP), TT0 ROUND_00_15(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT1, 9) VMOVDQU (YTMP1)(SP), TT1 ROUND_00_15(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT2, 10) VMOVDQU (YTMP2)(SP), TT2 ROUND_00_15(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT3, 11) VMOVDQU (YTMP3)(SP), TT3 ROUND_00_15(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 12) ROUND_00_15(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT1, 13) ROUND_00_15(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT2, 14) ROUND_00_15(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT3, 15) // Rounds 16-31 ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x10) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x11) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x12) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x13) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x14) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x15) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x16) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x17) ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x18) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x19) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x1a) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x1b) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x1c) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x1d) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x1e) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x1f) // Rounds 32--47 ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x20) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x21) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x22) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x23) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x24) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x25) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x26) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x27) ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x28) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x29) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x2a) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x2b) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x2c) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x2d) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x2e) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x2f) // Rounds 48--64 ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x30) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x31) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x32) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x33) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x34) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x35) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x36) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x37) ROUND_16_XX(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x38) ROUND_16_XX(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x39) ROUND_16_XX(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x3a) ROUND_16_XX(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x3b) ROUND_16_XX(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x3c) ROUND_16_XX(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x3d) ROUND_16_XX(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x3e) ROUND_16_XX(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x3f) // add old digest MOVQ $_DIGEST_16<>(SB), TBL VPADDD (0*64)(TBL), Y0, Y0 VPADDD (1*64)(TBL), Y1, Y1 VPADDD (2*64)(TBL), Y2, Y2 VPADDD (3*64)(TBL), Y3, Y3 VPADDD (4*64)(TBL), Y4, Y4 VPADDD (5*64)(TBL), Y5, Y5 VPADDD (6*64)(TBL), Y6, Y6 VPADDD (7*64)(TBL), Y7, Y7 // rounds with padding // save old digest VMOVDQU Y0, (_DIGEST + 0*32)(SP) VMOVDQU Y1, (_DIGEST + 1*32)(SP) VMOVDQU Y2, (_DIGEST + 2*32)(SP) VMOVDQU Y3, (_DIGEST + 3*32)(SP) VMOVDQU Y4, (_DIGEST + 4*32)(SP) VMOVDQU Y5, (_DIGEST + 5*32)(SP) VMOVDQU Y6, (_DIGEST + 6*32)(SP) VMOVDQU Y7, (_DIGEST + 7*32)(SP) MOVQ $_PADDING_16<>(SB), TBL ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x00) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x01) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x02) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x03) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x04) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x05) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x06) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x07) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x08) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x09) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x0a) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x0b) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x0c) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x0d) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x0e) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x0f) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x10) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x11) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x12) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x13) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x14) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x15) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x16) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x17) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x18) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x19) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x1a) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x1b) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x1c) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x1d) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x1e) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x1f) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x20) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x21) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x22) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x23) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x24) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x25) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x26) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x27) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x28) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x29) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x2a) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x2b) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x2c) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x2d) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x2e) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x2f) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x30) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x31) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x32) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x33) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x34) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x35) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x36) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x37) ROUND_00_15_PADD(Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, TT0, 0x38) ROUND_00_15_PADD(Y7, Y0, Y1, Y2, Y3, Y4, Y5, Y6, TT0, 0x39) ROUND_00_15_PADD(Y6, Y7, Y0, Y1, Y2, Y3, Y4, Y5, TT0, 0x3a) ROUND_00_15_PADD(Y5, Y6, Y7, Y0, Y1, Y2, Y3, Y4, TT0, 0x3b) ROUND_00_15_PADD(Y4, Y5, Y6, Y7, Y0, Y1, Y2, Y3, TT0, 0x3c) ROUND_00_15_PADD(Y3, Y4, Y5, Y6, Y7, Y0, Y1, Y2, TT0, 0x3d) ROUND_00_15_PADD(Y2, Y3, Y4, Y5, Y6, Y7, Y0, Y1, TT0, 0x3e) ROUND_00_15_PADD(Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y0, TT0, 0x3f) // add previous digest VPADDD (_DIGEST + 0*32)(SP), Y0, Y0 VPADDD (_DIGEST + 1*32)(SP), Y1, Y1 VPADDD (_DIGEST + 2*32)(SP), Y2, Y2 VPADDD (_DIGEST + 3*32)(SP), Y3, Y3 VPADDD (_DIGEST + 4*32)(SP), Y4, Y4 VPADDD (_DIGEST + 5*32)(SP), Y5, Y5 VPADDD (_DIGEST + 6*32)(SP), Y6, Y6 VPADDD (_DIGEST + 7*32)(SP), Y7, Y7 // transpose the digest and convert to little endian to get the registers correctly TRANSPOSE8_U32 VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), TT0 VPSHUFB TT0, Y0, Y0 VPSHUFB TT0, Y1, Y1 VPSHUFB TT0, Y2, Y2 VPSHUFB TT0, Y3, Y3 VPSHUFB TT0, Y4, Y4 VPSHUFB TT0, Y5, Y5 VPSHUFB TT0, Y6, Y6 VPSHUFB TT0, Y7, Y7 // write to output VMOVDQU Y0, (0*32)(OUTPUT_PTR) VMOVDQU Y1, (1*32)(OUTPUT_PTR) VMOVDQU Y2, (2*32)(OUTPUT_PTR) VMOVDQU Y3, (3*32)(OUTPUT_PTR) VMOVDQU Y4, (4*32)(OUTPUT_PTR) VMOVDQU Y5, (5*32)(OUTPUT_PTR) VMOVDQU Y6, (6*32)(OUTPUT_PTR) VMOVDQU Y7, (7*32)(OUTPUT_PTR) // update pointers and loop ADDQ $512, DATA_PTR ADDQ $256, OUTPUT_PTR SUBL $8, NUM_BLKS JMP sha256_8_avx2_loop // AVX 512 section avx512: MOVQ digests+0(FP), OUTPUT_PTR MOVQ p_base+8(FP), DATA_PTR MOVL count+32(FP), NUM_BLKS MOVQ $_DIGEST_16<>(SB), DIGESTAVX512 MOVQ $_PADDING_16<>(SB), PADDINGAVX512 MOVQ $_K256_16<>(SB), TBL avx512_loop: CMPL NUM_BLKS, $16 JB sha256_8_avx2_loop // Load digest VMOVDQU32 (0*64)(DIGESTAVX512), Z0 VMOVDQU32 (1*64)(DIGESTAVX512), Z1 VMOVDQU32 (2*64)(DIGESTAVX512), Z2 VMOVDQU32 (3*64)(DIGESTAVX512), Z3 VMOVDQU32 (4*64)(DIGESTAVX512), Z4 VMOVDQU32 (5*64)(DIGESTAVX512), Z5 VMOVDQU32 (6*64)(DIGESTAVX512), Z6 VMOVDQU32 (7*64)(DIGESTAVX512), Z7 // Load incoming blocks 16 at a time VMOVUPS (0*64)(DATA_PTR), YW0 VMOVUPS (1*64)(DATA_PTR), YW1 VMOVUPS (2*64)(DATA_PTR), YW2 VMOVUPS (3*64)(DATA_PTR), YW3 VMOVUPS (4*64)(DATA_PTR), YW4 VMOVUPS (5*64)(DATA_PTR), YW5 VMOVUPS (6*64)(DATA_PTR), YW6 VMOVUPS (7*64)(DATA_PTR), YW7 VMOVUPS (0*64+32)(DATA_PTR), YW8 VMOVUPS (1*64+32)(DATA_PTR), YW9 VMOVUPS (2*64+32)(DATA_PTR), YW10 VMOVUPS (3*64+32)(DATA_PTR), YW11 VMOVUPS (4*64+32)(DATA_PTR), YW12 VMOVUPS (5*64+32)(DATA_PTR), YW13 VMOVUPS (6*64+32)(DATA_PTR), YW14 VMOVUPS (7*64+32)(DATA_PTR), YW15 VINSERTI64X4 $0x01, (8*64)(DATA_PTR), W0, W0 VINSERTI64X4 $0x01, (9*64)(DATA_PTR), W1, W1 VINSERTI64X4 $0x01, (10*64)(DATA_PTR), W2, W2 VINSERTI64X4 $0x01, (11*64)(DATA_PTR), W3, W3 VINSERTI64X4 $0x01, (12*64)(DATA_PTR), W4, W4 VINSERTI64X4 $0x01, (13*64)(DATA_PTR), W5, W5 VINSERTI64X4 $0x01, (14*64)(DATA_PTR), W6, W6 VINSERTI64X4 $0x01, (15*64)(DATA_PTR), W7, W7 VINSERTI64X4 $0x01, (8*64+32)(DATA_PTR), W8, W8 VINSERTI64X4 $0x01, (9*64+32)(DATA_PTR), W9, W9 VINSERTI64X4 $0x01, (10*64+32)(DATA_PTR), W10, W10 VINSERTI64X4 $0x01, (11*64+32)(DATA_PTR), W11, W11 VINSERTI64X4 $0x01, (12*64+32)(DATA_PTR), W12, W12 VINSERTI64X4 $0x01, (13*64+32)(DATA_PTR), W13, W13 VINSERTI64X4 $0x01, (14*64+32)(DATA_PTR), W14, W14 VINSERTI64X4 $0x01, (15*64+32)(DATA_PTR), W15, W15 VMOVDQU32 _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), ZTMP2 VMOVDQU32 (TBL), ZTMP3 TRANSPOSE16_U32_PRELOADED VPSHUFB ZTMP2, W0, W0 VPSHUFB ZTMP2, W1, W1 VPSHUFB ZTMP2, W2, W2 VPSHUFB ZTMP2, W3, W3 VPSHUFB ZTMP2, W4, W4 VPSHUFB ZTMP2, W5, W5 VPSHUFB ZTMP2, W6, W6 VPSHUFB ZTMP2, W7, W7 VPSHUFB ZTMP2, W8, W8 VPSHUFB ZTMP2, W9, W9 VPSHUFB ZTMP2, W10, W10 VPSHUFB ZTMP2, W11, W11 VPSHUFB ZTMP2, W12, W12 VPSHUFB ZTMP2, W13, W13 VPSHUFB ZTMP2, W14, W14 VPSHUFB ZTMP2, W15, W15 PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W0) VMOVDQU32 (0x40*0x01)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W0, W1, W9, W14) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W1) VMOVDQU32 (0x40*0x02)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W1, W2, W10, W15) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W2) VMOVDQU32 (0x40*0x03)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W2, W3, W11, W0) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W3) VMOVDQU32 (0x40*0x04)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W3, W4, W12, W1) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W4) VMOVDQU32 (0x40*0x05)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W4, W5, W13, W2) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W5) VMOVDQU32 (0x40*0x06)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W5, W6, W14, W3) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W6) VMOVDQU32 (0x40*0x07)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W6, W7, W15, W4) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W7) VMOVDQU32 (0x40*0x08)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W7, W8, W0, W5) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W8) VMOVDQU32 (0x40*0x09)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W8, W9, W1, W6) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W9) VMOVDQU32 (0x40*0x0a)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W9, W10, W2, W7) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W10) VMOVDQU32 (0x40*0x0b)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W10, W11, W3, W8) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W11) VMOVDQU32 (0x40*0x0c)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W11, W12, W4, W9) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W12) VMOVDQU32 (0x40*0x0d)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W12, W13, W5, W10) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W13) VMOVDQU32 (0x40*0x0e)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W13, W14, W6, W11) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W14) VMOVDQU32 (0x40*0x0f)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W14, W15, W7, W12) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W15) VMOVDQU32 (0x40*0x10)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W15, W0, W8, W13) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W0) VMOVDQU32 (0x40*0x11)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W0, W1, W9, W14) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W1) VMOVDQU32 (0x40*0x12)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W1, W2, W10, W15) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W2) VMOVDQU32 (0x40*0x13)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W2, W3, W11, W0) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W3) VMOVDQU32 (0x40*0x14)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W3, W4, W12, W1) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W4) VMOVDQU32 (0x40*0x15)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W4, W5, W13, W2) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W5) VMOVDQU32 (0x40*0x16)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W5, W6, W14, W3) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W6) VMOVDQU32 (0x40*0x17)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W6, W7, W15, W4) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W7) VMOVDQU32 (0x40*0x18)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W7, W8, W0, W5) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W8) VMOVDQU32 (0x40*0x19)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W8, W9, W1, W6) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W9) VMOVDQU32 (0x40*0x1a)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W9, W10, W2, W7) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W10) VMOVDQU32 (0x40*0x1b)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W10, W11, W3, W8) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W11) VMOVDQU32 (0x40*0x1c)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W11, W12, W4, W9) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W12) VMOVDQU32 (0x40*0x1d)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W12, W13, W5, W10) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W13) VMOVDQU32 (0x40*0x1e)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W13, W14, W6, W11) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W14) VMOVDQU32 (0x40*0x1f)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W14, W15, W7, W12) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W15) VMOVDQU32 (0x40*0x20)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W15, W0, W8, W13) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W0) VMOVDQU32 (0x40*0x21)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W0, W1, W9, W14) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W1) VMOVDQU32 (0x40*0x22)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W1, W2, W10, W15) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W2) VMOVDQU32 (0x40*0x23)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W2, W3, W11, W0) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W3) VMOVDQU32 (0x40*0x24)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W3, W4, W12, W1) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W4) VMOVDQU32 (0x40*0x25)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W4, W5, W13, W2) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W5) VMOVDQU32 (0x40*0x26)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W5, W6, W14, W3) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W6) VMOVDQU32 (0x40*0x27)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W6, W7, W15, W4) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W7) VMOVDQU32 (0x40*0x28)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W7, W8, W0, W5) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W8) VMOVDQU32 (0x40*0x29)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W8, W9, W1, W6) PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W9) VMOVDQU32 (0x40*0x2a)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W9, W10, W2, W7) PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W10) VMOVDQU32 (0x40*0x2b)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W10, W11, W3, W8) PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W11) VMOVDQU32 (0x40*0x2c)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W11, W12, W4, W9) PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W12) VMOVDQU32 (0x40*0x2d)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W12, W13, W5, W10) PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W13) VMOVDQU32 (0x40*0x2e)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W13, W14, W6, W11) PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W14) VMOVDQU32 (0x40*0x2f)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W14, W15, W7, W12) PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W15) VMOVDQU32 (0x40*0x30)(TBL), ZTMP3 MSG_SCHED_ROUND_16_63_AVX512(W15, W0, W8, W13) PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W0) VMOVDQU32 (0x40*0x31)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W1) VMOVDQU32 (0x40*0x32)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W2) VMOVDQU32 (0x40*0x33)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W3) VMOVDQU32 (0x40*0x34)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W4) VMOVDQU32 (0x40*0x35)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W5) VMOVDQU32 (0x40*0x36)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W6) VMOVDQU32 (0x40*0x37)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W7) VMOVDQU32 (0x40*0x38)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, W8) VMOVDQU32 (0x40*0x39)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, W9) VMOVDQU32 (0x40*0x3a)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, W10) VMOVDQU32 (0x40*0x3b)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, W11) VMOVDQU32 (0x40*0x3c)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, W12) VMOVDQU32 (0x40*0x3d)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, W13) VMOVDQU32 (0x40*0x3e)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, W14) VMOVDQU32 (0x40*0x3f)(TBL), ZTMP3 PROCESS_LOOP_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, W15) // add old digest VPADDD (0*64)(DIGESTAVX512), Z0, Z0 VPADDD (1*64)(DIGESTAVX512), Z1, Z1 VPADDD (2*64)(DIGESTAVX512), Z2, Z2 VPADDD (3*64)(DIGESTAVX512), Z3, Z3 VPADDD (4*64)(DIGESTAVX512), Z4, Z4 VPADDD (5*64)(DIGESTAVX512), Z5, Z5 VPADDD (6*64)(DIGESTAVX512), Z6, Z6 VPADDD (7*64)(DIGESTAVX512), Z7, Z7 // Save digest for later processing VMOVDQA32 Z0, W0 VMOVDQA32 Z1, W1 VMOVDQA32 Z2, W2 VMOVDQA32 Z3, W3 VMOVDQA32 Z4, W4 VMOVDQA32 Z5, W5 VMOVDQA32 Z6, W6 VMOVDQA32 Z7, W7 // Load transposing masks VMOVDQU32 _PSHUFFLE_TRANSPOSE_MASK3<>(SB), ZTMP5 VMOVDQU32 _PSHUFFLE_TRANSPOSE_MASK4<>(SB), ZTMP6 // Rounds with padding VMOVDQU32 (0x40*0x00)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x01)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x02)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x03)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x04)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x05)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x06)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x07)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x08)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x09)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x0a)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x0b)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x0c)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x0d)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x0e)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x0f)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x10)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x11)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x12)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x13)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x14)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x15)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x16)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x17)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x18)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x19)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x1a)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x1b)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x1c)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x1d)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x1e)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x1f)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x20)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x21)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x22)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x23)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x24)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x25)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x26)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x27)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x28)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x29)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x2a)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x2b)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x2c)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x2d)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x2e)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x2f)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x30)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x31)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x32)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x33)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x34)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x35)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x36)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x37)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 (0x40*0x38)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7, ZTMP4) VMOVDQU32 (0x40*0x39)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z7, Z0, Z1, Z2, Z3, Z4, Z5, Z6, ZTMP4) VMOVDQU32 (0x40*0x3a)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z6, Z7, Z0, Z1, Z2, Z3, Z4, Z5, ZTMP4) VMOVDQU32 (0x40*0x3b)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z5, Z6, Z7, Z0, Z1, Z2, Z3, Z4, ZTMP4) VMOVDQU32 (0x40*0x3c)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z4, Z5, Z6, Z7, Z0, Z1, Z2, Z3, ZTMP4) VMOVDQU32 (0x40*0x3d)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z3, Z4, Z5, Z6, Z7, Z0, Z1, Z2, ZTMP4) VMOVDQU32 (0x40*0x3e)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z2, Z3, Z4, Z5, Z6, Z7, Z0, Z1, ZTMP4) VMOVDQU32 (0x40*0x3f)(PADDINGAVX512), ZTMP4 PROCESS_LOOP_PADDING_AVX512(Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z0, ZTMP4) VMOVDQU32 _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), W8 // add old digest VPADDD W0, Z0, Z0 VPADDD W1, Z1, Z1 VPADDD W2, Z2, Z2 VPADDD W3, Z3, Z3 VPADDD W4, Z4, Z4 VPADDD W5, Z5, Z5 VPADDD W6, Z6, Z6 VPADDD W7, Z7, Z7 TRANSPOSE_8x16_U32 VPSHUFB W8, Z0, Z0 VPSHUFB W8, Z1, Z1 VPSHUFB W8, Z2, Z2 VPSHUFB W8, Z3, Z3 VPSHUFB W8, Z4, Z4 VPSHUFB W8, Z5, Z5 VPSHUFB W8, Z6, Z6 VPSHUFB W8, Z7, Z7 VMOVDQU32 Z0, (0*64)(OUTPUT_PTR) VMOVDQU32 Z1, (1*64)(OUTPUT_PTR) VMOVDQU32 Z2, (2*64)(OUTPUT_PTR) VMOVDQU32 Z3, (3*64)(OUTPUT_PTR) VMOVDQU32 Z4, (4*64)(OUTPUT_PTR) VMOVDQU32 Z5, (5*64)(OUTPUT_PTR) VMOVDQU32 Z6, (6*64)(OUTPUT_PTR) VMOVDQU32 Z7, (7*64)(OUTPUT_PTR) // update pointers and loop ADDQ $1024, DATA_PTR ADDQ $512, OUTPUT_PTR SUBL $16, NUM_BLKS JMP avx512_loop // SHA-ni section shani: MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte MOVQ p_base+8(FP), DATA_PTR // p [][32]byte MOVL count+32(FP), NUM_BLKS // NUM_BLKS uint32 // Golang assembly does not guarantee stack aligned at 16 bytes MOVQ SP, SAVE_SP ANDQ $~0xf, SP VMOVDQU _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB), SHUF_MASK MOVQ $PADDING<>(SB), SHA256PADDING MOVQ $K256<>(SB), SHA256CONSTANTS shani_loop: CMPL NUM_BLKS, $2 JB shani_x1 VMOVDQU _DIGEST_1<>(SB), STATE0 VMOVDQU _DIGEST_1<>+0x10(SB), STATE1 VMOVDQU _DIGEST_1<>(SB), STATE0b VMOVDQU _DIGEST_1<>+0x10(SB), STATE1b // Rounds 0-3 VMOVDQU (16*0)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP0 PADDD (0*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 VMOVDQU (16*(0+4))(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP0b PADDD (16*0)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b // Rounds 4--7 VMOVDQU (1*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP1 PADDD (1*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP1, MSGTMP0 VMOVDQU (5*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP1b PADDD (1*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b SHA256MSG1 MSGTMP1b, MSGTMP0b // Rounds 8--11 VMOVDQU (2*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP2 PADDD (2*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP2, MSGTMP1 VMOVDQU (6*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP2b PADDD (2*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b SHA256MSG1 MSGTMP2b, MSGTMP1b // Rounds 12 -- 15 VMOVDQU (3*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP3 PADDD (3*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP3, MSGTMP4 PALIGNR $0x4, MSGTMP2, MSGTMP4 PADDD MSGTMP4, MSGTMP0 SHA256MSG2 MSGTMP3, MSGTMP0 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP3, MSGTMP2 VMOVDQU (7*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP3b PADDD (3*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VMOVDQA MSGTMP3b, MSGTMP4b PALIGNR $0x4, MSGTMP2b, MSGTMP4b PADDD MSGTMP4b, MSGTMP0b SHA256MSG2 MSGTMP3b, MSGTMP0b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b SHA256MSG1 MSGTMP3b, MSGTMP2b // Rounds 16-51 ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 4) ROUNDS_16_XX_SHA(MSGTMP0b, MSGTMP1b, MSGTMP3b, MSGTMP4b, STATE0b, STATE1b, 4) ROUNDS_16_XX_SHA(MSGTMP1, MSGTMP2, MSGTMP0, MSGTMP4, STATE0, STATE1, 5) ROUNDS_16_XX_SHA(MSGTMP1b, MSGTMP2b, MSGTMP0b, MSGTMP4b, STATE0b, STATE1b, 5) ROUNDS_16_XX_SHA(MSGTMP2, MSGTMP3, MSGTMP1, MSGTMP4, STATE0, STATE1, 6) ROUNDS_16_XX_SHA(MSGTMP2b, MSGTMP3b, MSGTMP1b, MSGTMP4b, STATE0b, STATE1b, 6) ROUNDS_16_XX_SHA(MSGTMP3, MSGTMP0, MSGTMP2, MSGTMP4, STATE0, STATE1, 7) ROUNDS_16_XX_SHA(MSGTMP3b, MSGTMP0b, MSGTMP2b, MSGTMP4b, STATE0b, STATE1b, 7) ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 8) ROUNDS_16_XX_SHA(MSGTMP0b, MSGTMP1b, MSGTMP3b, MSGTMP4b, STATE0b, STATE1b, 8) ROUNDS_16_XX_SHA(MSGTMP1, MSGTMP2, MSGTMP0, MSGTMP4, STATE0, STATE1, 9) ROUNDS_16_XX_SHA(MSGTMP1b, MSGTMP2b, MSGTMP0b, MSGTMP4b, STATE0b, STATE1b, 9) ROUNDS_16_XX_SHA(MSGTMP2, MSGTMP3, MSGTMP1, MSGTMP4, STATE0, STATE1, 10) ROUNDS_16_XX_SHA(MSGTMP2b, MSGTMP3b, MSGTMP1b, MSGTMP4b, STATE0b, STATE1b, 10) ROUNDS_16_XX_SHA(MSGTMP3, MSGTMP0, MSGTMP2, MSGTMP4, STATE0, STATE1, 11) ROUNDS_16_XX_SHA(MSGTMP3b, MSGTMP0b, MSGTMP2b, MSGTMP4b, STATE0b, STATE1b, 11) ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 12) ROUNDS_16_XX_SHA(MSGTMP0b, MSGTMP1b, MSGTMP3b, MSGTMP4b, STATE0b, STATE1b, 12) // Rounds 52--55 VMOVDQA MSGTMP1, MSG PADDD (13*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP1, MSGTMP4 PALIGNR $4, MSGTMP0, MSGTMP4 PADDD MSGTMP4, MSGTMP2 SHA256MSG2 MSGTMP1, MSGTMP2 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 VMOVDQA MSGTMP1b, MSG PADDD (13*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VMOVDQA MSGTMP1b, MSGTMP4b PALIGNR $4, MSGTMP0b, MSGTMP4b PADDD MSGTMP4b, MSGTMP2b SHA256MSG2 MSGTMP1b, MSGTMP2b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b // Rounds 56-59 VMOVDQA MSGTMP2, MSG PADDD (14*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP2, MSGTMP4 PALIGNR $4, MSGTMP1, MSGTMP4 PADDD MSGTMP4, MSGTMP3 SHA256MSG2 MSGTMP2, MSGTMP3 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 VMOVDQA MSGTMP2b, MSG PADDD (14*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VMOVDQA MSGTMP2b, MSGTMP4b PALIGNR $4, MSGTMP1b, MSGTMP4b PADDD MSGTMP4b, MSGTMP3b SHA256MSG2 MSGTMP2b, MSGTMP3b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b // Rounds 60--63 VMOVDQA MSGTMP3, MSG PADDD (15*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 VMOVDQA MSGTMP3b, MSG PADDD (15*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0b, STATE1b VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1b, STATE0b // Add previous digests PADDD _DIGEST_1<>(SB), STATE0 PADDD _DIGEST_1<>+16(SB), STATE1 PADDD _DIGEST_1<>(SB), STATE0b PADDD _DIGEST_1<>+16(SB), STATE1b // Rounds with padding // Save previous digest VMOVDQU STATE0, ( _DIGEST + 0*16)(SP) VMOVDQU STATE1, ( _DIGEST + 1*16)(SP) VMOVDQU STATE0b, ( _DIGEST + 2*16)(SP) VMOVDQU STATE1b, ( _DIGEST + 3*16)(SP) ROUND_PADD_SHA(0x0) ROUND_PADD_SHA(0x1) ROUND_PADD_SHA(0x2) ROUND_PADD_SHA(0x3) ROUND_PADD_SHA(0x4) ROUND_PADD_SHA(0x5) ROUND_PADD_SHA(0x6) ROUND_PADD_SHA(0x7) ROUND_PADD_SHA(0x8) ROUND_PADD_SHA(0x9) ROUND_PADD_SHA(0xa) ROUND_PADD_SHA(0xb) ROUND_PADD_SHA(0xc) ROUND_PADD_SHA(0xd) ROUND_PADD_SHA(0xe) ROUND_PADD_SHA(0xf) PADDD ( _DIGEST + 0*16 )(SP), STATE0 PADDD ( _DIGEST + 1*16 )(SP), STATE1 PADDD ( _DIGEST + 2*16 )(SP), STATE0b PADDD ( _DIGEST + 3*16 )(SP), STATE1b // Write hash values back in the correct order PSHUFD $0x1B, STATE0, STATE0 PSHUFD $0xB1, STATE1, STATE1 PSHUFD $0x1B, STATE0b, STATE0b PSHUFD $0xB1, STATE1b, STATE1b VMOVDQA STATE0, MSGTMP4 VMOVDQA STATE0b, MSGTMP4b PBLENDW $0xF0, STATE1, STATE0 PBLENDW $0xF0, STATE1b, STATE0b PALIGNR $0x8, MSGTMP4, STATE1 PALIGNR $0x8, MSGTMP4b, STATE1b PSHUFB SHUF_MASK, STATE0 PSHUFB SHUF_MASK, STATE0b PSHUFB SHUF_MASK, STATE1 PSHUFB SHUF_MASK, STATE1b VMOVDQU STATE0, (0*16)(OUTPUT_PTR) VMOVDQU STATE1, (1*16)(OUTPUT_PTR) VMOVDQU STATE0b, (2*16)(OUTPUT_PTR) VMOVDQU STATE1b, (3*16)(OUTPUT_PTR) // Increment data pointer and loop if more to process ADDQ $128, DATA_PTR ADDQ $64, OUTPUT_PTR SUBL $2, NUM_BLKS JMP shani_loop shani_x1: TESTL NUM_BLKS, NUM_BLKS JZ shani_epilog VMOVDQU _DIGEST_1<>(SB), STATE0 VMOVDQU _DIGEST_1<>+0x10(SB), STATE1 // Save hash values for addition after rounds VMOVDQA STATE0, ABEF_SAVE VMOVDQA STATE1, CDGH_SAVE // Rounds 0-3 VMOVDQU (16*0)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP0 PADDD (0*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 // Rounds 4--7 VMOVDQU (1*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP1 PADDD (1*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP1, MSGTMP0 // Rounds 8--11 VMOVDQU (2*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP2 PADDD (2*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP2, MSGTMP1 // Rounds 12 -- 15 VMOVDQU (3*16)(DATA_PTR), MSG PSHUFB SHUF_MASK, MSG VMOVDQA MSG, MSGTMP3 PADDD (3*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP3, MSGTMP4 PALIGNR $0x4, MSGTMP2, MSGTMP4 PADDD MSGTMP4, MSGTMP0 SHA256MSG2 MSGTMP3, MSGTMP0 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 SHA256MSG1 MSGTMP3, MSGTMP2 // Rounds 16-51 ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 4) ROUNDS_16_XX_SHA(MSGTMP1, MSGTMP2, MSGTMP0, MSGTMP4, STATE0, STATE1, 5) ROUNDS_16_XX_SHA(MSGTMP2, MSGTMP3, MSGTMP1, MSGTMP4, STATE0, STATE1, 6) ROUNDS_16_XX_SHA(MSGTMP3, MSGTMP0, MSGTMP2, MSGTMP4, STATE0, STATE1, 7) ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 8) ROUNDS_16_XX_SHA(MSGTMP1, MSGTMP2, MSGTMP0, MSGTMP4, STATE0, STATE1, 9) ROUNDS_16_XX_SHA(MSGTMP2, MSGTMP3, MSGTMP1, MSGTMP4, STATE0, STATE1, 10) ROUNDS_16_XX_SHA(MSGTMP3, MSGTMP0, MSGTMP2, MSGTMP4, STATE0, STATE1, 11) ROUNDS_16_XX_SHA(MSGTMP0, MSGTMP1, MSGTMP3, MSGTMP4, STATE0, STATE1, 12) // Rounds 52--55 VMOVDQA MSGTMP1, MSG PADDD (13*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP1, MSGTMP4 PALIGNR $4, MSGTMP0, MSGTMP4 PADDD MSGTMP4, MSGTMP2 SHA256MSG2 MSGTMP1, MSGTMP2 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 // Rounds 56-59 VMOVDQA MSGTMP2, MSG PADDD (14*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VMOVDQA MSGTMP2, MSGTMP4 PALIGNR $4, MSGTMP1, MSGTMP4 PADDD MSGTMP4, MSGTMP3 SHA256MSG2 MSGTMP2, MSGTMP3 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 // Rounds 60--63 VMOVDQA MSGTMP3, MSG PADDD (15*16)(SHA256CONSTANTS), MSG SHA256RNDS2 X0, STATE0, STATE1 VPSHUFD $0x0E, MSG, MSG SHA256RNDS2 X0, STATE1, STATE0 // Add current hash values with previously saved PADDD ABEF_SAVE, STATE0 PADDD CDGH_SAVE, STATE1 // Rounds with PADDING // Save hash values for addition after rounds VMOVDQA STATE0, ABEF_SAVE VMOVDQA STATE1, CDGH_SAVE ROUND_PADD_SHA_x1(0x0) ROUND_PADD_SHA_x1(0x1) ROUND_PADD_SHA_x1(0x2) ROUND_PADD_SHA_x1(0x3) ROUND_PADD_SHA_x1(0x4) ROUND_PADD_SHA_x1(0x5) ROUND_PADD_SHA_x1(0x6) ROUND_PADD_SHA_x1(0x7) ROUND_PADD_SHA_x1(0x8) ROUND_PADD_SHA_x1(0x9) ROUND_PADD_SHA_x1(0xa) ROUND_PADD_SHA_x1(0xb) ROUND_PADD_SHA_x1(0xc) ROUND_PADD_SHA_x1(0xd) ROUND_PADD_SHA_x1(0xe) ROUND_PADD_SHA_x1(0xf) // Add current hash values with previously saved PADDD ABEF_SAVE, STATE0 PADDD CDGH_SAVE, STATE1 // Write hash values back in the correct order PSHUFD $0x1B, STATE0, STATE0 PSHUFD $0xB1, STATE1, STATE1 VMOVDQA STATE0, MSGTMP4 PBLENDW $0xF0, STATE1, STATE0 PALIGNR $0x8, MSGTMP4, STATE1 PSHUFB SHUF_MASK, STATE0 PSHUFB SHUF_MASK, STATE1 VMOVDQU STATE0, (0*16)(OUTPUT_PTR) VMOVDQU STATE1, (1*16)(OUTPUT_PTR) shani_epilog: MOVQ SAVE_SP, SP RET // Data section DATA K256<>+0x00(SB)/4, $0x428a2f98 DATA K256<>+0x04(SB)/4, $0x71374491 DATA K256<>+0x08(SB)/4, $0xb5c0fbcf DATA K256<>+0x0c(SB)/4, $0xe9b5dba5 DATA K256<>+0x10(SB)/4, $0x3956c25b DATA K256<>+0x14(SB)/4, $0x59f111f1 DATA K256<>+0x18(SB)/4, $0x923f82a4 DATA K256<>+0x1c(SB)/4, $0xab1c5ed5 DATA K256<>+0x20(SB)/4, $0xd807aa98 DATA K256<>+0x24(SB)/4, $0x12835b01 DATA K256<>+0x28(SB)/4, $0x243185be DATA K256<>+0x2c(SB)/4, $0x550c7dc3 DATA K256<>+0x30(SB)/4, $0x72be5d74 DATA K256<>+0x34(SB)/4, $0x80deb1fe DATA K256<>+0x38(SB)/4, $0x9bdc06a7 DATA K256<>+0x3c(SB)/4, $0xc19bf174 DATA K256<>+0x40(SB)/4, $0xe49b69c1 DATA K256<>+0x44(SB)/4, $0xefbe4786 DATA K256<>+0x48(SB)/4, $0x0fc19dc6 DATA K256<>+0x4c(SB)/4, $0x240ca1cc DATA K256<>+0x50(SB)/4, $0x2de92c6f DATA K256<>+0x54(SB)/4, $0x4a7484aa DATA K256<>+0x58(SB)/4, $0x5cb0a9dc DATA K256<>+0x5c(SB)/4, $0x76f988da DATA K256<>+0x60(SB)/4, $0x983e5152 DATA K256<>+0x64(SB)/4, $0xa831c66d DATA K256<>+0x68(SB)/4, $0xb00327c8 DATA K256<>+0x6c(SB)/4, $0xbf597fc7 DATA K256<>+0x70(SB)/4, $0xc6e00bf3 DATA K256<>+0x74(SB)/4, $0xd5a79147 DATA K256<>+0x78(SB)/4, $0x06ca6351 DATA K256<>+0x7c(SB)/4, $0x14292967 DATA K256<>+0x80(SB)/4, $0x27b70a85 DATA K256<>+0x84(SB)/4, $0x2e1b2138 DATA K256<>+0x88(SB)/4, $0x4d2c6dfc DATA K256<>+0x8c(SB)/4, $0x53380d13 DATA K256<>+0x90(SB)/4, $0x650a7354 DATA K256<>+0x94(SB)/4, $0x766a0abb DATA K256<>+0x98(SB)/4, $0x81c2c92e DATA K256<>+0x9c(SB)/4, $0x92722c85 DATA K256<>+0xa0(SB)/4, $0xa2bfe8a1 DATA K256<>+0xa4(SB)/4, $0xa81a664b DATA K256<>+0xa8(SB)/4, $0xc24b8b70 DATA K256<>+0xac(SB)/4, $0xc76c51a3 DATA K256<>+0xb0(SB)/4, $0xd192e819 DATA K256<>+0xb4(SB)/4, $0xd6990624 DATA K256<>+0xb8(SB)/4, $0xf40e3585 DATA K256<>+0xbc(SB)/4, $0x106aa070 DATA K256<>+0xc0(SB)/4, $0x19a4c116 DATA K256<>+0xc4(SB)/4, $0x1e376c08 DATA K256<>+0xc8(SB)/4, $0x2748774c DATA K256<>+0xcc(SB)/4, $0x34b0bcb5 DATA K256<>+0xd0(SB)/4, $0x391c0cb3 DATA K256<>+0xd4(SB)/4, $0x4ed8aa4a DATA K256<>+0xd8(SB)/4, $0x5b9cca4f DATA K256<>+0xdc(SB)/4, $0x682e6ff3 DATA K256<>+0xe0(SB)/4, $0x748f82ee DATA K256<>+0xe4(SB)/4, $0x78a5636f DATA K256<>+0xe8(SB)/4, $0x84c87814 DATA K256<>+0xec(SB)/4, $0x8cc70208 DATA K256<>+0xf0(SB)/4, $0x90befffa DATA K256<>+0xf4(SB)/4, $0xa4506ceb DATA K256<>+0xf8(SB)/4, $0xbef9a3f7 DATA K256<>+0xfc(SB)/4, $0xc67178f2 GLOBL K256<>(SB),(NOPTR+RODATA),$256 DATA PADDING<>+0x00(SB)/4, $0xc28a2f98 DATA PADDING<>+0x04(SB)/4, $0x71374491 DATA PADDING<>+0x08(SB)/4, $0xb5c0fbcf DATA PADDING<>+0x0c(SB)/4, $0xe9b5dba5 DATA PADDING<>+0x10(SB)/4, $0x3956c25b DATA PADDING<>+0x14(SB)/4, $0x59f111f1 DATA PADDING<>+0x18(SB)/4, $0x923f82a4 DATA PADDING<>+0x1c(SB)/4, $0xab1c5ed5 DATA PADDING<>+0x20(SB)/4, $0xd807aa98 DATA PADDING<>+0x24(SB)/4, $0x12835b01 DATA PADDING<>+0x28(SB)/4, $0x243185be DATA PADDING<>+0x2c(SB)/4, $0x550c7dc3 DATA PADDING<>+0x30(SB)/4, $0x72be5d74 DATA PADDING<>+0x34(SB)/4, $0x80deb1fe DATA PADDING<>+0x38(SB)/4, $0x9bdc06a7 DATA PADDING<>+0x3c(SB)/4, $0xc19bf374 DATA PADDING<>+0x40(SB)/4, $0x649b69c1 DATA PADDING<>+0x44(SB)/4, $0xf0fe4786 DATA PADDING<>+0x48(SB)/4, $0x0fe1edc6 DATA PADDING<>+0x4c(SB)/4, $0x240cf254 DATA PADDING<>+0x50(SB)/4, $0x4fe9346f DATA PADDING<>+0x54(SB)/4, $0x6cc984be DATA PADDING<>+0x58(SB)/4, $0x61b9411e DATA PADDING<>+0x5c(SB)/4, $0x16f988fa DATA PADDING<>+0x60(SB)/4, $0xf2c65152 DATA PADDING<>+0x64(SB)/4, $0xa88e5a6d DATA PADDING<>+0x68(SB)/4, $0xb019fc65 DATA PADDING<>+0x6c(SB)/4, $0xb9d99ec7 DATA PADDING<>+0x70(SB)/4, $0x9a1231c3 DATA PADDING<>+0x74(SB)/4, $0xe70eeaa0 DATA PADDING<>+0x78(SB)/4, $0xfdb1232b DATA PADDING<>+0x7c(SB)/4, $0xc7353eb0 DATA PADDING<>+0x80(SB)/4, $0x3069bad5 DATA PADDING<>+0x84(SB)/4, $0xcb976d5f DATA PADDING<>+0x88(SB)/4, $0x5a0f118f DATA PADDING<>+0x8c(SB)/4, $0xdc1eeefd DATA PADDING<>+0x90(SB)/4, $0x0a35b689 DATA PADDING<>+0x94(SB)/4, $0xde0b7a04 DATA PADDING<>+0x98(SB)/4, $0x58f4ca9d DATA PADDING<>+0x9c(SB)/4, $0xe15d5b16 DATA PADDING<>+0xa0(SB)/4, $0x007f3e86 DATA PADDING<>+0xa4(SB)/4, $0x37088980 DATA PADDING<>+0xa8(SB)/4, $0xa507ea32 DATA PADDING<>+0xac(SB)/4, $0x6fab9537 DATA PADDING<>+0xb0(SB)/4, $0x17406110 DATA PADDING<>+0xb4(SB)/4, $0x0d8cd6f1 DATA PADDING<>+0xb8(SB)/4, $0xcdaa3b6d DATA PADDING<>+0xbc(SB)/4, $0xc0bbbe37 DATA PADDING<>+0xc0(SB)/4, $0x83613bda DATA PADDING<>+0xc4(SB)/4, $0xdb48a363 DATA PADDING<>+0xc8(SB)/4, $0x0b02e931 DATA PADDING<>+0xcc(SB)/4, $0x6fd15ca7 DATA PADDING<>+0xd0(SB)/4, $0x521afaca DATA PADDING<>+0xd4(SB)/4, $0x31338431 DATA PADDING<>+0xd8(SB)/4, $0x6ed41a95 DATA PADDING<>+0xdc(SB)/4, $0x6d437890 DATA PADDING<>+0xe0(SB)/4, $0xc39c91f2 DATA PADDING<>+0xe4(SB)/4, $0x9eccabbd DATA PADDING<>+0xe8(SB)/4, $0xb5c9a0e6 DATA PADDING<>+0xec(SB)/4, $0x532fb63c DATA PADDING<>+0xf0(SB)/4, $0xd2c741c6 DATA PADDING<>+0xf4(SB)/4, $0x07237ea3 DATA PADDING<>+0xf8(SB)/4, $0xa4954b68 DATA PADDING<>+0xfc(SB)/4, $0x4c191d76 GLOBL PADDING<>(SB),(NOPTR+RODATA),$256 DATA _DIGEST_1<>+0x00(SB)/8, $0x510e527f9b05688c DATA _DIGEST_1<>+0x08(SB)/8, $0x6a09e667bb67ae85 DATA _DIGEST_1<>+0x10(SB)/8, $0x1f83d9ab5be0cd19 DATA _DIGEST_1<>+0x18(SB)/8, $0x3c6ef372a54ff53a GLOBL _DIGEST_1<>(SB),(NOPTR+RODATA),$32 DATA PSHUF_00BA<>+0x00(SB)/8, $0x0b0a090803020100 DATA PSHUF_00BA<>+0x08(SB)/8, $0xFFFFFFFFFFFFFFFF GLOBL PSHUF_00BA<>(SB),(NOPTR+RODATA),$16 DATA PSHUF_DC00<>+0x00(SB)/8, $0xFFFFFFFFFFFFFFFF DATA PSHUF_DC00<>+0x08(SB)/8, $0x0b0a090803020100 GLOBL PSHUF_DC00<>(SB),(NOPTR+RODATA),$16 // Data section AVX2 x8 DATA _K256_16<>+0(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+8(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+16(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+24(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+32(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+40(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+48(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+56(SB)/8, $0x428a2f98428a2f98 DATA _K256_16<>+64(SB)/8, $0x7137449171374491 DATA _K256_16<>+72(SB)/8, $0x7137449171374491 DATA _K256_16<>+80(SB)/8, $0x7137449171374491 DATA _K256_16<>+88(SB)/8, $0x7137449171374491 DATA _K256_16<>+96(SB)/8, $0x7137449171374491 DATA _K256_16<>+104(SB)/8, $0x7137449171374491 DATA _K256_16<>+112(SB)/8, $0x7137449171374491 DATA _K256_16<>+120(SB)/8, $0x7137449171374491 DATA _K256_16<>+128(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+136(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+144(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+152(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+160(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+168(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+176(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+184(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_16<>+192(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+200(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+208(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+216(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+224(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+232(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+240(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+248(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_16<>+256(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+264(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+272(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+280(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+288(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+296(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+304(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+312(SB)/8, $0x3956c25b3956c25b DATA _K256_16<>+320(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+328(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+336(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+344(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+352(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+360(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+368(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+376(SB)/8, $0x59f111f159f111f1 DATA _K256_16<>+384(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+392(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+400(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+408(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+416(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+424(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+432(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+440(SB)/8, $0x923f82a4923f82a4 DATA _K256_16<>+448(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+456(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+464(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+472(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+480(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+488(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+496(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+504(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_16<>+512(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+520(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+528(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+536(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+544(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+552(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+560(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+568(SB)/8, $0xd807aa98d807aa98 DATA _K256_16<>+576(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+584(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+592(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+600(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+608(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+616(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+624(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+632(SB)/8, $0x12835b0112835b01 DATA _K256_16<>+640(SB)/8, $0x243185be243185be DATA _K256_16<>+648(SB)/8, $0x243185be243185be DATA _K256_16<>+656(SB)/8, $0x243185be243185be DATA _K256_16<>+664(SB)/8, $0x243185be243185be DATA _K256_16<>+672(SB)/8, $0x243185be243185be DATA _K256_16<>+680(SB)/8, $0x243185be243185be DATA _K256_16<>+688(SB)/8, $0x243185be243185be DATA _K256_16<>+696(SB)/8, $0x243185be243185be DATA _K256_16<>+704(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+712(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+720(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+728(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+736(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+744(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+752(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+760(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_16<>+768(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+776(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+784(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+792(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+800(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+808(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+816(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+824(SB)/8, $0x72be5d7472be5d74 DATA _K256_16<>+832(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+840(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+848(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+856(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+864(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+872(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+880(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+888(SB)/8, $0x80deb1fe80deb1fe DATA _K256_16<>+896(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+904(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+912(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+920(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+928(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+936(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+944(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+952(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_16<>+960(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+968(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+976(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+984(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+992(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+1000(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+1008(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+1016(SB)/8, $0xc19bf174c19bf174 DATA _K256_16<>+1024(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1032(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1040(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1048(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1056(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1064(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1072(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1080(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_16<>+1088(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1096(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1104(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1112(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1120(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1128(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1136(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1144(SB)/8, $0xefbe4786efbe4786 DATA _K256_16<>+1152(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1160(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1168(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1176(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1184(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1192(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1200(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1208(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_16<>+1216(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1224(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1232(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1240(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1248(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1256(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1264(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1272(SB)/8, $0x240ca1cc240ca1cc DATA _K256_16<>+1280(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1288(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1296(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1304(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1312(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1320(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1328(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1336(SB)/8, $0x2de92c6f2de92c6f DATA _K256_16<>+1344(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1352(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1360(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1368(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1376(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1384(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1392(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1400(SB)/8, $0x4a7484aa4a7484aa DATA _K256_16<>+1408(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1416(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1424(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1432(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1440(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1448(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1456(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1464(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_16<>+1472(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1480(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1488(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1496(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1504(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1512(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1520(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1528(SB)/8, $0x76f988da76f988da DATA _K256_16<>+1536(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1544(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1552(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1560(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1568(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1576(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1584(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1592(SB)/8, $0x983e5152983e5152 DATA _K256_16<>+1600(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1608(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1616(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1624(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1632(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1640(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1648(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1656(SB)/8, $0xa831c66da831c66d DATA _K256_16<>+1664(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1672(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1680(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1688(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1696(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1704(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1712(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1720(SB)/8, $0xb00327c8b00327c8 DATA _K256_16<>+1728(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1736(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1744(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1752(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1760(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1768(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1776(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1784(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_16<>+1792(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1800(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1808(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1816(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1824(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1832(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1840(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1848(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_16<>+1856(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1864(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1872(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1880(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1888(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1896(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1904(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1912(SB)/8, $0xd5a79147d5a79147 DATA _K256_16<>+1920(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1928(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1936(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1944(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1952(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1960(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1968(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1976(SB)/8, $0x06ca635106ca6351 DATA _K256_16<>+1984(SB)/8, $0x1429296714292967 DATA _K256_16<>+1992(SB)/8, $0x1429296714292967 DATA _K256_16<>+2000(SB)/8, $0x1429296714292967 DATA _K256_16<>+2008(SB)/8, $0x1429296714292967 DATA _K256_16<>+2016(SB)/8, $0x1429296714292967 DATA _K256_16<>+2024(SB)/8, $0x1429296714292967 DATA _K256_16<>+2032(SB)/8, $0x1429296714292967 DATA _K256_16<>+2040(SB)/8, $0x1429296714292967 DATA _K256_16<>+2048(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2056(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2064(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2072(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2080(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2088(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2096(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2104(SB)/8, $0x27b70a8527b70a85 DATA _K256_16<>+2112(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2120(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2128(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2136(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2144(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2152(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2160(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2168(SB)/8, $0x2e1b21382e1b2138 DATA _K256_16<>+2176(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2184(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2192(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2200(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2208(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2216(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2224(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2232(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_16<>+2240(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2248(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2256(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2264(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2272(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2280(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2288(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2296(SB)/8, $0x53380d1353380d13 DATA _K256_16<>+2304(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2312(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2320(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2328(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2336(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2344(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2352(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2360(SB)/8, $0x650a7354650a7354 DATA _K256_16<>+2368(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2376(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2384(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2392(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2400(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2408(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2416(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2424(SB)/8, $0x766a0abb766a0abb DATA _K256_16<>+2432(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2440(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2448(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2456(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2464(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2472(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2480(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2488(SB)/8, $0x81c2c92e81c2c92e DATA _K256_16<>+2496(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2504(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2512(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2520(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2528(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2536(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2544(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2552(SB)/8, $0x92722c8592722c85 DATA _K256_16<>+2560(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2568(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2576(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2584(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2592(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2600(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2608(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2616(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_16<>+2624(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2632(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2640(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2648(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2656(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2664(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2672(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2680(SB)/8, $0xa81a664ba81a664b DATA _K256_16<>+2688(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2696(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2704(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2712(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2720(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2728(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2736(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2744(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_16<>+2752(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2760(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2768(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2776(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2784(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2792(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2800(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2808(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_16<>+2816(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2824(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2832(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2840(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2848(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2856(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2864(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2872(SB)/8, $0xd192e819d192e819 DATA _K256_16<>+2880(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2888(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2896(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2904(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2912(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2920(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2928(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2936(SB)/8, $0xd6990624d6990624 DATA _K256_16<>+2944(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2952(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2960(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2968(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2976(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2984(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+2992(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+3000(SB)/8, $0xf40e3585f40e3585 DATA _K256_16<>+3008(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3016(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3024(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3032(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3040(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3048(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3056(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3064(SB)/8, $0x106aa070106aa070 DATA _K256_16<>+3072(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3080(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3088(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3096(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3104(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3112(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3120(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3128(SB)/8, $0x19a4c11619a4c116 DATA _K256_16<>+3136(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3144(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3152(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3160(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3168(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3176(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3184(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3192(SB)/8, $0x1e376c081e376c08 DATA _K256_16<>+3200(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3208(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3216(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3224(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3232(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3240(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3248(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3256(SB)/8, $0x2748774c2748774c DATA _K256_16<>+3264(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3272(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3280(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3288(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3296(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3304(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3312(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3320(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_16<>+3328(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3336(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3344(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3352(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3360(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3368(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3376(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3384(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_16<>+3392(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3400(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3408(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3416(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3424(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3432(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3440(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3448(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_16<>+3456(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3464(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3472(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3480(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3488(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3496(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3504(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3512(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_16<>+3520(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3528(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3536(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3544(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3552(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3560(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3568(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3576(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_16<>+3584(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3592(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3600(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3608(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3616(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3624(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3632(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3640(SB)/8, $0x748f82ee748f82ee DATA _K256_16<>+3648(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3656(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3664(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3672(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3680(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3688(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3696(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3704(SB)/8, $0x78a5636f78a5636f DATA _K256_16<>+3712(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3720(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3728(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3736(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3744(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3752(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3760(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3768(SB)/8, $0x84c8781484c87814 DATA _K256_16<>+3776(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3784(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3792(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3800(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3808(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3816(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3824(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3832(SB)/8, $0x8cc702088cc70208 DATA _K256_16<>+3840(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3848(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3856(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3864(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3872(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3880(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3888(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3896(SB)/8, $0x90befffa90befffa DATA _K256_16<>+3904(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3912(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3920(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3928(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3936(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3944(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3952(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3960(SB)/8, $0xa4506ceba4506ceb DATA _K256_16<>+3968(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+3976(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+3984(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+3992(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+4000(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+4008(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+4016(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+4024(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_16<>+4032(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4040(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4048(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4056(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4064(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4072(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4080(SB)/8, $0xc67178f2c67178f2 DATA _K256_16<>+4088(SB)/8, $0xc67178f2c67178f2 GLOBL _K256_16<>(SB),(NOPTR+RODATA),$4096 DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+0(SB)/8, $0x0405060700010203 DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+8(SB)/8, $0x0c0d0e0f08090a0b DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+16(SB)/8, $0x0405060700010203 DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+24(SB)/8, $0x0c0d0e0f08090a0b DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+32(SB)/8, $0x0405060700010203 DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+40(SB)/8, $0x0c0d0e0f08090a0b DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+48(SB)/8, $0x0405060700010203 DATA _PSHUFFLE_BYTE_FLIP_MASK_16<>+56(SB)/8, $0x0c0d0e0f08090a0b GLOBL _PSHUFFLE_BYTE_FLIP_MASK_16<>(SB),(NOPTR+RODATA),$64 DATA _PADDING_16<>+0(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+8(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+16(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+24(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+32(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+40(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+48(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+56(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_16<>+64(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+72(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+80(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+88(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+96(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+104(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+112(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+120(SB)/8, $0x7137449171374491 DATA _PADDING_16<>+128(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+136(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+144(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+152(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+160(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+168(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+176(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+184(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_16<>+192(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+200(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+208(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+216(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+224(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+232(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+240(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+248(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_16<>+256(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+264(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+272(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+280(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+288(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+296(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+304(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+312(SB)/8, $0x3956c25b3956c25b DATA _PADDING_16<>+320(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+328(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+336(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+344(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+352(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+360(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+368(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+376(SB)/8, $0x59f111f159f111f1 DATA _PADDING_16<>+384(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+392(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+400(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+408(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+416(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+424(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+432(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+440(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_16<>+448(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+456(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+464(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+472(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+480(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+488(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+496(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+504(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_16<>+512(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+520(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+528(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+536(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+544(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+552(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+560(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+568(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_16<>+576(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+584(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+592(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+600(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+608(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+616(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+624(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+632(SB)/8, $0x12835b0112835b01 DATA _PADDING_16<>+640(SB)/8, $0x243185be243185be DATA _PADDING_16<>+648(SB)/8, $0x243185be243185be DATA _PADDING_16<>+656(SB)/8, $0x243185be243185be DATA _PADDING_16<>+664(SB)/8, $0x243185be243185be DATA _PADDING_16<>+672(SB)/8, $0x243185be243185be DATA _PADDING_16<>+680(SB)/8, $0x243185be243185be DATA _PADDING_16<>+688(SB)/8, $0x243185be243185be DATA _PADDING_16<>+696(SB)/8, $0x243185be243185be DATA _PADDING_16<>+704(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+712(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+720(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+728(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+736(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+744(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+752(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+760(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_16<>+768(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+776(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+784(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+792(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+800(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+808(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+816(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+824(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_16<>+832(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+840(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+848(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+856(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+864(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+872(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+880(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+888(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_16<>+896(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+904(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+912(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+920(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+928(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+936(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+944(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+952(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_16<>+960(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+968(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+976(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+984(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+992(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+1000(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+1008(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+1016(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_16<>+1024(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1032(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1040(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1048(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1056(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1064(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1072(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1080(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_16<>+1088(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1096(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1104(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1112(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1120(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1128(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1136(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1144(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_16<>+1152(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1160(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1168(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1176(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1184(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1192(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1200(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1208(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_16<>+1216(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1224(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1232(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1240(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1248(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1256(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1264(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1272(SB)/8, $0x240cf254240cf254 DATA _PADDING_16<>+1280(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1288(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1296(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1304(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1312(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1320(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1328(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1336(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_16<>+1344(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1352(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1360(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1368(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1376(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1384(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1392(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1400(SB)/8, $0x6cc984be6cc984be DATA _PADDING_16<>+1408(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1416(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1424(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1432(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1440(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1448(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1456(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1464(SB)/8, $0x61b9411e61b9411e DATA _PADDING_16<>+1472(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1480(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1488(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1496(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1504(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1512(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1520(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1528(SB)/8, $0x16f988fa16f988fa DATA _PADDING_16<>+1536(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1544(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1552(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1560(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1568(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1576(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1584(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1592(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_16<>+1600(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1608(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1616(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1624(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1632(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1640(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1648(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1656(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_16<>+1664(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1672(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1680(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1688(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1696(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1704(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1712(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1720(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_16<>+1728(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1736(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1744(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1752(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1760(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1768(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1776(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1784(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_16<>+1792(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1800(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1808(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1816(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1824(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1832(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1840(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1848(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_16<>+1856(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1864(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1872(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1880(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1888(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1896(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1904(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1912(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_16<>+1920(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1928(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1936(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1944(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1952(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1960(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1968(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1976(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_16<>+1984(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+1992(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2000(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2008(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2016(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2024(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2032(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2040(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_16<>+2048(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2056(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2064(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2072(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2080(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2088(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2096(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2104(SB)/8, $0x3069bad53069bad5 DATA _PADDING_16<>+2112(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2120(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2128(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2136(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2144(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2152(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2160(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2168(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_16<>+2176(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2184(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2192(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2200(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2208(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2216(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2224(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2232(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_16<>+2240(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2248(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2256(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2264(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2272(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2280(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2288(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2296(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_16<>+2304(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2312(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2320(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2328(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2336(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2344(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2352(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2360(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_16<>+2368(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2376(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2384(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2392(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2400(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2408(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2416(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2424(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_16<>+2432(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2440(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2448(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2456(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2464(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2472(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2480(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2488(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_16<>+2496(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2504(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2512(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2520(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2528(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2536(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2544(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2552(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_16<>+2560(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2568(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2576(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2584(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2592(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2600(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2608(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2616(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_16<>+2624(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2632(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2640(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2648(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2656(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2664(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2672(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2680(SB)/8, $0x3708898037088980 DATA _PADDING_16<>+2688(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2696(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2704(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2712(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2720(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2728(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2736(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2744(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_16<>+2752(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2760(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2768(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2776(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2784(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2792(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2800(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2808(SB)/8, $0x6fab95376fab9537 DATA _PADDING_16<>+2816(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2824(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2832(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2840(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2848(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2856(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2864(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2872(SB)/8, $0x1740611017406110 DATA _PADDING_16<>+2880(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2888(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2896(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2904(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2912(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2920(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2928(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2936(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_16<>+2944(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2952(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2960(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2968(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2976(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2984(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+2992(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+3000(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_16<>+3008(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3016(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3024(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3032(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3040(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3048(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3056(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3064(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_16<>+3072(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3080(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3088(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3096(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3104(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3112(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3120(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3128(SB)/8, $0x83613bda83613bda DATA _PADDING_16<>+3136(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3144(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3152(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3160(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3168(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3176(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3184(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3192(SB)/8, $0xdb48a363db48a363 DATA _PADDING_16<>+3200(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3208(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3216(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3224(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3232(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3240(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3248(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3256(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_16<>+3264(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3272(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3280(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3288(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3296(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3304(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3312(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3320(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_16<>+3328(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3336(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3344(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3352(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3360(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3368(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3376(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3384(SB)/8, $0x521afaca521afaca DATA _PADDING_16<>+3392(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3400(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3408(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3416(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3424(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3432(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3440(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3448(SB)/8, $0x3133843131338431 DATA _PADDING_16<>+3456(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3464(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3472(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3480(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3488(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3496(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3504(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3512(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_16<>+3520(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3528(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3536(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3544(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3552(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3560(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3568(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3576(SB)/8, $0x6d4378906d437890 DATA _PADDING_16<>+3584(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3592(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3600(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3608(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3616(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3624(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3632(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3640(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_16<>+3648(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3656(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3664(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3672(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3680(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3688(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3696(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3704(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_16<>+3712(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3720(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3728(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3736(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3744(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3752(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3760(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3768(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_16<>+3776(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3784(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3792(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3800(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3808(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3816(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3824(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3832(SB)/8, $0x532fb63c532fb63c DATA _PADDING_16<>+3840(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3848(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3856(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3864(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3872(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3880(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3888(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3896(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_16<>+3904(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3912(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3920(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3928(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3936(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3944(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3952(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3960(SB)/8, $0x07237ea307237ea3 DATA _PADDING_16<>+3968(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+3976(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+3984(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+3992(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+4000(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+4008(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+4016(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+4024(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_16<>+4032(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4040(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4048(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4056(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4064(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4072(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4080(SB)/8, $0x4c191d764c191d76 DATA _PADDING_16<>+4088(SB)/8, $0x4c191d764c191d76 GLOBL _PADDING_16<>(SB),(NOPTR+RODATA),$4096 DATA _DIGEST_16<>+0(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+4(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+8(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+12(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+16(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+20(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+24(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+28(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+32(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+36(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+40(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+44(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+48(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+52(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+56(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+60(SB)/4, $0x6a09e667 DATA _DIGEST_16<>+64(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+68(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+72(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+76(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+80(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+84(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+88(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+92(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+96(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+100(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+104(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+108(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+112(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+116(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+120(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+124(SB)/4, $0xbb67ae85 DATA _DIGEST_16<>+128(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+132(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+136(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+140(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+144(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+148(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+152(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+156(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+160(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+164(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+168(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+172(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+176(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+180(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+184(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+188(SB)/4, $0x3c6ef372 DATA _DIGEST_16<>+192(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+196(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+200(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+204(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+208(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+212(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+216(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+220(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+224(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+228(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+232(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+236(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+240(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+244(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+248(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+252(SB)/4, $0xa54ff53a DATA _DIGEST_16<>+256(SB)/4, $0x510e527f DATA _DIGEST_16<>+260(SB)/4, $0x510e527f DATA _DIGEST_16<>+264(SB)/4, $0x510e527f DATA _DIGEST_16<>+268(SB)/4, $0x510e527f DATA _DIGEST_16<>+272(SB)/4, $0x510e527f DATA _DIGEST_16<>+276(SB)/4, $0x510e527f DATA _DIGEST_16<>+280(SB)/4, $0x510e527f DATA _DIGEST_16<>+284(SB)/4, $0x510e527f DATA _DIGEST_16<>+288(SB)/4, $0x510e527f DATA _DIGEST_16<>+292(SB)/4, $0x510e527f DATA _DIGEST_16<>+296(SB)/4, $0x510e527f DATA _DIGEST_16<>+300(SB)/4, $0x510e527f DATA _DIGEST_16<>+304(SB)/4, $0x510e527f DATA _DIGEST_16<>+308(SB)/4, $0x510e527f DATA _DIGEST_16<>+312(SB)/4, $0x510e527f DATA _DIGEST_16<>+316(SB)/4, $0x510e527f DATA _DIGEST_16<>+320(SB)/4, $0x9b05688c DATA _DIGEST_16<>+324(SB)/4, $0x9b05688c DATA _DIGEST_16<>+328(SB)/4, $0x9b05688c DATA _DIGEST_16<>+332(SB)/4, $0x9b05688c DATA _DIGEST_16<>+336(SB)/4, $0x9b05688c DATA _DIGEST_16<>+340(SB)/4, $0x9b05688c DATA _DIGEST_16<>+344(SB)/4, $0x9b05688c DATA _DIGEST_16<>+348(SB)/4, $0x9b05688c DATA _DIGEST_16<>+352(SB)/4, $0x9b05688c DATA _DIGEST_16<>+356(SB)/4, $0x9b05688c DATA _DIGEST_16<>+360(SB)/4, $0x9b05688c DATA _DIGEST_16<>+364(SB)/4, $0x9b05688c DATA _DIGEST_16<>+368(SB)/4, $0x9b05688c DATA _DIGEST_16<>+372(SB)/4, $0x9b05688c DATA _DIGEST_16<>+376(SB)/4, $0x9b05688c DATA _DIGEST_16<>+380(SB)/4, $0x9b05688c DATA _DIGEST_16<>+384(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+388(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+392(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+396(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+400(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+404(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+408(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+412(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+416(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+420(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+424(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+428(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+432(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+436(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+440(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+444(SB)/4, $0x1f83d9ab DATA _DIGEST_16<>+448(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+452(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+456(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+460(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+464(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+468(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+472(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+476(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+480(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+484(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+488(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+492(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+496(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+500(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+504(SB)/4, $0x5be0cd19 DATA _DIGEST_16<>+508(SB)/4, $0x5be0cd19 GLOBL _DIGEST_16<>(SB),(NOPTR+RODATA),$512 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+0(SB)/8, $0x0000000000000000 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+8(SB)/8, $0x0000000000000001 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+16(SB)/8, $0x0000000000000008 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+24(SB)/8, $0x0000000000000009 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+32(SB)/8, $0x0000000000000004 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+40(SB)/8, $0x0000000000000005 DATA _PSHUFFLE_TRANSPOSE_MASK1<>+48(SB)/8, $0x000000000000000C DATA _PSHUFFLE_TRANSPOSE_MASK1<>+56(SB)/8, $0x000000000000000D GLOBL _PSHUFFLE_TRANSPOSE_MASK1<>(SB),(NOPTR+RODATA),$64 DATA _PSHUFFLE_TRANSPOSE_MASK2<>+0(SB)/8, $0x0000000000000002 DATA _PSHUFFLE_TRANSPOSE_MASK2<>+8(SB)/8, $0x0000000000000003 DATA _PSHUFFLE_TRANSPOSE_MASK2<>+16(SB)/8, $0x000000000000000A DATA _PSHUFFLE_TRANSPOSE_MASK2<>+24(SB)/8, $0x000000000000000B DATA _PSHUFFLE_TRANSPOSE_MASK2<>+32(SB)/8, $0x0000000000000006 DATA _PSHUFFLE_TRANSPOSE_MASK2<>+40(SB)/8, $0x0000000000000007 DATA _PSHUFFLE_TRANSPOSE_MASK2<>+48(SB)/8, $0x000000000000000E DATA _PSHUFFLE_TRANSPOSE_MASK2<>+56(SB)/8, $0x000000000000000F GLOBL _PSHUFFLE_TRANSPOSE_MASK2<>(SB),(NOPTR+RODATA),$64 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+0(SB)/4, $0x00000000 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+4(SB)/4, $0x00000002 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+8(SB)/4, $0x00000010 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+12(SB)/4, $0x00000012 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+16(SB)/4, $0x00000001 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+20(SB)/4, $0x00000003 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+24(SB)/4, $0x00000011 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+28(SB)/4, $0x00000013 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+32(SB)/4, $0x00000004 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+36(SB)/4, $0x00000006 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+40(SB)/4, $0x00000014 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+44(SB)/4, $0x00000016 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+48(SB)/4, $0x00000005 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+52(SB)/4, $0x00000007 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+56(SB)/4, $0x00000015 DATA _PSHUFFLE_TRANSPOSE_MASK3<>+60(SB)/4, $0x00000017 GLOBL _PSHUFFLE_TRANSPOSE_MASK3<>(SB),(NOPTR+RODATA),$64 DATA _PSHUFFLE_TRANSPOSE_MASK4<>+0(SB)/4, $0x00000008 DATA _PSHUFFLE_TRANSPOSE_MASK4<>+4(SB)/4, $0x0000000a DATA _PSHUFFLE_TRANSPOSE_MASK4<>+8(SB)/4, $0x00000018 DATA _PSHUFFLE_TRANSPOSE_MASK4<>+12(SB)/4, $0x0000001a DATA _PSHUFFLE_TRANSPOSE_MASK4<>+16(SB)/4, $0x00000009 DATA _PSHUFFLE_TRANSPOSE_MASK4<>+20(SB)/4, $0x0000000b DATA _PSHUFFLE_TRANSPOSE_MASK4<>+24(SB)/4, $0x00000019 DATA _PSHUFFLE_TRANSPOSE_MASK4<>+28(SB)/4, $0x0000001b DATA _PSHUFFLE_TRANSPOSE_MASK4<>+32(SB)/4, $0x0000000c DATA _PSHUFFLE_TRANSPOSE_MASK4<>+36(SB)/4, $0x0000000e DATA _PSHUFFLE_TRANSPOSE_MASK4<>+40(SB)/4, $0x0000001c DATA _PSHUFFLE_TRANSPOSE_MASK4<>+44(SB)/4, $0x0000001e DATA _PSHUFFLE_TRANSPOSE_MASK4<>+48(SB)/4, $0x0000000d DATA _PSHUFFLE_TRANSPOSE_MASK4<>+52(SB)/4, $0x0000000f DATA _PSHUFFLE_TRANSPOSE_MASK4<>+56(SB)/4, $0x0000001d DATA _PSHUFFLE_TRANSPOSE_MASK4<>+60(SB)/4, $0x0000001f GLOBL _PSHUFFLE_TRANSPOSE_MASK4<>(SB),(NOPTR+RODATA),$64 gohashtree-0.0.4-beta/hash_arm64.go000066400000000000000000000023361455574744200170650ustar00rootroot00000000000000//go:build arm64 // +build arm64 /* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree import ( "github.com/klauspost/cpuid/v2" ) var hasShani = cpuid.CPU.Supports(cpuid.SHA2) var supportedCPU = true gohashtree-0.0.4-beta/hash_arm64.s000066400000000000000000001264621455574744200167310ustar00rootroot00000000000000/* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This code is based on Intel's implementation found in https://github.com/intel/intel-ipsec-mb Copied parts are Copyright (c) 2012-2021, Intel Corporation */ #include "textflag.h" #define OUTPUT_PTR R0 #define DATA_PTR R1 #define NUM_BLKS R2 #define last R2 #define digest R19 #define k256 R20 #define padding R21 #define VR0 V0 #define VR1 V1 #define VR2 V2 #define VR3 V3 #define VTMP0 V4 #define VTMP1 V5 #define VTMP2 V6 #define VTMP3 V7 #define VTMP4 V17 #define VTMP5 V18 #define VTMP6 V19 #define KV0 V20 #define KV1 V21 #define KV2 V22 #define KV3 V23 #define KQ0 F20 #define KQ1 F21 #define KQ2 F22 #define KQ3 F23 #define VZ V16 #define A_ R3 #define B_ R4 #define C_ R5 #define D_ R6 #define E_ R7 #define F_ R9 #define G_ R10 #define H_ R11 #define T1 R12 #define T2 R13 #define T3 R14 #define T4 R15 #define T5 R22 #define round1_sched(A, B, C, D, E, F, G, H, VV0, VV1, VV2, VV3) \ VEXT $4, VV3.B16, VV2.B16, VTMP0.B16; \ RORW $6, E, T1; \ MOVWU (RSP), T3; \ RORW $2, A, T2; \ RORW $13, A, T4; \ VEXT $4, VV1.B16, VV0.B16, VTMP1.B16; \ EORW T4, T2, T2; \ ADDW T3, H, H; \ RORW $11, E, T3; \ VADD VV0.S4, VTMP0.S4, VTMP0.S4; \ EORW T3, T1, T1; \ RORW $25, E, T3; \ RORW $22, A, T4; \ VUSHR $7, VTMP1.S4, VTMP2.S4; \ EORW T3, T1, T1; \ EORW T4, T2, T2; \ EORW G, F, T3; \ VSHL $(32-7), VTMP1.S4, VTMP3.S4; \ EORW C, A, T4; \ ANDW E, T3, T3; \ ANDW B, T4, T4; \ EORW G, T3, T3; \ VUSHR $18, VTMP1.S4, VTMP4.S4; \ ADDW T3, T1, T1; \ ANDW C, A, T3; \ ADDW T1, H, H; \ VORR VTMP2.B16, VTMP3.B16, VTMP3.B16; \ EORW T3, T4, T4; \ ADDW H, D, D; \ ADDW T4, T2, T2; \ VUSHR $3, VTMP1.S4, VTMP2.S4; \ ADDW T2, H, H #define round2_sched(A, B, C, D, E, F, G, H, VV3) \ MOVWU 4(RSP), T3; \ RORW $6, E, T1; \ VSHL $(32-18), VTMP1.S4, VTMP1.S4; \ RORW $2, A, T2; \ RORW $13, A, T4; \ ADDW T3, H, H; \ VEOR VTMP2.B16, VTMP3.B16, VTMP3.B16; \ RORW $11, E, T3; \ EORW T4, T2, T2; \ EORW T3, T1, T1; \ VEOR VTMP1.B16, VTMP4.B16, VTMP1.B16; \ RORW $25, E, T3; \ RORW $22, A, T4; \ EORW T3, T1, T1; \ VZIP2 VV3.S4, VV3.S4, VTMP5.S4; \ EORW T4, T2, T2; \ EORW G, F, T3; \ EORW C, A, T4; \ VEOR VTMP1.B16, VTMP3.B16, VTMP1.B16; \ ANDW E, T3, T3; \ ANDW B, T4, T4; \ EORW G, T3, T3; \ VUSHR $10, VTMP5.S4, VTMP6.S4; \ ADDW T3, T1, T1; \ ANDW C, A, T3; \ ADDW T1, H, H; \ VUSHR $19, VTMP5.D2, VTMP3.D2; \ EORW T3, T4, T4; \ ADDW H, D, D; \ ADDW T4, T2, T2; \ VUSHR $17, VTMP5.D2, VTMP2.D2; \ ADDW T2, H, H #define round3_sched(A, B, C, D, E, F, G, H) \ MOVWU 8(RSP), T3; \ RORW $6, E, T1; \ VEOR VTMP6.B16, VTMP3.B16, VTMP3.B16; \ RORW $2, A, T2; \ RORW $13, A, T4; \ ADDW T3, H, H; \ VADD VTMP1.S4, VTMP0.S4, VTMP0.S4; \ RORW $11, E, T3; \ EORW T4, T2, T2; \ EORW T3, T1, T1; \ VEOR VTMP2.B16, VTMP3.B16, VTMP1.B16; \ RORW $25, E, T3; \ RORW $22, A, T4; \ EORW T3, T1, T1; \ WORD $0xea128a5; \ EORW T4, T2, T2; \ EORW G, F, T3; \ EORW C, A, T4; \ VADD VTMP1.S4, VTMP0.S4, VTMP0.S4; \ ANDW E, T3, T3; \ ANDW B, T4, T4; \ EORW G, T3, T3; \ VZIP1 VTMP0.S4, VTMP0.S4, VTMP2.S4; \ ADDW T3, T1, T1; \ ANDW C, A, T3; \ ADDW T1, H, H; \ EORW T3, T4, T4; \ ADDW H, D, D; \ ADDW T4, T2, T2; \ VUSHR $10, VTMP2.S4, VTMP1.S4; \ ADDW T2, H, H #define round4_sched(A, B, C, D, E, F, G, H, VV0) \ MOVWU 12(RSP), T3; \ RORW $6, E, T1; \ RORW $2, A, T2; \ VUSHR $19, VTMP2.D2, VTMP3.D2; \ RORW $13, A, T4; \ ADDW T3, H, H; \ RORW $11, E, T3; \ EORW T4, T2, T2; \ VUSHR $17, VTMP2.D2, VTMP2.D2; \ EORW T3, T1, T1; \ RORW $25, E, T3; \ RORW $22, A, T4; \ EORW T3, T1, T1; \ VEOR VTMP3.B16, VTMP1.B16, VTMP1.B16; \ EORW T4, T2, T2; \ EORW G, F, T3; \ EORW C, A, T4; \ VEOR VTMP2.B16, VTMP1.B16, VTMP1.B16; \ ANDW E, T3, T3; \ ANDW B, T4, T4; \ EORW G, T3, T3; \ VUZP1 VTMP1.S4, VZ.S4, VTMP1.S4; \ ADDW T3, T1, T1; \ ANDW C, A, T3; \ ADDW T1, H, H; \ EORW T3, T4, T4; \ ADDW H, D, D; \ ADDW T4, T2, T2; \ VADD VTMP0.S4, VTMP1.S4, VV0.S4; \ ADDW T2, H, H #define four_rounds_sched(A, B, C, D, E, F, G, H, VV0, VV1, VV2, VV3) \ round1_sched(A, B, C, D, E, F, G, H, VV0, VV1, VV2, VV3); \ round2_sched(H, A, B, C, D, E, F, G, VV3); \ round3_sched(G, H, A, B, C, D, E, F); \ round4_sched(F, G, H, A, B, C, D, E, VV0) #define one_round(A, B, C, D, E, F, G, H, ptr, offset) \ MOVWU offset(ptr), T3; \ RORW $6, E, T1; \ RORW $2, A, T2; \ RORW $13, A, T4; \ ADDW T3, H, H; \ RORW $11, E, T3; \ EORW T4, T2, T2; \ EORW T3, T1, T1; \ RORW $25, E, T3; \ RORW $22, A, T4; \ EORW T3, T1, T1; \ EORW T4, T2, T2; \ EORW G, F, T3; \ EORW C, A, T4; \ ANDW E, T3, T3; \ ANDW B, T4, T4; \ EORW G, T3, T3; \ ADDW T3, T1, T1; \ ANDW C, A, T3; \ ADDW T1, H, H; \ EORW T3, T4, T4; \ ADDW H, D, D; \ ADDW T4, T2, T2; \ ADDW T2, H, H #define four_rounds(A, B, C, D, E, F, G, H, ptr, offset) \ one_round(A, B, C, D, E, F, G, H, ptr, offset); \ one_round(H, A, B, C, D, E, F, G, ptr, offset + 4); \ one_round(G, H, A, B, C, D, E, F, ptr, offset + 8); \ one_round(F, G, H, A, B, C, D, E, ptr, offset + 12) // Definitions for ASIMD version #define digest2 R6 #define post64 R7 #define postminus176 R9 #define post32 R10 #define postminus80 R11 #define M1 V16 #define M2 V17 #define M3 V18 #define M4 V19 #define MQ1 F16 #define MQ2 F17 #define MQ3 F18 #define MQ4 F19 #define NVR1 V24 #define NVR2 V25 #define NVR3 V26 #define NVR4 V27 #define QR2 F25 #define QR4 F27 #define TV1 V28 #define TV2 V29 #define TV3 V30 #define TV4 V31 #define TV5 V20 #define TV6 V21 #define TV7 V22 #define TV8 V23 #define TQ4 F31 #define TQ5 F20 #define TQ6 F21 #define TQ7 F22 #define round_4(A, B, C, D, E, F, G, H, MV, MQ, bicword, offset) \ VUSHR $6, E.S4, TV1.S4; \ VSHL $(32-6), E.S4, TV2.S4; \ VUSHR $11, E.S4, NVR2.S4; \ VSHL $(32-11), E.S4, NVR1.S4; \ VAND F.B16, E.B16, TV3.B16; \ WORD bicword; \ VORR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $25, E.S4, TV2.S4; \ FMOVQ offset(k256), QR4; \ VSHL $(32-25), E.S4, NVR3.S4; \ VORR NVR1.B16, NVR2.B16, NVR1.B16; \ VEOR TV4.B16, TV3.B16, TV3.B16; \ VORR NVR3.B16, TV2.B16, TV2.B16; \ VEOR C.B16, A.B16, NVR3.B16; \ VEOR NVR1.B16, TV1.B16, TV1.B16; \ VADD NVR4.S4, MV.S4, TV4.S4; \ VADD TV3.S4, H.S4, H.S4; \ VUSHR $2, A.S4, TV3.S4; \ VAND B.B16, NVR3.B16, NVR3.B16; \ VSHL $(32-2), A.S4, NVR4.S4; \ VEOR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $13, A.S4, TV2.S4; \ VSHL $(32-13), A.S4, NVR1.S4; \ VADD TV4.S4, H.S4, H.S4; \ VORR NVR4.B16, TV3.B16, TV3.B16; \ VAND C.B16, A.B16, NVR4.B16; \ VUSHR $22, A.S4, TV4.S4; \ VSHL $(32 - 22), A.S4, NVR2.S4 ; \ VORR NVR1.B16, TV2.B16, TV2.B16; \ VADD TV1.S4, H.S4, H.S4; \ VEOR NVR4.B16, NVR3.B16, NVR3.B16; \ VORR NVR2.B16, TV4.B16, TV4.B16; \ VEOR TV3.B16, TV2.B16, TV2.B16; \ VADD H.S4, D.S4, D.S4; \ VADD NVR3.S4, H.S4, H.S4; \ VEOR TV4.B16, TV2.B16, TV2.B16; \ FMOVQ MQ, offset(RSP); \ VADD TV2.S4, H.S4, H.S4 #define eight_4_roundsA(A, B, C, D, E, F, G, H, MV1, MV2, MV3, MV4, MQ1, MQ2, MQ3, MQ4, offset) \ round_4(A, B, C, D, E, F, G, H, MV1, MQ1, $0x4e641cdf, offset); \ round_4(H, A, B, C, D, E, F, G, MV2, MQ2, $0x4e631cbf, offset + 16); \ round_4(G, H, A, B, C, D, E, F, MV3, MQ3, $0x4e621c9f, offset + 32); \ round_4(F, G, H, A, B, C, D, E, MV4, MQ4, $0x4e611c7f, offset + 48) #define eight_4_roundsB(A, B, C, D, E, F, G, H, MV1, MV2, MV3, MV4, MQ1, MQ2, MQ3, MQ4, offset) \ round_4(A, B, C, D, E, F, G, H, MV1, MQ1, $0x4e601c5f, offset); \ round_4(H, A, B, C, D, E, F, G, MV2, MQ2, $0x4e671c3f, offset + 16); \ round_4(G, H, A, B, C, D, E, F, MV3, MQ3, $0x4e661c1f, offset + 32); \ round_4(F, G, H, A, B, C, D, E, MV4, MQ4, $0x4e651cff, offset + 48) #define round_4_and_sched(A, B, C, D, E, F, G, H, bicword, offset) \ FLDPQ (offset-256)(RSP), (TQ6, TQ5); \ VUSHR $6, E.S4, TV1.S4; \ VSHL $(32-6), E.S4, TV2.S4; \ VUSHR $11, E.S4, NVR2.S4; \ VSHL $(32-11), E.S4, NVR1.S4; \ VAND F.B16, E.B16, TV3.B16; \ WORD bicword; \ VUSHR $7, TV5.S4, M1.S4; \ FMOVQ (offset-32)(RSP), TQ7; \ VSHL $(32-7), TV5.S4, M2.S4; \ VORR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $25, E.S4, TV2.S4; \ VSHL $(32-25), E.S4, NVR3.S4; \ VORR NVR1.B16, NVR2.B16, NVR1.B16; \ VEOR TV4.B16, TV3.B16, TV3.B16; \ FMOVQ offset(k256), QR4; \ VORR M2.B16, M1.B16, M1.B16; \ VUSHR $17, TV7.S4, M3.S4; \ VSHL $(32-17), TV7.S4, M4.S4; \ VUSHR $18, TV5.S4, M2.S4; \ VSHL $(32-18), TV5.S4, TV8.S4; \ VORR NVR3.B16, TV2.B16, TV2.B16; \ VEOR C.B16, A.B16, NVR3.B16; \ VORR M4.B16, M3.B16, M3.B16; \ FMOVQ (offset-112)(RSP), TQ4; \ VUSHR $19, TV7.S4, M4.S4; \ VSHL $(32-19), TV7.S4, NVR2.S4; \ VORR TV8.B16, M2.B16, M2.B16; \ VUSHR $3, TV5.S4, TV8.S4; \ VORR NVR2.B16, M4.B16, M4.B16; \ VEOR NVR1.B16, TV1.B16, TV1.B16; \ VEOR M2.B16, M1.B16, M1.B16; \ VUSHR $10, TV7.S4, M2.S4; \ VEOR M4.B16, M3.B16, M3.B16; \ VADD TV3.S4, H.S4, H.S4; \ VEOR TV8.B16, M1.B16, M1.B16; \ VADD TV4.S4, TV6.S4, TV6.S4; \ VEOR M2.B16, M3.B16, M3.B16; \ VUSHR $2, A.S4, TV3.S4; \ VAND B.B16, NVR3.B16, NVR3.B16; \ VADD TV6.S4, M1.S4, M1.S4; \ VSHL $(32-2), A.S4, TV6.S4; \ VEOR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $13, A.S4, TV2.S4; \ VADD M3.S4, M1.S4, M1.S4; \ VADD TV1.S4, H.S4, H.S4; \ VSHL $(32-13), A.S4, NVR1.S4; \ VORR TV6.B16, TV3.B16, TV3.B16; \ VADD NVR4.S4, M1.S4, TV5.S4; \ FMOVQ MQ1, offset(RSP); \ VAND C.B16, A.B16, NVR4.B16; \ VUSHR $22, A.S4, TV4.S4; \ VSHL $(32-22), A.S4, NVR2.S4; \ VADD TV5.S4, H.S4, H.S4; \ VORR NVR1.B16, TV2.B16, TV2.B16; \ VEOR NVR4.B16, NVR3.B16, NVR3.B16; \ VORR NVR2.B16, TV4.B16, TV4.B16; \ VEOR TV3.B16, TV2.B16, TV2.B16; \ VADD H.S4, D.S4, D.S4; \ VADD NVR3.S4, H.S4, H.S4; \ VEOR TV4.B16, TV2.B16, TV2.B16; \ VADD TV2.S4, H.S4, H.S4 #define eight_4_rounds_and_sched(A, B, C, D, E, F, G, H, offset) \ round_4_and_sched(A, B, C, D, E, F, G, H, $0x4e641cdf, offset + 0*16); \ round_4_and_sched(H, A, B, C, D, E, F, G, $0x4e631cbf, offset + 1*16); \ round_4_and_sched(G, H, A, B, C, D, E, F, $0x4e621c9f, offset + 2*16); \ round_4_and_sched(F, G, H, A, B, C, D, E, $0x4e611c7f, offset + 3*16); \ round_4_and_sched(E, F, G, H, A, B, C, D, $0x4e601c5f, offset + 4*16); \ round_4_and_sched(D, E, F, G, H, A, B, C, $0x4e671c3f, offset + 5*16); \ round_4_and_sched(C, D, E, F, G, H, A, B, $0x4e661c1f, offset + 6*16); \ round_4_and_sched(B, C, D, E, F, G, H, A, $0x4e651cff, offset + 7*16) #define round_4_padding(A, B, C, D, E, F, G, H, bicword, offset) \ VUSHR $6, E.S4, TV1.S4; \ VSHL $(32-6), E.S4, TV2.S4; \ VUSHR $11, E.S4, NVR2.S4; \ VSHL $(32-11), E.S4, NVR1.S4; \ VAND F.B16, E.B16, TV3.B16; \ WORD bicword; \ VORR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $25, E.S4, TV2.S4; \ VSHL $(32-25), E.S4, NVR3.S4; \ VORR NVR1.B16, NVR2.B16, NVR1.B16; \ VEOR TV4.B16, TV3.B16, TV3.B16; \ VORR NVR3.B16, TV2.B16, TV2.B16; \ VEOR C.B16, A.B16, NVR3.B16; \ VEOR NVR1.B16, TV1.B16, TV1.B16; \ VADD TV3.S4, H.S4, H.S4; \ VUSHR $2, A.S4, TV3.S4; \ FMOVQ offset(padding), QR2; \ VAND B.B16, NVR3.B16, NVR3.B16; \ VSHL $(32-2), A.S4, NVR4.S4; \ VEOR TV2.B16, TV1.B16, TV1.B16; \ VUSHR $13, A.S4, TV2.S4; \ VSHL $(32-13), A.S4, NVR1.S4; \ VADD NVR2.S4, H.S4, H.S4; \ VORR NVR4.B16, TV3.B16, TV3.B16; \ VAND C.B16, A.B16, NVR4.B16; \ VUSHR $22, A.S4, TV4.S4; \ VSHL $(32-22), A.S4, NVR2.S4; \ VORR NVR1.B16, TV2.B16, TV2.B16; \ VADD TV1.S4, H.S4, H.S4; \ VEOR NVR4.B16, NVR3.B16, NVR3.B16; \ VORR NVR2.B16, TV4.B16, TV4.B16; \ VEOR TV3.B16, TV2.B16, TV2.B16; \ VADD H.S4, D.S4, D.S4; \ VADD NVR3.S4, H.S4, H.S4; \ VEOR TV4.B16, TV2.B16, TV2.B16; \ VADD TV2.S4, H.S4, H.S4 #define eight_4_rounds_padding(A, B, C, D, E, F, G, H, offset) \ round_4_padding(A, B, C, D, E, F, G, H, $0x4e641cdf, offset + 0*16); \ round_4_padding(H, A, B, C, D, E, F, G, $0x4e631cbf, offset + 1*16); \ round_4_padding(G, H, A, B, C, D, E, F, $0x4e621c9f, offset + 2*16); \ round_4_padding(F, G, H, A, B, C, D, E, $0x4e611c7f, offset + 3*16); \ round_4_padding(E, F, G, H, A, B, C, D, $0x4e601c5f, offset + 4*16); \ round_4_padding(D, E, F, G, H, A, B, C, $0x4e671c3f, offset + 5*16); \ round_4_padding(C, D, E, F, G, H, A, B, $0x4e661c1f, offset + 6*16); \ round_4_padding(B, C, D, E, F, G, H, A, $0x4e651cff, offset + 7*16) // Definitions for SHA-2 #define check_shani R19 #define HASHUPDATE(word) \ SHA256H word, V3, V2; \ SHA256H2 word, V8, V3; \ VMOV V2.B16, V8.B16 TEXT ·_hash(SB), 0, $1024-36 MOVD digests+0(FP), OUTPUT_PTR MOVD p_base+8(FP), DATA_PTR MOVWU count+32(FP), NUM_BLKS MOVBU ·hasShani(SB), check_shani CBNZ check_shani, shani arm_x4: CMPW $4, NUM_BLKS BLO arm_x1 MOVD $_PADDING_4<>(SB), padding MOVD $_K256_4<>(SB), k256 MOVD $_DIGEST_4<>(SB), digest ADD $64, digest, digest2 MOVD $64, post64 MOVD $32, post32 MOVD $-80, postminus80 MOVD $-176, postminus176 arm_x4_loop: CMPW $4, NUM_BLKS BLO arm_x1 VLD1 (digest), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1 (digest2), [V4.S4, V5.S4, V6.S4, V7.S4] // First 16 rounds WORD $0xde7a030 WORD $0xde7b030 WORD $0x4de7a030 WORD $0x4de9b030 VREV32 M1.B16, M1.B16 VREV32 M2.B16, M2.B16 VREV32 M3.B16, M3.B16 VREV32 M4.B16, M4.B16 eight_4_roundsA(V0, V1, V2, V3, V4, V5, V6, V7, M1, M2, M3, M4, MQ1, MQ2, MQ3, MQ4, 0x00) WORD $0xde7a030 WORD $0xde7b030 WORD $0x4de7a030 WORD $0x4de9b030 VREV32 M1.B16, M1.B16 VREV32 M2.B16, M2.B16 VREV32 M3.B16, M3.B16 VREV32 M4.B16, M4.B16 eight_4_roundsB(V4, V5, V6, V7, V0, V1, V2, V3, M1, M2, M3, M4, MQ1, MQ2, MQ3, MQ4, 0x40) WORD $0xde7a030 WORD $0xde7b030 WORD $0x4de7a030 WORD $0x4de9b030 VREV32 M1.B16, M1.B16 VREV32 M2.B16, M2.B16 VREV32 M3.B16, M3.B16 VREV32 M4.B16, M4.B16 eight_4_roundsA(V0, V1, V2, V3, V4, V5, V6, V7, M1, M2, M3, M4, MQ1, MQ2, MQ3, MQ4, 0x80) WORD $0xde7a030 WORD $0xde7b030 WORD $0x4de7a030 WORD $0x4de9b030 VREV32 M1.B16, M1.B16 VREV32 M2.B16, M2.B16 VREV32 M3.B16, M3.B16 VREV32 M4.B16, M4.B16 eight_4_roundsB(V4, V5, V6, V7, V0, V1, V2, V3, M1, M2, M3, M4, MQ1, MQ2, MQ3, MQ4, 0xc0) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x100) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x180) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x200) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x280) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x300) eight_4_rounds_and_sched(V0, V1, V2, V3, V4, V5, V6, V7, 0x380) // add previous digest VLD1 (digest), [M1.S4, M2.S4, M3.S4, M4.S4] VLD1 (digest2), [TV5.S4, TV6.S4, TV7.S4, TV8.S4] VADD M1.S4, V0.S4, V0.S4 VADD M2.S4, V1.S4, V1.S4 VADD M3.S4, V2.S4, V2.S4 VADD M4.S4, V3.S4, V3.S4 VADD TV5.S4, V4.S4, V4.S4 VADD TV6.S4, V5.S4, V5.S4 VADD TV7.S4, V6.S4, V6.S4 VADD TV8.S4, V7.S4, V7.S4 // save state VMOV V0.B16, M1.B16 VMOV V1.B16, M2.B16 VMOV V2.B16, M3.B16 VMOV V3.B16, M4.B16 VMOV V4.B16, TV5.B16 VMOV V5.B16, TV6.B16 VMOV V6.B16, TV7.B16 VMOV V7.B16, TV8.B16 // rounds with padding eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x000) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x080) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x100) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x180) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x200) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x280) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x300) eight_4_rounds_padding(V0, V1, V2, V3, V4, V5, V6, V7, 0x380) // add previous digest VADD M1.S4, V0.S4, V0.S4 VADD M2.S4, V1.S4, V1.S4 VADD M3.S4, V2.S4, V2.S4 VADD M4.S4, V3.S4, V3.S4 VADD TV5.S4, V4.S4, V4.S4 VADD TV6.S4, V5.S4, V5.S4 VADD TV7.S4, V6.S4, V6.S4 VADD TV8.S4, V7.S4, V7.S4 // change endianness transpose and store VREV32 V0.B16, V0.B16 VREV32 V1.B16, V1.B16 VREV32 V2.B16, V2.B16 VREV32 V3.B16, V3.B16 VREV32 V4.B16, V4.B16 VREV32 V5.B16, V5.B16 VREV32 V6.B16, V6.B16 VREV32 V7.B16, V7.B16 WORD $0xdaaa000 WORD $0xdaab000 WORD $0x4daaa000 WORD $0x4dabb000 WORD $0xdaaa004 WORD $0xdaab004 WORD $0x4daaa004 WORD $0x4dbfb004 ADD $192, DATA_PTR, DATA_PTR SUBW $4, NUM_BLKS, NUM_BLKS JMP arm_x4_loop arm_x1: VMOV ZR, VZ.S4 // Golang guarantees this is zero MOVD $_DIGEST_1<>(SB), digest MOVD $_PADDING_1<>(SB), padding ADD NUM_BLKS<<5, OUTPUT_PTR, last arm_x1_loop: CMP OUTPUT_PTR, last BEQ epilog // Load one block VLD1.P 64(DATA_PTR), [VR0.S4, VR1.S4, VR2.S4, VR3.S4] MOVD $_K256_1<>(SB), k256 // change endiannes VREV32 VR0.B16, VR0.B16 VREV32 VR1.B16, VR1.B16 VREV32 VR2.B16, VR2.B16 VREV32 VR3.B16, VR3.B16 // load initial digest LDPW (digest), (A_, B_) LDPW 8(digest), (C_, D_) LDPW 16(digest), (E_, F_) LDPW 24(digest), (G_, H_) // First 48 rounds VLD1.P 64(k256), [KV0.S4, KV1.S4, KV2.S4, KV3.S4] VADD VR0.S4, KV0.S4, KV0.S4 FMOVQ KQ0, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR0, VR1, VR2, VR3) VADD VR1.S4, KV1.S4, KV1.S4 FMOVQ KQ1, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR1, VR2, VR3, VR0) VADD VR2.S4, KV2.S4, KV2.S4 FMOVQ KQ2, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR2, VR3, VR0, VR1) VADD VR3.S4, KV3.S4, KV3.S4 FMOVQ KQ3, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR3, VR0, VR1, VR2) VLD1.P 64(k256), [KV0.S4, KV1.S4, KV2.S4, KV3.S4] VADD VR0.S4, KV0.S4, KV0.S4 FMOVQ KQ0, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR0, VR1, VR2, VR3) VADD VR1.S4, KV1.S4, KV1.S4 FMOVQ KQ1, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR1, VR2, VR3, VR0) VADD VR2.S4, KV2.S4, KV2.S4 FMOVQ KQ2, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR2, VR3, VR0, VR1) VADD VR3.S4, KV3.S4, KV3.S4 FMOVQ KQ3, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR3, VR0, VR1, VR2) VLD1.P 64(k256), [KV0.S4, KV1.S4, KV2.S4, KV3.S4] VADD VR0.S4, KV0.S4, KV0.S4 FMOVQ KQ0, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR0, VR1, VR2, VR3) VADD VR1.S4, KV1.S4, KV1.S4 FMOVQ KQ1, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR1, VR2, VR3, VR0) VADD VR2.S4, KV2.S4, KV2.S4 FMOVQ KQ2, (RSP) four_rounds_sched(A_, B_, C_, D_, E_, F_, G_, H_, VR2, VR3, VR0, VR1) VADD VR3.S4, KV3.S4, KV3.S4 FMOVQ KQ3, (RSP) four_rounds_sched(E_, F_, G_, H_, A_, B_, C_, D_, VR3, VR0, VR1, VR2) // last 16 rounds VLD1.P 64(k256), [KV0.S4, KV1.S4, KV2.S4, KV3.S4] VADD VR0.S4, KV0.S4, KV0.S4 FMOVQ KQ0, (RSP) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, RSP, 0) VADD VR1.S4, KV1.S4, KV1.S4 FMOVQ KQ1, (RSP) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, RSP, 0) VADD VR2.S4, KV2.S4, KV2.S4 FMOVQ KQ2, (RSP) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, RSP, 0) VADD VR3.S4, KV3.S4, KV3.S4 FMOVQ KQ3, (RSP) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, RSP, 0) // rounds with padding LDPW (digest), (T1, T2) LDPW 8(digest), (T3, T4) ADDW T1, A_, A_ ADDW T2, B_, B_ ADDW T3, C_, C_ ADDW T4, D_, D_ LDPW 16(digest), (T1, T2) STPW (A_, B_), (RSP) STPW (C_, D_), 8(RSP) LDPW 24(digest), (T3, T4) ADDW T1, E_, E_ ADDW T2, F_, F_ ADDW T3, G_, G_ STPW (E_, F_), 16(RSP) ADDW T4, H_, H_ STPW (G_, H_), 24(RSP) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0x00) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0x10) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0x20) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0x30) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0x40) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0x50) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0x60) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0x70) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0x80) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0x90) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0xa0) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0xb0) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0xc0) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0xd0) four_rounds(A_, B_, C_, D_, E_, F_, G_, H_, padding, 0xe0) four_rounds(E_, F_, G_, H_, A_, B_, C_, D_, padding, 0xf0) LDPW (RSP), (T1, T2) LDPW 8(RSP), (T3, T4) ADDW T1, A_, A_ ADDW T2, B_, B_ REV32 A_, A_ REV32 B_, B_ ADDW T3, C_, C_ ADDW T4, D_, D_ STPW.P (A_, B_), 8(OUTPUT_PTR) LDPW 16(RSP), (T1, T2) REV32 C_, C_ REV32 D_, D_ STPW.P (C_, D_), 8(OUTPUT_PTR) LDPW 24(RSP), (T3, T4) ADDW T1, E_, E_ ADDW T2, F_, F_ REV32 E_, E_ REV32 F_, F_ ADDW T3, G_, G_ ADDW T4, H_, H_ REV32 G_, G_ REV32 H_, H_ STPW.P (E_, F_), 8(OUTPUT_PTR) STPW.P (G_, H_), 8(OUTPUT_PTR) JMP arm_x1_loop shani: MOVD $_DIGEST_1<>(SB), digest MOVD $_PADDING_1<>(SB), padding MOVD $_K256_1<>(SB), k256 ADD NUM_BLKS<<5, OUTPUT_PTR, last // load incoming digest VLD1 (digest), [V0.S4, V1.S4] shani_loop: CMP OUTPUT_PTR, last BEQ epilog // load all K constants VLD1.P 64(k256), [V16.S4, V17.S4, V18.S4, V19.S4] VLD1.P 64(k256), [V20.S4, V21.S4, V22.S4, V23.S4] VLD1.P 64(k256), [V24.S4, V25.S4, V26.S4, V27.S4] VLD1 (k256), [V28.S4, V29.S4, V30.S4, V31.S4] SUB $192, k256, k256 // load one block VLD1.P 64(DATA_PTR), [V4.S4, V5.S4, V6.S4, V7.S4] VMOV V0.B16, V2.B16 VMOV V1.B16, V3.B16 VMOV V2.B16, V8.B16 // reverse endianness VREV32 V4.B16, V4.B16 VREV32 V5.B16, V5.B16 VREV32 V6.B16, V6.B16 VREV32 V7.B16, V7.B16 VADD V16.S4, V4.S4, V9.S4 SHA256SU0 V5.S4, V4.S4 HASHUPDATE(V9.S4) VADD V17.S4, V5.S4, V9.S4 SHA256SU0 V6.S4, V5.S4 SHA256SU1 V7.S4, V6.S4, V4.S4 HASHUPDATE(V9.S4) VADD V18.S4, V6.S4, V9.S4 SHA256SU0 V7.S4, V6.S4 SHA256SU1 V4.S4, V7.S4, V5.S4 HASHUPDATE(V9.S4) VADD V19.S4, V7.S4, V9.S4 SHA256SU0 V4.S4, V7.S4 SHA256SU1 V5.S4, V4.S4, V6.S4 HASHUPDATE(V9.S4) VADD V20.S4, V4.S4, V9.S4 SHA256SU0 V5.S4, V4.S4 SHA256SU1 V6.S4, V5.S4, V7.S4 HASHUPDATE(V9.S4) VADD V21.S4, V5.S4, V9.S4 SHA256SU0 V6.S4, V5.S4 SHA256SU1 V7.S4, V6.S4, V4.S4 HASHUPDATE(V9.S4) VADD V22.S4, V6.S4, V9.S4 SHA256SU0 V7.S4, V6.S4 SHA256SU1 V4.S4, V7.S4, V5.S4 HASHUPDATE(V9.S4) VADD V23.S4, V7.S4, V9.S4 SHA256SU0 V4.S4, V7.S4 SHA256SU1 V5.S4, V4.S4, V6.S4 HASHUPDATE(V9.S4) VADD V24.S4, V4.S4, V9.S4 SHA256SU0 V5.S4, V4.S4 SHA256SU1 V6.S4, V5.S4, V7.S4 HASHUPDATE(V9.S4) VADD V25.S4, V5.S4, V9.S4 SHA256SU0 V6.S4, V5.S4 SHA256SU1 V7.S4, V6.S4, V4.S4 HASHUPDATE(V9.S4) VADD V26.S4, V6.S4, V9.S4 SHA256SU0 V7.S4, V6.S4 SHA256SU1 V4.S4, V7.S4, V5.S4 HASHUPDATE(V9.S4) VADD V27.S4, V7.S4, V9.S4 SHA256SU0 V4.S4, V7.S4 SHA256SU1 V5.S4, V4.S4, V6.S4 HASHUPDATE(V9.S4) VADD V28.S4, V4.S4, V9.S4 HASHUPDATE(V9.S4) SHA256SU1 V6.S4, V5.S4, V7.S4 VADD V29.S4, V5.S4, V9.S4 HASHUPDATE(V9.S4) VADD V30.S4, V6.S4, V9.S4 HASHUPDATE(V9.S4) VADD V31.S4, V7.S4, V9.S4 HASHUPDATE(V9.S4) // Add initial digest VADD V2.S4, V0.S4, V2.S4 VADD V3.S4, V1.S4, V3.S4 // Back it up VMOV V2.B16, V10.B16 VMOV V3.B16, V11.B16 // Rounds with padding // load prescheduled constants VLD1.P 64(padding), [V16.S4, V17.S4, V18.S4, V19.S4] VLD1.P 64(padding), [V20.S4, V21.S4, V22.S4, V23.S4] VMOV V2.B16, V8.B16 VLD1.P 64(padding), [V24.S4, V25.S4, V26.S4, V27.S4] VLD1 (padding), [V28.S4, V29.S4, V30.S4, V31.S4] SUB $192, padding, padding HASHUPDATE(V16.S4) HASHUPDATE(V17.S4) HASHUPDATE(V18.S4) HASHUPDATE(V19.S4) HASHUPDATE(V20.S4) HASHUPDATE(V21.S4) HASHUPDATE(V22.S4) HASHUPDATE(V23.S4) HASHUPDATE(V24.S4) HASHUPDATE(V25.S4) HASHUPDATE(V26.S4) HASHUPDATE(V27.S4) HASHUPDATE(V28.S4) HASHUPDATE(V29.S4) HASHUPDATE(V30.S4) HASHUPDATE(V31.S4) // add backed up digest VADD V2.S4, V10.S4, V2.S4 VADD V3.S4, V11.S4, V3.S4 VREV32 V2.B16, V2.B16 VREV32 V3.B16, V3.B16 VST1.P [V2.S4, V3.S4], 32(OUTPUT_PTR) JMP shani_loop epilog: RET // Data section DATA _K256_1<>+0x00(SB)/4, $0x428a2f98 DATA _K256_1<>+0x04(SB)/4, $0x71374491 DATA _K256_1<>+0x08(SB)/4, $0xb5c0fbcf DATA _K256_1<>+0x0c(SB)/4, $0xe9b5dba5 DATA _K256_1<>+0x10(SB)/4, $0x3956c25b DATA _K256_1<>+0x14(SB)/4, $0x59f111f1 DATA _K256_1<>+0x18(SB)/4, $0x923f82a4 DATA _K256_1<>+0x1c(SB)/4, $0xab1c5ed5 DATA _K256_1<>+0x20(SB)/4, $0xd807aa98 DATA _K256_1<>+0x24(SB)/4, $0x12835b01 DATA _K256_1<>+0x28(SB)/4, $0x243185be DATA _K256_1<>+0x2c(SB)/4, $0x550c7dc3 DATA _K256_1<>+0x30(SB)/4, $0x72be5d74 DATA _K256_1<>+0x34(SB)/4, $0x80deb1fe DATA _K256_1<>+0x38(SB)/4, $0x9bdc06a7 DATA _K256_1<>+0x3c(SB)/4, $0xc19bf174 DATA _K256_1<>+0x40(SB)/4, $0xe49b69c1 DATA _K256_1<>+0x44(SB)/4, $0xefbe4786 DATA _K256_1<>+0x48(SB)/4, $0x0fc19dc6 DATA _K256_1<>+0x4c(SB)/4, $0x240ca1cc DATA _K256_1<>+0x50(SB)/4, $0x2de92c6f DATA _K256_1<>+0x54(SB)/4, $0x4a7484aa DATA _K256_1<>+0x58(SB)/4, $0x5cb0a9dc DATA _K256_1<>+0x5c(SB)/4, $0x76f988da DATA _K256_1<>+0x60(SB)/4, $0x983e5152 DATA _K256_1<>+0x64(SB)/4, $0xa831c66d DATA _K256_1<>+0x68(SB)/4, $0xb00327c8 DATA _K256_1<>+0x6c(SB)/4, $0xbf597fc7 DATA _K256_1<>+0x70(SB)/4, $0xc6e00bf3 DATA _K256_1<>+0x74(SB)/4, $0xd5a79147 DATA _K256_1<>+0x78(SB)/4, $0x06ca6351 DATA _K256_1<>+0x7c(SB)/4, $0x14292967 DATA _K256_1<>+0x80(SB)/4, $0x27b70a85 DATA _K256_1<>+0x84(SB)/4, $0x2e1b2138 DATA _K256_1<>+0x88(SB)/4, $0x4d2c6dfc DATA _K256_1<>+0x8c(SB)/4, $0x53380d13 DATA _K256_1<>+0x90(SB)/4, $0x650a7354 DATA _K256_1<>+0x94(SB)/4, $0x766a0abb DATA _K256_1<>+0x98(SB)/4, $0x81c2c92e DATA _K256_1<>+0x9c(SB)/4, $0x92722c85 DATA _K256_1<>+0xa0(SB)/4, $0xa2bfe8a1 DATA _K256_1<>+0xa4(SB)/4, $0xa81a664b DATA _K256_1<>+0xa8(SB)/4, $0xc24b8b70 DATA _K256_1<>+0xac(SB)/4, $0xc76c51a3 DATA _K256_1<>+0xb0(SB)/4, $0xd192e819 DATA _K256_1<>+0xb4(SB)/4, $0xd6990624 DATA _K256_1<>+0xb8(SB)/4, $0xf40e3585 DATA _K256_1<>+0xbc(SB)/4, $0x106aa070 DATA _K256_1<>+0xc0(SB)/4, $0x19a4c116 DATA _K256_1<>+0xc4(SB)/4, $0x1e376c08 DATA _K256_1<>+0xc8(SB)/4, $0x2748774c DATA _K256_1<>+0xcc(SB)/4, $0x34b0bcb5 DATA _K256_1<>+0xd0(SB)/4, $0x391c0cb3 DATA _K256_1<>+0xd4(SB)/4, $0x4ed8aa4a DATA _K256_1<>+0xd8(SB)/4, $0x5b9cca4f DATA _K256_1<>+0xdc(SB)/4, $0x682e6ff3 DATA _K256_1<>+0xe0(SB)/4, $0x748f82ee DATA _K256_1<>+0xe4(SB)/4, $0x78a5636f DATA _K256_1<>+0xe8(SB)/4, $0x84c87814 DATA _K256_1<>+0xec(SB)/4, $0x8cc70208 DATA _K256_1<>+0xf0(SB)/4, $0x90befffa DATA _K256_1<>+0xf4(SB)/4, $0xa4506ceb DATA _K256_1<>+0xf8(SB)/4, $0xbef9a3f7 DATA _K256_1<>+0xfc(SB)/4, $0xc67178f2 GLOBL _K256_1<>(SB),(NOPTR+RODATA),$256 DATA _PADDING_1<>+0x00(SB)/4, $0xc28a2f98 DATA _PADDING_1<>+0x04(SB)/4, $0x71374491 DATA _PADDING_1<>+0x08(SB)/4, $0xb5c0fbcf DATA _PADDING_1<>+0x0c(SB)/4, $0xe9b5dba5 DATA _PADDING_1<>+0x10(SB)/4, $0x3956c25b DATA _PADDING_1<>+0x14(SB)/4, $0x59f111f1 DATA _PADDING_1<>+0x18(SB)/4, $0x923f82a4 DATA _PADDING_1<>+0x1c(SB)/4, $0xab1c5ed5 DATA _PADDING_1<>+0x20(SB)/4, $0xd807aa98 DATA _PADDING_1<>+0x24(SB)/4, $0x12835b01 DATA _PADDING_1<>+0x28(SB)/4, $0x243185be DATA _PADDING_1<>+0x2c(SB)/4, $0x550c7dc3 DATA _PADDING_1<>+0x30(SB)/4, $0x72be5d74 DATA _PADDING_1<>+0x34(SB)/4, $0x80deb1fe DATA _PADDING_1<>+0x38(SB)/4, $0x9bdc06a7 DATA _PADDING_1<>+0x3c(SB)/4, $0xc19bf374 DATA _PADDING_1<>+0x40(SB)/4, $0x649b69c1 DATA _PADDING_1<>+0x44(SB)/4, $0xf0fe4786 DATA _PADDING_1<>+0x48(SB)/4, $0x0fe1edc6 DATA _PADDING_1<>+0x4c(SB)/4, $0x240cf254 DATA _PADDING_1<>+0x50(SB)/4, $0x4fe9346f DATA _PADDING_1<>+0x54(SB)/4, $0x6cc984be DATA _PADDING_1<>+0x58(SB)/4, $0x61b9411e DATA _PADDING_1<>+0x5c(SB)/4, $0x16f988fa DATA _PADDING_1<>+0x60(SB)/4, $0xf2c65152 DATA _PADDING_1<>+0x64(SB)/4, $0xa88e5a6d DATA _PADDING_1<>+0x68(SB)/4, $0xb019fc65 DATA _PADDING_1<>+0x6c(SB)/4, $0xb9d99ec7 DATA _PADDING_1<>+0x70(SB)/4, $0x9a1231c3 DATA _PADDING_1<>+0x74(SB)/4, $0xe70eeaa0 DATA _PADDING_1<>+0x78(SB)/4, $0xfdb1232b DATA _PADDING_1<>+0x7c(SB)/4, $0xc7353eb0 DATA _PADDING_1<>+0x80(SB)/4, $0x3069bad5 DATA _PADDING_1<>+0x84(SB)/4, $0xcb976d5f DATA _PADDING_1<>+0x88(SB)/4, $0x5a0f118f DATA _PADDING_1<>+0x8c(SB)/4, $0xdc1eeefd DATA _PADDING_1<>+0x90(SB)/4, $0x0a35b689 DATA _PADDING_1<>+0x94(SB)/4, $0xde0b7a04 DATA _PADDING_1<>+0x98(SB)/4, $0x58f4ca9d DATA _PADDING_1<>+0x9c(SB)/4, $0xe15d5b16 DATA _PADDING_1<>+0xa0(SB)/4, $0x007f3e86 DATA _PADDING_1<>+0xa4(SB)/4, $0x37088980 DATA _PADDING_1<>+0xa8(SB)/4, $0xa507ea32 DATA _PADDING_1<>+0xac(SB)/4, $0x6fab9537 DATA _PADDING_1<>+0xb0(SB)/4, $0x17406110 DATA _PADDING_1<>+0xb4(SB)/4, $0x0d8cd6f1 DATA _PADDING_1<>+0xb8(SB)/4, $0xcdaa3b6d DATA _PADDING_1<>+0xbc(SB)/4, $0xc0bbbe37 DATA _PADDING_1<>+0xc0(SB)/4, $0x83613bda DATA _PADDING_1<>+0xc4(SB)/4, $0xdb48a363 DATA _PADDING_1<>+0xc8(SB)/4, $0x0b02e931 DATA _PADDING_1<>+0xcc(SB)/4, $0x6fd15ca7 DATA _PADDING_1<>+0xd0(SB)/4, $0x521afaca DATA _PADDING_1<>+0xd4(SB)/4, $0x31338431 DATA _PADDING_1<>+0xd8(SB)/4, $0x6ed41a95 DATA _PADDING_1<>+0xdc(SB)/4, $0x6d437890 DATA _PADDING_1<>+0xe0(SB)/4, $0xc39c91f2 DATA _PADDING_1<>+0xe4(SB)/4, $0x9eccabbd DATA _PADDING_1<>+0xe8(SB)/4, $0xb5c9a0e6 DATA _PADDING_1<>+0xec(SB)/4, $0x532fb63c DATA _PADDING_1<>+0xf0(SB)/4, $0xd2c741c6 DATA _PADDING_1<>+0xf4(SB)/4, $0x07237ea3 DATA _PADDING_1<>+0xf8(SB)/4, $0xa4954b68 DATA _PADDING_1<>+0xfc(SB)/4, $0x4c191d76 GLOBL _PADDING_1<>(SB),(NOPTR+RODATA),$256 DATA _DIGEST_1<>+0(SB)/4, $0x6a09e667 DATA _DIGEST_1<>+4(SB)/4, $0xbb67ae85 DATA _DIGEST_1<>+8(SB)/4, $0x3c6ef372 DATA _DIGEST_1<>+12(SB)/4, $0xa54ff53a DATA _DIGEST_1<>+16(SB)/4, $0x510e527f DATA _DIGEST_1<>+20(SB)/4, $0x9b05688c DATA _DIGEST_1<>+24(SB)/4, $0x1f83d9ab DATA _DIGEST_1<>+28(SB)/4, $0x5be0cd19 GLOBL _DIGEST_1<>(SB),(NOPTR+RODATA),$32 DATA _DIGEST_4<>+0(SB)/8, $0x6a09e6676a09e667 DATA _DIGEST_4<>+8(SB)/8, $0x6a09e6676a09e667 DATA _DIGEST_4<>+16(SB)/8, $0xbb67ae85bb67ae85 DATA _DIGEST_4<>+24(SB)/8, $0xbb67ae85bb67ae85 DATA _DIGEST_4<>+32(SB)/8, $0x3c6ef3723c6ef372 DATA _DIGEST_4<>+40(SB)/8, $0x3c6ef3723c6ef372 DATA _DIGEST_4<>+48(SB)/8, $0xa54ff53aa54ff53a DATA _DIGEST_4<>+56(SB)/8, $0xa54ff53aa54ff53a DATA _DIGEST_4<>+64(SB)/8, $0x510e527f510e527f DATA _DIGEST_4<>+72(SB)/8, $0x510e527f510e527f DATA _DIGEST_4<>+80(SB)/8, $0x9b05688c9b05688c DATA _DIGEST_4<>+88(SB)/8, $0x9b05688c9b05688c DATA _DIGEST_4<>+96(SB)/8, $0x1f83d9ab1f83d9ab DATA _DIGEST_4<>+104(SB)/8, $0x1f83d9ab1f83d9ab DATA _DIGEST_4<>+112(SB)/8, $0x5be0cd195be0cd19 DATA _DIGEST_4<>+120(SB)/8, $0x5be0cd195be0cd19 GLOBL _DIGEST_4<>(SB),(NOPTR+RODATA),$128 DATA _PADDING_4<>+0(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_4<>+8(SB)/8, $0xc28a2f98c28a2f98 DATA _PADDING_4<>+16(SB)/8, $0x7137449171374491 DATA _PADDING_4<>+24(SB)/8, $0x7137449171374491 DATA _PADDING_4<>+32(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_4<>+40(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _PADDING_4<>+48(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_4<>+56(SB)/8, $0xe9b5dba5e9b5dba5 DATA _PADDING_4<>+64(SB)/8, $0x3956c25b3956c25b DATA _PADDING_4<>+72(SB)/8, $0x3956c25b3956c25b DATA _PADDING_4<>+80(SB)/8, $0x59f111f159f111f1 DATA _PADDING_4<>+88(SB)/8, $0x59f111f159f111f1 DATA _PADDING_4<>+96(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_4<>+104(SB)/8, $0x923f82a4923f82a4 DATA _PADDING_4<>+112(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_4<>+120(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _PADDING_4<>+128(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_4<>+136(SB)/8, $0xd807aa98d807aa98 DATA _PADDING_4<>+144(SB)/8, $0x12835b0112835b01 DATA _PADDING_4<>+152(SB)/8, $0x12835b0112835b01 DATA _PADDING_4<>+160(SB)/8, $0x243185be243185be DATA _PADDING_4<>+168(SB)/8, $0x243185be243185be DATA _PADDING_4<>+176(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_4<>+184(SB)/8, $0x550c7dc3550c7dc3 DATA _PADDING_4<>+192(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_4<>+200(SB)/8, $0x72be5d7472be5d74 DATA _PADDING_4<>+208(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_4<>+216(SB)/8, $0x80deb1fe80deb1fe DATA _PADDING_4<>+224(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_4<>+232(SB)/8, $0x9bdc06a79bdc06a7 DATA _PADDING_4<>+240(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_4<>+248(SB)/8, $0xc19bf374c19bf374 DATA _PADDING_4<>+256(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_4<>+264(SB)/8, $0x649b69c1649b69c1 DATA _PADDING_4<>+272(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_4<>+280(SB)/8, $0xf0fe4786f0fe4786 DATA _PADDING_4<>+288(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_4<>+296(SB)/8, $0x0fe1edc60fe1edc6 DATA _PADDING_4<>+304(SB)/8, $0x240cf254240cf254 DATA _PADDING_4<>+312(SB)/8, $0x240cf254240cf254 DATA _PADDING_4<>+320(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_4<>+328(SB)/8, $0x4fe9346f4fe9346f DATA _PADDING_4<>+336(SB)/8, $0x6cc984be6cc984be DATA _PADDING_4<>+344(SB)/8, $0x6cc984be6cc984be DATA _PADDING_4<>+352(SB)/8, $0x61b9411e61b9411e DATA _PADDING_4<>+360(SB)/8, $0x61b9411e61b9411e DATA _PADDING_4<>+368(SB)/8, $0x16f988fa16f988fa DATA _PADDING_4<>+376(SB)/8, $0x16f988fa16f988fa DATA _PADDING_4<>+384(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_4<>+392(SB)/8, $0xf2c65152f2c65152 DATA _PADDING_4<>+400(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_4<>+408(SB)/8, $0xa88e5a6da88e5a6d DATA _PADDING_4<>+416(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_4<>+424(SB)/8, $0xb019fc65b019fc65 DATA _PADDING_4<>+432(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_4<>+440(SB)/8, $0xb9d99ec7b9d99ec7 DATA _PADDING_4<>+448(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_4<>+456(SB)/8, $0x9a1231c39a1231c3 DATA _PADDING_4<>+464(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_4<>+472(SB)/8, $0xe70eeaa0e70eeaa0 DATA _PADDING_4<>+480(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_4<>+488(SB)/8, $0xfdb1232bfdb1232b DATA _PADDING_4<>+496(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_4<>+504(SB)/8, $0xc7353eb0c7353eb0 DATA _PADDING_4<>+512(SB)/8, $0x3069bad53069bad5 DATA _PADDING_4<>+520(SB)/8, $0x3069bad53069bad5 DATA _PADDING_4<>+528(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_4<>+536(SB)/8, $0xcb976d5fcb976d5f DATA _PADDING_4<>+544(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_4<>+552(SB)/8, $0x5a0f118f5a0f118f DATA _PADDING_4<>+560(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_4<>+568(SB)/8, $0xdc1eeefddc1eeefd DATA _PADDING_4<>+576(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_4<>+584(SB)/8, $0x0a35b6890a35b689 DATA _PADDING_4<>+592(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_4<>+600(SB)/8, $0xde0b7a04de0b7a04 DATA _PADDING_4<>+608(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_4<>+616(SB)/8, $0x58f4ca9d58f4ca9d DATA _PADDING_4<>+624(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_4<>+632(SB)/8, $0xe15d5b16e15d5b16 DATA _PADDING_4<>+640(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_4<>+648(SB)/8, $0x007f3e86007f3e86 DATA _PADDING_4<>+656(SB)/8, $0x3708898037088980 DATA _PADDING_4<>+664(SB)/8, $0x3708898037088980 DATA _PADDING_4<>+672(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_4<>+680(SB)/8, $0xa507ea32a507ea32 DATA _PADDING_4<>+688(SB)/8, $0x6fab95376fab9537 DATA _PADDING_4<>+696(SB)/8, $0x6fab95376fab9537 DATA _PADDING_4<>+704(SB)/8, $0x1740611017406110 DATA _PADDING_4<>+712(SB)/8, $0x1740611017406110 DATA _PADDING_4<>+720(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_4<>+728(SB)/8, $0x0d8cd6f10d8cd6f1 DATA _PADDING_4<>+736(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_4<>+744(SB)/8, $0xcdaa3b6dcdaa3b6d DATA _PADDING_4<>+752(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_4<>+760(SB)/8, $0xc0bbbe37c0bbbe37 DATA _PADDING_4<>+768(SB)/8, $0x83613bda83613bda DATA _PADDING_4<>+776(SB)/8, $0x83613bda83613bda DATA _PADDING_4<>+784(SB)/8, $0xdb48a363db48a363 DATA _PADDING_4<>+792(SB)/8, $0xdb48a363db48a363 DATA _PADDING_4<>+800(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_4<>+808(SB)/8, $0x0b02e9310b02e931 DATA _PADDING_4<>+816(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_4<>+824(SB)/8, $0x6fd15ca76fd15ca7 DATA _PADDING_4<>+832(SB)/8, $0x521afaca521afaca DATA _PADDING_4<>+840(SB)/8, $0x521afaca521afaca DATA _PADDING_4<>+848(SB)/8, $0x3133843131338431 DATA _PADDING_4<>+856(SB)/8, $0x3133843131338431 DATA _PADDING_4<>+864(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_4<>+872(SB)/8, $0x6ed41a956ed41a95 DATA _PADDING_4<>+880(SB)/8, $0x6d4378906d437890 DATA _PADDING_4<>+888(SB)/8, $0x6d4378906d437890 DATA _PADDING_4<>+896(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_4<>+904(SB)/8, $0xc39c91f2c39c91f2 DATA _PADDING_4<>+912(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_4<>+920(SB)/8, $0x9eccabbd9eccabbd DATA _PADDING_4<>+928(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_4<>+936(SB)/8, $0xb5c9a0e6b5c9a0e6 DATA _PADDING_4<>+944(SB)/8, $0x532fb63c532fb63c DATA _PADDING_4<>+952(SB)/8, $0x532fb63c532fb63c DATA _PADDING_4<>+960(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_4<>+968(SB)/8, $0xd2c741c6d2c741c6 DATA _PADDING_4<>+976(SB)/8, $0x07237ea307237ea3 DATA _PADDING_4<>+984(SB)/8, $0x07237ea307237ea3 DATA _PADDING_4<>+992(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_4<>+1000(SB)/8, $0xa4954b68a4954b68 DATA _PADDING_4<>+1008(SB)/8, $0x4c191d764c191d76 DATA _PADDING_4<>+1016(SB)/8, $0x4c191d764c191d76 GLOBL _PADDING_4<>(SB),(NOPTR+RODATA),$1024 DATA _K256_4<>+0(SB)/8, $0x428a2f98428a2f98 DATA _K256_4<>+8(SB)/8, $0x428a2f98428a2f98 DATA _K256_4<>+16(SB)/8, $0x7137449171374491 DATA _K256_4<>+24(SB)/8, $0x7137449171374491 DATA _K256_4<>+32(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_4<>+40(SB)/8, $0xb5c0fbcfb5c0fbcf DATA _K256_4<>+48(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_4<>+56(SB)/8, $0xe9b5dba5e9b5dba5 DATA _K256_4<>+64(SB)/8, $0x3956c25b3956c25b DATA _K256_4<>+72(SB)/8, $0x3956c25b3956c25b DATA _K256_4<>+80(SB)/8, $0x59f111f159f111f1 DATA _K256_4<>+88(SB)/8, $0x59f111f159f111f1 DATA _K256_4<>+96(SB)/8, $0x923f82a4923f82a4 DATA _K256_4<>+104(SB)/8, $0x923f82a4923f82a4 DATA _K256_4<>+112(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_4<>+120(SB)/8, $0xab1c5ed5ab1c5ed5 DATA _K256_4<>+128(SB)/8, $0xd807aa98d807aa98 DATA _K256_4<>+136(SB)/8, $0xd807aa98d807aa98 DATA _K256_4<>+144(SB)/8, $0x12835b0112835b01 DATA _K256_4<>+152(SB)/8, $0x12835b0112835b01 DATA _K256_4<>+160(SB)/8, $0x243185be243185be DATA _K256_4<>+168(SB)/8, $0x243185be243185be DATA _K256_4<>+176(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_4<>+184(SB)/8, $0x550c7dc3550c7dc3 DATA _K256_4<>+192(SB)/8, $0x72be5d7472be5d74 DATA _K256_4<>+200(SB)/8, $0x72be5d7472be5d74 DATA _K256_4<>+208(SB)/8, $0x80deb1fe80deb1fe DATA _K256_4<>+216(SB)/8, $0x80deb1fe80deb1fe DATA _K256_4<>+224(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_4<>+232(SB)/8, $0x9bdc06a79bdc06a7 DATA _K256_4<>+240(SB)/8, $0xc19bf174c19bf174 DATA _K256_4<>+248(SB)/8, $0xc19bf174c19bf174 DATA _K256_4<>+256(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_4<>+264(SB)/8, $0xe49b69c1e49b69c1 DATA _K256_4<>+272(SB)/8, $0xefbe4786efbe4786 DATA _K256_4<>+280(SB)/8, $0xefbe4786efbe4786 DATA _K256_4<>+288(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_4<>+296(SB)/8, $0x0fc19dc60fc19dc6 DATA _K256_4<>+304(SB)/8, $0x240ca1cc240ca1cc DATA _K256_4<>+312(SB)/8, $0x240ca1cc240ca1cc DATA _K256_4<>+320(SB)/8, $0x2de92c6f2de92c6f DATA _K256_4<>+328(SB)/8, $0x2de92c6f2de92c6f DATA _K256_4<>+336(SB)/8, $0x4a7484aa4a7484aa DATA _K256_4<>+344(SB)/8, $0x4a7484aa4a7484aa DATA _K256_4<>+352(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_4<>+360(SB)/8, $0x5cb0a9dc5cb0a9dc DATA _K256_4<>+368(SB)/8, $0x76f988da76f988da DATA _K256_4<>+376(SB)/8, $0x76f988da76f988da DATA _K256_4<>+384(SB)/8, $0x983e5152983e5152 DATA _K256_4<>+392(SB)/8, $0x983e5152983e5152 DATA _K256_4<>+400(SB)/8, $0xa831c66da831c66d DATA _K256_4<>+408(SB)/8, $0xa831c66da831c66d DATA _K256_4<>+416(SB)/8, $0xb00327c8b00327c8 DATA _K256_4<>+424(SB)/8, $0xb00327c8b00327c8 DATA _K256_4<>+432(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_4<>+440(SB)/8, $0xbf597fc7bf597fc7 DATA _K256_4<>+448(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_4<>+456(SB)/8, $0xc6e00bf3c6e00bf3 DATA _K256_4<>+464(SB)/8, $0xd5a79147d5a79147 DATA _K256_4<>+472(SB)/8, $0xd5a79147d5a79147 DATA _K256_4<>+480(SB)/8, $0x06ca635106ca6351 DATA _K256_4<>+488(SB)/8, $0x06ca635106ca6351 DATA _K256_4<>+496(SB)/8, $0x1429296714292967 DATA _K256_4<>+504(SB)/8, $0x1429296714292967 DATA _K256_4<>+512(SB)/8, $0x27b70a8527b70a85 DATA _K256_4<>+520(SB)/8, $0x27b70a8527b70a85 DATA _K256_4<>+528(SB)/8, $0x2e1b21382e1b2138 DATA _K256_4<>+536(SB)/8, $0x2e1b21382e1b2138 DATA _K256_4<>+544(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_4<>+552(SB)/8, $0x4d2c6dfc4d2c6dfc DATA _K256_4<>+560(SB)/8, $0x53380d1353380d13 DATA _K256_4<>+568(SB)/8, $0x53380d1353380d13 DATA _K256_4<>+576(SB)/8, $0x650a7354650a7354 DATA _K256_4<>+584(SB)/8, $0x650a7354650a7354 DATA _K256_4<>+592(SB)/8, $0x766a0abb766a0abb DATA _K256_4<>+600(SB)/8, $0x766a0abb766a0abb DATA _K256_4<>+608(SB)/8, $0x81c2c92e81c2c92e DATA _K256_4<>+616(SB)/8, $0x81c2c92e81c2c92e DATA _K256_4<>+624(SB)/8, $0x92722c8592722c85 DATA _K256_4<>+632(SB)/8, $0x92722c8592722c85 DATA _K256_4<>+640(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_4<>+648(SB)/8, $0xa2bfe8a1a2bfe8a1 DATA _K256_4<>+656(SB)/8, $0xa81a664ba81a664b DATA _K256_4<>+664(SB)/8, $0xa81a664ba81a664b DATA _K256_4<>+672(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_4<>+680(SB)/8, $0xc24b8b70c24b8b70 DATA _K256_4<>+688(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_4<>+696(SB)/8, $0xc76c51a3c76c51a3 DATA _K256_4<>+704(SB)/8, $0xd192e819d192e819 DATA _K256_4<>+712(SB)/8, $0xd192e819d192e819 DATA _K256_4<>+720(SB)/8, $0xd6990624d6990624 DATA _K256_4<>+728(SB)/8, $0xd6990624d6990624 DATA _K256_4<>+736(SB)/8, $0xf40e3585f40e3585 DATA _K256_4<>+744(SB)/8, $0xf40e3585f40e3585 DATA _K256_4<>+752(SB)/8, $0x106aa070106aa070 DATA _K256_4<>+760(SB)/8, $0x106aa070106aa070 DATA _K256_4<>+768(SB)/8, $0x19a4c11619a4c116 DATA _K256_4<>+776(SB)/8, $0x19a4c11619a4c116 DATA _K256_4<>+784(SB)/8, $0x1e376c081e376c08 DATA _K256_4<>+792(SB)/8, $0x1e376c081e376c08 DATA _K256_4<>+800(SB)/8, $0x2748774c2748774c DATA _K256_4<>+808(SB)/8, $0x2748774c2748774c DATA _K256_4<>+816(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_4<>+824(SB)/8, $0x34b0bcb534b0bcb5 DATA _K256_4<>+832(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_4<>+840(SB)/8, $0x391c0cb3391c0cb3 DATA _K256_4<>+848(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_4<>+856(SB)/8, $0x4ed8aa4a4ed8aa4a DATA _K256_4<>+864(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_4<>+872(SB)/8, $0x5b9cca4f5b9cca4f DATA _K256_4<>+880(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_4<>+888(SB)/8, $0x682e6ff3682e6ff3 DATA _K256_4<>+896(SB)/8, $0x748f82ee748f82ee DATA _K256_4<>+904(SB)/8, $0x748f82ee748f82ee DATA _K256_4<>+912(SB)/8, $0x78a5636f78a5636f DATA _K256_4<>+920(SB)/8, $0x78a5636f78a5636f DATA _K256_4<>+928(SB)/8, $0x84c8781484c87814 DATA _K256_4<>+936(SB)/8, $0x84c8781484c87814 DATA _K256_4<>+944(SB)/8, $0x8cc702088cc70208 DATA _K256_4<>+952(SB)/8, $0x8cc702088cc70208 DATA _K256_4<>+960(SB)/8, $0x90befffa90befffa DATA _K256_4<>+968(SB)/8, $0x90befffa90befffa DATA _K256_4<>+976(SB)/8, $0xa4506ceba4506ceb DATA _K256_4<>+984(SB)/8, $0xa4506ceba4506ceb DATA _K256_4<>+992(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_4<>+1000(SB)/8, $0xbef9a3f7bef9a3f7 DATA _K256_4<>+1008(SB)/8, $0xc67178f2c67178f2 DATA _K256_4<>+1016(SB)/8, $0xc67178f2c67178f2 GLOBL _K256_4<>(SB),(NOPTR+RODATA),$1024 gohashtree-0.0.4-beta/hash_fuzz_test.go000066400000000000000000000026351455574744200201730ustar00rootroot00000000000000//go:build go1.18 // +build go1.18 package gohashtree_test import ( "testing" "github.com/prysmaticlabs/gohashtree" ) func convertRawChunks(raw []byte) [][32]byte { var chunks [][32]byte for i := 32; i <= len(raw); i += 32 { var c [32]byte copy(c[:], raw[i-32:i]) chunks = append(chunks, c) } return chunks } func FuzzHash(f *testing.F) { for i := 1; i <= 10; i++ { f.Add(make([]byte, 64*i)) } f.Fuzz(func(t *testing.T, chunksRaw []byte) { if len(chunksRaw) < 64 || len(chunksRaw)%64 != 0 { return // No chunks and odd number of chunks are invalid } chunks := convertRawChunks(chunksRaw) digests := make([][32]byte, len(chunks)/2) if err := gohashtree.Hash(digests, chunks); err != nil { t.Fatal(err) } }) } func FuzzHash_Differential_Minio(f *testing.F) { for i := uint(0); i < 128; i++ { d := make([]byte, 64) for j := 0; j < 64; j++ { d[j] = byte(i) } f.Add(d) } f.Fuzz(func(t *testing.T, chunksRaw []byte) { if len(chunksRaw) < 64 || len(chunksRaw)%64 != 0 { return // No chunks and odd number of chunks are invalid } chunks := convertRawChunks(chunksRaw) digests := make([][32]byte, len(chunks)/2) if err := gohashtree.Hash(digests, chunks); err != nil { t.Fatal(err) } for i := 64; i <= len(chunksRaw); i += 64 { a := OldHash(chunksRaw[i-64 : i]) b := digests[(i/64)-1] if a != b { t.Error("minio.Hash() != gohashtree.Hash()") } } }) } gohashtree-0.0.4-beta/hash_test.go000066400000000000000000000620351455574744200171150ustar00rootroot00000000000000/* MIT License # Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree_test import ( "reflect" "testing" "github.com/minio/sha256-simd" "github.com/prysmaticlabs/gohashtree" ) var _test_32_block = [][32]byte{ {0x7a, 0xee, 0xd5, 0xc9, 0x66, 0x17, 0x59, 0x7f, 0x89, 0xd6, 0xd9, 0xe8, 0xa8, 0xa7, 0x01, 0x47, 0x60, 0xc6, 0x88, 0xfd, 0x2a, 0x7a, 0xf6, 0x1d, 0x10, 0x20, 0x62, 0x7e, 0x7c, 0xd0, 0x1a, 0x0b}, {0xd4, 0x1f, 0xa7, 0x89, 0x8c, 0xf9, 0x05, 0xfc, 0x1e, 0xb0, 0x04, 0xd7, 0xaa, 0x56, 0x35, 0xec, 0x36, 0xf5, 0x0d, 0x41, 0x75, 0x64, 0x34, 0x71, 0xf0, 0x3b, 0x5b, 0xb2, 0xcc, 0xfa, 0x8c, 0xca}, {0xf8, 0xd9, 0x9e, 0xa7, 0x9c, 0xa1, 0xe0, 0x3a, 0x19, 0x4f, 0xd3, 0x2d, 0xbd, 0x40, 0x3a, 0xa3, 0x28, 0xe8, 0xa4, 0x27, 0x58, 0x44, 0x12, 0xf7, 0x69, 0x01, 0x66, 0xfa, 0xf1, 0x97, 0x30, 0xfe}, {0x99, 0x7c, 0x24, 0x0e, 0xed, 0x31, 0x0a, 0xda, 0x12, 0x16, 0x0e, 0x06, 0x44, 0xb8, 0x3f, 0xa2, 0x40, 0x52, 0xbc, 0x2d, 0xaf, 0x97, 0x00, 0x01, 0x5d, 0xbb, 0x0d, 0x06, 0x66, 0xb1, 0x59, 0xf2}, {0x99, 0x43, 0x52, 0x77, 0x28, 0x39, 0x6b, 0xeb, 0x03, 0x51, 0xc4, 0x5f, 0x7d, 0xd3, 0xe1, 0x41, 0x17, 0x66, 0x7b, 0x0e, 0xc9, 0x51, 0x01, 0xa7, 0x39, 0xf3, 0xc8, 0x63, 0x95, 0xa5, 0x92, 0x6b}, {0xce, 0x6e, 0xab, 0xd2, 0xe8, 0xad, 0x90, 0xad, 0xbe, 0xe5, 0x94, 0x96, 0xa9, 0x98, 0xe7, 0x83, 0x07, 0xa4, 0x0f, 0x8e, 0xe5, 0xb3, 0x5a, 0x05, 0xcd, 0xfd, 0xae, 0x9c, 0x07, 0xad, 0x26, 0xaa}, {0xf5, 0xee, 0x66, 0x87, 0x00, 0xed, 0xeb, 0x8b, 0xc2, 0x7d, 0x97, 0x52, 0x2d, 0xfc, 0x0a, 0x2a, 0x32, 0x0e, 0x92, 0xd2, 0x91, 0xd1, 0x69, 0x29, 0x9d, 0xb1, 0x3a, 0x65, 0x9f, 0x8e, 0x7e, 0x2a}, {0x88, 0x4a, 0xc8, 0x81, 0xdb, 0xa6, 0x79, 0x36, 0x54, 0xe9, 0x15, 0x5c, 0xff, 0x06, 0x35, 0x8b, 0x6e, 0x0d, 0xaa, 0x3e, 0x7a, 0x82, 0x7c, 0x4a, 0xfe, 0x8a, 0x91, 0xb4, 0x34, 0xed, 0xe3, 0x17}, {0xe7, 0x92, 0xa4, 0x91, 0xdc, 0x1d, 0x83, 0xc8, 0x72, 0x5a, 0xd1, 0x27, 0x17, 0x78, 0x2b, 0xc7, 0x67, 0xe9, 0x56, 0xf2, 0xb4, 0x37, 0x51, 0xa1, 0x6b, 0x23, 0x8c, 0xc9, 0x03, 0x3d, 0x90, 0x1e}, {0xc4, 0x1f, 0xcc, 0x5e, 0xcb, 0x5e, 0x7d, 0x02, 0x12, 0x3f, 0x15, 0x9f, 0x35, 0xf4, 0x49, 0x55, 0xba, 0xc6, 0x47, 0xd2, 0x85, 0x85, 0x61, 0x69, 0xa5, 0x60, 0x7a, 0x32, 0x7f, 0x8e, 0x09, 0x5f}, {0x60, 0xb6, 0xab, 0xb5, 0x6b, 0x4d, 0xce, 0x6f, 0x1d, 0x77, 0x2e, 0x9b, 0x0d, 0x60, 0x76, 0xe3, 0xcb, 0x79, 0xbc, 0x40, 0x2d, 0x16, 0xf6, 0xa3, 0x06, 0x12, 0x36, 0x71, 0xda, 0xfd, 0x28, 0x89}, {0x67, 0xdd, 0x7f, 0x26, 0x6d, 0x2e, 0xf3, 0xef, 0x13, 0xb6, 0x09, 0x73, 0x82, 0xbc, 0x73, 0x25, 0x83, 0xc0, 0x34, 0x90, 0xe8, 0xad, 0xf0, 0x17, 0x8d, 0xed, 0xad, 0x29, 0xf7, 0x78, 0x9c, 0x28}, {0x00, 0xb0, 0xd5, 0xd0, 0x8e, 0x9b, 0xe5, 0xf0, 0x46, 0x8e, 0x60, 0x25, 0x95, 0xe5, 0x3a, 0x46, 0xb1, 0x07, 0x74, 0x97, 0xed, 0x0a, 0x2f, 0x9a, 0x3f, 0xf3, 0x94, 0x2f, 0xb3, 0x12, 0xa1, 0x91}, {0x8d, 0x36, 0x16, 0xc6, 0x00, 0x88, 0xd6, 0x69, 0xb4, 0x5a, 0x71, 0x18, 0x41, 0xe5, 0x4d, 0xb2, 0xd9, 0x00, 0x7a, 0x17, 0x63, 0x6a, 0x9b, 0x2e, 0x22, 0x12, 0x5b, 0xa3, 0x74, 0x7c, 0x95, 0xc9}, {0x4e, 0xfc, 0x5c, 0x18, 0xd1, 0x8a, 0x5b, 0x57, 0x7c, 0x86, 0x3e, 0xe2, 0x75, 0x91, 0xf2, 0xb3, 0x5f, 0xd0, 0x92, 0xbc, 0x77, 0xbe, 0x1b, 0xef, 0x1a, 0x7c, 0xe2, 0xd8, 0x8d, 0x7b, 0xef, 0xf7}, {0xb7, 0x80, 0xc2, 0x31, 0xe6, 0x75, 0x0c, 0xad, 0x0f, 0xe8, 0xed, 0x59, 0x34, 0xdb, 0xfb, 0x41, 0xd4, 0x38, 0x73, 0x7a, 0x47, 0x01, 0xb8, 0xea, 0xea, 0x2e, 0x01, 0x8e, 0x4f, 0x09, 0x64, 0x82}, {0x99, 0x43, 0x52, 0x77, 0x28, 0x39, 0x6b, 0xeb, 0x03, 0x51, 0xc4, 0x5f, 0x7d, 0xd3, 0xe1, 0x41, 0x17, 0x66, 0x7b, 0x0e, 0xc9, 0x51, 0x01, 0xa7, 0x39, 0xf3, 0xc8, 0x63, 0x95, 0xa5, 0x92, 0x6b}, {0xce, 0x6e, 0xab, 0xd2, 0xe8, 0xad, 0x90, 0xad, 0xbe, 0xe5, 0x94, 0x96, 0xa9, 0x98, 0xe7, 0x83, 0x07, 0xa4, 0x0f, 0x8e, 0xe5, 0xb3, 0x5a, 0x05, 0xcd, 0xfd, 0xae, 0x9c, 0x07, 0xad, 0x26, 0xaa}, {0xf5, 0xee, 0x66, 0x87, 0x00, 0xed, 0xeb, 0x8b, 0xc2, 0x7d, 0x97, 0x52, 0x2d, 0xfc, 0x0a, 0x2a, 0x32, 0x0e, 0x92, 0xd2, 0x91, 0xd1, 0x69, 0x29, 0x9d, 0xb1, 0x3a, 0x65, 0x9f, 0x8e, 0x7e, 0x2a}, {0x88, 0x4a, 0xc8, 0x81, 0xdb, 0xa6, 0x79, 0x36, 0x54, 0xe9, 0x15, 0x5c, 0xff, 0x06, 0x35, 0x8b, 0x6e, 0x0d, 0xaa, 0x3e, 0x7a, 0x82, 0x7c, 0x4a, 0xfe, 0x8a, 0x91, 0xb4, 0x34, 0xed, 0xe3, 0x17}, {0xe7, 0x92, 0xa4, 0x91, 0xdc, 0x1d, 0x83, 0xc8, 0x72, 0x5a, 0xd1, 0x27, 0x17, 0x78, 0x2b, 0xc7, 0x67, 0xe9, 0x56, 0xf2, 0xb4, 0x37, 0x51, 0xa1, 0x6b, 0x23, 0x8c, 0xc9, 0x03, 0x3d, 0x90, 0x1e}, {0xc4, 0x1f, 0xcc, 0x5e, 0xcb, 0x5e, 0x7d, 0x02, 0x12, 0x3f, 0x15, 0x9f, 0x35, 0xf4, 0x49, 0x55, 0xba, 0xc6, 0x47, 0xd2, 0x85, 0x85, 0x61, 0x69, 0xa5, 0x60, 0x7a, 0x32, 0x7f, 0x8e, 0x09, 0x5f}, {0x60, 0xb6, 0xab, 0xb5, 0x6b, 0x4d, 0xce, 0x6f, 0x1d, 0x77, 0x2e, 0x9b, 0x0d, 0x60, 0x76, 0xe3, 0xcb, 0x79, 0xbc, 0x40, 0x2d, 0x16, 0xf6, 0xa3, 0x06, 0x12, 0x36, 0x71, 0xda, 0xfd, 0x28, 0x89}, {0x67, 0xdd, 0x7f, 0x26, 0x6d, 0x2e, 0xf3, 0xef, 0x13, 0xb6, 0x09, 0x73, 0x82, 0xbc, 0x73, 0x25, 0x83, 0xc0, 0x34, 0x90, 0xe8, 0xad, 0xf0, 0x17, 0x8d, 0xed, 0xad, 0x29, 0xf7, 0x78, 0x9c, 0x28}, {0x00, 0xb0, 0xd5, 0xd0, 0x8e, 0x9b, 0xe5, 0xf0, 0x46, 0x8e, 0x60, 0x25, 0x95, 0xe5, 0x3a, 0x46, 0xb1, 0x07, 0x74, 0x97, 0xed, 0x0a, 0x2f, 0x9a, 0x3f, 0xf3, 0x94, 0x2f, 0xb3, 0x12, 0xa1, 0x91}, {0x8d, 0x36, 0x16, 0xc6, 0x00, 0x88, 0xd6, 0x69, 0xb4, 0x5a, 0x71, 0x18, 0x41, 0xe5, 0x4d, 0xb2, 0xd9, 0x00, 0x7a, 0x17, 0x63, 0x6a, 0x9b, 0x2e, 0x22, 0x12, 0x5b, 0xa3, 0x74, 0x7c, 0x95, 0xc9}, {0x4e, 0xfc, 0x5c, 0x18, 0xd1, 0x8a, 0x5b, 0x57, 0x7c, 0x86, 0x3e, 0xe2, 0x75, 0x91, 0xf2, 0xb3, 0x5f, 0xd0, 0x92, 0xbc, 0x77, 0xbe, 0x1b, 0xef, 0x1a, 0x7c, 0xe2, 0xd8, 0x8d, 0x7b, 0xef, 0xf7}, {0xcd, 0x78, 0x15, 0x64, 0x2c, 0x78, 0x57, 0x74, 0x2b, 0xb7, 0xdb, 0x74, 0xe2, 0xab, 0x82, 0xbb, 0x61, 0x32, 0x3e, 0xe4, 0xb1, 0x00, 0xde, 0xb2, 0x35, 0x1e, 0x3e, 0x1c, 0x91, 0x9d, 0x87, 0xde}, {0x17, 0xcc, 0x52, 0x5c, 0x60, 0x9e, 0xd8, 0xd4, 0xf4, 0x56, 0x28, 0x16, 0xde, 0xde, 0x73, 0xfe, 0xd9, 0x92, 0xb7, 0x99, 0x15, 0x24, 0x1b, 0x40, 0xb0, 0xda, 0x9a, 0xf8, 0x24, 0x38, 0x13, 0xbd}, {0xd0, 0x45, 0x9b, 0xe3, 0x9a, 0xae, 0x78, 0x41, 0xcd, 0x12, 0x9a, 0x6b, 0x91, 0x58, 0x29, 0x75, 0xae, 0x21, 0xd3, 0xf2, 0x5e, 0x98, 0xab, 0x09, 0xb0, 0xaa, 0x62, 0x96, 0x35, 0x64, 0x18, 0x48}, {0xd2, 0x5b, 0x10, 0xf1, 0x35, 0xaa, 0x04, 0x49, 0x4e, 0x51, 0x30, 0x0d, 0xb6, 0xbf, 0xa0, 0x9b, 0xa0, 0xf5, 0x66, 0x5f, 0x28, 0xc7, 0x8d, 0xa8, 0x3e, 0x0f, 0xe4, 0xa7, 0xc9, 0xd4, 0x0f, 0x7d}, {0xb7, 0x80, 0xc2, 0x31, 0xe6, 0x75, 0x0c, 0xad, 0x0f, 0xe8, 0xed, 0x59, 0x34, 0xdb, 0xfb, 0x41, 0xd4, 0x38, 0x73, 0x7a, 0x47, 0x01, 0xb8, 0xea, 0xea, 0x2e, 0x01, 0x8e, 0x4f, 0x09, 0x64, 0x82}, {0xe4, 0x8b, 0x12, 0xd3, 0xd0, 0x78, 0xb5, 0x5f, 0x3e, 0x9d, 0x94, 0x7f, 0x93, 0x84, 0x77, 0x77, 0xdb, 0x78, 0x41, 0xe8, 0x91, 0xfb, 0x6d, 0x0d, 0xef, 0x00, 0x30, 0x8e, 0x0a, 0xe4, 0x7b, 0xec}, {0xe7, 0xb2, 0x76, 0xe7, 0x6c, 0xba, 0x8f, 0x8c, 0x0b, 0xf2, 0xa3, 0xad, 0xc2, 0x2d, 0x92, 0xb4, 0xd5, 0xf2, 0x83, 0x42, 0x65, 0x02, 0xd6, 0x67, 0x9a, 0x78, 0x6a, 0xc1, 0xca, 0x91, 0x87, 0x7c}, {0x16, 0x99, 0x13, 0xf8, 0xa9, 0x20, 0x62, 0x2e, 0xc1, 0x84, 0xc0, 0x25, 0xdc, 0x35, 0x1f, 0xe6, 0x32, 0x49, 0x37, 0x79, 0x78, 0xfb, 0xf5, 0xf7, 0x34, 0xf4, 0xa5, 0x49, 0x9f, 0xc8, 0xfa, 0x8e}, {0x28, 0x9b, 0x27, 0xae, 0x21, 0x12, 0x14, 0x57, 0x56, 0xf6, 0x9d, 0x7f, 0x0d, 0x28, 0x03, 0xbd, 0x05, 0xd0, 0x11, 0x9e, 0xf1, 0x98, 0x8e, 0x1c, 0xbe, 0xc1, 0x83, 0xdb, 0x1a, 0x65, 0x08, 0x0d}, {0xef, 0x42, 0x3a, 0x0b, 0x2f, 0xea, 0xdf, 0xfe, 0xeb, 0xd9, 0x72, 0x9a, 0xcf, 0x5a, 0xac, 0x19, 0x09, 0x75, 0x25, 0x64, 0x61, 0x19, 0xf5, 0xcd, 0xdb, 0x9d, 0xcf, 0x4a, 0xa9, 0xf5, 0x48, 0x2c}, {0x47, 0x69, 0xaa, 0x80, 0x3f, 0xd3, 0x02, 0x67, 0xe9, 0x8b, 0x82, 0xa8, 0x02, 0xe8, 0xcf, 0x60, 0x66, 0xaa, 0xcf, 0x05, 0x0a, 0x85, 0xeb, 0x3d, 0x87, 0x21, 0xcc, 0xe2, 0xdd, 0x6c, 0x42, 0x54}, {0xd8, 0xb4, 0x39, 0x4f, 0x78, 0xce, 0xd8, 0xad, 0x57, 0xbe, 0xda, 0x18, 0x8f, 0x4a, 0x9b, 0x41, 0xfe, 0x58, 0x9d, 0xa1, 0xd4, 0x71, 0x6e, 0x2f, 0x04, 0xaf, 0x37, 0xa0, 0x29, 0x60, 0x6f, 0x9d}, {0x84, 0x4a, 0x39, 0x0a, 0x5e, 0x24, 0x81, 0x2e, 0x63, 0xc9, 0xb6, 0xde, 0xc3, 0xf1, 0x82, 0x7b, 0x82, 0x14, 0x07, 0xde, 0x46, 0x03, 0x25, 0x27, 0x4d, 0x09, 0x6b, 0x7e, 0xb9, 0x82, 0x98, 0x41}, {0x68, 0xf8, 0x98, 0x04, 0xb2, 0x61, 0x78, 0xbf, 0x8a, 0x69, 0x4d, 0xc7, 0x83, 0x4a, 0xe7, 0x77, 0xf7, 0x4b, 0x00, 0x28, 0x34, 0xe6, 0x36, 0xca, 0xa2, 0x58, 0x37, 0x61, 0x60, 0x95, 0x0d, 0xa6}, {0x20, 0x00, 0x7e, 0x29, 0xa8, 0x6e, 0xca, 0xb8, 0x1b, 0xbc, 0x94, 0x29, 0x2b, 0x18, 0xaa, 0x56, 0x0f, 0x4c, 0x38, 0x1a, 0x7a, 0x16, 0xe8, 0xbb, 0x51, 0xb7, 0xb3, 0xe3, 0x22, 0x8e, 0x9c, 0x05}, {0xa8, 0x0f, 0x08, 0x4d, 0xf1, 0xd1, 0xd8, 0x2c, 0xac, 0xe8, 0x73, 0x43, 0xcc, 0x73, 0x6b, 0x03, 0x40, 0x21, 0x85, 0x9b, 0x9d, 0x63, 0xa8, 0x44, 0x6a, 0x6c, 0x23, 0xe3, 0x4e, 0x76, 0xb1, 0x51}, {0x90, 0x61, 0x31, 0xfe, 0xf7, 0x4a, 0x8f, 0x06, 0x9e, 0x75, 0x6a, 0x5a, 0x66, 0xdd, 0xa2, 0xe4, 0x9b, 0x8f, 0x98, 0xbb, 0x18, 0x9a, 0x96, 0x84, 0xfa, 0xe4, 0x3c, 0xd2, 0x2c, 0x96, 0x61, 0xd8}, {0x96, 0xb4, 0x84, 0xa8, 0x8b, 0x6f, 0xeb, 0xc5, 0x3e, 0xa3, 0x48, 0xd5, 0x00, 0x95, 0x47, 0xda, 0xc1, 0x2d, 0x95, 0x68, 0x49, 0x29, 0x15, 0xb9, 0x36, 0x59, 0x4c, 0x0b, 0x77, 0xdc, 0x01, 0x06}, {0x58, 0x37, 0xa7, 0x03, 0x40, 0x70, 0x91, 0xee, 0x29, 0x75, 0x10, 0xd4, 0xec, 0x01, 0x87, 0x5f, 0x2e, 0xb5, 0x56, 0xc6, 0x2d, 0xe9, 0x2b, 0xb4, 0xab, 0x95, 0x82, 0x1f, 0x11, 0xf2, 0xb8, 0xc9}, {0x81, 0xbf, 0xb0, 0x58, 0xcc, 0xdd, 0x0e, 0xf1, 0x9c, 0x17, 0x6b, 0xa0, 0xe6, 0x42, 0x8c, 0x1a, 0x3c, 0x9c, 0x20, 0x18, 0x0b, 0x52, 0x66, 0x5a, 0xc1, 0xe5, 0xc5, 0x66, 0x35, 0xe5, 0x26, 0x4f}, {0xca, 0x73, 0xe0, 0x95, 0x2c, 0xc7, 0xa9, 0x22, 0x58, 0x68, 0x49, 0xb3, 0x68, 0xdc, 0x34, 0xe1, 0x3b, 0x17, 0x67, 0xaa, 0x82, 0xa1, 0xb6, 0xbd, 0x69, 0x9b, 0xf6, 0x00, 0x71, 0x51, 0x08, 0xca}, {0xce, 0x06, 0x68, 0x95, 0x13, 0x37, 0x8b, 0x32, 0xc9, 0x62, 0x38, 0xc9, 0x78, 0x90, 0x89, 0x0e, 0x3a, 0x5d, 0x85, 0x50, 0x1c, 0x4c, 0xd6, 0x80, 0xcc, 0x5f, 0x63, 0xf0, 0xc9, 0xfe, 0x7a, 0xb5}, {0x79, 0x78, 0x8d, 0x38, 0x13, 0xdf, 0xb7, 0x37, 0x18, 0x78, 0xbd, 0x2f, 0x3e, 0xc7, 0x2c, 0x46, 0xd2, 0x74, 0x01, 0xe9, 0xa1, 0x3f, 0xfe, 0x46, 0x11, 0xb0, 0x85, 0x2f, 0x6d, 0x4b, 0x4b, 0x8e}, {0x11, 0xce, 0x55, 0xe4, 0xba, 0xf7, 0x11, 0xcd, 0xe8, 0xa8, 0x04, 0x33, 0xbd, 0x19, 0xe8, 0xbe, 0xa1, 0x00, 0xd3, 0x28, 0xca, 0x78, 0x56, 0x6d, 0xde, 0xe5, 0x71, 0x13, 0xc2, 0xbd, 0xd8, 0xc2}, {0x04, 0x64, 0xdb, 0xdb, 0x8b, 0x4f, 0x73, 0x0e, 0x0a, 0x9e, 0xfe, 0xd0, 0x5d, 0x92, 0x3e, 0xf8, 0xf4, 0x8b, 0xef, 0xb6, 0x6f, 0x42, 0xc9, 0xea, 0x73, 0xfb, 0xb6, 0x8e, 0x37, 0x74, 0xae, 0x39}, {0x91, 0x1e, 0x40, 0x74, 0x23, 0xa7, 0xa8, 0x00, 0xfc, 0xa1, 0x16, 0xed, 0xcf, 0xff, 0xce, 0xea, 0x3f, 0x31, 0x54, 0xad, 0x19, 0x98, 0xcb, 0x5d, 0xfd, 0x82, 0xe2, 0x48, 0xbf, 0xc3, 0x74, 0x71}, {0x5f, 0x45, 0x5f, 0xba, 0x82, 0x5d, 0xc4, 0x20, 0x12, 0x67, 0x65, 0x0d, 0x8b, 0x14, 0x45, 0x20, 0xd3, 0xbc, 0xb4, 0x23, 0x26, 0x98, 0xfc, 0x05, 0x8f, 0xa5, 0x99, 0xe2, 0x78, 0x74, 0x72, 0x71}, {0xda, 0xa5, 0x2a, 0xc1, 0x13, 0xa4, 0x3b, 0xeb, 0x41, 0x51, 0x1b, 0x96, 0xa3, 0xa0, 0x5b, 0xd8, 0xed, 0x5e, 0x69, 0x67, 0xfb, 0xc5, 0x27, 0x66, 0x56, 0x8a, 0xb2, 0x1e, 0x93, 0xbf, 0xb0, 0x36}, {0x54, 0xb8, 0x17, 0xb6, 0xd2, 0x26, 0x22, 0x93, 0xdc, 0xb5, 0xd5, 0x32, 0x1b, 0x76, 0x3c, 0xfa, 0x24, 0x04, 0xcb, 0xa0, 0x1b, 0xcb, 0xa3, 0x12, 0x20, 0x60, 0x3b, 0x59, 0xe5, 0xdf, 0xf7, 0xbf}, {0x41, 0x42, 0x6c, 0xbf, 0xfa, 0x23, 0xcc, 0xee, 0x3e, 0xf6, 0xf3, 0xbf, 0xa1, 0x39, 0x9b, 0x6e, 0x7f, 0xfb, 0x2c, 0x7f, 0x4e, 0xf5, 0x35, 0x78, 0xb5, 0x5e, 0x77, 0x02, 0x40, 0x2a, 0xbc, 0x77}, {0x9b, 0xc5, 0x2f, 0xb6, 0xa1, 0x3d, 0x5a, 0xc0, 0x9a, 0x23, 0xce, 0xbf, 0x9b, 0x94, 0xad, 0xd4, 0xe4, 0x6f, 0x0f, 0x0a, 0x64, 0x55, 0x22, 0x26, 0xbc, 0x8b, 0xba, 0xdf, 0xb9, 0x04, 0x3a, 0x5b}, {0x7b, 0x66, 0x20, 0xcf, 0x63, 0xeb, 0x29, 0xb9, 0x11, 0xc5, 0x5e, 0x18, 0x98, 0x15, 0x2f, 0x69, 0x60, 0xa7, 0xf1, 0x0c, 0xc1, 0x6b, 0x6f, 0xba, 0xd3, 0x2c, 0x83, 0x7d, 0x9d, 0x8e, 0x2b, 0x74}, {0x7b, 0x9b, 0xcd, 0x1a, 0xe3, 0xfd, 0xd9, 0xd4, 0x74, 0x2e, 0x0d, 0xbc, 0xe1, 0x3c, 0x54, 0x2c, 0xc1, 0x81, 0xb5, 0x0b, 0xa0, 0xf9, 0xd5, 0xe1, 0xca, 0x18, 0x00, 0xf9, 0xb5, 0x84, 0x85, 0xca}, {0xe7, 0xc9, 0xe2, 0xc8, 0x33, 0x41, 0x31, 0x15, 0xb3, 0x84, 0x3f, 0x79, 0x18, 0xe9, 0x98, 0x5a, 0x51, 0x60, 0xf0, 0x5a, 0x5b, 0xf8, 0x7f, 0x5f, 0xdd, 0x70, 0x27, 0xe3, 0x8f, 0xe3, 0x39, 0xf4}, {0x36, 0x0d, 0x5b, 0xa8, 0x0e, 0x59, 0xe2, 0x82, 0xa2, 0x39, 0xdf, 0x28, 0x34, 0x4d, 0x4f, 0x74, 0xee, 0xd8, 0x6b, 0xa0, 0xd8, 0x9d, 0xe7, 0x88, 0x05, 0x4e, 0xba, 0x6b, 0x50, 0x03, 0x89, 0xa2}, {0x89, 0xd6, 0x81, 0x5f, 0x68, 0x39, 0x36, 0x6c, 0x25, 0xad, 0xb6, 0x43, 0xff, 0x6b, 0x5e, 0x19, 0x63, 0xd3, 0xff, 0xd0, 0xce, 0x1a, 0xa7, 0x8c, 0x7f, 0xeb, 0x5a, 0x6e, 0x99, 0xf1, 0xb4, 0xdb}, {0x1f, 0x36, 0x6f, 0x27, 0xc8, 0x2f, 0x23, 0x81, 0xfc, 0x02, 0x80, 0x4f, 0x8b, 0x8d, 0xa8, 0x2f, 0x3d, 0x35, 0x91, 0xe3, 0x60, 0x90, 0x7c, 0x57, 0x03, 0xc3, 0xa9, 0xed, 0xb1, 0x72, 0x3e, 0x3e}, } var _test_32_digests = [][32]byte{ {0x22, 0xd8, 0x35, 0x89, 0xe6, 0x42, 0xe1, 0xb1, 0x40, 0xed, 0x1b, 0x48, 0x48, 0x5b, 0x44, 0xc7, 0x07, 0x9d, 0xf3, 0xb2, 0x04, 0xbe, 0x48, 0x69, 0x42, 0x1d, 0x45, 0x49, 0xf3, 0x9e, 0x2c, 0xc7}, {0xac, 0xfe, 0x28, 0x1d, 0x11, 0x77, 0x7c, 0x1e, 0x22, 0xe0, 0xb7, 0x16, 0x0f, 0x01, 0x66, 0x92, 0xa7, 0xb3, 0xb5, 0x69, 0xed, 0x12, 0x8d, 0x93, 0xcf, 0xce, 0x27, 0x49, 0xfd, 0x1c, 0x85, 0x01}, {0xbc, 0xb2, 0xa2, 0x0b, 0x95, 0x58, 0x91, 0x64, 0x1f, 0x3a, 0x5d, 0x80, 0xaa, 0x11, 0x49, 0xa5, 0x1b, 0xac, 0xb7, 0x1e, 0x06, 0x62, 0x45, 0x34, 0xa5, 0x66, 0xd1, 0xc7, 0x5a, 0xa9, 0x68, 0xc9}, {0x4d, 0xe2, 0xaa, 0x4b, 0xc4, 0x6c, 0x1c, 0x3d, 0x42, 0x65, 0x34, 0x8a, 0x2c, 0x7a, 0x64, 0xa8, 0xd9, 0x8a, 0x82, 0xe4, 0x8b, 0x9c, 0xc9, 0x3c, 0x3c, 0xcd, 0x34, 0x4d, 0x71, 0x76, 0xda, 0x69}, {0x1e, 0x00, 0xd3, 0xc6, 0x59, 0x37, 0x27, 0x6a, 0x6a, 0xae, 0xa7, 0xd8, 0x37, 0x51, 0xac, 0x74, 0x2d, 0xe0, 0xb6, 0x7e, 0xc5, 0xa8, 0xa7, 0x56, 0x5b, 0x0f, 0x10, 0xba, 0x8a, 0x40, 0xe2, 0x1c}, {0x30, 0x96, 0xdb, 0x9d, 0xcf, 0xa9, 0x5c, 0xf4, 0xa4, 0xc4, 0xc9, 0xd5, 0xa0, 0x1e, 0xd4, 0x30, 0xe5, 0xe8, 0xad, 0x9d, 0xaa, 0x8e, 0x79, 0x1c, 0x5d, 0x6c, 0xac, 0x1a, 0xb3, 0x65, 0xb5, 0x14}, {0x7a, 0xee, 0xd5, 0xc9, 0x66, 0x17, 0x59, 0x7f, 0x89, 0xd6, 0xd9, 0xe8, 0xa8, 0xa7, 0x01, 0x47, 0x60, 0xc6, 0x88, 0xfd, 0x2a, 0x7a, 0xf6, 0x1d, 0x10, 0x20, 0x62, 0x7e, 0x7c, 0xd0, 0x1a, 0x0b}, {0xce, 0x0c, 0x94, 0xa7, 0x41, 0x25, 0xa5, 0xe3, 0x96, 0x77, 0xd6, 0xbd, 0x91, 0xca, 0xe6, 0x06, 0xf3, 0x90, 0xe0, 0x37, 0xcc, 0xc1, 0x2c, 0x7d, 0x97, 0x97, 0xf3, 0x56, 0xf0, 0xbd, 0x66, 0x43}, {0xbc, 0xb2, 0xa2, 0x0b, 0x95, 0x58, 0x91, 0x64, 0x1f, 0x3a, 0x5d, 0x80, 0xaa, 0x11, 0x49, 0xa5, 0x1b, 0xac, 0xb7, 0x1e, 0x06, 0x62, 0x45, 0x34, 0xa5, 0x66, 0xd1, 0xc7, 0x5a, 0xa9, 0x68, 0xc9}, {0x4d, 0xe2, 0xaa, 0x4b, 0xc4, 0x6c, 0x1c, 0x3d, 0x42, 0x65, 0x34, 0x8a, 0x2c, 0x7a, 0x64, 0xa8, 0xd9, 0x8a, 0x82, 0xe4, 0x8b, 0x9c, 0xc9, 0x3c, 0x3c, 0xcd, 0x34, 0x4d, 0x71, 0x76, 0xda, 0x69}, {0x1e, 0x00, 0xd3, 0xc6, 0x59, 0x37, 0x27, 0x6a, 0x6a, 0xae, 0xa7, 0xd8, 0x37, 0x51, 0xac, 0x74, 0x2d, 0xe0, 0xb6, 0x7e, 0xc5, 0xa8, 0xa7, 0x56, 0x5b, 0x0f, 0x10, 0xba, 0x8a, 0x40, 0xe2, 0x1c}, {0x30, 0x96, 0xdb, 0x9d, 0xcf, 0xa9, 0x5c, 0xf4, 0xa4, 0xc4, 0xc9, 0xd5, 0xa0, 0x1e, 0xd4, 0x30, 0xe5, 0xe8, 0xad, 0x9d, 0xaa, 0x8e, 0x79, 0x1c, 0x5d, 0x6c, 0xac, 0x1a, 0xb3, 0x65, 0xb5, 0x14}, {0x7a, 0xee, 0xd5, 0xc9, 0x66, 0x17, 0x59, 0x7f, 0x89, 0xd6, 0xd9, 0xe8, 0xa8, 0xa7, 0x01, 0x47, 0x60, 0xc6, 0x88, 0xfd, 0x2a, 0x7a, 0xf6, 0x1d, 0x10, 0x20, 0x62, 0x7e, 0x7c, 0xd0, 0x1a, 0x0b}, {0xd4, 0x1f, 0xa7, 0x89, 0x8c, 0xf9, 0x05, 0xfc, 0x1e, 0xb0, 0x04, 0xd7, 0xaa, 0x56, 0x35, 0xec, 0x36, 0xf5, 0x0d, 0x41, 0x75, 0x64, 0x34, 0x71, 0xf0, 0x3b, 0x5b, 0xb2, 0xcc, 0xfa, 0x8c, 0xca}, {0xf8, 0xd9, 0x9e, 0xa7, 0x9c, 0xa1, 0xe0, 0x3a, 0x19, 0x4f, 0xd3, 0x2d, 0xbd, 0x40, 0x3a, 0xa3, 0x28, 0xe8, 0xa4, 0x27, 0x58, 0x44, 0x12, 0xf7, 0x69, 0x01, 0x66, 0xfa, 0xf1, 0x97, 0x30, 0xfe}, {0x99, 0x7c, 0x24, 0x0e, 0xed, 0x31, 0x0a, 0xda, 0x12, 0x16, 0x0e, 0x06, 0x44, 0xb8, 0x3f, 0xa2, 0x40, 0x52, 0xbc, 0x2d, 0xaf, 0x97, 0x00, 0x01, 0x5d, 0xbb, 0x0d, 0x06, 0x66, 0xb1, 0x59, 0xf2}, {0x99, 0x43, 0x52, 0x77, 0x28, 0x39, 0x6b, 0xeb, 0x03, 0x51, 0xc4, 0x5f, 0x7d, 0xd3, 0xe1, 0x41, 0x17, 0x66, 0x7b, 0x0e, 0xc9, 0x51, 0x01, 0xa7, 0x39, 0xf3, 0xc8, 0x63, 0x95, 0xa5, 0x92, 0x6b}, {0xce, 0x6e, 0xab, 0xd2, 0xe8, 0xad, 0x90, 0xad, 0xbe, 0xe5, 0x94, 0x96, 0xa9, 0x98, 0xe7, 0x83, 0x07, 0xa4, 0x0f, 0x8e, 0xe5, 0xb3, 0x5a, 0x05, 0xcd, 0xfd, 0xae, 0x9c, 0x07, 0xad, 0x26, 0xaa}, {0xf5, 0xee, 0x66, 0x87, 0x00, 0xed, 0xeb, 0x8b, 0xc2, 0x7d, 0x97, 0x52, 0x2d, 0xfc, 0x0a, 0x2a, 0x32, 0x0e, 0x92, 0xd2, 0x91, 0xd1, 0x69, 0x29, 0x9d, 0xb1, 0x3a, 0x65, 0x9f, 0x8e, 0x7e, 0x2a}, {0x88, 0x4a, 0xc8, 0x81, 0xdb, 0xa6, 0x79, 0x36, 0x54, 0xe9, 0x15, 0x5c, 0xff, 0x06, 0x35, 0x8b, 0x6e, 0x0d, 0xaa, 0x3e, 0x7a, 0x82, 0x7c, 0x4a, 0xfe, 0x8a, 0x91, 0xb4, 0x34, 0xed, 0xe3, 0x17}, {0xe7, 0x92, 0xa4, 0x91, 0xdc, 0x1d, 0x83, 0xc8, 0x72, 0x5a, 0xd1, 0x27, 0x17, 0x78, 0x2b, 0xc7, 0x67, 0xe9, 0x56, 0xf2, 0xb4, 0x37, 0x51, 0xa1, 0x6b, 0x23, 0x8c, 0xc9, 0x03, 0x3d, 0x90, 0x1e}, {0xc4, 0x1f, 0xcc, 0x5e, 0xcb, 0x5e, 0x7d, 0x02, 0x12, 0x3f, 0x15, 0x9f, 0x35, 0xf4, 0x49, 0x55, 0xba, 0xc6, 0x47, 0xd2, 0x85, 0x85, 0x61, 0x69, 0xa5, 0x60, 0x7a, 0x32, 0x7f, 0x8e, 0x09, 0x5f}, {0x60, 0xb6, 0xab, 0xb5, 0x6b, 0x4d, 0xce, 0x6f, 0x1d, 0x77, 0x2e, 0x9b, 0x0d, 0x60, 0x76, 0xe3, 0xcb, 0x79, 0xbc, 0x40, 0x2d, 0x16, 0xf6, 0xa3, 0x06, 0x12, 0x36, 0x71, 0xda, 0xfd, 0x28, 0x89}, {0x67, 0xdd, 0x7f, 0x26, 0x6d, 0x2e, 0xf3, 0xef, 0x13, 0xb6, 0x09, 0x73, 0x82, 0xbc, 0x73, 0x25, 0x83, 0xc0, 0x34, 0x90, 0xe8, 0xad, 0xf0, 0x17, 0x8d, 0xed, 0xad, 0x29, 0xf7, 0x78, 0x9c, 0x28}, {0x00, 0xb0, 0xd5, 0xd0, 0x8e, 0x9b, 0xe5, 0xf0, 0x46, 0x8e, 0x60, 0x25, 0x95, 0xe5, 0x3a, 0x46, 0xb1, 0x07, 0x74, 0x97, 0xed, 0x0a, 0x2f, 0x9a, 0x3f, 0xf3, 0x94, 0x2f, 0xb3, 0x12, 0xa1, 0x91}, {0x8d, 0x36, 0x16, 0xc6, 0x00, 0x88, 0xd6, 0x69, 0xb4, 0x5a, 0x71, 0x18, 0x41, 0xe5, 0x4d, 0xb2, 0xd9, 0x00, 0x7a, 0x17, 0x63, 0x6a, 0x9b, 0x2e, 0x22, 0x12, 0x5b, 0xa3, 0x74, 0x7c, 0x95, 0xc9}, {0x4e, 0xfc, 0x5c, 0x18, 0xd1, 0x8a, 0x5b, 0x57, 0x7c, 0x86, 0x3e, 0xe2, 0x75, 0x91, 0xf2, 0xb3, 0x5f, 0xd0, 0x92, 0xbc, 0x77, 0xbe, 0x1b, 0xef, 0x1a, 0x7c, 0xe2, 0xd8, 0x8d, 0x7b, 0xef, 0xf7}, {0xcd, 0x78, 0x15, 0x64, 0x2c, 0x78, 0x57, 0x74, 0x2b, 0xb7, 0xdb, 0x74, 0xe2, 0xab, 0x82, 0xbb, 0x61, 0x32, 0x3e, 0xe4, 0xb1, 0x00, 0xde, 0xb2, 0x35, 0x1e, 0x3e, 0x1c, 0x91, 0x9d, 0x87, 0xde}, {0x17, 0xcc, 0x52, 0x5c, 0x60, 0x9e, 0xd8, 0xd4, 0xf4, 0x56, 0x28, 0x16, 0xde, 0xde, 0x73, 0xfe, 0xd9, 0x92, 0xb7, 0x99, 0x15, 0x24, 0x1b, 0x40, 0xb0, 0xda, 0x9a, 0xf8, 0x24, 0x38, 0x13, 0xbd}, {0xd0, 0x45, 0x9b, 0xe3, 0x9a, 0xae, 0x78, 0x41, 0xcd, 0x12, 0x9a, 0x6b, 0x91, 0x58, 0x29, 0x75, 0xae, 0x21, 0xd3, 0xf2, 0x5e, 0x98, 0xab, 0x09, 0xb0, 0xaa, 0x62, 0x96, 0x35, 0x64, 0x18, 0x48}, {0xd2, 0x5b, 0x10, 0xf1, 0x35, 0xaa, 0x04, 0x49, 0x4e, 0x51, 0x30, 0x0d, 0xb6, 0xbf, 0xa0, 0x9b, 0xa0, 0xf5, 0x66, 0x5f, 0x28, 0xc7, 0x8d, 0xa8, 0x3e, 0x0f, 0xe4, 0xa7, 0xc9, 0xd4, 0x0f, 0x7d}, {0xb7, 0x80, 0xc2, 0x31, 0xe6, 0x75, 0x0c, 0xad, 0x0f, 0xe8, 0xed, 0x59, 0x34, 0xdb, 0xfb, 0x41, 0xd4, 0x38, 0x73, 0x7a, 0x47, 0x01, 0xb8, 0xea, 0xea, 0x2e, 0x01, 0x8e, 0x4f, 0x09, 0x64, 0x82}, } func TestHash(t *testing.T) { tests := []struct { name string count uint32 }{ { name: "hash 1 block", count: 1, }, { name: "hash 4 blocks", count: 4, }, { name: "hash 8 blocks", count: 8, }, { name: "hash 16 blocks", count: 16, }, { name: "hash 18 blocks", count: 18, }, { name: "hash 24 blocks", count: 24, }, { name: "hash 32 blocks", count: 32, }, { name: "hash 31 blocks", count: 31, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { digests := make([][32]byte, tt.count) err := gohashtree.Hash(digests, _test_32_block[:2*tt.count]) if err != nil { t.Log(err) t.Fail() } if !reflect.DeepEqual(digests, _test_32_digests[:tt.count]) { t.Logf("Digests are different\n Expected: %x\n Produced: %x\n", _test_32_digests[:tt.count], digests) t.Fail() } digests2 := make([][32]byte, tt.count) gohashtree.Sha256_1_generic(digests2, _test_32_block[:2*tt.count]) if err != nil { t.Log(err) t.Fail() } if !reflect.DeepEqual(digests2, _test_32_digests[:tt.count]) { t.Logf("Digests are different\n Expected: %x\n Produced: %x\n", _test_32_digests[:tt.count], digests) t.Fail() } }) } } func TestHashByteSlice(t *testing.T) { tests := []struct { name string count uint32 }{ { name: "hash 1 block", count: 1, }, { name: "hash 4 blocks", count: 4, }, { name: "hash 8 blocks", count: 8, }, { name: "hash 16 blocks", count: 16, }, { name: "hash 18 blocks", count: 18, }, { name: "hash 24 blocks", count: 24, }, { name: "hash 32 blocks", count: 32, }, { name: "hash 31 blocks", count: 31, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { digests := make([]byte, 32*tt.count) chunks := make([]byte, 64*tt.count) for i := 0; i < int(2*tt.count); i += 2 { if n := copy(chunks[32*i:32*i+32], _test_32_block[i][:]); n != 32 { t.Logf("copied wrong number of bytes") t.Fail() } if n := copy(chunks[32*i+32:32*i+64], _test_32_block[i+1][:]); n != 32 { t.Logf("copied wrong number of bytes") t.Fail() } } err := gohashtree.HashByteSlice(digests, chunks) if err != nil { t.Log(err) t.Fail() } for i := 0; i < int(tt.count); i++ { if !reflect.DeepEqual(digests[32*i:32*i+32], _test_32_digests[i][:]) { t.Logf("Digests are different\n Expected: %x\n Produced: %x\n", _test_32_digests[i][:], digests[32*i:32*i+32]) t.Fail() } } }) } } func TestOddChunks(t *testing.T) { digests := make([][32]byte, 1) chunks := make([][32]byte, 1) err := gohashtree.Hash(digests, chunks) if err.Error() != "odd number of chunks" { t.Logf("expected error: \"odd number of chunks\", got: \"%s\"", err) t.Fail() } } func TestNotAllocatedDigest(t *testing.T) { digests := make([][32]byte, 1) chunks := make([][32]byte, 4) err := gohashtree.Hash(digests, chunks) expected := "not enough digest length, need at least 2, got 1" if err.Error() != expected { t.Logf("expected error: \"%s\", got: \"%s\"", expected, err) t.Fail() } } func OldHash(data []byte) [32]byte { h := sha256.New() h.Reset() var b [32]byte h.Write(data) h.Sum(b[:0]) return b } func BenchmarkHash_1_minio(b *testing.B) { chunks := [64]byte{'A'} digests := make([][32]byte, 1) b.ResetTimer() for i := 0; i < b.N; i++ { digests[0] = OldHash(chunks[:]) } } func BenchmarkHash_1(b *testing.B) { chunks := make([][32]byte, 2) digests := make([][32]byte, 1) b.ResetTimer() for i := 0; i < b.N; i++ { gohashtree.Hash(digests, chunks) } } func BenchmarkHash_4_minio(b *testing.B) { chunks := [64 * 4]byte{'A'} digests := make([][32]byte, 4) b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < 4; j++ { digests[j] = OldHash(chunks[j*64 : j*64+64]) } } } func BenchmarkHash_4(b *testing.B) { chunks := make([][32]byte, 8) digests := make([][32]byte, 4) b.ResetTimer() for i := 0; i < b.N; i++ { gohashtree.Hash(digests, chunks) } } func BenchmarkHash_8_minio(b *testing.B) { chunks := [64 * 8]byte{'A'} digests := make([][32]byte, 8) b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < 8; j++ { digests[j] = OldHash(chunks[j*64 : j*64+64]) } } } func BenchmarkHash_8(b *testing.B) { chunks := make([][32]byte, 16) digests := make([][32]byte, 8) b.ResetTimer() for i := 0; i < b.N; i++ { gohashtree.Hash(digests, chunks) } } func BenchmarkHash_16_minio(b *testing.B) { chunks := [64 * 16]byte{'A'} digests := make([][32]byte, 16) b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < 16; j++ { digests[j] = OldHash(chunks[j*64 : j*64+64]) } } } func BenchmarkHash_16(b *testing.B) { chunks := make([][32]byte, 32) digests := make([][32]byte, 16) b.ResetTimer() for i := 0; i < b.N; i++ { gohashtree.Hash(digests, chunks) } } func BenchmarkHashLargeList_minio(b *testing.B) { balances := make([][32]byte, 400000) for i := 0; i < len(balances); i++ { balances[i] = [32]byte{'A'} } digests := make([][32]byte, 200000) b.ResetTimer() for i := 0; i < b.N; i++ { for j := 1; j < 200000; j++ { batchedRT := append(balances[2*j][:], balances[2*j+1][:]...) digests[j] = OldHash(batchedRT) } } } func BenchmarkHashList(b *testing.B) { balances := make([][32]byte, 400000) for i := 0; i < len(balances); i++ { balances[i] = [32]byte{'A'} } digests := make([][32]byte, 200000) b.ResetTimer() for i := 0; i < b.N; i++ { gohashtree.Hash(digests, balances) } } gohashtree-0.0.4-beta/sha256_1_generic.go000066400000000000000000000135641455574744200200620ustar00rootroot00000000000000/* MIT License Copyright (c) 2021-2022 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree import ( "encoding/binary" "math/bits" ) const ( init0 = uint32(0x6A09E667) init1 = uint32(0xBB67AE85) init2 = uint32(0x3C6EF372) init3 = uint32(0xA54FF53A) init4 = uint32(0x510E527F) init5 = uint32(0x9B05688C) init6 = uint32(0x1F83D9AB) init7 = uint32(0x5BE0CD19) ) var _P = []uint32{ 0xc28a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf374, 0x649b69c1, 0xf0fe4786, 0x0fe1edc6, 0x240cf254, 0x4fe9346f, 0x6cc984be, 0x61b9411e, 0x16f988fa, 0xf2c65152, 0xa88e5a6d, 0xb019fc65, 0xb9d99ec7, 0x9a1231c3, 0xe70eeaa0, 0xfdb1232b, 0xc7353eb0, 0x3069bad5, 0xcb976d5f, 0x5a0f118f, 0xdc1eeefd, 0x0a35b689, 0xde0b7a04, 0x58f4ca9d, 0xe15d5b16, 0x007f3e86, 0x37088980, 0xa507ea32, 0x6fab9537, 0x17406110, 0x0d8cd6f1, 0xcdaa3b6d, 0xc0bbbe37, 0x83613bda, 0xdb48a363, 0x0b02e931, 0x6fd15ca7, 0x521afaca, 0x31338431, 0x6ed41a95, 0x6d437890, 0xc39c91f2, 0x9eccabbd, 0xb5c9a0e6, 0x532fb63c, 0xd2c741c6, 0x07237ea3, 0xa4954b68, 0x4c191d76, } var _K = []uint32{ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, } func sha256_1_generic(digests [][32]byte, p [][32]byte) { var w [16]uint32 for k := 0; k < len(p)/2; k++ { // First 16 rounds a, b, c, d, e, f, g, h := init0, init1, init2, init3, init4, init5, init6, init7 for i := 0; i < 8; i++ { j := i * 4 w[i] = uint32(p[2*k][j])<<24 | uint32(p[2*k][j+1])<<16 | uint32(p[2*k][j+2])<<8 | uint32(p[2*k][j+3]) t1 := h + ((bits.RotateLeft32(e, -6)) ^ (bits.RotateLeft32(e, -11)) ^ (bits.RotateLeft32(e, -25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] t2 := ((bits.RotateLeft32(a, -2)) ^ (bits.RotateLeft32(a, -13)) ^ (bits.RotateLeft32(a, -22))) + ((a & b) ^ (a & c) ^ (b & c)) h = g g = f f = e e = d + t1 d = c c = b b = a a = t1 + t2 } for i := 8; i < 16; i++ { j := (i - 8) * 4 w[i] = uint32(p[2*k+1][j])<<24 | uint32(p[2*k+1][j+1])<<16 | uint32(p[2*k+1][j+2])<<8 | uint32(p[2*k+1][j+3]) t1 := h + ((bits.RotateLeft32(e, -6)) ^ (bits.RotateLeft32(e, -11)) ^ (bits.RotateLeft32(e, -25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] t2 := ((bits.RotateLeft32(a, -2)) ^ (bits.RotateLeft32(a, -13)) ^ (bits.RotateLeft32(a, -22))) + ((a & b) ^ (a & c) ^ (b & c)) h = g g = f f = e e = d + t1 d = c c = b b = a a = t1 + t2 } // Last 48 rounds for i := 16; i < 64; i++ { v1 := w[(i-2)%16] t1 := (bits.RotateLeft32(v1, -17)) ^ (bits.RotateLeft32(v1, -19)) ^ (v1 >> 10) v2 := w[(i-15)%16] t2 := (bits.RotateLeft32(v2, -7)) ^ (bits.RotateLeft32(v2, -18)) ^ (v2 >> 3) w[i%16] += t1 + w[(i-7)%16] + t2 t1 = h + ((bits.RotateLeft32(e, -6)) ^ (bits.RotateLeft32(e, -11)) ^ (bits.RotateLeft32(e, -25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i%16] t2 = ((bits.RotateLeft32(a, -2)) ^ (bits.RotateLeft32(a, -13)) ^ (bits.RotateLeft32(a, -22))) + ((a & b) ^ (a & c) ^ (b & c)) h = g g = f f = e e = d + t1 d = c c = b b = a a = t1 + t2 } // Add original digest a += init0 b += init1 c += init2 d += init3 e += init4 f += init5 g += init6 h += init7 h0, h1, h2, h3, h4, h5, h6, h7 := a, b, c, d, e, f, g, h // Rounds with padding for i := 0; i < 64; i++ { t1 := h + ((bits.RotateLeft32(e, -6)) ^ (bits.RotateLeft32(e, -11)) ^ (bits.RotateLeft32(e, -25))) + ((e & f) ^ (^e & g)) + _P[i] t2 := ((bits.RotateLeft32(a, -2)) ^ (bits.RotateLeft32(a, -13)) ^ (bits.RotateLeft32(a, -22))) + ((a & b) ^ (a & c) ^ (b & c)) h = g g = f f = e e = d + t1 d = c c = b b = a a = t1 + t2 } h0 += a h1 += b h2 += c h3 += d h4 += e h5 += f h6 += g h7 += h var dig [32]byte binary.BigEndian.PutUint32(dig[0:4], h0) binary.BigEndian.PutUint32(dig[4:8], h1) binary.BigEndian.PutUint32(dig[8:12], h2) binary.BigEndian.PutUint32(dig[12:16], h3) binary.BigEndian.PutUint32(dig[16:20], h4) binary.BigEndian.PutUint32(dig[20:24], h5) binary.BigEndian.PutUint32(dig[24:28], h6) binary.BigEndian.PutUint32(dig[28:32], h7) (digests)[k] = dig } } gohashtree-0.0.4-beta/sha256_1_sse.go-bak000066400000000000000000000022671455574744200177710ustar00rootroot00000000000000//go:build amd64 // +build amd64 /* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package gohashtree //go:noescape func sha256_1_sse(digests *byte, p [][32]byte, count uint32) gohashtree-0.0.4-beta/sha256_1_sse.s-bak000066400000000000000000000324411455574744200176230ustar00rootroot00000000000000/* MIT License Copyright (c) 2021 Prysmatic Labs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define MSGSCHEDULE0(index) \ MOVL (index*4)(SI), AX; \ BSWAPL AX; \ MOVL AX, (index*4)(BP) // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x) // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x) #define MSGSCHEDULE1(index) \ MOVL ((index-2)*4)(BP), AX; \ MOVL AX, DI; \ RORL $17, AX; \ MOVL DI, DX; \ RORL $19, DI; \ SHRL $10, DX; \ MOVL ((index-15)*4)(BP), BX; \ XORL DI, AX; \ MOVL BX, DI; \ XORL DX, AX; \ RORL $7, BX; \ MOVL DI, DX; \ SHRL $3, DX; \ RORL $18, DI; \ ADDL ((index-7)*4)(BP), AX; \ XORL DI, BX; \ XORL DX, BX; \ ADDL ((index-16)*4)(BP), BX; \ ADDL BX, AX; \ MOVL AX, ((index)*4)(BP) // Calculate T1 and T2, then e = d + T1 and a = T1 + T2. Wt+Kt is passed in AX. // The values for e and a are stored in d and h, ready for rotation. #define SHA256ROUND(a, b, c, d, e, f, g, h) \ MOVL e, BX; \ RORL $14, BX; \ MOVL a, DX; \ RORL $9, DX; \ XORL e, BX; \ MOVL f, DI; \ RORL $5, BX; \ XORL a, DX; \ XORL g, DI; \ XORL e, BX; \ ANDL e, DI; \ RORL $11, DX; \ XORL a, DX; \ RORL $6, BX; \ XORL g, DI; \ RORL $2, DX; \ ADDL BX, DI; \ ADDL AX, DI; \ MOVL a, BX; \ ADDL DI, h; \ MOVL a, DI; \ ORL c, BX; \ ADDL h, d; \ ANDL c, DI; \ ANDL b, BX; \ ADDL DX, h; \ ORL DI, BX; \ ADDL BX, h #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \ MSGSCHEDULE0(index); \ ADDL $const, AX; \ SHA256ROUND(a, b, c, d, e, f, g, h) #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \ MSGSCHEDULE1(index); \ ADDL $const, AX; \ SHA256ROUND(a, b, c, d, e, f, g, h) #define PADDSHA256ROUND(const, a, b, c, d, e, f, g, h) \ MOVL e, BX; \ RORL $14, BX; \ MOVL a, DX; \ RORL $9, DX; \ XORL e, BX; \ MOVL f, DI; \ RORL $5, BX; \ XORL a, DX; \ XORL g, DI; \ XORL e, BX; \ ANDL e, DI; \ RORL $11, DX; \ XORL a, DX; \ RORL $6, BX; \ XORL g, DI; \ RORL $2, DX; \ ADDL BX, DI; \ ADDL $const, DI; \ MOVL a, BX; \ ADDL DI, h; \ MOVL a, DI; \ ORL c, BX; \ ADDL h, d; \ ANDL c, DI; \ ANDL b, BX; \ ADDL DX, h; \ ORL DI, BX; \ ADDL BX, h TEXT ·sha256_1_sse(SB), 0, $296-36 MOVQ digests+0(FP), CX // digests *[][32]byte MOVQ p_base+8(FP), SI // p [][32]byte MOVL count+32(FP), DX // count uint32 SHLQ $6, DX LEAQ (SI)(DX*1), DI MOVQ DI, 256(SP) CMPQ SI, DI JEQ end MOVQ SP, BP loop: MOVL $0x6A09E667, R8 // a = H0 MOVL $0xBB67AE85, R9 // b = H1 MOVL $0x3C6EF372, R10 // c = H2 MOVL $0xA54FF53A, R11 // d = H3 MOVL $0x510E527F, R12 // e = H4 MOVL $0x9B05688C, R13 // f = H5 MOVL $0x1F83D9AB, R14 // g = H6 MOVL $0x5BE0CD19, R15 // h = H7 SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8) SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15) SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14) SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13) SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12) SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11) SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10) SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9) SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8) // Add initial digest and save it ADDL $0x6A09E667, R8 // H0 = a + H0 MOVL R8, (0*4)(CX) ADDL $0xBB67AE85, R9 // H1 = b + H1 MOVL R9, (1*4)(CX) ADDL $0x3C6EF372, R10 // H2 = c + H2 MOVL R10, (2*4)(CX) ADDL $0xA54FF53A, R11 // H3 = d + H3 MOVL R11, (3*4)(CX) ADDL $0x510E527F, R12 // H4 = e + H4 MOVL R12, (4*4)(CX) ADDL $0x9B05688C, R13 // H5 = f + H5 MOVL R13, (5*4)(CX) ADDL $0x1F83D9AB, R14 // H6 = g + H6 MOVL R14, (6*4)(CX) ADDL $0x5BE0CD19, R15 // H7 = h + H7 MOVL R15, (7*4)(CX) // Rounds with padding // Rounds 0 - 15 PADDSHA256ROUND(0xc28a2f98, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0x71374491, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8) PADDSHA256ROUND(0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0x243185be, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0xc19bf374, R9, R10, R11, R12, R13, R14, R15, R8) // Rounds 16 - 31 PADDSHA256ROUND(0x649b69c1, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0xf0fe4786, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0x0fe1edc6, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0x240cf254, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x4fe9346f, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x6cc984be, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0x61b9411e, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0x16f988fa, R9, R10, R11, R12, R13, R14, R15, R8) PADDSHA256ROUND(0xf2c65152, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0xa88e5a6d, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0xb019fc65, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0xb9d99ec7, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x9a1231c3, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0xe70eeaa0, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0xfdb1232b, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0xc7353eb0, R9, R10, R11, R12, R13, R14, R15, R8) // Rounds 32 - 48 PADDSHA256ROUND(0x3069bad5, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0xcb976d5f, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0x5a0f118f, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0xdc1eeefd, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x0a35b689, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0xde0b7a04, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0x58f4ca9d, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0xe15d5b16, R9, R10, R11, R12, R13, R14, R15, R8) PADDSHA256ROUND(0x007f3e86, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0x37088980, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0xa507ea32, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0x6fab9537, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x17406110, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x0d8cd6f1, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0xcdaa3b6d, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0xc0bbbe37, R9, R10, R11, R12, R13, R14, R15, R8) // Rounds 49 - 64 PADDSHA256ROUND(0x83613bda, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0xdb48a363, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0x0b02e931, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0x6fd15ca7, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0x521afaca, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x31338431, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0x6ed41a95, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0x6d437890, R9, R10, R11, R12, R13, R14, R15, R8) PADDSHA256ROUND(0xc39c91f2, R8, R9, R10, R11, R12, R13, R14, R15) PADDSHA256ROUND(0x9eccabbd, R15, R8, R9, R10, R11, R12, R13, R14) PADDSHA256ROUND(0xb5c9a0e6, R14, R15, R8, R9, R10, R11, R12, R13) PADDSHA256ROUND(0x532fb63c, R13, R14, R15, R8, R9, R10, R11, R12) PADDSHA256ROUND(0xd2c741c6, R12, R13, R14, R15, R8, R9, R10, R11) PADDSHA256ROUND(0x07237ea3, R11, R12, R13, R14, R15, R8, R9, R10) PADDSHA256ROUND(0xa4954b68, R10, R11, R12, R13, R14, R15, R8, R9) PADDSHA256ROUND(0x4c191d76, R9, R10, R11, R12, R13, R14, R15, R8) // Add previous digest and save it ADDL (0*4)(CX), R8 // H0 = a + H0 BSWAPL R8 MOVL R8, (0*4)(CX) ADDL (1*4)(CX), R9 // H1 = b + H1 BSWAPL R9 MOVL R9, (1*4)(CX) ADDL (2*4)(CX), R10 // H2 = c + H2 BSWAPL R10 MOVL R10, (2*4)(CX) ADDL (3*4)(CX), R11 // H3 = d + H3 BSWAPL R11 MOVL R11, (3*4)(CX) ADDL (4*4)(CX), R12 // H4 = e + H4 BSWAPL R12 MOVL R12, (4*4)(CX) ADDL (5*4)(CX), R13 // H5 = f + H5 BSWAPL R13 MOVL R13, (5*4)(CX) ADDL (6*4)(CX), R14 // H6 = g + H6 BSWAPL R14 MOVL R14, (6*4)(CX) ADDL (7*4)(CX), R15 // H7 = h + H7 BSWAPL R15 MOVL R15, (7*4)(CX) ADDQ $64, SI ADDQ $32, CX CMPQ SI, 256(SP) JB loop end: RET