pax_global_header00006660000000000000000000000064151752326760014530gustar00rootroot0000000000000052 comment=f0e5753ab568a8fb1998685a959b1d6bbb69a97a golang-github-kelindar-bitmap-1.5.5/000077500000000000000000000000001517523267600173305ustar00rootroot00000000000000golang-github-kelindar-bitmap-1.5.5/.github/000077500000000000000000000000001517523267600206705ustar00rootroot00000000000000golang-github-kelindar-bitmap-1.5.5/.github/FUNDING.yml000066400000000000000000000000241517523267600225010ustar00rootroot00000000000000github: [kelindar] golang-github-kelindar-bitmap-1.5.5/.github/bitmap1.png000066400000000000000000001037441517523267600227440ustar00rootroot00000000000000PNG  IHDRv`sRGBgAMA a pHYs(JyIDATx^`چɺ%WZ uw[_wJ޺+uhqw}uҐMv{;FPB * *TP"P)V *TP*ŪPB @X*TPBB *T(bUPB ER *TPTUB *  dhR )Xi.yغd6 즞č`D'ִmZR)VEۨKplG/|Kp0&c_Ih&F6iRșDO#4=ǕgD՜a9K{(cr)$?,4hpLLb4 #DhZ;v3[/1ǽ{|Awg:ֵDz0w3Q]TUmRڵڥ ҈DpDaK0 Em66UKg a}I'Ñh 1h5Iօ^c1h4dxR$tYr<“I_(<k iSd4čX4&œ0j0$%44 S-ۦR1ʹ]nP~AsBs=^$5uj`-b'^Z44mIUnj[ՔzQU+lkQ/%)FhiuS'ӪSxL>nnw-'i`fàKPCۿnb)=zO7nT@0ԑmND/Tpk+ښbA*v(JoASA8bB+iDbUFJ nvMtYXrj*vCBڽd.i#VbBM6UTTD")i kx9P]X~ws5 4w&*v}W>s'%7o\VVKIn&QXҼ Nԅ Θ1CC]]ݳ>o644HIW^]ɛp&*0dٽdҿbNO??qD(vDVX1{lj0$vO?}1-%"VAXv1${D9tjEPY*w}w.,XfI@}}Y(d2IIH4ZpXךͳ֕(9uU3וv e%uMp /IA8T똳ok*jӴ8o /oDMlZX$+G}$>ysΞ)I/DYC8f-][Y m|B+ʪf M:Wۻ%3ז,-jmA8Eq_}QVYJ:5Ţ#bH1-Zh}mu#/ua @K6WMn]QZ&7+ejrg"(پhS%_ܾ`/.wH$pS㰱{ߙ,Y0_ڦ޼ U6܁`ӳjƚM _^BY9шD u fӏOx;/=WYV*m8 6o/{oj?i['˖ ^vS\jE(( -\9k]YY}lp{oflu%8}E񊲚`Sy 2qqy(YY^;c&L=L_U"y|Y V-={mEu.oq#ɇ?y)?|)#`l!Y Ju}rYY8xN(O<'oxL&3] ͉ço'_ FVSLS.kpL6OQaF(i ؕ%PxmUeg^}TBA(PʴCȕ/ú M۵O^f(]_M:*6|>?xKK 4(K9s4447ա` rrrF/' )(Tv@uUTTL81##CJM4IG$hp2ծYiTK,I_Jk^Я C6gN:kpu˗%q{+'W?ק1X=h_~fa Oڭ umU#KhrϞ{N1!V}n6B!?jRMTJ(l!(6u˥jZ&b2nЗch>ϝŪqO zly /-<)ɀSν.7hfkuP'j ?AQGtʅW_?hH{fpGɎ/8=nz[ = 2 :atdL30(Ԍ̡S,s-4^Eb SMJLbŠMD.h^LLf'rϣOa9sy)*b)5?裏7x狋l6R}>{>È-ioB ./ah ڛ9sO?ٳg0 33ՙb)O>)<.&hwޙ4i /wt:y $¦T=7pÝw4R_}r331Vih@PضAI`8X]#0SO=<_~4hPzz:Md>N>dxNJ6l(,,/⋃>l6ŋ|u]uUs۵kW1]dɴik/D'FԭX&o)ae4N1=#gE~?_!l:gΜ;ÐT &L0tPsι;N?tj" 2#barLRT(?ܥoQ|{Bj2ޔ%TV[a1쎗oˈ<o1c!"_|Ŵ4!Ev<ꨣzɎ$ĩjaÆq_|1u`S @LL&+dw2aw$HYIII>}R[7nϬV+ pL2 O>O<e#%)V/rt(>l=N:$.55U$*o+~zx ##߆Q~x-B_p^^{!HQJ{5Gϝ;- Q?vLyi!M7FEhA&2LJ٨X`#M@$t@@B&+%ntFkuJqS\\ 5Qł|7К]wݡ~pp'<[7^>:u*7#af "x># #@>htsp {ws)`0|'/Havsss'N(9Nթf"[O؎lF٠sN"H=A"ldbO:>O Db2]=$*v* T 2hD+L7&s|M=7<N7nQM3Y<21Si,$+Q,}X|={l>+DJcbAt]w}ݨC9DQXsyGǏ?f(pرnU#<@TgMݻ1bє Xۡ+;{ElϿo'xJ=[ Pɓ!C"pgL4 f/hmr$,`JS:MqMCI}syWW1'3m=YV>'V:=nAke隑ʺr64WUԄ"t˜H\L@jVMM5#.ڪ:]͜fN+€դϳ|Њ7,oZ[Y욕@u2/j5MgǮjʰ4;-:ņBH rSɕ$)r '%K1Lg+I`=!Q> S6( e=8C4qN9jԨnݺ D/&L+ c=V<C&>-9i֮v_(xSu%s֗nksz[n1U7/Vx@חn-Hi,̴t[׿`c9Me3ȶzFǰ*bjRnC}ś*cI+'(CV9@tZm PR8[n } 2Ϟ*eJ9,J׊𷮺^PK"Ѥƍ=g+>x`Pي_)fX9l*l޷}NYPBEif94<5j(5W}̴= ҭ&9Haf6HDII2{h7-KljǨKIm!VT2b`qR4 McEcuWnVAΨ]˱2ltuշA]n(i¥X񠩱1 4$HJj p굙N=D邊̬AGebtM)%ɃzڳSJm M~G-fnxZ흛9[.qcõ`Ig7Lg1xeK3v뗟%Bq7 RMrӳY6Kn-ngFOT!hicY8ZmcOrF,Ҍ'}4hhhԈ*yUu; %&'9=^V#~ǚ:w?0JIB$jImK'e9|).qY4 ζ;[ 1dZPKkܛkc 6 u3K :͐T$?_Yf7O7I༵*LCFO*ud[VPýG(r8pAFF*Vx׀bn> Ľvqֱ<D|XѮv5=lvbW0>7/Q?qwlOFkBzmBEB,$Wb{HQX*vw$(W"yPAX*v{~a^գTU!%ɀz`3PݩbPrP)V\$Pl&?w1$hӱJ*!*0= oWW4HC_?dJlk;[/hZ'#[^j+ہKz$H:O`y{{:wRD"uuus?SVV^veOU&ԉv\gXs~guu]wg^tEśInݺ~#rsso2 ]p8t: (R*U)Z0 vkRd|g^z/U$CˢӑlgXtժU'NkNy;++l6KY`!CyAÆ #hrF@YZTVT69yWU^n*A{FWi}#jty!G"O:=Pk҆&ZWp9E._B$'$`HQB`}mƵk|>0Ĩjt.ѓ.@1Gv G"M^y:''0hdLb pr_Lcu]0ZjQJw}s-KzM 7|'tRzzhll5k=D!m^v_2e Be2]vYϞ=Y!|>3!O?ph4J9rw*J5y[QVÜ -M‘2?ݨ0\` Vˆ aOϐ6:o g[ t/RpT/s8ֱz:&V:MJEx}Cᒺeŵ2-x :m۞]˱l]ƺtiT֭e5ջjF%e>*P$JӚ*qE{YIjn`lL)%F2ͲI$v]%6̞ǟ?|lҤmx]SMrj]k*kx1B79,i鵚{O{j0"!hgkr< a /^|￯ACgIۂ/ ft.KN>W^ d\pQb+%J6,Μ9^?蠃nV^:-Y~1b n:T!Enܸq=ZR@o;DǷ50=ux~YLZǦZ!e1d``?7^ WO"jڪ&W0kஹv޹.{jbQR{ni#z`殫g/`(αQ(-/'BWF?.4뚊:((zeK춸Οz%c'L솛;V3V44eZF.ꛗ//s^v AFD/KX=7V7~#{N-wH \ I-8y^~edfJryoΚǐ{\zݍt>A:655͞=;//oҥK_y1cL4 2!B4l? O=Ի{lhW^=wAխXK+WBϔ~m[м`C QaÆ744,_9^XX(n---~GZ*5kA[IBaϏosҬ n5Lo0 atfaX9锻ZXe;AYViy4_g 䡸Μg_|5z f͘GcDi$>faxС:]A2֥KŔj62B>ܾ7d.36?ݦh>9qX\ٻ w{}|9j|hA<շvGdfe%Jt"eYׯ_mm'|rm뮃Ds= .ˡ11ҽۧLҿko?!Ο.C~X`+P֭{o,1tg_tE0:M"֧!]>׭[WSSX,(rvyw}wuC=S/hAS,aU80A18g=څ02`[?R"G"šեj&M?Yh~*Bpś=i PpV(d/7z!TLL#A+$>''ĵi}0PQr)VOCBzB a0amiAt 5/vJ\>?]c5D:a3T--OJ>~VNQ v lDZwȈ/qD0P}щ'xM77|s ×z< J@<ٳgAAȑ#N?_~ bbjxntJVVV>쳿 70uThrƌ|T ۧO8G10~Mo-R]] R駟~!=|P^ѵ}aIIamE 6P(p't›RXP0RR"pD duu1 ēOyhát 1qizr p!p)I@p ok3@$NZA$>o}‚gcLث7?R[O r7dZ]%XjCqNz߳[9(dߝPCA!Bg}脁 6 6mJP vM6!y>`ɄE1~7n\NNα;vXHW^tFEbQpg6l; "4V@~̙38@dbʤ7 l6uѣȃFR=jKavo,:t(%,42Eӭx)聶ѱ.FܠXZjQ!|guj"דڨ e^qDA4&k駰1[ck D`\D"5ƤXR$OnK@g!f`QvMt,XP^^~UWԐ!C 389 B]YY|w.gySO0alٲ;4(k裏k)2`fPonCVTT[! C-ϛ_|Eii_~d;Y [Ϟ=$RWoV+<#j/j믿^ӧO>wqk:roRbŲRNB輹sFS3"6,a#+8aGyz3@v/<a#l s#jQb KJJ? |1 3"OxQCv&\r}衇N?tL766"[훟ݮXE’d nxG_c9e+JwňoZ7ydB6P;].:OvNBv$r, 9nݺII[.DAvAwyśgНGuԏ?x;`ߚ$Sb ?~7@;^veLgggr)/ҠAх  uYhn*]vذa$Ԗ g{履~zѢEً/#ʗ.]:r|?mu0j;ФhRo[EbX[\%#(v#]Z6%&r&JbgZ/0(\[9)J#N6\nѨvˁD!ibQdX) VROZǑ a$tōM'/떣 o` 9cb5<|\jT:zh\~=3|p.SXWQϰIzSO=UYY) f㓜$b: h=>F}u;_lMIVP,),^PP@+yZ4o4%㇦8N'6D+H;FQ$ )u;;e+aĮ<2dIՂe؇u278!0.nh1`P[i;NAhv:tP))pş|xj5O>,sԩ'O?32GA~6lͅ IGS=?ظ'[B{~xܸq#%S=z p{1T-_X8 [hQQSaxzӦMD뮻g!>֭KKK;CQՒ1a6-&# @X $1Đf6P&p E=Ͳ6 bbB`5q?r|QeSWOIXZn҉tŨgbF|f!b-=j/䩱a|4H$hЧ89DxrA n.؈n-L $2|vr A Zv-#&¯=N8j2!K/2;w.Kvms̹;R855 ._~9[=Ţ̢ES~zGz!|PzҤI3gN^*&ݩg^^7߼}egqƝwiDca_8.; $fF/n78T?YI*TUl6j\bj]&3V'z6fAו75KF(j@R>Yhá,i&#FU:\\[pj[M2-$ =WsyQ`"`tdG*,4tnoM qz$:]$^(0H Wqw NbT:t˳/H Jׄ_EfGl.ገum _w}?@[W5g|P&|a?Kc [n-Bb֢t333!/QW^yOl4^~cXD(cN'4X;Ŕf j+++{aa!+[H]ü*:oLa:Q}0luzX+S&7 {椷Ż9=L-q6;¡-<ܽg/)-sW{lٖ#`uEz^nKI$Ai3U;'^Z44m )uKK' 't S֦se9|M?2PW7cY$̵[夙.aQU/mM31Xq<Қ&O~zj(ΩCLI<iZ'#AyM.[^[hoi[^ZM0D'u&[xơ}@~xwIYڦX(7Wջ=3t.|BȲyȽX^fn7H4JVG#QFsy{,h~.F{*]$vf*Pi_L}s/36]0eljj>rK}ACKm\#+mSiUٽ{wq}̙3!?اO) '!(Iٳ'@rʹ'B<&L۷o:+RԚI!|-QH,%qiǠ +rJϏ.!%헟-3M/2inkղfe}M޴4V7vMހA靓7?SQxߴTM3/'!x4Ġ<;KF כvX^mF# +㧙iX:t7P#!b;D=XyarIoiAꥋ{YmRj[M;3OB[)`6Uvh;UJA?3,y]3\]כv\&CM7(GR*74Hd5#G;p5 FM;W,PfVNbAcnwCC0 v9 0/Lfk\-rG9^Ĩ(t~aQhKŊ~N}E:>YnWF/FObQy;?**#:_ L2@ Dd|v˼ēPH'9qXm !vqҶ _Ŋ`l\J _tZWTL8dR Ou{:9%hMʬ >IUl(y]c{*VV]wQ͆*"5m, !.La0սg/> ,"%]dUUA) P*7P)V ԐAŮbUE4iwuɒ՟*vu|(u_aAkyǿmGOҍݠ'{s,GOtGOu))=AaX V6 r(J$ Wv.σV,kwK.]J|tXGOt`Z|eE_C8-sz:mVhsbUU6666x(TX"iG enX$,L5oHx > Gd[x"QܧGtxfJsp Ci(b#暕nS Z٨ʛe+bD>Ik͠,3zƩm~24PC“1fNŗdK'$|&iɓIaғ V%qLMKR0 +R *TP˝TPB ER *TP*F8^ҥH@Q! oI))ɹjLnbUɿ+P~D2)GF3T{ɨpNŸ$‘hrb#֬`8bk#7&eFp?a W+pmv%'L'rDPxwTUX:Ը7$CfŦ&ݠtⅎ'=ˬ7+1HХdY I!vzز~%{Q,+%)X|^xo *wf0{xQ$ 7 y$^4-%Zt l$p4ZuLs28st~jŪ @]lIyrBs+WamiͲ$ U;:͠,[nv8elD⢱UKXe0jhmxb/Drm{TPq^ɮHbR”6AZww[ZW'dw*hJrGI7Px%;Tʐ nptWh?b%O0%͖?q.$i-\MJ\[bwm#g"c$[bVme柉Omxbe͞l'kq8&N{;K˿D&n*ŪPc;?ző$3g9bUG懺&;$.OZI<23!jcWVV:D"i*&sg\ $u](iԩ'pGqG{챏?ҥKᰔC~+}]{N:$x_{3f̠4iFp_obfkuu/$ K,~w eHbH޲F*TlS,<̙3gٙC IMM6m= Y_|x4 _|;C_}ksO~>}(2mdܗ_~M|/XRR"%*yfܻn:)n/~gy)I.tno@L,Nuؔb <mf0Po/ذa@Y޽oڴf"^;0`kFi;#Gl}dM[rM͛ѣ^466~$ŀ >#DP|?p1N3͡Q|A#u!J]HRekI(M+9&D2Hh]l0#mоx 533 ;=n7SJRрp$8_)SZZlٲZyyyqrʪb+`6lXZ..k̙;vl\ˎh={N8l6L>}q555HbvZ|DJdoP3իWKxQ RR@Y}gddH h`_ѳ>x-OBHEkaq՛JPY雯TBY.){4ۙr]rPWho?U:%fオ||)UBHɍfl͕u.OT$ 3gLYN$-mH4 n/c~溒髄-,(o 3H`nL}iXV k|?!6 ]sk=tbiC~zӧO<@GLTKc#t.:\~=ÊHa/X#ڵMq .7]2+ˤm -+*oi5vŨ'TU5˷=O_}+/|?eEyi(n'V~wş;/={ok3j?fKK!Lf5^~:몫a愂#[Wluf6eĝXKK>}{&_׿<K=lϷp엟~twkSn({>~A'H;vR,D HÇDB/裏޽3 2 ȃ*}wHլE9bJ 8蠃`5i VZx1ڸqwtݳfJKK߿1^d ֭uuuzG:  m1]v>RR5gǣu{ d=+Buo 37cTaL g}OL#:BPpquCm0# 7gaFqNd}ӏ[ݳ_ڮɊ!+W|W,YdZ{P8 Y8kޘޅEuNLxTV|ޛ5Eҳo?Ѡ ie&wNuT= eMu_O !k?̬pCFY1G i_N}˖6!1$^;)JCx%tRKx^{^z|h8[0ʕ+YcIJJJ{V8|ȻJJ!_~eʔ)>< N/Ϟ={Zr|Ac[R˵|rXYҖϝ;C۷ovDʏ;Y7ӈcs >&=ܡCx`wfgTo~~==vޔk;ƐigzCrcj5x梬H?_ 3lfNۭ|n 5At=n2 ׹,Ei&MCvJC9^_[A;u:}C>匳.tAڥ A֒KL.<る 3pkWU=ˎuZdZMJ KI8b^ǝ3.d>M&ABahOO[Mdb@d:Jf6V7|Q\{==.I@]f?ow,q|h5]{8+x .w~H:vu=ܳiƌwqO?}뭷ޘE|e]^zo&_qW\qŝwyCb:Ā }ǯZ(n[b0 U?l>:>Q]vu:^xP 4%@TjD'TpXH+P>!hhi͚5 M;Ө_3g-[A0?BTP/J l%3??ƌOv6 UDG}t%|{/Ρ/\k6 kz7}ʅ"z{{!yxr叛i?tH{zF1ОD?s_PP@H|{}fff~7싐zW!+\#3/;<իW\I|_>D;c9qIgXT ᐰ0 ˩\!(f&‘a@?x _p4!1_~>@<_^^eGl=kyG[.ާO,JCR 4999W]uE][#a `aC_}UK .U1SK!oQ]s 7 6L<?doz t|vGQ ZmJ( y;O(4&hMc45ZX;eTk"sq_E"QJDkIÉd!kH? sp8޲}6h$qvH;jҨ 8<sdƤ0Y5tX= `Ay(9ohcҤIw'A-g˖-CK 2|8w߅&O|aA0 .pQGr)( 4W"Lcn(#GFuWE82@?gBsC!B=^O[B5,X.GQ<7nLMMp3IZʫ]lhzʎ|ٳ!X!n‘rAĎ8 _(L0{]w͛7!`ik߿?DG}dȑtT 'tNV>uƒ%ȿc'\Fiq))=OyHS ~ X@8AX,DC rV/WiBXer9HK(꧟~BVTT X'h4G͢OȆ6?dxwv 2z3fSLgp8+:*)!m:I):@­V(D"C0%aqwףi&/****yN=TvaG g|ok{XNq M*!Ct.R$"qb{NF k駟FX x衇^tE7p?`8@ t;@KR?T 6DvM1,,{ꩧJKKh A{'֯_ϬҐF !$BK9!VHL9юN 3<!̭9H#nK~i `hbVO?̞=x L5L޽Iiu? N0ﯾ*bW4Tbpx~ڦ>MOy_UUUp?%W>N]sCgs =j5)ξ%-㟦m p$Y.q鄋ppopDtZp+SZ!NaHlmp5[Tv\|6{N~Gx99X?䓗^zM|"H ` do_" j!`zxofXSQC+QA) 4惧1"HQLhD3E;0IX(m{O[$rУz+$ 1 ^H^ҡ|7>gqYD1X^1к |PiIZ@\>R7 yIi]< 54& ) AgFaY zE5jNAt!D4h@ cvi?ӳ>sCi3f|~: 8g+fXz߁(>}`EB|7n\nnRDTx %Ob~ʔ)hm$4O5n*֩$Rn8GI3!fq+vKJJ[㎽[F Ē!]"ҩ4DY b!"DtMÞ`s9B_^XX(mjj;~FwMfm޸~<|ĘHm!V:MJQs&X[K‘hECӺwnFnMB0c5j uKKMO4|9v TDf kV_ja=Rۂ/i6. Qp*AXG+$kXd$@jiUe-/PA&#bly6AQ5NO ;juPG0#M8ٰCxZO0ͤK1Z"[iiGwpEƒTF"V &!x3뵌6'KKx=YS]uaG~ #HJ3eZPc͍W`S-b:zcH>X[/txQG{0AKo d'qwv3fmv'"*3dȐV1xdCA/#U|![M b1#Vs ~'n) 5!xfB|)ߝV5#WvwUEFN/?37׼ f_ii78o,_]^=e'2Bq+(3o~ayU4ځ鶖S2Pz23fAOdbyi5N)_[H<ٵgd{ef_3Rj ,QNxuRK;ɏݻ7\}#Ȓ%K>C=M]zAY?GU5V԰=FӅgHvT,iVSՒa5Z S{d6chFQx1I9Kx4lJTK^ i-3ͨ5ڧbV3pPp1Tl$^ &sQϞ} JrMĥbd`d9 [tp OސWP'rr5$b:f2fp0HY]FYn ge{|ZSvX`2{;|QY[vXV9xrlO WI_wkc7<ӌnI ֢]|1)){vOU)VEM,iI)K{$ ^El J"9Qx4 1^#<%JCRhx}9:㉸ w&" څ3Ӗ:Z65>cDLhD azE Ő"E}l!!1/7-Dz`EX*TPB(~_ *TP)V *TP*ŪPB @=mN( ȀpMP49ob*j5-PHh": EqcX.(NJQJy2$$ђɤ1BtI$8Ͽ?R&&ä3n}J*$ν֣K]4LBOPX5 IpP"=oHLX)}R"c1sq5IkIlyQVD">xoFFxRC 'AJRΐ-Fb2[eb0]4۾*$hZK$ސag5*ŪhL5A6)$`\J_ q7x6A{ȚzAiI£'<5~lQJR+k\pojPU.Of $ȯr;<7զ|ŠFN/G*>H꽁 BPSYO¯o7'M?z",rft}2R`L2N̳vœRbUb Rh W4H3LW`MfT|WIz%{ۿ9$ۖg3&&8<8ۖWCKrVO>nn$ԛu9Iz%Zw伒=][r'3K9VT)II4BxP/gNΣ0H(Nq*ŪPc[גp 3Ar5(6B!"g㏈MlMTP U#TIXhv{<իWϘ1_]h)>Nxg7mڔz k֬Yre8dwuŋ?^PLP7KWXQSS#yvK3>IU E:.f͚Cw}^{^p%+e'|rʔ)}ݩSnܸp8>o},%8|_Jڐ Ш? #6̼y[r}g E4*W"t=8644\r;O?>'O~Z˩ 6L4ghoVO>AI _,N'6*}rssO:UVќ Dhd㟼<)ڵk"~*%/6t+]R,+?3|^q~:t6 YtijjG٫W@MMM̙31b^/Z|?B~wހhd4YVV&Ċ+᭓ޝ9sf{)Mutȑ#\x=% W&6&pUEŋ\N$pU+~+VliXWUj;PtÚU~OJR4TH+ohdk]tg A(,ٸa|lt8-ZX]Y)%F\ #'^P8eK7mI!Ɇz)PbP8k9ofV hܸqPdzu֍=)믿 6h 94I+ϟtA&L`&I.P C=矋ui/;v׊ax .R[ ï2;fRwVzooyxdt|>7kyEˎDLmbo޸/>&_q_)@(ne%իkV+.[QV Ļ^h0Y8{;/>SWb<DJs7/\IV,(\T\J9v8#6F7^}G䃦FMʙƳOS >/nF_I6dFiloXQVviIe5t qL7F`Iqy՗qg +pH8P_trE?CՕҶ@ajժ}ĉ|p;,I/3-[_B_G%/\IҤΆM}Fjvb77WJjp{)LwkVX8w{\q-G^c{ G%0dHEzUEo?|׽W I32 53m2g^p.哅P˹bH4餓FAΚ5?~\./^vA ~w`[PWEEgΜ K1R) GJI :ꨂ(mT?;vqAhB //?M͠!-z7k6l0%%%{.n|SD`tʕnl[ׯ_L8qb~~> s=[n8F5ǟ?nygf;s3 '*3{{徇nS>A2k|`ׯiYԤ4 z=PAHiO9ˮ> l4 1SiVMhRSL FHYx8Rw^s'^ۥKFVi﬋/?{+b2?񁧞?2x41'ͪRREi:7 kU}ocO9OL7:G>̋*)BhiӦiZG=7xe]C͛7_ʺ} ̆_|_oVIT)5~tǩz=tA9}kf^|Ggeel%x*ݻ7~4O|G8 6$a[̦!SÑ(R`e_| d,i‘у6E#e P8VH'e#i?Dt5m}V41)Ikm%̮"[UU5}t$u]ww}*2r5j\ ڃP?QATPP?s=|9餓 0J%hيPC"1{dI;8F %\łE .r/_~gO4 j5 RX` &SSSf3)E/-..DPPP@sϥ^:|p2@VU\4K,xCqL+ Kѣ/E61tl L %C{ICQbA‘LgQվ}bdEP+&;0 `J 8r̙HIdz 9KXTTDNLݡ#<'\>v &/5/@Tx :D.](/TO_=m4H_;fSO=M&\C}p'L/>VvA _~9E<`[;ۙ2ӹvGz&TZ(<}x…>~ذa\pBsυQ KvCT{XuuuF ?f>S")5V+\+$T'ʂC{P ļUkPU S"[2=Qe #1 /4᠃"jpp3<O<6·zKnݺr2Wjn(%@CT[mPC^3K(Ln5>C0_;ʘuh4ꜫmUc*vrN"+F?gz|H;ΆBZݠ@|w=|q0>R 6'`_`Z#JNjb}p*k> &X{GM<5jwނPE+"UUU߳5Aڰ%HWlB_ŽfBCQZ~a믇=P,R+ڈQSi/ޣJ*'p5\V'*~mGq%RL@AC0o{=kN$ND`h[\^ -I ;aUFci#.-o` ?S F=Ԓ<:;o0r@P$jC̈́J<h&i%4E}P2=WPPj%O[3fH?"χ~U((H *B^x{'Bq-_|Gq&52>gΜy}QŋEC"`\5k>s) fޥ't;쳡4Rh_ЯӧOw}APܨgN^44.>Az/J& NT,I$h \pB4K )+&n$]c(ʬeYnjFe@ҭRҎ!$1VL%-+5q T4駟~׿ƍY/R(6@"$㏩S&Mm"Pxq,*OQ,P,BFi˰PON'4IuPisf4hu[ȿ=^i6- f[8=~my4`TSMk IUyE v7xDE+4yM!GP ZrGUM8$a&hn_bf5 e5}}3S*dWnE&T*HdU>r^ uT`46mE8'^g5zDvܛ/4`ՐfSq뮻/'Xwu׋/Ǝ9SPx_|C=Oy? fʒ s1'N2["Z?O8B<vؔ)Sjd( G?6!u֒ziQF]|x!ZOi6)'-&6q!H4Juq&[KU*V7+oR>':/(3-jvmXVVxSZdi :ltXjiIՒʵu.)]%m Z7菝ہZ2zduU+V/,tx|itOX P,BXO;GAp%V@E]Ѻ\ݻO7??'?_fA-n !j#E-T9\n n۳gA]||QސȞYl *PŪ]FoD,hCG/ ɔkKM]',0l/ez CY6p8CH= lF{:.%bv'xffgr<)ͧyvB~Y<)mPDD]׸ qr932,-hfn`0P_h~Zw?"/-ߚxjmT, V; Z͠lR iN1ʾЩ_^m˳!8-5U>.lÃmirW[Jǖ F{z6CKr̆>ָF$MK~*TnsgMgl]~D}>h!/_ԛu9xN^a4:.WvNԸ+j=v CajpWOzzmShnXݍ tQGf:r2p8hgh? ;#s]SM-SfOXJ*v0R#@b[Ţ,&A6fN[' U(b.еqHXZCɵ'%)LgdTl3M|8{j׎VAvOMKTl~TR맊`'>RN(v*$ћTUӡ7h3iTa EzNX*vw$mYK^lr]*ŪPc;*(N(Nq*Ū.nbR =vuMU MSbgdqky'˜ah'nҥIXHʵeިO )],=}=OxY7"jP$m1l!Tq8CEiF}Rn,w)S6 _H$v.[~tGzHF+~NtD0CmU)VE`|9+< Y)Ia`L$h${|EC$TIs@EQ]R<)6 /&a:. nДM1O*hhP$)IIJAB%qtzBA?b7nѪm0+ L-z&`aK\]ZK!1sDpЖZ CO`. XvxJ,'YCYkgmh%ݢWpML Nױ0"@{yu"sdjLQti_ ɤF"PGDiGM:aF亳Q)Vš-kBEDcZbUP*ษ:LDu#.655ڵnK[@=hs b|.r;D)];~d qϞ=J?uM]Ê-bsؚO ~嗇~xͭzVEpkJVD.bQc"Br7nDŞ|kINsf9:wq=3+V@@K&8"wB8":ŋ͘1d2I ***v9mڴVMx<5k a= 6{GYv[Zp*mCA]EVzA$8+-*gW^Z"!TWjtW4zrCAYMïi~ە+V{vII8KoY&cH][J iZE(Ni)ݞ-U8]˘nʹU6+vm/tipy+jW)a S\\x/p:-\'u#"ڵkժUfiIxLe̙1,(--EO:uԨQzGsձxG֢&lWT׸4hCQ'Թ!nogY5WFu6Gny5 !Q̴n+o?|O3u;bmlɃ:,ڑO.ӵ~]y XO?}ە6t8`m7>u]r:m=PWuwt՗|2|F-}' JE8yiu΂U6GYPSn+djHTXAq5 L:skIO[R"|`xe7CVrOH]^pDU_WfŲ=|)G~%^><(zj>ǎ%kMMMk׮D%,شiSUU8st{T:ܞ3OA-n®jv&zxvA980= 5>TMTԞ2 پy{ۍ#:qP/_g.S7(Gx7^[xaL|C|x"W.{>|%&Y~D6Z۠ޣrh[&% n{t20orF1F}RihV jta*njiLU^ F^˯~/o灸xi|t6p|[ݻKHHȒްai&L#gBgR46c?&޽{ԩVgV{QBC Pt琠"rAAF4GRWW׼)`ݷ:]n7lKhzQΉN'S.lk$6y.ѿ0ɩi:N-{>VOLihk5I1(uO+>,puI ^pџo>f\||gL涻~VN~Fkwʤ+c\rц$Q=.ODk1HtDEo82uZ \jAO{]1bMƨ$ɨDsx8d=o7O333$A޿o?ڵk!31o}˖-#7Ht$&[6 .\wa>|:|pK/t]w45kn4 `9dYSS#V;xGsss(ᅦY^{gݳgOxCz&o ,QQn chb}L| %E[שI(ƺnв֓Q^=@ BUKNBEq_0d(WaԺD"d)H*=$PpxVnT\T(QmT>Ÿh 8DIM»+߯=70uLaN7B%BGgƪqv۶mӷ~C_yqƉKIhѢ^x!++ .`;wj-A <~w} UUU?rmAO=T޽an$G&wt:z~/&+t9_J?j*d|[/v5ׯ\/駟~Wh9(@k^}_[ÔiJ:h[ICE0Ȭu*W՝'t ;8!̓KroS' ef7?XGg >E yqsq;(6??wW:ք,xYgg+?LMM<Ȅx FDҥK0j[z衱cs>wذa_O>RHfEB7xcNṄ۷/E% х y/g;( _\|wqu͚5 8ꨣL&kswo3ft:eW>d^{,sII JzqIdb Q;o<$8MP)TdgwܹsJ*DDC"+(r|lg͚E4+E@/ڔ AZ2D}G5rHʢtv9 BuA't[2Tb}EjHȎ#֝ !DTQQRLA8a?![SL_QC[R)LJ8ꫯ>#ݲe 0BƟ~ҥK07ْrݻvtEƅW6޽DkBޯfC숼VL8QةG}wtM0.Wᤩ_VhZ~{͗\u6yUW\#md$F hXDK.fagI ~T%Ni7!_R}pzTYð xVk+cѱQu\g?T(A6o |$ܹsg\\_kرPgG!"a2:I vP_N%\=>^X`nzMn>[XXx饗¦n0`cƌ D *~!zxW[C"[=R!cFӐ!Pd 0h6atx 4bLBeGkhzӠ2ڢū: KhT *z b!?`<$8pP@t2n4D-o$~R*e-\;{_2BA xoߞቼ "~MHH'TW?ٳoC^x!D(ޙ%UV,;vxg-[vgx!<|?~ʕ+/뮻>0͓'OM&MBϛ7ێ?xbK\s_.ƈEXi/";zp̙aFU{`ř Ჹ3CjL# B8<$gԳ"5JR:5ɏt#y5"zH0iAZ"(ܼ#C@ay#9L7cmwvWgdl y &#$Wd .w ]dbzp p3x0LAG"P؆()J@Dii)4h y~䭷 Pc=ZpI=:F.9%nѵw[ny' UMqw?ߏ:Pغ:!CӢ-v`)p.D;uԋ.ku}UӧO?TiCx6{wq$+rȑNOAQ(.jACBqqqO,̼ nE+|'Ph? 5ѨԯSbwƂrpTJ&gx׷r╻wKM`Jd b"%"DTԬS|O֒*Ϗ+!Vq`!$G4}SPuyu'[*%[jKJcꟖ63W)!E,J&R0AI'V.+][xgLHt4?Rj̙R+JOOSrh08 OhOdfCRm>\6l'g3ޢy Ө:y[U.?b0xL{`'[#^G3oFjE} ۬$!ѿ*/6}F\Ǚ6.OY<1h%#/qJ5)8+uyOzգ9:C4|DJZd &cd$<O]Murj*ʼneXFN6kiQ9!NJMt٠ IrsLQcsOr`ujI:{c>.L1N圭gTؘmLd 'P|,?&1CPPQe$$W6Zܤ4NEJ!ꝞF7͢7 bINAYvbCG}颣rXڙ]׃. ED<8Os'Zz=$x0q7kwTj,dbguy7VZSLAPbYf!B yu1ם=6T40GŲHVyF({s9')pz6CZmwM7&h iUY}^;<%5TCVjuڇ&[-!}P'Tg In ~(9[?-Y=>}tpݾVη!@&yAV4^#CQ+jp{QQ6MclEtq^6'#p|owzk=^o^' -6w'a/V*T4Cؐ^t]dOwx'^^o+ٻ . +ٻmUVVω MKka (mrB}%{׀+ٻː^ɞd ]JZ!Jv*TPyT($J*]DNǤLvH?CE< DU PѳphQسgϒ%K֯__WW'Y8\e˖=c^{|E*T8ܡB۷_pgϾ?qZmm"h֯/KahK/ܹSJ?^SSC\hAY .*:Cb$Z^^~G>owp("aǧ~[o-,,_矿[W]uh,**j!lrʅ m6;%WUUIUdEEP`šB.kp%\r1nj=}Ռ۲e=o~鯾_޻wo۽|NZɤ\f=.^Rg-m,Znw@ӟMVk]e%ŒUx:Ql}!8e˜oUV(P584Jvϧܰy|$U^om;ktK - 'K=YZ'8'ƭsZ-*q:a뛘n:IMpHƮjgK۔mevnw:U18{ {Mسgϻ;a„: VW^@&M%0vھ}{JJ^Qܸq#^\\ r-YYYMe,Y+̜9j۸1}ŋ_p#G_~E 6Lkv{I/FU˽zGYMQMCi581&nןf1`Nhuկ?}/>P7;N>miLAtyUJ*Yˊ^QQ:m[1 m%U;kX:E2ӱf-orIO / m)ȫgԂnh6 2t܉F]&'mNʺUeMbj̆v_8 d.z捋nwhu~FAb6y%WX@OT _RKl0ŠɨmWN"<)KƷU^ZJij"11 [u#=ykI&SIu2VN *jt0,Vuwn/{V8lXisg`5iI3DH媮XrWp8L"?=Nm&h@p[QQѴi, ZZZ'dggC2{MXwy1*⩲#[+&>2Sʺ;ٱu/;Zӂi64=Sݸv&mX~aTeeK/|衇d@bͶm۶͛7E4>7l؀=Q=8/bҥW\qŜ9sL&W/{K}AO?ݻw\`)Ѩӎ>,+u@Z~ىqV!p릍_|sלq ҆!g]^o^eЬq}3$9,;5ZZ)]QYOI}S@zғgYe C;Dxz_j~52;mtN:q\tK‚𔟼z*8wρIhš@ia\g;g9)syE NwpF>SN6I<"CʏI60-q|&GIm.L`˯3c& 0,+o1u0mjj6[;ygܤɌMmM\x7ݒ*m~|{Onz 7="3!(%Yv' /RxĿ [srrv܉2nW+466СCV:FHn ֭xqnjY{üz1Ͻkzjr fѨJ1 k%Y1FxAM5Eja:])Bs0-O p{,}r_$gMTtj)^zy_}igfՓAr :^iW)!{՝wM.^t7ˠդ YoGHI|&'ʈi>`6ƛ ėO|Dl>x{1?%[dqO_x#njtf'pS,l?Bh>?_vm'|rSv?oxݿkVbGh"駟>3_~eYwGt =I3k^'Gi̙qt0`MIO믿~ٲeP.Q!"v,' Wۍ$uΤX^Ϝd} Ꙟ5naYيF"V" N1Y:l@A\nb}c ~ 5 5&,QI'< ^44-|GV!C>Mz] <}~}7|M3111if*bŊgy&##;묳~ײ26фiӦLZ@=fX=ru׉sm޼|:&c))){N;yAbYN%t^(*eUy!STMgl`LC^&G)[OЋ&~JL0L4N2&jՓj4ӈNf*OPt{!I*w+J ou͗0ьix{R }zI,gauI5'g>@6h>/F}_}իOOp䐛 =X,>/Pqn~j't7''g|N4iΜ9#F4hP޽#dH6'JT@-ZFYHgJkөő J3ɈX?prޱc?p)qHբ"S=wIP-OZ,0l ຸ/¸RGµ*O@D)A, J Vנo 'jJ!'{$6 u'xԾz NPDYb-QrPnȑ3gμ{!ZV%gS, O?_~G~>g+8e߳>m۶JF.Y}az2꫑UUU˗/_ի(J_-[F)`Tpʕo~/wi*Tt}HB[6!;byΝ;'NE퉉cǎ](m7pÃ>a$o" ~P3l w"v[Ѕ.] ˢ/a8^Of*'@lf98*](/z5jt:f BҜ+**6oLPPP@_%^Ep֩SL@-+yPsyZF9C~6ѽCA RVv`Xݎ] V!$ \hκ;^}B8z蜜X H^nΙ/$F".?_ٳt:x'x'y框!O?袋TB7l17`0 dovd17ހPĿuV~B`"zy}([ 'Pp$LP>,2W< &}an$lZZű(lٲ^;vJKKg͚ރ,8pU;Vr1xި:Nn쩙!N'.z[pҟ1trXSAOd$t=60uaT,DZB[n'T'I3jJI M\N1I4ҫ m6 CgD$'1lryof.Sx^cXA8 $N7kvC4X֩ &C/őG}4gHz@wq{mԫc̬V+?SN9_CQ20`ꫯ>CpްaMm ȷz[~=\?~-InpGbF޽;䓑%;;O't7,F C p`ڕ`6]ޒZªt 'Jb/1Vc"L͇YoH>z! !낯wrY\/<+*++xB;ş]4w\Vo)2c ۫Z4 KV[Qq: gs銫.Z.JkI7(ûhm,(c-K$XDdЎIO)i;(hpۇĤ[~uK@<;˪wWY ZmtyEcd~{E{^bM2Lͱj7R,Ax>鉝.5JksNxfAuö*(-]ۄ8,9iP}lV;3,2# (w?x}=ϋŵqzXdjAA,6^duLI %F|kWxrui0 <@nͤklLRbboxܸqN; *d(/XO+C҈DNNNFJ &M8;N=ԖJz`T0+Jм vު]@HoaWxnىCR;%!қv@7ݽk̄G!7@ j>ii񲖌޴NX0%"#$H:w3=7uZ緹"UuOK]L]oK !X4Kբ,lgDDH*Vi:QObA4!1)6>AbEN"\愜4MNBR"nWEGEZ߅bE0d8 >)sĻb^O5AEq iQBbeetCfdd 8ϖ:5fH($),Tt#a#kKRMK0!0FA bq.RWD`x[ 8IIơ_Y=wШ`O CbPt :PVދa%McD'ɯ݁V;``Ake<|DzfL~CbUPG?xS)V *h( U(NFNX*QTtaVBPWPBVB 0@EgJ*]JQDU P@Eg!q_.:FărxCp p$S$:iET£'N\}ؽ>zRzJrxƈyקnoV^b0i5X΀I?`P5$vE#)6"$s_>?%ZgjD+./&1%DXLb Ó(TU.GD;Ce ,)"s>=ɦѕY9eEIy$tvpMX*TPBD⠄ *TPqBX*TPBU ?G=4QrKkԒ/}'*Ūh U 6~kw4HpA6&.eޔ.Y ^ɤ$,7.AY8]W[a7ĥN)?p+KVA#UGuci}I0I1-.uF'HO&D knDrĜ&FgJ}R_U<ŘcbPas٫j3̉K$.nJKjU1ޚhmLAH&t&6V`Iei/dbJVPm̴ۻA"+^}erj<OV Fe'@E/ӭpU?:Zv>7j#4WUWlŞ=RX!x5(ɤ$j+7njSz-%2ht9YsMvh؜5c%I1 %EU G5R*WTSkƏ85#uhj%mXsdR .wnƚ=F#gH`IdW hs9l.VS?u :m&ox=w,vL~%.#**p0Q|`FFG POl[֭MAj(lHi':J8mNx7b_2|Ze:b5a1mmlf#۪ E σ3{Ma 'BX*"t2U QE+@NbU򩮟*ԘD.RZv`A u^o$S bP]XD*8RUPիoN8SO=oNcx<.Kѫ[~?_SS#|V\yWygu\RN$A?/(ZtΝ;wnݺ7xfI  GEKDnb+ A*>tb DހJJJywyNJq˗//)˚5kP)p,Z}srr&L(**Q~)J(,,|Wϟ/_ʋ/_{soo#u=P ^"|12jdł{Qx(E'|?!B{fWUU믿E& ]v͚5멧z|I$ҥKoh[J>vرe˖#8"&&Fnܸ177O>hqweɒ%#G?Ð@m۶ V^@@Sigvrn .LC'o+i4.+ Nv|93o]&kch!Iw'б(Z8|K'm%A[alNw>nO]IÄ\aժUfyΜ9XBB!CN?txvQz´ ۍ6Pc=6))IL&{nd: F#-mD%w,%?SFFƌ3t֯1GBǏGP|m΄I|1ᒵKD >n8ɤ=^Q~޺U˷l\ϸH &{gYKv[joqQMˣ =МbڵvFCyUuJ,uqWy ˨ct)a!-ػ{M5ՕU$[7_iC]A@zb󺵒U$0kiqц5֭^IP+mP olh۳{ew /F]5a„7r.mD@pQG)b>n||̙35n@f]re\\ܴiӚw 2DE`fݻd 숪޹sIK c?v+V6lXsWwԍQ[nGM/Iְ":7[ゅ ~X|YUEFisawwW쩬E&ǚLFó*i+% v_~Z-,.,P.DfUm/n;,8-!p{ee[7۴fMK[^^lɂ?xޘ7O *'ѯ;˪7ǛkJ15:JaPD.gA %[֯'wn\UQ~qm%UWIcyu PYVťK^Su_oZmNb5ܾu?.\eR*VWVlXj~JHÁ'|"<@iYfEbǵ܍6lXzrssS%8 5RCCCڵknjLE"?cǎ&ٳg  AP>z4333_:y)SJ.A,>}u vڕ7]dL+lŵX~l}2M; V748::0 rwlۼlªʊ̌T\B&l͙_U52'mlߌ14uEUV[x/̔m7VWM8MI}Mup@FU 2>BgIjz|U$+f!..۹uSC}]L\l @ίs|#S N8ZV~C?æN)n֬XV_W/mX[S7mQ,\:W&^\jըQF-_u /9Rz`={`Vd @)zyy?qF9A45!7|oO< 駟}FٴiSMM 䄨˗H FشH/R_~ O:IÇCJUïwrYL@im۶=@Uɓ;ip8h{?AvX24*-=sGw1L&srJ>hoR,1fVc-!?GԑOHL;qґLrxb(!3`zՅ~M1h56YhuMWF9O&7>1)Y3Zg٧<ғ2.ϗgI : I7"/#;X~#b n3cYbBIK͛.k4d%ƚtZZtyM=q0[bΘ{O/iњ~9^o4Fu85:]"lDVh1FGEYnONJCY]qÆh ӏ}s'T墁 #( Zm/0; EFV΀!ÒRRF%m=XP@ h@Ơ&m9'BAA3f:%0D7hǤ^ `Tq M8xh\|cYr%K#̓O> I7RjN>hǎ͕'[K۷o0aMZE_}07x<$Ъb W `L>bԺ{O1O‹s}G!_K @7EG( *ڥZ0=t"z2JX@B_,BOvQGh@k"Ey_Fy=݀ 9jrbYL~`,\+rƏ N8?{c=] A>NJ/JObMM @ݎ7}IzQ7550T zGyFPp# ܃`cDIz*@<{qQBzqbѭuA`†ܴB=s/bB5Qy(vH/Fdзo_4y[H!}UW] 3Lؗ.]:{&O ׮]  a/De-4@B2ԛ|BҠ:&.$5 -Brm޵Bi f͚u뭷r-> r̘1ԟ(RH D+y0:JSO+[S%B X|v=b1&SOz V$xL2x(7dwMQ;' лwo~Q; -Mꫯoz8(?&aZQq@Y# URGr \ϵ?x1첎/QT,8Voa'mBfff. >DA$klld߲e A{!y~'x"ȍף>:w\,//)3f4JhD!O1۷oG }L3/s4rm%(< [S="SN#V`?x)O0HT ΣhA1`_ 8$&˗C$Y ]D1J8{RSSf-~g-1 MFQb@W##m!B@ĦܣTG %K@d[N`ԅr{uPVĭJֹG;1EPʕ+њcǎ=QEo6Y{`݇ j|BW˖-8—0VNN?o޼?dlBn tտxE<1hJ, /Eڣh 9!t^D?SO`PH}%ӾG) #EޢtZ1ytg^x! * PmӮZakp/b;l[oF22۶mcCȾt)#AރT{…>JL ӪUyQFчfLF Qmp:~4N2C4Y@ 9}pE֨zp9h w.z$k2&F|2Tt=S2駟J{y >Al6^~eh[zoikڝw ˢI %)Qo(#^B?$R(a;F<7O13u)@ۈk G7 f#1@d }|(Jb_{K//P%l-r<54iBVi6=b "8hO_Q0AEPCXRO뇈^yb/4VFXcbhA\@]nVF%+9ߩymsyoM.7$kv,F=]ߜo ^aYN"jn1Hnw{xhTйAp vaгÎ^{-RG?d0vg#Ԗ,YD&&&~Ç{1xaJV,Bx뭷 ⍘K0 NRcu8~{r5]ZDƚp)g3IbzNp;k?^oU NvuHwcyᇥ|Y/r 0(gذaeee} ?>3yӧC*0"܀&0a”)ST)HlEc <̙%4PQXT?gp ZDÈcPOvD_FȮP{wytH)P5!j(3f *I5g͇J;PYfP D؅M|G s> U:ꨣ}1d8{l eDaJiY%5-ߞ/uþwW.7pz[vbGzThcukv{B[U`wfMT깼޲R>;?AdSoj64V`)+juo2 Ln~@{ h(kl[S$ hVWTXR3侜4m5;3SZR$ حMW` 4m{}Y0LgBuN[-a;KpiDcRQPSud <=YVTv2s?r*6+mA^MiVp$WY_Vg4 HFKZp(chuO@lݸaǜpRA#q:-mvnEхvZ%`ml۽+)%%wVko{e, #ou{X3deBMo'cagMgAG~k: Q;zhOdH%}^8 {uer-h ?a;ܢ477wo E@=a| 7ĭ EK3^s BTmPO4WAh%3e$YhPXI) p@ 4-#>scaDixvjAF49a=(&vXVJry@z+=2HE/Ej2CMĜkkq%M+k3 -J=MzvbYa#G,QPG{iC@T\N$S;hhh;w.L&Y[@P V貊dfe'$&u3| z]R)lᛓ'5y#(qU,#n03RRӴC"Tll ,&hĚ$`sWU,t񉉉)zيk*V9q W~p)_ 1'I0-FzwrgO)Q蚊eq V7UǛ405֒7%!5"(qU,f42vxbilJNM_^Ml\\fvNlGLG6QC')v Hg6BSR{+BQ-O7y{/BJH'{'ӱ2WrGGEgf1Hꗚhg_xINDACt!B a9:#+k!o̫G|wG}Θ1cjVi A( @ RnDr@Aσb=ϒ%K[n~;w^۝*TtJ}`BM;*,Z>|YguW>çvp׊#u=PSL zLwc9c*-*i.aF°N׫=;s BABfDjQWtAK*TP0#b1=;gK_Uh>{ђIIWm''U'lnzOG re4:v-yF_ ?ȭ,+_4$U1|_ |RJrۯ#.X.`MWɣfǘ>˰={KV>d[û?WowzC-r0&׺ ':Am]{ FAy-.9/3j.NXmuedHO ("~6#|Fv֯b3df{m ނ;Ϳ<ˬv[|%p8=5K2S LFwny|?kP]2.Wr#M&ů|U;+rc3xBN8}If  >VQbiK|Le~4mRQY_="hflh[O6T(8/dCyMaÏjbh%ۜK&%Qg]s~~*V5oiEf\^׏Z{v,68K Qso/>}i1>ɇZ֠@Oz49+{N߃ݟ^4M wAݱ鴟9bU ( ;fHQII@, g P\{yEʷOu٠ [:3]&Dvo|aT^#̎iv2xmǟ#K{8_~_TqԄ($ĩmwؠXɴ"rEBO*" UP)V *` bUPB EU6*JXYQīfFcSݢ5~rf~ނM v>Ɲmqߴ{dM;nqDEbQPV7`a#my2} JvГq ߴ|jpү lU5e !;k o*z TF3Ԙ!bnwSSTQQvE-[S y=J=444[V->oΝO<Ļ[]]-Y a٤ΡwAJ}Q___PP@h"V"6 X1ˁ^BޓO>׿k/;cʕ0@ mݺu_ [K) ޾}ok.$UUU|믿//Y$׈^/R/fodjzW_}D2h:}B LqSb^/[o}?38l6_xp-2eFelݻSRR?O?}G{RtV~~bb^J S?x]81O?uz vϧ &In_hц TBš E(C²z+2o޼+8묳f̘XYY޽{g͚ua%M&\TTnݺaÆeffJ.yyy~ hl4o k֬5X+2=wyy9=bN7YƆʊr+n߹mKޮ^p8KKNde-]Q[Xrp]Uv[xNǴ_дW6ؓ]ە[Rz7vatE$yy;nv+yb^kt=uV#dVu 6b闭["=ؿ/3gׯ 9z9[x1\rI'ő!wedd[J% /_^__OУCTWWT .HJJbR7'ohiƍɓ'w?Fb:mӦMl`@d-*[}֍<(9w ueKm@(4'ZVr_}z2{]Kn/QZ|Rh/BuLqޱm[7㻴MXŕkʶWҴMkJ])EXU㙷^|nǖM M.R2= TWQʚKohkJeUU<ۿ>p]_}2"m 7hZAu% wUo+\W]m_ M0FP@iqъ_kV,s5dam _7{wO?=s1QRRlٲ#G2WZZzgR.^r% oܸqr*Fjժ~O3f #6w4*!!%aÆ#FGJpbZ1uTddm oqQ4[ #lMFMklm7R_[]i.#KD]b2qĉ"A;IɐtGKh`/>ct:7}rn|QGG%k̾9sL&ƃL30k'\;B2ܱcČ3:%oDΝ;&MjSI#I@ץz:(n ߳>jĩ'$&)S|K t 7v#ҖUW8x#{wuM&{0,;%').+1vxvjJjw.dCu<2)5mȈQQ;1noIm&Ќi C2S$':7ztTWVn\2)%Μ:( m WuU>-3k񉉊nŸ..oQ'qFƈÐ& ޕ҅ 6hfwz`mȣ<|cc'N2҆!3*>ìsB$m۶!jy#k}466.Z(;;CACuR NM~ݻwҢ9ش%/\-/G_u3X Q^ثϟooٲU2&--u!ji;IL-w;Qݛ? mCP޽{?y$)8aԣ;~”Y99:}H(9vݏ=yC1vH9PNp`unOѐg0 Z K6_i+3.?]uQ'ݷO !x`v~&>|؜ ONIy zɆ.{ >_MCf :<(3lt=v@)eX1f1sN8i}0]Gy%$&Nyc5n|ZFFC3G4֯tÇ+?-A{eee,Rv@5k~O>sAՈVg!>즛nqi`g}o 0H&g۷o>}:#O!fɒ%ǏzR6551y, R'?( +beeoSO}?<|\=Q?iȮ] *<{II/|뭷I>b2&pȑBOxT,Bՙ$F?3.ƫJ\hAprh;d%!)9e! II]&Wr!4Ot:}MC4_z]tT- aK륣]m`En 4괺2 'Mu{|nwܛcb`9,W!f%6.^I=$hMq Ynj撴^`4amVVU OLNX8O~ 2`G$,T}ojGq4 ݞx𓔺 a={l޼>bwt(lUTT$SN9PD; DE]twnܸ$G^{4bO?E%ő 7piKW 2#g6Avӟc.>M=cχG'L@IcgqxeS@HXIVTTD(TJkIZMdei4w?I#M^c$P^(Z| 0"=ŅbY!$k?pҍ78{aÆLcT %CC#a^x xfggϛ7o;.\< (0o4iR./P&\NY|h "pؠAHI-DT#E㏇&HÍF#$F[##ga>T5”/)/N: ^DRP!tNS ïdtRܧO ,&O\`bnj9ҭ浡쐀JTxB "Q[%soV"lkp!r3DC q BB[޽?믿.>}zဖBhW_}5/=܃K;8X9'' 窫b+4.1d6wXA?Pĉtg{Pi`G R\mR\3 @y[*Ɨb~zaCc:t(:PQ /NAԊgff_|952e ߏ/➠!55 4B*PI>6<}I3 m"ݭ"VOuJWX0$#q Ϛ+!ksA0yaA~: VZ%3ݥGSRRī3Z_Y ,//ot BQgdd,11]KI:".. '+RUZmcD'PO>,^dc(yΝ/U=HCs؋_~?tQG w T+ T^>@@vR{jPqжk@tTDQQ>ŒiN燆3naskx^%{֣MW^7߄)1L,D[bkEHեKxe/ye}$)<*J4xͷpG}O8)L2m4رkXm۶mcGToF[IE @ru]wcGaM*TFDpS1 Kpm$CÅ(ZGR_~܇z |ᇯzaa!=޽ p 'NO(҅ɍjD '-V-=XЭi(D#kA'Uq WI-k5Z%\DE?4HFVjVM$SBOY"F0fK.g.3=cjX'Ljժ/E%,Y䭷znC^B&zٗh8L< A|_gA0̄8F\zȾgy'O-  YF |2F,¸XН'%X,hM(V$&+H&5.[ ּ[N?th:|Eaqza޼y44:6¸Q(裏 K{=[[[X,1&nMNU5z$WTQDoO֤QM.!~e=b#=by|~e3Vo~Xgnjvz9b O>kE"%!'\HJHx0JGCAd9{lUc>IqZx/njC'7ޞ={Oa@f͚'\hEv9NP IGIY0*N;sAA12ʤپ}bYI9sN9lr1ǐMe!~xaޙ:4"=PMPkVos]yQX h'%!=.S_hԆ [3tC큞/:d 2Cx=YVTRcn)mq_Prf 3EY5U?]JZ#2e_@f!x#GFǓ`6i5D E5 fNc#[P1DB 5 :h-:N1q])ٽt߲|}Ʒw)4OKw @7ps=K/]~( A555_駟뮻W!I2&!ڠqwVIO3AEvEy-/b؎:@lMT/F8\ʢ'OF$QUʢao#%-$Jݰ[" `I&aoY4dOS7X[1>c[FҺ@k$"vl*Z\ff%&Z~M)(#z&')ΨWo((i +*XKfb_EF&~[c-OHH@fx7|#Q`. $W]uլY Ў?{[n>|xKƂ{` !9jɯl06_Gbx 4 cKcit#h&~?@]FT( >wljAXLl\t(8_&:Zq&rI="+uxvAub >0HYfZNwEOI =@X.wiOJ=e`-3[:aaqJJI ':oư,HXECS͑d1M;)^Ŏm}![F7ºp WJ_{ W{=8ɑ1 k>QI|>Ąϲ`QMM_vJUpZ__ Y;Cӹvj;Grėy}>%FZkwl]:bŒ3$>VZeqmIQM4p^»ؼ^Aoϲv2wWFUAR ^=tZ'7fVmǦC sƆ8svor֮1vNEs@OQ&Nt,PkҨbB8ϨUWUd[!pMBFQ)p:YNx!bqSǰ:7i6Їv-@Yity,zVsxͦMF,zm_hwΙ~Y{'/Bqj811Q<*YU `Y!dVD`("&..O_DEE-@D %5bMpy.e0 S"XMzmQOb>1!ٜ%_[ t5+Hya݁`青&_ F$IxhZ!.XFL&sxUlOX*"yTȄ1RC:}B LqS)V še*MEH*ԑ;0(VwX:0P PP"P!Pc=fz}*zJkKZqBl,<Zi]꼬sg@* SRusjR&ZF>oMmQoI10I2b!5QWoNlW~‚?p=R@|$]d69pIfwy\ 7tO:>CJbX B$>7XNmi{._kuAgEWyWDb;]!!<[5RDWxϛ tze0 #T i-Dmsy@ nkq$1`qDbɈL$:IcdF?`tǍDahI4?!fk$r+EK⃢""lqԖ6kvWѷ8QkKXdQ(-Ğ̩7DA2LsZRP5P^*䥇"7WŞ1)1*icpĐ uzO FJeLGajNRe)AJbLynruWI00ĩ* ٥ho؆Ǫad&˩-z{@t =bϮPE),d]\=U*JbLZx!B.J Q0}>Xg՘P5hwF&w=kLD '8] tpZ.b%k¤n*[h͙h-u&7JB'p;TE,g,xrhjז'c)c|#oF\,0v8q8rꄺЦP穸ǩt:f3S'[_^75'Zr.jĖ;7ov8κ/r444E z\5'N ΢۷o߹s'`m,Xj*J%B{p*S.ceN8^/bz333#{?wȑ,}f?/l6*B" ?~^=+;*)WgTU5_-ZռUc%6d4EdG1%do^> *vX(,,2eJLL hܺukJJȑ#% aRT `nE^is'kV(-N{+ 0[AxYͱa`6`y9T%n@K:ZiɲZݗ*)vD3';vj3򶤩]5T*mzA9~]UCKZ-2w 0eDZ2 FOn:5nէ+ fc1rp;hV;}bNgC}ݱ۷uZ*úHk06nܘؿ;jjjrrrM># œo>5d{μ#U'*jM6'5zU5F ҚZMI6pqfMtD"59'sz`rOXoF>yq`fAK=TGCP ;7~SG0! NW.?\^WSM--p޿gݛ7 Zhfd__07Zhٜ3uGjJjNr 3j |՚Uj0t9ne=Q+NEuZTLe=(@ ?//Q]Yo8~{^O-.F :uj۶mƍml<|pLF%Bׯ5jT׮]HN%ܪP/1zLA)! dAA{^>Fo ROO-93&Fa*Y0Yw咏4vH-!Jr{r k4 KO nbt߿^Zӊ Vv~&r:OZmzd4PBm~QeyL-9ڸC(hrss D^G"&[i٥baR>#rds_׺=n0u穥R2Q, ;>%YAC|'9v\phϟ_y;ь&].)6S :5#g<-4bddt4 DbQag>9w 7uWRQ Bzc%ݒ9dvywC#x|UJfh+$@.D!CfC h#fpR|d@5~mՆX!\)oD%[Xs*z^}ߙ޵@bD,PD7o`$/|7K,1=z 1Ʊ\`/oڴs*;v`CV zbl 6geea={=zY,0|{nѢEOt[}'w_eh1ڝ;w4,\|逸z饗֭[PSNZ A6l؀Z$]̌ޏ~+| ;*H׶7nH BvqէP*Cb ϿsDV 2*(Mc5.t/jT}|z; !\&qR^_jI1Q/  ( ?9/wBW  UTn]t@;ĉ?k֬Y~ʕ+333ȳ0/>p-ҫW/mڵkKΝ;߷op{ޓO>9owQ> wyFQWXYc5\?C W\q7jAVA#߿?su]G$y$<ޥT 7bW_麈=.tslR'@؄  "oojsn`/@m' c '1֑;JB,"ƯGpV$$?mn-c[7E؆ sĉ@{oBϿ{###HzM``;ymlkiiiDD0 ւpGGW_}5k2MNNBP}vpk9-\/7##R%&&O?pXСCe .{Ǯ*T*@,;688{wuH?i$\NfpYlCi F*X2JwL3+?o.Y;L綤?Q) B'4%Ԁa|Scls`{j}Ď^uCb]\LWRRRPP0bĈAmpWBBB||<|{kƏ_VVx\2`{n"nVͩX>LMM6ͽ{J$BA0;v(//2d?ˈbL/bX_A 6,Y۷/#ԩo{v>l8k"kX )+.[ N0!))p=#^Q#G 1 ;cƌ"#JF-~\nhrot/'^nʻn Z[ r(`-¡"FYE0s[hݵ=4lJ]\;qDrr2f{rƢ>}xъɓ'~x5kiqq1yiRee% zMRQWWT]v=yʕ+KKKKZhx˰'6 b>_~̘1çVn駟b0> @){=1O6 {#BuFEn%Di<'="BG,>QKi<ޯ-DE<#hu{LSt- ŷ}s*tZtH,^eTDaI.Ot!*- =4}c͗~اxY_0z+XvN %|'5kH`, hϟ;{;sĉ'_K^|W7,2wڵxbrʔ)pƍV,xپ};OstWp6Z}_ѣoFa[n6ܯ_F9ukjjZaxeN\iIDռS>btJ"E{bJE1QN&ّ+ EBZW0RYIu6M.'ۍRb$QJBLTVnMn!:ˍa(Eg~踰'gpkBt' 0@+A(Ftޘ# V:K  5^(Ad2D(3_#G GBP6<<kvx#+r`0+dffl6sƌoAP zuR ˑC 4 ˗| R -r6lX`܆\"9]z I&r o2[\T"6`rYlT=P;jZ*͎7:t(' ( @ĦP_Ak=إ^Jzܔvy޼yxŮ|<̐!C\rw 0[,[n%(FFB  %jOc4 ЍP)tR{eFRa"Jg,|~Ryi#*D@DyU p4-fYNpmIbplNgRRHpíNkqsX1yAJ_zd3IZ /^ڠ՛-v'-՚m9 zuln]D]DD%X@1=Pz>`MKKƺwEFF6cXLL̬Y^}n | 0a„rfʔ)sh+H~CXGW\q^jW&6mڴ馛๱ٵkWo )<8&XUo4/(?XXyA 9"8DV-D1O(B] SLjdHZQ֠dUV~yLd^h PuJ^g0eV`,љİ , Iŧ+ݡ ꔈO꽱n?PXZ J3z ( [z"/hj*u]G$t6BBB@F yih,oa? P@o)$i!FD‚$F(duXLN;! 33qqA!,%cHht\TS|AL|bDTORF+]TZuD>?ʔȐ^j9H ; QMtf]!%{Ft$ba؈rUs{<1Ad +D1؄ĸfN#5~mv1lc \8T;aTogWzEROvrW$9#3:s< ^AK52!{J^ZTvޔ&hζ~i0cr6r+ ByI|GTh͙h0NԙNW(5 -R$K ̨*ux*ZBB@׉񵳋ҧv8+"rt..r:?ՁךSg#b9u>]x_^wNKc~C,Χ L>Q!zq8ˉSbit'NX"_g9r|`ĉ(n6ktvw( Q8V W0~t`w)bƟ;Oe`9c`<-ZhRM;Y:OԛX\=|dfVv`.T"JSpNV"n'igNvZ%e,|Te՚lc-޴!S[R-3l<% SL*Ks]nOT􍪐XR bt6gbV`)l.@B`mNP5@ X8iqu6 Uڈ_XhI zCV'Aͺx<#bqTRqԖ* .Yn#l̞( e4 dTQ$"'xv$`XF<]3)\̰0PeH8JbL.â;fS1`r= +!Pi.>3,"VC$T*1!ˋ"Np{?ع7 Ee-,XNd5^mq3FqkحeGYg* {#ₕTc\Vhh(yJM2# |66*Wo2UUJIMO%1&Yf(%R1}3chCL2"4JeL5=TTgW*IDpJV˝,39R!KXla;GpH#")ʘtf낔1Ob NiMD%1&˝Sv$kHCԱT*3€.hN퉓;zcmP6JbL5fZcZ<^%cgHHIKC2&۝SQXR{% ѽTt$.9H$;'N-ȓ-pj[:v^l6Fc۩Ϝ8\<'N:KIt:w1o޼ÇSI<B vYl}yޗ$18(')(\ eŊ_}Ձ@={l߾ozH59}w}Wumm-%\.4^#MgGb-#N8u#fK.-))iDhڲ2BO&D~xΝkK,)//O(p8P?| .ܻwoEEaZl}QQt}!Jsĉ'B܉b﷭?K/`?kWJ0)))99LlC m KM{SΙ3k/L&"111U-ZwСz1XނTy˖-oL"pĉSgE---?qj%$piMDDؚ@_~n4X 쮻oرSL5jԁPj%ErXn3ϼ'N 3gDGG-0nݺU.GEEw“vQ7mL`Tj ZCd9|X~ C@:d2RN)ͱZ-g&Qiη'& I}A>9`:Ih9L *Hf[ ӡ%MFĤP dG}fFh5^3Ui NoS"v5[NR3vwXbƍ'O$ WpP]DILLT(y`ꫯ͛gϞ{3{I,O=z4liÉG~L&K5f#G 8{ב#G"{Ԯm[ڮje08SY`aš )356vͦ⢽;ܺZВŅa~^OZȮ;6o,ϥsN?jv sW _{ ZNV3@GJ9R\9NY%EY{wܺ`-_Vʼ>XXU^N-c@nhП8r>~9Zk|ӧO_AŁIatE:IrDpH`>ZZ5Q(|MVo۸衃%&e,=ݷ< ^c劓G0M[63>7.ٸ*jҚ-'kj0{lDRx`Ne]QAw œ.wY>jw&wW)MSĢ+H q #vD[vĹ" 3&,#:,%r E0iVQe:u_&O>k~`2ΆKOLڽ\0 ݾp]"䊫qe@jW'fۮq'I#AYaSM洌$R Y[6mZ:$4tʍh][]e zR1-6A]1QP5?H& !`k-%jǏ|aFGDEӌ/M!&_s-yo󮰈HF+ɇ3᪫/tlbJXLߠyEУ>$ma4m2q\JVH8Xb 6XI ;Mؕъ}ڂd!Ә`fjt f{uD,> yy3+>?a(Ba|Rr߁#Lc:UdI^r1&O@?$\Tx<;H,NշPH Riib>tٜN$HؘX(XJt8NG*4>ǓE-Jr^"6kg@c||X,V(  ++ T1b &۷obbb:1cƼO> 6mڔ_]- D,XErĉnJ򲪪 ZvY^y &(--}ꩧ9P;Uk9)L 7t6Z+gǎƁHHH@`?">6?vAT7{ィS91 TC>Ճwφ ;s\7(D՘]UXʎ]S3Np[ %)'1~=@,3DMg+6& xN㝄mK;w0@R sl^7UT=z;Zz@cjjF:hXQ 9\C 9nݺ^X =c#87j<ޝ7S"l#S#G A#Y`%2AS5X4A_lqu=Ç!9Qw:Cm{~ 4iO)[(B+Yr(7 {?~<'H` nf )M+viX~+V^ b։ >#PBc:t護ނꪫ +x5)) MG\ܲe _E37l&h%A3>sE-@e2-ãupsN_!8Ewm׃jnjT0v`Cqq1 :`pZy8]ऴ433T ƴݻB7PlHnoȑ  *<$GsI`$غvMwڵx+Vl۶ F9IBnr٨VTTX2PZB1u,pٳq[W^?`d EگmɺsW/ptaU¨}:g~HI^VWW2EDDb\OZADX1w;FG(  ,Q~=x曟y9s\z(AQ/͛7\<( \r%0, = $Stzׯ_M7]s5FCC0e˖D}!qƑlfzᤑ>Ll40C ڴiӇ~n5 GQ E,dW.週27X[5 PG#@:JȓO<#"룴(2B>yFuF?#vfSͰQ#ŀ?G"\(цkeEd^u-vTtiBDjɉDƯna;JE:Áf?ƮB{XjK0{C,w 8} Œ ` 6@Ąj*X7;ƚϟgIP5hC(11J e8u֭[G zaWX!xb:^{cv$$&!\rj>e@!w2|'[lAXpu'[Gqo;YϜ9vF[]HBh)00,vG^nnwyܘXEy J*,iB'P Վ># ;>{rbB@*9].@.JBOFܦG =%"[%)F'L8=oab7wްeط~ \~0W ̙o Z. &ݻテu0YXX ZvÆ *l W}f[HHwb/ҽ ]\\ c'΁뮻R F.jQ͛WXMH/]tAaa; # T$"/r:q<wIa^KT^lskbULDzN.W1[0 u[AVc!R \[g5Gƨ^FCʪdsz<:".L c׮fNr$/:Zu2Ӂi%E1q?4KCCBϠhU;oO^q'Б5Ce"CLܜR75,fhwhj,3&-Jc67GEQI4٬:w^jQ0ONW!;ɴw&`CcuJɿOx#TKHwd*=Qd`ƌ0$X`p-LJJM7b+JM&SJz3o 6Ξr8 hsԛ F O `=5غښښĤv~iQ~#pn۸vt W~#֠W^Qu:gpEt,_b5 0Ƚv2h:QAʮlZM}ueeL\<0k1[g7 O'Faۆ )cOړOBo0[K`BBnE|XtptdQa0tNJX>}-c#~t:wglٲ]aFSQQc(ad:6df 3GE><GYlpӑl|]2f@4E#T Qg#Sښ}}Ć'QId2Z$˩1Sv$kHC>w\y. :€.hN퉓;z.tZKTQ~}}^ޖuln$Bժ*5 Ii d1-T},XI}F&}}F:]99~ u?'?:X7b߈C~ #bUjSduuuu-aF0N}3v֟ b@Cl>b+ɝ{/\J`W_9q Q:-XN8qĉE1~v8rbkxtaWqGS\,'N NU\Զ8ˉ.v8q0Y06(twK*bTxF$29rGau>kr߈г_̈́nOqUnYkh*a@כ#e!,.?\/_HltaOgw]n/5sğkV L7qƁ3:fqugtQCYsz:45:d7]ҙYfF'19FF𧷻+v)b9*YH'c -ď?f7KY1;;11y+_]_o??f|:){'"M㣂m]poB>}?VnĄ:u˩9_XyP'&YXNPl n<_Rp`k-YxV]]g}v]w/_`Z֦ڕ+W fH%%%>%UUUVl+.ԦZOmX!77wſ+c43k, kjj)®vڅ GDDDEM>XDԊgYfҥMa0-[/RI ;Aӧ$e7xӚ8$y 0y?'kNGǮL& f;vmN?>[BoVjEwFdddg5/\YYw^`[TR 8|X,# mܸ ty>gc*yswpx'ONIIQTSN8q"V]]SalֵْkW,ٿ>} @GO(6իåR\.6lu]wȑsi삂 8J#ۙ3g>ƍC! x=wB^X2dH\\~p_~ZRi lZWVZt+ݹ]}fœ:I-f@8hET*8bۗ_ՇϬ=]\iٙ3Ui6US_wj3r:u5ՇZܓ'TD%ВӮ3kAK?wjI8߈Z'dNiL쒪u>(Ih'>}W,rGT*cթܿkr`Ϯv -ߚ{,gr~ePUQN-A`K/C@`X"~\ML2(--4i(p„ >۾ -yfX|s"|ѣG{]mEv'OĆFH:$}O]6P|JgDKVLR(T!s'kTYQ֧v)&`AHDMCVd} gNz$111eEIayQMTVV?::l_6={`) F=uV?-#^\6$5nPJ쀔 \ћga#[QVa_1"&HT*AkFK3<#>:HY7mDxɨ7=uNT<r|m׺\x{5F8yU VkL)2 :IFOԦ|j놺YrN۬*'3wэ*,xiqqjVZ\NeY?xhHߓ<ٱy㜇f~"=`蓾$uZ헟,~akJzfrZ#;p>oaW]u9YcK,u馛r"ƬT[[[\\l4Q†N7E~8']rKcTqP,|ذaBEGa1\r%/br - +Ro~~Ǐi¶PM9RAT!\֣wk6=/Gb QBD$R+1Ÿ f9zMIK| N55#C C X Q/H9 4-*Wh .w?)>޳K]֭;CO.W dԃoE V,##.\xAl6m4k֬3fHw\vw}srr=zg̺:@z>E`%T!䅥@QHHs=7o޼z@QÇ7|\Gʱc~mOaew0+WLJJ3f X}v&@^UUE# l#KQ6`ة"F %9n۶'@ q-[8E-fm~G.k׮9s&*<7TpAǯjC :4)0c] 7vD5hxDw"e}Tr#nPEہd Ef^IR}]5n=$R)ZYytR PDz! H&tqmt;&.A(]8xfoOpE"doīJ"6 KqdTMp>l72`{~!R %跼RrԩߑJZϷ0U N6m߾}`3<tҗ^zgϞ,o!b:XF?$w ~G?ӧOa[oqFl^^^e`~aXg[o9`СCݽ6o\__2#;G1Z3(]BTJOšϙ3'22gΜA GT[aHa GѧOD{FjlE #ann.Z駟F>(Y644mӱp Ӧ\|駈WPB4;#Ր++++ի7ߌɸ!)l{BX secHghW/݂c>Bi+Èbw"ASeQ;[=:j"#c6GhI6kId8tQTR򩩉:%!:PCQ&jpd7V ? fBԭC}8f"<+5oM.9rr`aܸq8!1ydl@R)~w"ceJW D>뮉'ρX$ Xqt/Pw'pXJ =ԩSXsѤ kl:6l+++Cܼ60,66;^~e (c=LT8{@&z 88(!Ñ]\0` }( =XbϞ=< aÆaP`(P4"?/9PxdcW_ukEQ Othot_ teo񽿐%0Y.Ƙa#tTMUQŒ$@K$nFF8 oVG۽Ct?K>LgNNΎ;Go_0HMMmD<_@89=h8KƆprرc'LM0c89=VÐ y. W<` sn fðXaΝl4&\VÇ+ }d!ʀ0]`y_r%X{@s޽($&&u衇ApҰ |-2***BX0M:5,, -;Gvh.y} = pgϞ uVZ#B^> 0ÿ"Glub)D ފ2wpt@|*>=6jqE"łw3]Ԋg,g&X Xpbݘu< N+߂1Ǐ>P+6'Ñ#GHiiiwT[YtSO=eـmAҊ@/? J# 9AsGa(A\B'Fo߾('vȽMB¤"kW^/WcT#/XRxG C@q @'N`}mdo%&&0dN=B ҂C+Zaqݺuׯ/.AAAX'** >š> ^f:tw^z饵˖-CPѳqDh,-HۣtiԺ:;p> +a%1n6 o|{ὰ`@|>^,""+o8J"0C.X `~8 w=z(! Fb+8'l*v{ʔ)H : TxÆ 7"PU.))A^{w#@9,Y|ͨ*ރ.e)[8A}!VC ![ ??Uf͚ berMpc0LjBP bs0a—?w܁@FD5QGw;ۣג9ٜ,r.h(`=y ˚ KBO%M'.t:1GK;X+W]D޽{16 7o I~:?k׮ov{^{yI2 XsByo"yN)`'&%:_wG8qXꪫ.2 9B.(Gyy~a<%ޠ/ɲ{gEP~dݶmn>|#E9s&OٳСC:hPHPo<{ǡ K/GAD5XW{cvI@kQSw~qUhLlGvdb =VCz||</{nmРAARRR~.]cBz'O ^qF&@B߾}; A 0VXX?lDA' y3g ;믿bV=HGL$222P**l>୷G;~pXL/E뮻/'Y|h=܃f{T\f믿L2E݁#h P_~?9nGJ>rpڴi D3p8.- TuoD D$Ԛ:L95Vˉ5퉵iȇIg9kt&BG .tX'@a&S7NWBN'DPnRqLj;f2@j fծ慨Xձo"Z.\& bi|!IӇzpٲe .Z 0 &o&@BL>裟}yiRO?4腽-Zhڴi$I!#ꝗϘ1cm1j#5!,F2E`@y3B[\J*11'@ L†O@EPYTԁ%FIP}|$eff_~%LXE{IaC6z֬Y?={6P6=`>$l~x.SNEyȥ(XKDRo5DИ mخȱ!mCwh+2+:^U\=,lFThThޛWtq~M|I0fd\㥄DlCEO.tܙaAbhq"?3:,6D1Y В'jIzGDhE<ԉbD $pqSEŕv;)<8)<:;p^zzۺ sذax# F DGG7.mqCb館$z=Aݻw]7[J (o>5`V*/G%}}*/KPpHtl|XN:XeTƆ2¢}5Tj,Q}LTF)t (/ںx8uL,FѡE^Jki"wdh<){ O$+dzX, E *:x*}FDEu~DG&lZWE){Bʌ Q7}bCҘM A!A>?yT"GrэNbt!AC?SB2TBtEZ$Iѩ5ڔXa2^KRBef]Zsf"Z%u[_>Qg;]}TR2"̮&ɒ|_G,۪ᱪhE `m Le4m"$1cF07\Kx{O3's/2NeVdGߪ#rGsv~6 ]vB:ug\Nٳ333vkGS޴é}qܷEljj*~K,ڵks9u^7:XCRa;oY äƏxC88-NGq~@H~b3lur !g,Vz[\D|DT*9ykD .2VI\n1SP;?"V!ob9*հ{0j3dC5&:SR4!H4b]n =Iv_'G&D3~I0E4ɲ'k4F?}Bgu%K|Iv?TUcT-"܉S'_C,N(nbԖ;Ɲ;_YXNPl n=?l`|Ug9r#8Neu:]MMMUU^06)))ٸq7|_YVjYp8|*_reP &v(=q4 ^g n27y&7b/4b/D5hg9r"l6ر[hѼy>}7Zwnm|ŋgΜo uk׾>g}jJU\\?"C={̞=رc&mЌTy$—M ,bA;ˑcXp</߿?55u֬Yo 7 s#8=>K,Ǐ4g0'Of)Оچ"*I.^o1 ;.da p$fdwJ-'k&3d6(?-ONRRThX)CBB`='N ۊ`y#|ѣ LΝ;̎`ǩnTj&ʺꪫz! zu]'9w\xMW|,..2dHPPR+Ql5b;*++m#.aegb1_BEiɚ_V~{ηуx{*1N UCpۊ/>{//O&qzWn3T.q 1l{/>;':#b}SeyK?y>hQ]mGl#x޽jcbbJdm2Je.ISjfgg#4hPl f\t:>;VPPޝm֟9}R;*ڟO=p=7]E;6oku tMƣ1ǯ j*:h%ue5.kLqBk<S^\g]ۋs뜌<uzj򄐠 "aVh\f#jY8gêN=l65?Ԯ))k.7S- ygʷ^zaJ 鶋a=UQ[\34=>:XY1.BϤV ˗-U{1rsNPRnS`=r ُq}kUUԲ@VUU^LkZmTS{葒 sҥwygxx8ž`Iӫɰ(ɓQ#h4t`_믏?LQv5Z RHiiih=*-9ycQ11#F_ >QW]|Ң+ǞWe C;r`\?dXϾOOr8Eu:􌏌 QMN 4W\ xF`@d!Ayƈ$IJSgƄw yZF9Y 2D^|c{8gF^6N$ЌԶt&+B,i*mJvo߲/ccu7&'1XݴioL LtxDQ.CFW_}=#M|b@T/hv;vm {e/2Om,l{ԩ uܸqpH{lX=)׭[s>aC i43ڍ̎L!m;8iJ5WYYLpңGo4#C.=z\uK/صG/Y t#$,g_X7ynаr_rQ1\;nU  MIv&b@ V$4y/յWp}N* "UrAz`JJvj?ivb\q-?41`%Ue1rKһPj'3"T6|ߚ8аpAY6+D! O[D^"!}Kf_J CG~uGSo#o趼J㾙~;Nt*3z[ȿ{…#Gw{Ԫ ѬΗ_~O<@~j%/Nv裏'_=@؈1N0$ڱgXR8ANmS[v-pXEWÞ/4Jjժ:jm/QArjÆ w}ٳyr?jvQG냂'I.E.]wOݒ4ɡgΜ`Q KdT ðaQe˖7;=S_|L `GDJ$Ҏv\ֳwBRT&CZaEDEet/A*K"y|tZs=$\;xPV$ 5BP&:]nyDۑX"IIόOI*F2<$4\&WLH,NNMO]V1! G.]R,UGp8#VAFIDE$Rizn) $nAGo˖-=}o| ͧ~:gΜ(jKw?./8ݗ^z b,o۾}ĉӧOcCh{YX,c= ) @E V=Zk!oرcd 9ro>;;{oV׮]~ΝD-m]ƒQիW/Y)&i ;l=h?!vR^_]]=p@3믿3f裏&N$Mļ:O/[%p= @mZ:}q۱J;"YUCuҴ}@rb6`DCg.W{}m$[oGy@~ 6nܸq Mm(xMSHE ux5ax㍤{۷/` s嗯Xu͚5BK.~l^VVvs}'[YXVcW;v -s=ZhT(<Q1bʕW_}uiiFA݋UBs^ziqq322Zөɧv^@} %!:^$~ܴJa>$Z’",FD,h~L:LG;v, 4;f0~租~z֬Yx`X~Çꪫ L&>}СCacx֒n d: ӦM+vuذa111 +5'O2={Q X@ElGbr} o`Q`_`3L"XQy-tXXXb"X*ku.ŒptMXn퉥dP/qxED"|D$al6h" +/6ac?uLMuTl19kь*(k Ғ5XLP\ff&`URCpp0 a]v<xHW@,Ӹ-~Q xO=" ; E@6ގ9R__O`[݁Fd׳gO'N_qXQfހi+bof ܴi[oRAZ x(?*¢~;0ڵkQƋ[c|9s&B3ЀhϺSNy8ϙ3gO<|mm3ĞmXF(O)]lg)S t:A2Dby(xȽK h1j(@c"""f{@ ~t`p1)) ?B>= ꫯM"S )6lhzR3@8H-I&Tu)I6p~ :rH߷o… ccc͛Mb(d\\444т6ύj~pϨuJJ 2†(ZG5?|# y^> 8ZF4dbkxt Kg_Y%,鷘P5a5;'~lv ב;s;*q/l \YH{y #̙h\f?HA>Zt@pRS`Q^^? 1ˁKrz+6` ʀ}w0s`2n1o߾ƍq2FQe_~MAPt QlTmvl43g J%/t}aF?۷т(</EFX;iD`'vx- ^ڵ 4iu)S"@P0Pж(0rܸq#y[/G'+llEOT vaˠ/arA%U9Y5bUrb2 DD ٿ)Hoz{{bбGV ~oʕLUUUNNf̘ /368[رcǰ9`S:'y#r~{s4hP -ZhժU^{/ {P;Q \X,! dR^F:lAI_|q߅DI/4MԺG``7>Ok" TH-ڡc@":t(`o;7|jΟ?֬Yw0pc9U$ǏN8qWL8X{ɛ;wn޽-Y:!}Hu!8?D!#%UYY }"6,U-nԩ` >䧟~DEE~AG. F:2 YaX" ʀ0?~'@;Yp!asU*5 \̙3+Ē'pr沲{v6”DX( QPjDkfŦуO!u, =A2\,ҚmF ,C*;#m+s.f|xGXCyo)S. ȬZ`%Kర'O 56P >fA{ 6f^xD oc: $''dF" SIȠ)68p`+Pl+bd:"։G#)^iVՄMPǦ@eoT {#SIzQPdy2:&n !JXYΜ;sJ j4riZdhc:%@Qcˏ L V~v#w RHÃm%5O뭶 E\ϏeEok@ 8_|GH$tl BӦM+--T>,\CrO7Xi>>KMI[n|ڹ;J333~3Vki,9DGJ*eߤ0`(_2~o)1!*q^ AH3dnu/*5x"账f_ߴIT=ώ~)=iusNf sc.lN3<"zstV{`w+}O gjm+>/oK:oITRŒC<8 NL>nifWc3%Id4%Nՠj$4BOWWo@=ܔ0w9!q5'Nts* KJwOMGJH].A xTh͙ah%a:r{_?GG-$4,$4߉uRrʇS`Wh"k YXNO}yM ,bA;ˑ˩+ړ;r:ˑ˩邛|iS:_n߉s82: J..TqԎ`Oz7s0GPo^T]6? %2GZ'!?+?qvFCܶlQ"BԋNz_, XC&0p(ZZyRLILM#Jj5{b"ՊP*19ݞZ]N[~BA=؋!aD7]cSY8pZ4JءDYeAҦ4|FHt{k6E*a!g= =Z}FS;24;5j:HX%><^2#G6u+.[IBdw3h9XurԪL63{jy<> X!M0)Ґsب* Ez5srV}p03ZC *Sf+-5ln8Cyygc'0I1Qr6b):]gP[XN8qĉq;qĉ'NC,'N8qĈ8rĉ'NC,'N8qĈ8rĉ'NC,'N8qĈ8rĉ'N KMONIENDB`golang-github-kelindar-bitmap-1.5.5/.github/bitmap4.png000066400000000000000000000347061517523267600227500ustar00rootroot00000000000000PNG  IHDRv}nDsRGBgAMA a pHYsod9[IDATx^|S&! I(!@ =Q}{ VP@T*(%&%JH(޳lvg^y lv|?|̝;ޙΙX,AA8/   I,AApI,AApI,AApI,AApI,AApI,AApI,AApI,AApI,AApI,AApI{^%4 h !ujjj^VR6o޼f͚R pHb݀>СC&-0 O;w%K.]ĖA.IرcΝuhdK=<6nܨP(LbAJLA4$. RՓ'OnڴI{{{3ÇWVV/((H.3555]ؑ)! X)kז,Y-y<f3J}Eypppdd$S?33s͚5:Adĺ.\8p)@f)W(;wD暘XPPem׮% SBA4$. Rլ, JIIp2k4O8q#F m`>裎;zyY, b2juç+`ׯ_G`}7w$EIKKJݻwjBBPVVk.dɅ<m۶Hsm;y|䯌*ʌ a6"@Go޼xߡӦM8qs"B233/_V, u:ѣGsssQ. oݺ%#""ǠZ-1۷-Z}v*Zhr`YYYׯ?p@]][jlc`w縄A는Bd2YTTsrr~甔=zT d.\X9sÇϙ3"v@Dӑf͚lHsݳgn۶gϞ͛7J.Q>>Q??ZDPÇ o6mb/,>wް"!BZ${8p`RR[nAG#""` ;wn4b ?nھ}{F `uUVVEGG|{TDWX'ᾐĺZѣC ) ,#gAB#7oD"\\\,b/+WP[nBhQgΜ9ƍCRĎLGA8;j*66A ݻ7n܀pkHb]ׯ۷K.K:A. r˖-٣F`(--EHl]]ŋ3g`'N/TհLA(|С#FDEEoߞ_FsΚaÆjJծ];LeXi۶m}]AAA( 2 1X0tʥ u!`-0BL 6}EСC{V`7y&&&2BX_~eX2s 9qdddNN A8;}a{ܹ3Z g9s . =[nmܸyԩS_u^^ݬrHb] _"4"1evg1B݊ 'cB0))Gd[h1cƌW^y> {& 8),BT,5k-oݺtQx5ktt4++WDiO8q_~__j<%I`0d2٣>pʔ)>>eqֿeRO>&kPPʀ^dc g "nӦ fLjzz1|VZ)ׯn cOؽ{7=rHXJJG ONN޹s'G}wg!KG]XWL_2 `*qqq i!Ctԩޖ2 p5 iiiPP@M6Ak{ط .LD^O?an֭'$$0/L֡C~ݻw5kV^>pHb]*_~L /sp \eVV֞={RSS۵k-((@*cǎec}6$6** 2Y[[e h>pQQѶmې?~!s] X QW=d" l߾[n;vL&-r(+ҥK`˗/[?[jjj,~D丐m!% HѣGw1rπK,A.;j(ƕhC}!H|+**JKKy睌iӦA_ -ZtС?<-- 9.V˖-;}~( % W$cnT*EyՆ^ gn O]jdx̙ ^m۶}饗D"ѩS&LϷlr-ZOV^M!w)<AG?~>=zرc۵kSSS///:ujdd+rfꟺ0~5 ɱZdlpÇ_x񩧞gdhrHb 8ɓG"dKm2 W_}5f̘{{j7%X 0 GR~XRR"bR!% N A $A $A $A $A $A &l(5 ax>],|/g|l6f9(]-/P&l܂3sʙġoOO/JLHLuC:LµjuJQ3'=:`2Mf fnF3:L69.$JrH^SSSv911{ݼtf% [ha~y2O>uXM:dMAޭ=~Yg֯i56T\hkmǠJazDZ_-K*#D* ۴o,쭩(Ί[_m߸ʅ͎Z{.Yêvl+.,@!@ΝHaK|>>loDVϻy&#:yގ\b)aGEyzr݅W/]|Bf\BH0J`2%P4H& KFuFkC(-I#N$O*~Ng]:{f_ˢc8@S>;-Akv P{^18`4V"'5 iu:}QቬZp+B(&.^&ki,мUZ wh2<(12$!VqPЪmҀ!Ծ#J*{䯾b! LJMMֱ)?\,_&rY,XXn.!'O>|8GvB'D+p۷o3d }vt:ܸq…  EYYs W\T*8d%^NM,06ϟ_`_~14ɀ ڵk)))aaalAײ- E˩38txl|@ h4SNPB?O7V:_p"qL(>9h3\"hh=o4YruCwXElуKHl^^^fff۵k0,fggGFFnu޼y.]BM߽{7S W-3gΩSVXqYbիWoݺH-Z4{rlBϝ;=z̘1V>:MHp5 >|BxRM,,__G/Ha!={ryӖsqzF14 QtsU-ycWW =+Aq/ 8$^,( GoX!XϤCBh4p` @q~>CCwf5}e gX=w1~b$; >9h.]+**{H[oEx*mƍǏ4i1??Wh4"EyHH.U3LVcv?Connʕ+5Z@axРAؑQ_,|7W 4_$j1v83F u20f9'aaVM 0p?U ;ƱBm!9Zv_Y%9ǧM6 2rGC &Ϝ9s#F@=zXf]zhRRRPP߿G oFf0@f KpbW}`}5t]Xۭ[ l zBj8E0y;vG2<=H%!b111N>} ׄvh 1DZ?>>^Vgggc<2, eeeZ䬈ƍ1`xv޽O>b1&ddi b1aߘ#p&J4B -Z@R[jo߾AAϠW\pBJU `\ɤP( SRRG"D^&''圜d2P-//%otbŊ3f3b! PP$oiӐFFF"ʂ2޽mBo$n]N\%]sUVsRw ?;$)`1-!wbm\Y0z[o/GX=svҸojzE~i6mѩT[N:4 !G>}#ڹsgl*//߰a̙3Bb$ɹs.\xMPDsff&2O?O?!Q^tȑ#! r|*CkBu͗/_FVqc0rD^7W@g0MfgB@.*C1tLwh =qJPAjXȺZ(4ںW$tZ\?GY^}v""Xf [Cp/OpHPP]*(šUFsdL$`9hJSQ[g57eRWï>4tgAFٛ2e t bCr\\ mk.T'&&Imd`/AAAE_|-/H;[AM َ;vs(,` hm„ C-P$/p\"572 G VkJjp><%"o̡ǀxB:e׹S 4'BQC>aʥPق*[5">eX`jl= X3) #Ceb$ ʮT ?R1N[P+4Z 1IuP{̙b$SRR J$M%pE @o7B^^/?iw{@.$Ѝ`2^/j4[R;KA! ]g4ݐBb" [%wes=fK(1F,[m!Yn;o5²+~>!s K/}%BjUP)+j||OĨ蘄6~%*"^`/hWײu8ݿXg0O/o>?$<"ubpx8,Gu:#kFjn̖Z$B`V$>4'&/LǺn4K*y%Hl3EF|w** QAQVS  yE~ -b8aWh4Fs\,vR]5( YhlQV_ I iqbQ]OIxx.W 'Ne4I$OrR%|ETUsw֫Tr |$Ƥ8aW]˽u0*4'{k)50ytLp4[&61Ze9 pMf,/-y5u Q.=bԕ;<ΤLE,sp-ƽx^1YНY`9gKb2[o7\5т־{lԔ'Ioد+'tgД*=-F/Og }k}{AĘ-ڪ*uӮzIE2vK &{wLfd:. O"}k`} -)h$ml $Y:4 *`:>>wH^DaRR@ `KF*Q~~~\tHb +O^v /)jx{/_޲eKgEEE3gBBBbRB͛׷o_v+Hb V_ϝ;O?}'x<7mT*￿eP޽{CuȒϟ?_[[/ɐ"x&M4n8.[ۭ %٬鼽︉)]QQȖ7諠܆`@ s/ ,0LHUMVXXp#F0!~iii+VJyyy˖-k߾%lHae7n?g}֧Ozw/b]8^= b͚5\#b8 DykfϞ=q{KOOcF *]zuffH$BȨ͛_SSSryTTT&sG^iӦΝ;wЁ{w$pK`!Pٚ:ujͰO`4 MHHȫ_.YdsEM0ycتVjVVw܉K.EEE~~~IlCK~}Yt$[Y^t Z0…e" ; eLܜQ\Dһw#G۷SNm۶MIIAI6mU }]vmƍH=zpP:|!C4Miii~ :9yW\8q"X4Zذ;P( AKz>==EFFBJr!auݺu"BU*6lX|yuu5Sހa4-}K#uuuHURCda;(mݺ5<< ̓)!l/?#܆"-)èVVVlٲg>aa(۷oʕĞi ?O?a*&>q̟$oRiLL #0˗/߿f$qqq"ԩSpp0¯ gرK^#G|bnݚ!Z)edd<Hm2 %l ))S.\nݺO?… aTm555X|Vji/^8k,o=zh&iqƿsb+V@̴|~ GN*㠌F}{ 8 in0]f-G ¨|||nݺ `̘1رc_擾FIKK ޽;* -[`J;vŕ q1{a+jMU؄}mP7o=QW_8p !D>^zY\\ ޻w/)52પ*&C}}}!&Mc,I,N03>}bccQAčTuРAHmH8zܳaG*;v<䓐X1 fzz:={#T_nذ lXVH5⃆jy䑠< &^߼ysii)RX???vH<7n[C Ak/b+L ܼy%KG}7 @3fB+F9 ɓ{X:eAQ)]va?C^-Z4sdV/^<}t {0`P$p';wf’al(PA𐹦`ZjU®낃@<!^rݻတ%e0>u{Y>… H%؄$7n|O=J׸Q@D!H5AV˗/Ú"sC1iϟ?}vr޽Lغu/AQy֭!'Oh!92{l&TOWۇ}:¨uذa²`&Lx7ڵkax04l1bAp~ I %˙OF Ep KD-fвeK8J3U4I> _ԟ m۶!?a\v ggg ?s '||2Ef_Ct1B ލI S` 6L{ ] h!"T.JFs r\-.0:;H,,XVh`JCA&a h_ˆ(l զNzc B, Ә4ig#E9,1 '55zP}Bn^zqL |Hb  #|;Ir`EW\ -&&y 8Snu`TR!R ٲe V#TLE`nC0i0i^]]駟”f̘|ٳR9`DY]~ZZ… ~G0ồ]t @={ޙ[_~]6Tet6)) ǂ?H$#j3!c&%RO$y}A +jܹH_V#}e@ 3!*[IOOAn ͆&<F2b-wGG0Hŋ{nGɓh yV:w0w@v]XX8l0pƍ1E9 .VLx _@kwGt]v!k|/Ѹ[;7t_;v"cTÔBn޼ݩS aL8_5p&$e>Ȅ 4ymLAd"S5jTǎq3`/2_ *Ě5k/_P(~'ֆ==ztڴig͚2 8p1+V@J`Pn|@@<D4,,y` EEEppGf #`MPu <#3'0 XaS0.Liv߾}{6m0FH;egg#Eo'{E(4ȀF{#E< ~_t ']`?N:>@!ntm2? ϟ߷ow̙3EpU`00HU!uC9wÆ HQ6VkkkVP7neV$J ɐߔ7DNZp!<є)S`w<42#3B={a Sa_t=|p,w'xeX0131i0! &@e!@C0(1D6bĈf_v]ve_d Caן!~0=4eׯ_QA<s@zYA#aG2:G?C0ITMv$!1c 8\#Iyal:Չ'"mE$T*ay,&>3!~YO>`ɓ(ߡ@բGtBq/C}饗ֶPC8. /!5Eğ 9h|TBI {DX ݅X(ҥK^0dh!&f;IC[ѝvoi@?fG:0e˖ر"7dLyy9sPwꫯ>2x+WP5^W_!r|B;0޳@:tҥPeH/#x!F8z2\.e0i7\B@f͂Kb#8( 5; $`.RTTp0Y0`;'%\ӧOG$[v=rݺumGU/y#L:1oܸrL D!V*a1Z9s͛!Psss3* ׀h:>>g4Mzz!={LHH w؊@8nx %\ׯCJHtP]VPz(/#G (m˨w;A$,oUa2~dd=֐B땒d Y,Aw@KA@ߋ% N % N % N % N % N % N % N % N % ?a~ʏIENDB`golang-github-kelindar-bitmap-1.5.5/.github/logo.png000066400000000000000000000355741517523267600223540ustar00rootroot00000000000000PNG  IHDRX4 pHYs.#.#x?v OiCCPPhotoshop ICC profilexڝSgTS=BKKoR RB&*! J!QEEȠQ, !{kּ> H3Q5 B.@ $pd!s#~<<+"x M0B\t8K@zB@F&S`cbP-`'{[! eDh;VEX0fK9-0IWfH  0Q){`##xFW<+*x<$9E[-qWW.(I+6aa@.y24x6_-"bbϫp@t~,/;m%h^ uf@Wp~<5j>{-]cK'Xto(hw?G%fIq^D$.Tʳ?D*A, `6B$BB dr`)B(Ͱ*`/@4Qhp.U=pa( Aa!ڈbX#!H$ ɈQ"K5H1RT UH=r9\F;2G1Q= C7F dt1r=6Ыhڏ>C03l0.B8, c˱" VcϱwE 6wB aAHXLXNH $4 7 Q'"K&b21XH,#/{C7$C2'ITFnR#,4H#dk9, +ȅ3![ b@qS(RjJ4e2AURݨT5ZBRQ4u9̓IKhhitݕNWGw Ljg(gwLӋT071oUX**| J&*/Tު UUT^S}FU3S ԖUPSSg;goT?~YYLOCQ_ cx,!k u5&|v*=9C3J3WRf?qtN (~))4L1e\kXHQG6EYAJ'\'GgSSݧ M=:.kDwn^Loy}/TmG X $ <5qo</QC]@Caaᄑ.ȽJtq]zۯ6iܟ4)Y3sCQ? 0k߬~OCOg#/c/Wװwa>>r><72Y_7ȷOo_C#dz%gA[z|!?:eAAA!h쐭!ΑiP~aa~ 'W?pX15wCsDDDޛg1O9-J5*>.j<74?.fYXXIlK9.*6nl {/]py.,:@LN8A*%w% yg"/6шC\*NH*Mz쑼5y$3,幄'L Lݛ:v m2=:1qB!Mggfvˬen/kY- BTZ(*geWf͉9+̳ې7ᒶKW-X潬j9(xoʿܔĹdff-[n ڴ VE/(ۻCɾUUMfeI?m]Nmq#׹=TR+Gw- 6 U#pDy  :v{vg/jBFS[b[O>zG499?rCd&ˮ/~јѡ򗓿m|x31^VwwO| (hSЧc3- cHRMz%u0`:o_F0IDATxyչ{ffe_gw1޸0BYs{֩sB!<B!B!B!B!P`B!P`B!P`B! ,B! ,B! ,B!BE!BE!BE!BE!B(!B(!B(!B!B!BH潗!Teb" !5Id躌l/xnB-8)~p $VB( !gKB"Y!PMU;yBHJ Pf'( |3X$6 t' !$ͮS奅!cQ-ΔPJP"K0zELQ_KBAm^E B͜.Uʳ>qB(* T@(#WLU/Y^H3ʵ4! fEP'>rEE"dg$K@<_HSVwk%B<=n 1B& *:q=Hٲ_P CPq!^`A(}Q*5H\QX- >qEHB08Z+_\sE{6@(2F!bD Eׂ$iPB@mDfX$eB)H8+!!{~Q H?HU!{B!XB!XB!XB!$jz\.$B!aB!B!B!B!P`B!X">SrHa#b#"B"B"B"B!c訜 I$B`B!B!B!$h` dG!F0E!b2 2u⚳s8J{BְlWgcxC. 5]hj]mrԝٸwUڃX{ȹE 78dd;YSI !Xi@N֦=WZ b;z;Zᏼ Pũe,WXA=RbjyR{"tnvB!.vTL(Y[^ 6=KE yy]~3&(vA-R]=Kc3A W(!R[mԫӮvduq cg-gB*>nj!?PtB(dՓox&/羘]uu<ⴗ]R.uG0<-BF S 9Aqf=[G~@mxő9 BYIX{Nj޵{o=Y] ūvfUBQ(˸afmuWexeeO,X Wsl'wK{o;ss-^Yn~1-R6] @Lr \9yH 3ꝅ#sO(ˣRʵy߆g3FϢQ 3B"_u3cnT=_WRYubfL\(|xU}mјe NOG-| (ZQ;'ri+(P}fαN GkgȯhgcWտ!~c7hEF0LHO8_1=@P#?^Jz=+hEcPfGūV@iQ]]u( , ?L12}['ش1sw~|heݓR((4ocQ U [cޑGfÌ2Q \w!oeȘb]Ub ~× GkrRE]Z V3ς#Y}hּBuI!n4w,v3>EkW`V]>0cGZ`qlŎ}_uuF6lqi>%N=Rh[]6!Iq Q2{7\1ÓzLyؒR (A2iVWM]̵׭yP7/ҵϨy[mQ&OѪMbY|lëw^iF퉽~(TG? vpf}f5N vQ`s"Em|yȜ5dj%EΨoK B]:lyQwF8 @HMIBoۀs4)a%S2z#K̜Ykz YoH)߮[ZkV{g 1nMbY:}Ng|V~fi#nW^T좾pkU̚?}-. oiT`%I5,csҍmhpľ}G8ٯDN_~y^dE޾eb$D 1z?EV0}TZ ,_ Goص[ kWr¦axC?6͵SH'h#4|JwCez0b4rY/| ]0**C~6%ͭ7 Yt> 6Zi?/*zg*Ax㺡9%Bxx(5*pFAl;3z7/"phYeb蒝ׂvA*>獣8(˹{bsֺ,kxZNpLd 9@熟27rqๆq FlOg2._]k|Hg=7h?Mst5z0kW2{+c.qgL{b"?f"XUu\S~1L 2q3><>}R4$2W7OwQqCXjܴ2.5o$"w.r&Ԗ.hf 9{2z#?zEe}ez-. K+[ ڃ 1z݄%(t,h? 7>ly5W}Z%Z%qG}x塽XqILWޙ7aG]#`%~xš%^;z$t i>yUT7jn-?,LkmMKOn}֗n?@ J^ \%3{}#o3Q0pG.|&K`Ei~p Ј_-cK8=xԈ$ e3v=RػH(566ۧ:::*Y*Y9x-^;xYt1 vu`HIMRn<{|sR Yǫ$/Q緯"RnI)#B _3`{Ԝ~Qs?hɸ-IJJ ^p p *7"EN )pbp~54| M^^ך1s1_h#q%Ng7HzK=s]wg 9QʝkrL ZV/YP"H˯n+䗷ۀn} iP™S3@ko/Qau h ;Rnb՗B**}PKX%:5oh* o_1&yQhK/9ok:M`P-AKkn52 [4Q""Y+Ջ=RBiM?5yC<ˀ9u3f (}<*A +PwP:'~37^(Ak8hLDM 74>úFvR!p"5IWp|T_ˣoOM'\ )a)jb>*PxB^r{7ҺNRnA;g5Wn4rhΦluY¤cA,Hw&1oG#sE߅nY`q:"$]0Sh'h8wK3RxgfV,U)8sE%u 2;>TXf!N{{YR/-0T !}G>D.7t鳤A 1n3u^G \Ng%<+MkOG83dv(+~ i i0H:zÎH7ZZ&ӌ2)s+Dc *2{J"Ē#S)3LJk4&I%|Vq#Aeex p\c"Ycp fMB5NSI'ny7Pf=yK =m5W57gSrƜ|`GAB"'oMU ce-IH{>Fn\-/km'yO|O?nUHxsnl^sQIX zLg Jkkږ> y)ų}cL4^!N"ogWH(0}wϫf0jQnT}y VHNN $N NO(+&n^wUۃКU oQ]J4ͫwxbt(]-3˜'ɜ~we;hg쉽"I)}ݶ/<+~܈^ EyvʌUܼGQ\J15XkD 11wr\ݼ9\f Rmj94"wQi7`w:ڢD_#war3DZɇU-}'´eriy|]l>}{W!rfl}Ʉ(2TW(cbP3]TRn^eb ],z!{\ӽR'rʿCߍܪʟ3 V:9B xI:”tg1NxriW!r΂LX$$}w;(x54WGAKvoi7y)i[("^WC Iއtzݱey+bJK΂)9 koc&,B+(ol ,b?߉kEnr8#wc ^=qIRs<]E5PY(V* ,OHM:_ q?bSH+sk mk.GK`e82E}ZĠgonWhlZ] 9f,_+ƅ#r:nLq6&y?vzm_"f]C+j]^=$IBU҂i"T-!J pk\-O+ x[KḚ&N5xq3V.EY||cd.zgFL}K)%y#? ,~kMoǣ‘(IE^|wmu =Huf]yE(\ 0 ωI 1i\!^шYoJ^*1 ޙ p(]Pc&4d{;n|U4df/ FO^yCRX uZ}`;i cbiNaee xquQ~wQ&Fbq֓)Wis1.I#yƎm/wX^;. @vQxuzv)mמlۙ:3ƬQ{JE(b vL,ʲu6|v2!E!e~zzw[L n_rмe#[\"gh4ۊ6*. z`smw~ Rߤ!.]̜_'qU=wzZ`muVh4k~l_8(ɛy=PW'Dڅ7Mh"uQqJe"<+o)>';ϼ!|((T%!mHt4,|K\~ܱdx/;݋݇\aض|-^CMnjZ;%:jG7ʖc3_%QB8. ̚ouX=HE<^~ zFα:w|T*,%–2M_ca{<-u}l]w[OyoouEi1c_Wv̗PkZOLzͻ\+<Ǖ9VFjkWOd:W1^z\'s V2mngn:|b\ ^)-k̳wo^;Yz;^Jb+F 6 E[_9ޙ |[$5nM3~Uas#*K$$:?>S"R6j ۱jܣ=!sU [p->JwV ǥeF2V<2ֵ[VIWb-]/v^>#%g_'ۚ>޼WO/vyS$Q`hWu-9z#qQv$kk辻ߋ|#෇\aZi6ͶV/Lxp/[wLRk>+)hvdϽ"Ar3J׋?3xYxɽ梗tK2Fglo@ʻN͊u guQoZKV 9(|4. }g' Quާ)c$:?++}[|?.TDtN+HŽ9T 1D(b("?S3(=mq6Z(@DHꯏ*51VEV8Uc?PSG1ԧ [`;n"9vY{Gq3oeB[Q!tmګw3$WVj}=# ׻na:9U٣N<㳯{g*8+k.$CF`uq|Q?][珲F 1c!^?B⿼E+ٞ'i H/cCD45]9TxePhnƛ7!o%}Os^3v(|оRFDHweྪ $@"e҂Mɿ'dziE빜j{шXwa-PowW%l(XcI]$G!kJ !D %B,WgCW4]iA(U|VaRlW>(V4D'PiZa?BrDkٙ Ol$bGCAe!A # mn-Z >*u}-XRqc%?isLرATnE4TfV,!eܿ!~iqiEzп> 7r`O,3,s%z.nST]}`;dPErO2xdf{[]}*ⵡt^+uaiR|#X&'KeHB'՝hH@N[ԹLe% V?o4V"D21FQ`BNyBH|Mҏ%^`%Jʄu,`v~|t"b?ӵv/G!W0ӍgßDO//]+>5BH|M|ҏʼnF>?a6Y|BIA%e ^rHVimۮ|HZ̸i}Bqp{C .=gΌ:ʡknqixch^>bm!v5c!܍-pd.FnK5$Z>B!P`ۚ DgFѥwڶ|BI1e2nɰ~Te8݊*|g`t[\n ۖ/͗`E?!bYeOmj4t?Yc¿R\oΰ>1ۗX)y+!szRЧ-FL}3,f&U'vl<Ⲣ,|ܰ8Ư}!y'~PKJ^}7 |1 r닳 Eu+69g8~쁔)(KG+@b[|?w- +i~,.(ìP`MӞ=`h)3ՃxvO{ʔ2-"đ;ˣYXIX&vO;n~b_E]ĭw"G!$- D4i ,}ҋ݇?{ťa b/V/!%hW\j+ bκg}iѢ׷7w'S|B5ׅ^W ‘Qy~khSN=I< uȝv#B+bٛ%ï@75IgfO:1}'۽Kۼx,Z^kY!BE!4B!B!ȝ;H!F0E!BE!BE!BE!BE+; !F!B(!B(!B(!B!BeQ9Hf b"B"B"B"B!樜\]L!bp44!TX_B,dĩH!CRd+& b%"I"\GcB&8IYBwŸHmzE@I’SKuBl1{I*}cR`IJ[rLB]0Ѩ{ E~Kfv@!V1#N56g$u9R` @ ]4a-bRRA6mkiB!)-@-:BU )W$FfH4YmŹXBHz,B!3<B!B!B!B!P`B!P`B!P`B! ,B! ,B! ,B!BE!BE!BE!BE!B`@I[IENDB`golang-github-kelindar-bitmap-1.5.5/.github/workflows/000077500000000000000000000000001517523267600227255ustar00rootroot00000000000000golang-github-kelindar-bitmap-1.5.5/.github/workflows/test.yml000066400000000000000000000012441517523267600244300ustar00rootroot00000000000000name: Test on: [push] env: GITHUB_TOKEN: ${{ secrets.COVERALLS_TOKEN }} GO111MODULE: "on" jobs: test: name: Test with Coverage runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - uses: actions/setup-go@v6 with: go-version: "1.25" - name: Install dependencies run: | go mod download - name: Run Unit Tests run: | go test -tags noasm -race -covermode atomic -coverprofile=profile.cov ./... go test -race ./... - name: Upload Coverage uses: shogo82148/actions-goveralls@v1 with: path-to-profile: profile.cov golang-github-kelindar-bitmap-1.5.5/.gitignore000066400000000000000000000000071517523267600213150ustar00rootroot00000000000000*.exe golang-github-kelindar-bitmap-1.5.5/LICENSE000066400000000000000000000021071517523267600203350ustar00rootroot00000000000000MIT License Copyright (c) 2021 Roman Atachiants Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-kelindar-bitmap-1.5.5/README.md000066400000000000000000000235061517523267600206150ustar00rootroot00000000000000

kelindar/bitmap
Go Version PkgGoDev Go Report Card License Coverage

## SIMD-Vectorized Bitmap (Bitset) in Go This package contains a bitmap implementation, backed by a slice of `[]uint64` and designed for *dense* small or medium collections. This implementation focuses on high performance by avoiding heap allocations, unrolling loops and implementing SIMD vectorization in assembly. ## Features * Optimized for **zero heap allocation** for all of the important methods of the bitmap. * Optimized by **vectorized instructions (SIMD)** used for certain operations such as boolean algebra. * Support for **boolean algebra** that makes it perfect to implement [bitmap indexes](https://en.wikipedia.org/wiki/Bitmap_index). * Support for **bit counting** with operations such `Min()`, `Max()`, `Count()` and more. * Support for **fast iteration** over bits set to one by using an unrolled loop. * Support for **in-place filtering** based on a user-defined predicate. * Support for **binary encoding** and can be read/written and has a no-copy slice conversion. * Support for **reusability** by providing `Clone()` and `Clear()` operations. ## Documentation The general idea of this package is to have a dead simple way of creating bitmaps (bitsets) that provide maximum performance on the modern hardware by using vectorized single-instruction multiple data ([SIMD](https://en.wikipedia.org/wiki/SIMD)) operations. As opposed to something as [roaring bitmaps](https://github.com/RoaringBitmap/roaring) which are excellent for sparse data, this implementation is designed to be used for small or medium dense bit sets. I've used this package to build a columnar in-memory store, so if you want to see how it can be used for indexing, have a look at [kelindar/column](https://github.com/kelindar/column). I'd like to specifically point out the indexing part and how bitmaps can be used as a good alternative to B*Trees and Hash Maps. - [Boolean Algebra](#boolean-algebra) - [Single Bit Operations](#single-bit-operations) - [Bit Count and Search](#bit-count-and-search) - [Iterate and Filter](#iterate-and-filter) - [Example Usage](#example-usage) - [Benchmarks](#benchmarks) - [Contributing](#contributing) First, here's what you need to do in order to import this package. ```go import "github.com/kelindar/bitmap" ``` ## Boolean Algebra Perhaps one of the most useful features of this package is the vectorized implementation of boolean operations allowing us to perform boolean algebra on multiple bitmaps. For example, let's imagine that we have a dataset containing books, and four bitmaps defining one of the four properties of each book. In the figure below, you can imagine that our books can be on "columns" and each bit in a bitmap defines whether this attribute exists on a book or not.

kelindar/bitmap

Now, if we want to find all books that were recently published and have an ebook available, we can use an `And()` method on our two bitmaps in order to combine them. In the example below we retrieve 3 hypothetical bitmaps and combine them to answer our query by calling and `And()` method to mutate the `books` bitmap twice. ```go books := bitmapFor("books") // bitmap.Bitmap recent := bitmapFor("books_recent") // bitmap.Bitmap ebooks := bitmapFor("books_has_ebook") // bitmap.Bitmap // And operation actually mutates our "books" bitmap books.And(recent) books.And(ebooks) ```

kelindar/bitmap

Now, what if we want to find recently published books which has e-book available but are *not* best-sellers? In that case, we could use binary `AndNot()` operation that hardware exposes. In the example below we combine ```go books.And(recent) books.And(ebooks) books.AndNot(bestsellers) ```

kelindar/bitmap

## Single Bit Operations When dealing with single elements, this package supports simple single-bit operations. They include `Set()` and `Remove()` to set a bit to one and to zero respectively, as well as `Contans()` to check for a presence (value set to one) of a certain bit. These methods are simple to use and setting a bit which is out of range would automatically resize the bitmap. In the example below we're creating a bitmap, setting one bit to one, checking its presence and setting it back to zero after. ```go var books bitmap.Bitmap books.Set(3) // Set the 3rd bit to '1' hasBook := books.Contains(3) // Returns 'true' books.Remove(3) // Set the 3rd bit to '0' ``` ## Bit Count and Search When using a bitmap for indexing or free-list purposes, you will often find yourself in need of counting how many bits are set in a bitmap. This operation actually has a specialized hardware instruction `POPCNT` and an efficient implementation is included in this library. The example below shows how you can simply count the number of bits in a bitmap by calling the `Count()` method. ```go // Counts number of bits set to '1' numBooks := books.Count() ``` On the other hand, you might want to find a specific bit either set to one or to zero, the methods `Min()`, `Max()` allow you to find first or last bit set to one while `MinZero()` and `MaxZero()` allow you to find first or last bit set to zero. The figure below demonstrates an example of that.

kelindar/bitmap

## Iterate and Filter The bits in the bitmap can also be iterated over using the `Range` method. It is a simple loop which iterates over and calls a callback. If the callback returns false, then the iteration is halted (similar to `sync.Map`). ```go // Iterate over the bits in the bitmap bitmap.Range(func(x uint32) bool { println(x) return true }) ``` Another way of iterating is using the `Filter` method. It iterates similarly to `Range` but the callback returns a boolean value, and if it returns `false` then the current bit will be cleared in the underlying bitmap. You could accomplish the same using `Range` and `Remove` but `Filter` is significantly faster. ```go // Filter iterates over the bits and applies a callback bitmap.Filter(func(x uint32) bool { return x % 2 == 0 }) ``` ## Example Usage In its simplest form, you can use the bitmap as a bitset, set and remove bits. This is quite useful as an index (free/fill-list) for an array of data. ```go import "github.com/kelindar/bitmap" ``` ```go var books := bitmap.Bitmap books.Set(300) // sets 300-th bit books.Set(400) // sets 400-th bit books.Set(600) // sets 600-th bit (auto-resized) books.Contains(300) // returns true books.Contains(301) // returns false books.Remove(400) // clears 400-th bit // Min, Max, Count min, ok := books.Min() // returns 300 max, ok := books.Max() // returns 600 count := books.Count() // returns 2 // Boolean algebra var other bitmap.Bitmap other.Set(300) books.And(other) // Intersection count = books.Count() // Now returns 1 ``` ## Benchmarks Benchmarks below were run on a pre-allocated bitmap of **100,000** elements containing with around 50% bits set to one. ``` cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz BenchmarkBitmap/set-8 552331321 4.319 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/remove-8 1000000000 1.621 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/contains-8 1000000000 1.309 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/clear-8 26083383 90.45 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/ones-8 6751939 347.9 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/min-8 757831477 3.137 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/max-8 1000000000 1.960 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/min-zero-8 776620110 3.081 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/max-zero-8 1000000000 1.536 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/count-8 6071037 382.5 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/count-to-8 82777459 28.85 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/clone-8 20654008 111.5 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/and-8 16813963 143.6 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/andnot-8 16961106 141.9 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/or-8 16999562 141.7 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/xor-8 16954036 144.7 ns/op 0 B/op 0 allocs/op BenchmarkRange/range-8 18225 131908 ns/op 0 B/op 0 allocs/op BenchmarkRange/filter-8 25636 93630 ns/op 0 B/op 0 allocs/op ``` ## Contributing We are open to contributions, feel free to submit a pull request and we'll review it as quickly as we can. This library is maintained by [Roman Atachiants](https://www.linkedin.com/in/atachiants/) ## License Tile is licensed under the [MIT License](LICENSE). golang-github-kelindar-bitmap-1.5.5/bench/000077500000000000000000000000001517523267600204075ustar00rootroot00000000000000golang-github-kelindar-bitmap-1.5.5/bench/main.go000066400000000000000000000012541517523267600216640ustar00rootroot00000000000000package main import ( "fmt" "time" "github.com/kelindar/bitmap" ) func main() { const size = 10000000 const iter = 500 const inner = 500 a := createBitmap(size) b := createBitmap(size) for i := 0; i < iter; i++ { start := time.Now() for j := 0; j < inner; j++ { a.And(b, b, b, b) //a.And(b) //a.And(b) //a.And(b) //a.And(b) } fmt.Printf("iteration %v took %v...\n", i*inner, time.Now().Sub(start)) } } func createBitmap(size int) bitmap.Bitmap { index := make(bitmap.Bitmap, size/64) index.Grow(uint32(size - 1)) for i := 0; i < len(index); i++ { index[i] = 0xf0f0f0f0f0f0f0f0 } return index } golang-github-kelindar-bitmap-1.5.5/bitmap.go000066400000000000000000000137131517523267600211400ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import ( "math/bits" "unsafe" "github.com/klauspost/cpuid/v2" ) const ( isUnsupported = iota isAccelerated isAVX512 ) // Hardware contains the resolved acceleration level var hardware = levelOf(cpuid.CPU) // levelOf returns the hardware acceleration level func levelOf(cpu cpuid.CPUInfo) int { switch { case cpu.Supports(cpuid.AVX512F) && cpu.Supports(cpuid.AVX512DQ) && cpu.Supports(cpuid.AVX512BW): return isAVX512 case cpu.Supports(cpuid.AVX2) && cpu.Supports(cpuid.FMA3): return isAccelerated case cpu.Supports(cpuid.ASIMD): return isAccelerated default: return isUnsupported } } // Bitmap represents a scalar-backed bitmap index type Bitmap []uint64 // Set sets the bit x in the bitmap and grows it if necessary. func (dst *Bitmap) Set(x uint32) { blkAt := int(x >> 6) bitAt := int(x % 64) if size := len(*dst); blkAt >= size { dst.grow(blkAt) } (*dst)[blkAt] |= (1 << bitAt) } // Remove removes the bit x from the bitmap, but does not shrink it. func (dst *Bitmap) Remove(x uint32) { if blkAt := int(x >> 6); blkAt < len(*dst) { bitAt := int(x % 64) (*dst)[blkAt] &^= (1 << bitAt) } } // Contains checks whether a value is contained in the bitmap or not. func (dst Bitmap) Contains(x uint32) bool { blkAt := int(x >> 6) if size := len(dst); blkAt >= size { return false } bitAt := int(x % 64) return (dst[blkAt] & (1 << bitAt)) > 0 } // Ones sets the entire bitmap to one. func (dst Bitmap) Ones() { for i := 0; i < len(dst); i++ { dst[i] = 0xffffffffffffffff } } // Min get the smallest value stored in this bitmap, assuming the bitmap is not empty. func (dst Bitmap) Min() (uint32, bool) { for blkAt, blk := range dst { if blk != 0x0 { return uint32(blkAt<<6 + bits.TrailingZeros64(blk)), true } } return 0, false } // Max get the largest value stored in this bitmap, assuming the bitmap is not empty. func (dst Bitmap) Max() (uint32, bool) { var blk uint64 for blkAt := len(dst) - 1; blkAt >= 0; blkAt-- { if blk = dst[blkAt]; blk != 0x0 { return uint32(blkAt<<6 + (63 - bits.LeadingZeros64(blk))), true } } return 0, false } // MinZero finds the first zero bit and returns its index, assuming the bitmap is not empty. func (dst Bitmap) MinZero() (uint32, bool) { for blkAt, blk := range dst { if blk != 0xffffffffffffffff { return uint32(blkAt<<6 + bits.TrailingZeros64(^blk)), true } } return 0, false } // MaxZero get the last zero bit and return its index, assuming bitmap is not empty func (dst Bitmap) MaxZero() (uint32, bool) { var blk uint64 for blkAt := len(dst) - 1; blkAt >= 0; blkAt-- { if blk = dst[blkAt]; blk != 0xffffffffffffffff { return uint32(blkAt<<6 + (63 - bits.LeadingZeros64(^blk))), true } } return 0, false } // CountTo counts the number of elements in the bitmap up until the specified index. If until // is math.MaxUint32, it will return the count. The count is non-inclusive of the index. func (dst Bitmap) CountTo(until uint32) int { if len(dst) == 0 { return 0 } if maxUntil := uint32(len(dst) << 6); until > maxUntil { until = maxUntil } // Figure out the index of the last block blkUntil := until >> 6 bitUntil := until % 64 // Count the bits right before the last block sum := dst[:blkUntil].Count() // Count the bits at the end if bitUntil > 0 { sum += bits.OnesCount64(dst[blkUntil] << (64 - uint64(bitUntil))) } return sum } // Grow grows the bitmap size until we reach the desired bit. func (dst *Bitmap) Grow(desiredBit uint32) { dst.grow(int(desiredBit >> 6)) } // grow grows the size of the bitmap until we reach the desired block offset func (dst *Bitmap) grow(blkAt int) { if len(*dst) > blkAt { return } // If there's space, resize the slice without copying. if cap(*dst) > blkAt { *dst = (*dst)[:blkAt+1] return } old := *dst *dst = make(Bitmap, blkAt+1, resize(cap(old), blkAt+1)) copy(*dst, old) } // shrink shrinks the size of the bitmap and resets to zero func (dst *Bitmap) shrink(length int) { until := len(*dst) for i := length; i < until; i++ { (*dst)[i] = 0 } // Trim without reallocating *dst = (*dst)[:length] } // minlen calculates the minimum length of all of the bitmaps func minlen(a, b Bitmap, extra []Bitmap) int { size := minint(len(a), len(b)) for _, v := range extra { if m := minint(len(a), len(v)); m < size { size = m } } return size } // maxlen calculates the maximum length of all of the bitmaps func maxlen(a, b Bitmap, extra []Bitmap) int { size := maxint(len(a), len(b)) for _, v := range extra { if m := maxint(len(a), len(v)); m > size { size = m } } return size } // maxint returns a maximum of two integers without branches. func maxint(v1, v2 int) int { return v1 - ((v1 - v2) & ((v1 - v2) >> 31)) } // minint returns a minimum of two integers without branches. func minint(v1, v2 int) int { return v2 + ((v1 - v2) & ((v1 - v2) >> 31)) } // resize calculates the new required capacity and a new index func resize(capacity, v int) int { const threshold = 256 if v < threshold { v |= v >> 1 v |= v >> 2 v |= v >> 4 v |= v >> 8 v |= v >> 16 v++ return int(v) } if capacity < threshold { capacity = threshold } for 0 < capacity && capacity < (v+1) { capacity += (capacity + 3*threshold) / 4 } return capacity } // dimensionsOf returns a uint64 containing the packed dimensions func dimensionsOf(n, m int) uint64 { return uint64(n) | (uint64(m) << 32) } // pointersOf returns a pointer to an array containing pointers to the // first element of each bitmap and the maximum length of all bitmaps func pointersOf(other Bitmap, extra []Bitmap) (unsafe.Pointer, int) { out := make([]unsafe.Pointer, len(extra)+1) out[0] = unsafe.Pointer(&other[0]) max := 0 for i := range extra { out[i+1] = unsafe.Pointer(&extra[i][0]) if len(extra[i]) > max { max = len(extra[i]) } } return unsafe.Pointer(&out[0]), max } golang-github-kelindar-bitmap-1.5.5/bitmap_amd64.go000066400000000000000000000075051517523267600221350ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. //go:build !noasm && amd64 package bitmap import "unsafe" // And computes the intersection between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) dst.shrink(max) if max == 0 { return } switch hardware { case isAccelerated: switch len(extra) { case 0: _and(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _and_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } case isAVX512: switch len(extra) { case 0: _and_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _and_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: and(*dst, max, other, extra) return } } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap. // Operation works as set subtract: dst - b func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) if max == 0 { return } switch hardware { case isAccelerated: switch len(extra) { case 0: _andn(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _andn_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } case isAVX512: switch len(extra) { case 0: _andn_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _andn_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: andn(*dst, max, other, extra) return } } // Or computes the union between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) if max == 0 { return } dst.grow(max - 1) switch hardware { case isAccelerated: switch len(extra) { case 0: _or(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _or_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } case isAVX512: switch len(extra) { case 0: _or_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _or_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: or(*dst, other, extra) } } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) if max == 0 { return } dst.grow(max - 1) switch hardware { case isAccelerated: switch len(extra) { case 0: _xor(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _xor_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } case isAVX512: switch len(extra) { case 0: _xor_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _xor_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: xor(*dst, other, extra) } } // Count returns the number of elements in this bitmap func (dst Bitmap) Count() int { if len(dst) == 0 { return 0 } switch hardware { case isAccelerated: var res uint64 _count(unsafe.Pointer(&dst[0]), uint64(len(dst)), unsafe.Pointer(&res)) return int(res) default: return count(dst) } } golang-github-kelindar-bitmap-1.5.5/bitmap_arm64.go000066400000000000000000000054031517523267600221460ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. //go:build !noasm && arm64 package bitmap import "unsafe" // And computes the intersection between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) dst.shrink(max) if max == 0 { return } switch hardware { case isAccelerated: switch len(extra) { case 0: _and(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _and_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: and(*dst, max, other, extra) return } } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap. // Operation works as set subtract: dst - b func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) if max == 0 { return } switch hardware { case isAccelerated: switch len(extra) { case 0: _andn(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) default: vx, _ := pointersOf(other, extra) _andn_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: andn(*dst, max, other, extra) return } } // Or computes the union between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) if max == 0 { return } dst.grow(max - 1) switch hardware { case isAccelerated: switch len(extra) { case 0: _or(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _or_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: or(*dst, other, extra) } } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) if max == 0 { return } dst.grow(max - 1) switch hardware { case isAccelerated: switch len(extra) { case 0: _xor(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) default: vx, max := pointersOf(other, extra) _xor_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) } default: xor(*dst, other, extra) } } // Count returns the number of elements in this bitmap func (dst Bitmap) Count() int { if len(dst) == 0 { return 0 } switch hardware { case isAccelerated: var res uint64 _count(unsafe.Pointer(&dst[0]), uint64(len(dst)), unsafe.Pointer(&res)) return int(res) default: return count(dst) } } golang-github-kelindar-bitmap-1.5.5/bitmap_generic.go000066400000000000000000000024151517523267600226310ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. //go:build noasm || (!amd64 && !arm64) package bitmap // And computes the intersection between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) dst.shrink(max) and(*dst, max, other, extra) } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) { max := minlen(*dst, other, extra) andn(*dst, max, other, extra) } // Or computes the union between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) dst.grow(max - 1) or(*dst, other, extra) } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) { max := maxlen(*dst, other, extra) dst.grow(max - 1) xor(*dst, other, extra) } // Count returns the number of elements in this bitmap func (dst Bitmap) Count() int { return count(dst) } golang-github-kelindar-bitmap-1.5.5/bitmap_test.go000066400000000000000000000333501517523267600221760ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import ( "fmt" "math" "strconv" "testing" "github.com/stretchr/testify/assert" ) /* cpu: 13th Gen Intel(R) Core(TM) i7-13700K BenchmarkBitmap/set-24 655739137 1.803 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/remove-24 1000000000 1.107 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/contains-24 1000000000 0.8975 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/clear-24 827574 1487 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/ones-24 571444 2088 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/min-24 979591036 1.252 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/max-24 944884120 1.229 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/min-zero-24 991736356 1.258 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/max-zero-24 1000000000 1.157 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/count-24 393440 3086 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/count-to-24 58537441 20.20 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/clone-24 648651 1875 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/and-24 685710 1733 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/andnot-24 705882 1709 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/or-24 705894 1702 ns/op 0 B/op 0 allocs/op BenchmarkBitmap/xor-24 705919 1721 ns/op 0 B/op 0 allocs/op */ func BenchmarkBitmap(b *testing.B) { other := make(Bitmap, 1000000/64) other.Set(1000000) run(b, "set", func(index Bitmap) { index.Set(5000) }) run(b, "remove", func(index Bitmap) { index.Remove(5000) }) run(b, "contains", func(index Bitmap) { index.Contains(5000) }) run(b, "clear", func(index Bitmap) { index.Clear() }) run(b, "ones", func(index Bitmap) { index.Ones() }) run(b, "min", func(index Bitmap) { index.Min() }) run(b, "max", func(index Bitmap) { index.Max() }) run(b, "min-zero", func(index Bitmap) { index.MinZero() }) run(b, "max-zero", func(index Bitmap) { index.MaxZero() }) run(b, "count", func(index Bitmap) { index.Count() }) run(b, "count-to", func(index Bitmap) { index.CountTo(5001) }) into := make(Bitmap, len(other)) run(b, "clone", func(index Bitmap) { index.Clone(&into) }) run(b, "and", func(index Bitmap) { index.And(other) }) run(b, "andnot", func(index Bitmap) { index.AndNot(other) }) run(b, "or", func(index Bitmap) { index.AndNot(other) }) run(b, "xor", func(index Bitmap) { index.AndNot(other) }) } /* cpu: 13th Gen Intel(R) Core(TM) i7-13700K BenchmarkMany/and4-noasm-24 66297 18139 ns/op 0 B/op 0 allocs/op BenchmarkMany/and4-naive-24 179106 6803 ns/op 0 B/op 0 allocs/op BenchmarkMany/and4-batch-24 258091 4679 ns/op 32 B/op 1 allocs/op */ func BenchmarkMany(b *testing.B) { other := make(Bitmap, 1000000/64) other.Set(1000000) run(b, "and4-noasm", func(index Bitmap) { max := minlen(index, other, nil) index.shrink(max) and(index, max, other, nil) and(index, max, other, nil) and(index, max, other, nil) and(index, max, other, nil) }) run(b, "and4-naive", func(index Bitmap) { index.And(other) index.And(other) index.And(other) index.And(other) }) run(b, "and4-batch", func(index Bitmap) { index.And(other, other, other, other) }) } func TestSetRemove(t *testing.T) { index := Bitmap{} for i := uint32(100); i < 200; i++ { index.Set(i) assert.True(t, index.Contains(i)) } for i := uint32(150); i < 180; i++ { index.Remove(i) assert.False(t, index.Contains(i)) } } func TestClear(t *testing.T) { index := Bitmap{} for i := uint32(0); i < 500; i++ { index.Set(i) assert.True(t, index.Contains(i)) } index.Clear() index.Set(500) for i := uint32(0); i < 500; i++ { assert.False(t, index.Contains(i), i) } assert.True(t, index.Contains(500)) } func TestAnd(t *testing.T) { a, b := Bitmap{}, Bitmap{} for i := uint32(0); i < 100; i += 2 { a.Set(i) b.Set(i) } a.And(b) assert.False(t, a.Contains(1)) for i := uint32(0); i < 100; i += 2 { assert.True(t, a.Contains(i)) } } func TestAndNot(t *testing.T) { a, b := Bitmap{}, Bitmap{} for i := uint32(0); i < 100; i += 2 { a.Set(i) b.Set(i) } a.AndNot(b) assert.False(t, a.Contains(1)) for i := uint32(0); i < 100; i += 2 { assert.False(t, a.Contains(i)) } } func TestAndNot_TheSameBitmap(t *testing.T) { var a Bitmap for i := uint32(0); i < 100; i += 2 { a.Set(i) } a.AndNot(a) for i := uint32(0); i < 100; i++ { assert.Equal(t, false, a.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 0, a.Count()) } func TestAndNot_DifferentBitmapSizes(t *testing.T) { var a, b, c, d Bitmap for i := uint32(0); i < 100; i += 2 { a.Set(i) c.Set(i) } for i := uint32(0); i < 200; i += 2 { b.Set(i) d.Set(i) } a.AndNot(b) d.AndNot(c) for i := uint32(0); i < 100; i++ { assert.Equal(t, false, a.Contains(i), "for "+strconv.Itoa(int(i))) assert.Equal(t, false, d.Contains(i), "for "+strconv.Itoa(int(i))) } for i := uint32(100); i < 200; i++ { assert.Equal(t, b.Contains(i), d.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 0, a.Count()) assert.Equal(t, 50, d.Count()) } func TestOr(t *testing.T) { a, b := Bitmap{}, Bitmap{} for i := uint32(0); i < 100; i += 2 { b.Set(i) } a.Or(b) assert.False(t, a.Contains(1)) for i := uint32(0); i < 100; i += 2 { assert.True(t, a.Contains(i)) } } func TestOr_DifferentBitmapSizes(t *testing.T) { var a, b, c, d Bitmap for i := uint32(0); i < 100; i += 2 { a.Set(i) c.Set(i) } for i := uint32(0); i < 200; i += 2 { b.Set(i) d.Set(i) } a.Or(b) d.Or(c) for i := uint32(0); i < 200; i++ { assert.Equal(t, d.Contains(i), a.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 100, a.Count()) assert.Equal(t, 100, d.Count()) } func TestXor(t *testing.T) { a, b := Bitmap{}, Bitmap{} for i := uint32(0); i < 100; i += 2 { b.Set(i) } a.Xor(b) assert.False(t, a.Contains(1)) for i := uint32(0); i < 100; i += 2 { assert.True(t, a.Contains(i)) } } func TestXOr_DifferentBitmapSizes(t *testing.T) { var a, b, c, d Bitmap for i := uint32(0); i < 100; i += 2 { a.Set(i) c.Set(i) } for i := uint32(0); i < 200; i += 2 { b.Set(i) d.Set(i) } a.Xor(b) d.Xor(c) for i := uint32(0); i < 200; i++ { assert.Equal(t, d.Contains(i), a.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 50, a.Count()) assert.Equal(t, 50, d.Count()) } func TestMin(t *testing.T) { { a := Bitmap{0x0, 0x0, 0xffffffffffffff00} v, ok := a.Min() assert.True(t, ok) assert.Equal(t, 64+64+8, int(v)) assert.False(t, a.Contains(v-1)) assert.True(t, a.Contains(v)) } { a := Bitmap{0x0, 0x0} v, ok := a.Min() assert.False(t, ok) assert.Equal(t, 0, int(v)) } } func TestMax(t *testing.T) { { a := Bitmap{0x0, 0x0, 0x00000000000000f0} v, ok := a.Max() assert.True(t, ok) assert.Equal(t, 64+64+7, int(v)) assert.False(t, a.Contains(v-4)) assert.True(t, a.Contains(v-3)) assert.True(t, a.Contains(v-2)) assert.True(t, a.Contains(v-1)) assert.True(t, a.Contains(v)) assert.False(t, a.Contains(v+1)) assert.False(t, a.Contains(v+2)) } { a := Bitmap{0x0, 0x0} v, ok := a.Max() assert.False(t, ok) assert.Equal(t, 0, int(v)) } } func TestMinZero(t *testing.T) { { a := Bitmap{0xffffffffffffffff, 0xffffffffffffffff, 0xf0ffffffffffff0f} v, ok := a.MinZero() assert.True(t, ok) assert.Equal(t, 64+64+4, int(v)) assert.False(t, a.Contains(v)) } { a := Bitmap{0xffffffffffffffff, 0xffffffffffffffff} v, ok := a.MinZero() assert.False(t, ok) assert.Equal(t, 0, int(v)) } } func TestMaxZero(t *testing.T) { { a := Bitmap{0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffff0f} v, ok := a.MaxZero() assert.True(t, ok) assert.Equal(t, 64+64+7, int(v)) assert.False(t, a.Contains(v)) } { a := Bitmap{0xffffffffffffffff, 0xffffffffffffffff} v, ok := a.MaxZero() assert.False(t, ok) assert.Equal(t, 0, int(v)) } } func TestCount(t *testing.T) { a := Bitmap{} assert.Equal(t, 0, a.Count()) assert.Equal(t, 0, a.CountTo(math.MaxUint32)) b := Bitmap{} b.Set(1) b.Set(2) b.Set(5) b.Set(6) b.Set(101) b.Set(102) b.Set(105) b.Set(106) assert.Equal(t, 8, b.Count()) assert.Equal(t, 1, b.CountTo(2)) assert.Equal(t, 2, b.CountTo(4)) assert.Equal(t, 4, b.CountTo(100)) assert.Equal(t, 4, b.CountTo(101)) assert.Equal(t, 5, b.CountTo(102)) assert.Equal(t, 8, b.CountTo(128)) assert.Equal(t, 8, b.CountTo(math.MaxUint32)) b.Set(127) assert.Equal(t, 9, b.CountTo(128)) } func TestGrow(t *testing.T) { bitmap := make(Bitmap, 1, 5) bitmap[0] = 42 assert.Equal(t, 1, len(bitmap)) assert.Equal(t, 5, cap(bitmap)) assert.Equal(t, Bitmap{42}, bitmap) bitmap.grow(0) assert.Equal(t, 1, len(bitmap)) assert.Equal(t, 5, cap(bitmap)) assert.Equal(t, Bitmap{42}, bitmap) bitmap.grow(4) assert.Equal(t, 5, len(bitmap)) assert.Equal(t, 5, cap(bitmap)) assert.Equal(t, Bitmap{42, 0, 0, 0, 0}, bitmap) bitmap.grow(5) assert.Equal(t, 6, len(bitmap)) assert.Equal(t, Bitmap{42, 0, 0, 0, 0, 0}, bitmap) bitmap.Grow(6) } func TestAnd_DifferentBitmapSizes(t *testing.T) { var a, b, c, d Bitmap for i := uint32(0); i < 100; i += 2 { a.Set(i) c.Set(i) } for i := uint32(0); i < 200; i += 2 { b.Set(i) d.Set(i) } a.And(b) d.And(c) for i := uint32(0); i < 200; i++ { assert.Equal(t, a.Contains(i), d.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 50, a.Count()) assert.Equal(t, 50, d.Count()) } func TestAnd_ConsecutiveAnd_DifferentBitmapSizes(t *testing.T) { var a, b, c Bitmap for i := uint32(0); i < 200; i += 2 { a.Set(i) c.Set(i) } for i := uint32(0); i < 100; i += 2 { b.Set(i) } a.And(b) a.And(c) for i := uint32(0); i < 200; i++ { assert.Equal(t, a.Contains(i), b.Contains(i), "for "+strconv.Itoa(int(i))) } assert.Equal(t, 50, a.Count()) } func TestResizeBitmap(t *testing.T) { assert.Equal(t, 1, resize(100, 0)) assert.Equal(t, 2, resize(100, 1)) assert.Equal(t, 4, resize(100, 2)) assert.Equal(t, 16, resize(100, 11)) assert.Equal(t, 256, resize(100, 255)) assert.Equal(t, 1232, resize(100, 1000)) assert.Equal(t, 1232, resize(200, 1000)) assert.Equal(t, 1232, resize(512, 1000)) assert.Equal(t, 1213, resize(500, 1000)) // Inconsistent assert.Equal(t, 22504, resize(512, 20000)) assert.Equal(t, 28322, resize(22504, 22600)) } func TestMinInteger(t *testing.T) { tests := [][3]int{ {10, 20, 10}, {20, 10, 10}, {0, 10, 0}, {10, 0, 0}, {10, 10, 10}, {10, -10, -10}, {-10, 10, -10}, {-10, 0, -10}, {-10, -10, -10}, } for _, tc := range tests { assert.Equal(t, tc[2], minint(tc[0], tc[1]), fmt.Sprintf("min(%v, %v)", tc[0], tc[1])) } } func TestMaxInteger(t *testing.T) { tests := [][3]int{ {10, 20, 20}, {20, 10, 20}, {0, 10, 10}, {10, 0, 10}, {10, 10, 10}, {10, -10, 10}, {-10, 10, 10}, {-10, 0, 0}, {-10, -10, -10}, } for _, tc := range tests { assert.Equal(t, tc[2], maxint(tc[0], tc[1]), fmt.Sprintf("max(%v, %v)", tc[0], tc[1])) } } func TestBatched(t *testing.T) { const bits = 0b0011 // Functions to test tests := []func(Bitmap) func(Bitmap, ...Bitmap){ func(b Bitmap) func(Bitmap, ...Bitmap) { return b.And }, func(b Bitmap) func(Bitmap, ...Bitmap) { return b.AndNot }, func(b Bitmap) func(Bitmap, ...Bitmap) { return b.Or }, func(b Bitmap) func(Bitmap, ...Bitmap) { return b.Xor }, } for _, withHw := range []int{isAccelerated, isUnsupported} { for i, tc := range tests { t.Run(fmt.Sprintf("%v,avx=%v", i, withHw), func(t *testing.T) { hardware = withHw naive := func(n int) Bitmap { input := Bitmap{bits} tc(input)(Bitmap{bits}) for i := 0; i < n; i++ { tc(input)(Bitmap{bits}) } return input } for n := 0; n < 5; n++ { input := Bitmap{bits} other := Bitmap{bits} extra := make([]Bitmap, 0, n) for i := 0; i < n; i++ { extra = append(extra, Bitmap{bits}) } tc(input)(other, extra...) assert.Equal(t, naive(n), input) } }) } } } func TestEmptyAnd(t *testing.T) { var a, b Bitmap a.And(b) assert.Equal(t, 0, a.Count()) } func TestEmptyAndNot(t *testing.T) { var a, b Bitmap a.AndNot(b) assert.Equal(t, 0, a.Count()) } func TestEmptyOr(t *testing.T) { var a, b Bitmap a.Or(b) assert.Equal(t, 0, a.Count()) } func TestEmptyXor(t *testing.T) { var a, b Bitmap a.Xor(b) assert.Equal(t, 0, a.Count()) } func TestTruthTables_NoSIMD(t *testing.T) { hardware = isUnsupported testTruthTables(t) } func TestTruthTables_SIMD(t *testing.T) { hardware = isAccelerated testTruthTables(t) } func testTruthTables(t *testing.T) { { // AND a := Bitmap{0b0011, 0b1011, 0b1100, 0b0000, 0b0011, 0b1011, 0b1100, 0b0000, 0b0011} a.And(Bitmap{0b0101, 0b1101, 0b1010, 0b1111, 0b0101, 0b1101, 0b1010, 0b1111, 0b0101}) assert.Equal(t, 0b0001, int(a[0])) assert.Equal(t, 0b1001, int(a[1])) assert.Equal(t, 0b1000, int(a[2])) assert.Equal(t, 0b0000, int(a[3])) assert.Equal(t, 0b0001, int(a[4])) assert.Equal(t, 0b1001, int(a[5])) assert.Equal(t, 0b1000, int(a[6])) assert.Equal(t, 0b0000, int(a[7])) assert.Equal(t, 0b0001, int(a[8])) } { // AND NOT a := Bitmap{0b0011, 0, 0, 0} a.AndNot(Bitmap{0b0101}) assert.Equal(t, 0b0010, int(a[0])) } { // OR a := Bitmap{0b0011, 0, 0, 0} a.Or(Bitmap{0b0101}) assert.Equal(t, 0b0111, int(a[0])) } { // XOR a := Bitmap{0b0011, 0, 0, 0} a.Xor(Bitmap{0b0101}) assert.Equal(t, 0b0110, int(a[0])) } } golang-github-kelindar-bitmap-1.5.5/codec.go000066400000000000000000000110631517523267600207350ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import ( "encoding/binary" "encoding/hex" "encoding/json" "fmt" "io" "reflect" "strconv" "strings" "unsafe" ) // FromBytes reads a bitmap from a byte buffer without copying the buffer. func FromBytes(buffer []byte) (out Bitmap) { switch { case len(buffer) == 0: return nil case len(buffer)%8 != 0: panic(fmt.Sprintf("bitmap: buffer length expected to be multiple of 8, was %d", len(buffer))) } hdr := (*reflect.SliceHeader)(unsafe.Pointer(&out)) hdr.Len = len(buffer) >> 3 hdr.Cap = hdr.Len hdr.Data = uintptr(unsafe.Pointer(&(buffer)[0])) return out } // ToBytes converts the bitmap to binary representation without copying the underlying // data. The output buffer should not be modified, since it would also change the bitmap. func (dst *Bitmap) ToBytes() (out []byte) { if len(*dst) == 0 { return nil } hdr := (*reflect.SliceHeader)(unsafe.Pointer(&out)) hdr.Len = len(*dst) * 8 hdr.Cap = hdr.Len hdr.Data = uintptr(unsafe.Pointer(&(*dst)[0])) return out } // ReadFrom reads the bitmap from the reader. func ReadFrom(r io.Reader) (Bitmap, error) { var output Bitmap _, err := output.ReadFrom(r) return output, err } // WriteTo writes the bitmap to a specified writer. func (dst *Bitmap) WriteTo(w io.Writer) (int64, error) { buffer := dst.ToBytes() // Write the header into the stream var header [4]byte binary.BigEndian.PutUint32(header[:4], uint32(len(buffer))) n1, err := w.Write(header[:4]) if err != nil { return int64(n1), err } // Write the buffer into the stream n2, err := w.Write(buffer) if err != nil { return int64(n2), err } return int64(n1 + n2), err } // ReadFrom reads data from r until EOF or error. The return value n is the number of // bytes read. Any error except EOF encountered during the read is also returned. func (dst *Bitmap) ReadFrom(r io.Reader) (int64, error) { var header [4]byte if n, err := io.ReadFull(r, header[:]); err != nil { return int64(n), err } // If bitmap is too small, create one of the required size if size := int(binary.BigEndian.Uint32(header[:4])) / 8; size > len(*dst) { *dst = make(Bitmap, size) } // Read into the buffer buffer := dst.ToBytes() n, err := io.ReadFull(r, buffer) return int64(n + 4), err } // Clone clones the bitmap. If a destination bitmap is provided, the bitmap will be // cloned inside, otherwise a new Bitmap will be allocated and returned func (dst Bitmap) Clone(into *Bitmap) Bitmap { if into == nil { newm := make(Bitmap, len(dst)) into = &newm } max := maxlen(*into, dst, nil) into.grow(max - 1) copy(*into, dst) return (*into)[:len(dst)] } // Clear clears the bitmap and resizes it to zero. func (dst *Bitmap) Clear() { for i := range *dst { (*dst)[i] = 0 } *dst = (*dst)[:0] } // MarshalJSON returns encoded string representation for the bitmap func (dst Bitmap) MarshalJSON() ([]byte, error) { var sb strings.Builder for i := len(dst) - 1; i >= 0; i-- { // convert each uint64 into 16 * 4-bit hexadecimal character writeHexdecimal(&sb, dst[i], true) } return json.Marshal(sb.String()) } // writeHexdecimal write the hexdecimal representation for given value in buffer func writeHexdecimal(sb *strings.Builder, value uint64, pad bool) { maxLen := 16 // 64 bits / 4 hexadecimal := strings.ToUpper(strconv.FormatUint(value, 16)) hexaLen := len(hexadecimal) if !pad || hexaLen == maxLen { sb.WriteString(hexadecimal) return } // Add padding for i := hexaLen; i < maxLen; i++ { sb.WriteString("0") } sb.WriteString(hexadecimal) } // UnmarshalJSON decodes the received bytes and loads it to bitmap object func (dst *Bitmap) UnmarshalJSON(data []byte) (err error) { var str string if data == nil { *dst = make(Bitmap, 0) return } if err := json.Unmarshal(data, &str); err != nil { return err } mp, err := fromHex(str) if err != nil { return err } *dst = mp return nil } // fromHex reads a hexadecimal string and converts it to bitmap, character at index 0 is the most significant func fromHex(hexString string) (Bitmap, error) { bytes, err := hex.DecodeString(hexString) switch { case err != nil: return nil, err case len(bytes) == 0: return nil, nil } // reverse bytes to maintain bytes significance order (least significant = hexString tail = list head) for l, r := 0, len(bytes)-1; l < r; l, r = l+1, r-1 { bytes[l], bytes[r] = bytes[r], bytes[l] } for len(bytes)%8 != 0 { bytes = append(bytes, 0) } return FromBytes(bytes), nil } golang-github-kelindar-bitmap-1.5.5/codec_test.go000066400000000000000000000105051517523267600217740ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import ( "bytes" "encoding/json" "math" "math/rand" "strings" "testing" "github.com/klauspost/cpuid/v2" "github.com/stretchr/testify/assert" ) func BenchmarkCodec(b *testing.B) { tmp := bytes.NewBuffer(nil) run(b, "write-to", func(index Bitmap) { tmp.Reset() index.WriteTo(tmp) }) run(b, "read-from", func(index Bitmap) { ReadFrom(tmp) }) } func TestSaveLoad(t *testing.T) { m := Bitmap{} for i := 0; i <= 5000; i += 10 { m.Set(uint32(i)) } // Save the map enc := new(bytes.Buffer) cloned := m.Clone(nil) n, err := cloned.WriteTo(enc) assert.NoError(t, err) assert.Equal(t, int64(636), n) // Load the map back out, err := ReadFrom(enc) assert.NoError(t, err) assert.Equal(t, len(m), len(out)) assert.Equal(t, m, out) } func TestFromBytes(t *testing.T) { m := Bitmap{} for i := 0; i <= 5000; i += 10 { m.Set(uint32(i)) } out := FromBytes(m.ToBytes()) assert.Equal(t, m, out) } func TestFromBytesNil(t *testing.T) { out := FromBytes(nil) assert.Nil(t, out) } func TestFromBytesInvalid(t *testing.T) { m := make([]byte, 10) for i := 1; i < 8; i++ { assert.Panics(t, func() { FromBytes(m[:i]) }) } } func TestToBytesNil(t *testing.T) { var m Bitmap out := m.ToBytes() assert.Nil(t, out) } func TestJSON(t *testing.T) { mp := Bitmap{} for i := 0; i < 1000; i++ { mp.Set(uint32(rand.Intn(10000))) } data, err := json.Marshal(mp) assert.NoError(t, err) newMp := Bitmap{} assert.NoError(t, json.Unmarshal(data, &newMp)) assert.Equal(t, mp, newMp) assert.NoError(t, mp.UnmarshalJSON(nil)) assert.Empty(t, mp) assert.Error(t, mp.UnmarshalJSON([]byte("\"notvalid"))) assert.Error(t, mp.UnmarshalJSON([]byte("\"Z\""))) } func TestToHexadecimal(t *testing.T) { type Case struct { Input uint64 Pad bool Output string } tests := []Case{{ Input: 0, Pad: false, Output: "0", }, { Input: 42, Pad: false, Output: "2A", }, { Input: math.MaxUint64, Pad: false, Output: "FFFFFFFFFFFFFFFF", }, { Input: 15, Pad: true, Output: "000000000000000F", }, } for _, tc := range tests { sb := strings.Builder{} writeHexdecimal(&sb, tc.Input, tc.Pad) assert.Equal(t, tc.Output, sb.String()) } } func TestFromHex(t *testing.T) { bm, err := fromHex("FFA001") assert.NoError(t, err) assert.Equal(t, Bitmap{0xFFA001}, bm) bm, err = fromHex("000000000000000000000000000000000001") assert.NoError(t, err) assert.Equal(t, Bitmap{1, 0, 0}, bm) _, err = fromHex("Not Valid") assert.Error(t, err) bm, err = fromHex("") assert.NoError(t, err) assert.Nil(t, bm) } func TestDimensionsOf(t *testing.T) { testCases := []struct { n int m int expected uint64 }{ {0, 0, 0}, {10, 11, 0xb0000000a}, } for _, tc := range testCases { d := dimensionsOf(tc.n, tc.m) assert.Equal(t, tc.expected, d) } } func TestPointersOf(t *testing.T) { testCases := []struct { inputOther Bitmap inputExtra []Bitmap }{ {Bitmap{1}, []Bitmap{{2}}}, {Bitmap{1}, []Bitmap{{2}, {3}}}, } for _, tc := range testCases { ptr, max := pointersOf(tc.inputOther, tc.inputExtra) assert.NotNil(t, ptr) assert.NotZero(t, uintptr(ptr)) assert.NotZero(t, max) } } func TestLevelOfWithEnabledFeatures(t *testing.T) { testCases := []struct { name string featureIDs []cpuid.FeatureID expected int }{ { name: "AVX-512F, AVX-512BW, and AVX-512DQ support", featureIDs: []cpuid.FeatureID{cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512DQ}, expected: isAVX512, }, { name: "AVX2 and FMA3 support", featureIDs: []cpuid.FeatureID{cpuid.AVX2, cpuid.FMA3}, expected: isAccelerated, }, { name: "NEON support on ARM64", featureIDs: []cpuid.FeatureID{cpuid.ASIMD}, expected: isAccelerated, }, { name: "Unsupported feature combination", featureIDs: []cpuid.FeatureID{cpuid.SHA3, cpuid.AESARM}, expected: isUnsupported, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { cpu := cpuid.CPUInfo{} for _, feature := range tc.featureIDs { cpu.Enable(feature) } level := levelOf(cpu) assert.Equal(t, tc.expected, level, "expected to return %d, but got %d", tc.expected, level) }) } } golang-github-kelindar-bitmap-1.5.5/codegen/000077500000000000000000000000001517523267600207345ustar00rootroot00000000000000golang-github-kelindar-bitmap-1.5.5/codegen/generate.sh000066400000000000000000000005101517523267600230560ustar00rootroot00000000000000#!/bin/bash # requires gocc: go install github.com/kelindar/gocc/cmd/gocc@latest gocc simd_avx.c --arch avx2 -O1 --package bitmap -o ../ gocc simd_avx512.c --arch avx512 -O3 --package bitmap -o ../ gocc simd_neon.c --arch neon -O3 --package bitmap -o ../ gocc simd_apple.c --arch apple -O3 --package bitmap -o ../golang-github-kelindar-bitmap-1.5.5/codegen/simd_apple.c000066400000000000000000000066431517523267600232260ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. #include void _and(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= b[i]; } } void _andn(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= ~b[i]; } } void _or(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] |= b[i]; } } void _xor(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] ^= b[i]; } } void _and_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= b[j][i]; } } } } void _andn_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= ~b[j][i]; } } } } void _or_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] |= b[j][i]; } } } } void _xor_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] ^= b[j][i]; } } } } void _count(uint64_t *a, uint64_t size, uint64_t *result) { uint64_t count = 0; for (int i = 0; i < size; i++) { count += __builtin_popcountll(a[i]); } *result = count; } golang-github-kelindar-bitmap-1.5.5/codegen/simd_avx.c000066400000000000000000000066431517523267600227230ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. #include void _and(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= b[i]; } } void _andn(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= ~b[i]; } } void _or(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] |= b[i]; } } void _xor(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] ^= b[i]; } } void _and_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= b[j][i]; } } } } void _andn_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= ~b[j][i]; } } } } void _or_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] |= b[j][i]; } } } } void _xor_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] ^= b[j][i]; } } } } void _count(uint64_t *a, uint64_t size, uint64_t *result) { uint64_t count = 0; for (int i = 0; i < size; i++) { count += __builtin_popcountll(a[i]); } *result = count; } golang-github-kelindar-bitmap-1.5.5/codegen/simd_avx512.c000066400000000000000000000064261517523267600231520ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. #include void _and_avx512(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= b[i]; } } void _andn_avx512(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= ~b[i]; } } void _or_avx512(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] |= b[i]; } } void _xor_avx512(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] ^= b[i]; } } void _and_many_avx512(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= b[j][i]; } } } } void _andn_many_avx512(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= ~b[j][i]; } } } } void _or_many_avx512(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] |= b[j][i]; } } } } void _xor_many_avx512(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] ^= b[j][i]; } } } } golang-github-kelindar-bitmap-1.5.5/codegen/simd_neon.c000066400000000000000000000066431517523267600230640ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. #include void _and(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= b[i]; } } void _andn(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] &= ~b[i]; } } void _or(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] |= b[i]; } } void _xor(uint64_t* a, uint64_t* b, uint64_t n) { #pragma clang loop vectorize(enable) interleave(enable) for (uint64_t i = 0; i < n; ++i) { a[i] ^= b[i]; } } void _and_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= b[j][i]; } } } } void _andn_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] &= ~b[j][i]; } } } } void _or_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] |= b[j][i]; } } } } void _xor_many(uint64_t* a, uint64_t** b, uint64_t dims) { int64_t n = (dims & 0xffffffff); int64_t m = (dims >> 32); const int64_t chunk_size = 512; // Loop over chunks of b for (int64_t chunk = 0; chunk < n; chunk += chunk_size) { int64_t chunk_end = chunk + chunk_size; if (chunk_end > n) { chunk_end = n; } for (int64_t j = 0; j < m; ++j) { #pragma clang loop vectorize(enable) interleave(enable) for (int64_t i = chunk; i < chunk_end; ++i) { a[i] ^= b[j][i]; } } } } void _count(uint64_t *a, uint64_t size, uint64_t *result) { uint64_t count = 0; for (int i = 0; i < size; i++) { count += __builtin_popcountll(a[i]); } *result = count; } golang-github-kelindar-bitmap-1.5.5/go.mod000066400000000000000000000005371517523267600204430ustar00rootroot00000000000000module github.com/kelindar/bitmap go 1.24.0 require ( github.com/kelindar/simd v1.2.0 github.com/klauspost/cpuid/v2 v2.3.0 github.com/stretchr/testify v1.11.1 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/sys v0.41.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) golang-github-kelindar-bitmap-1.5.5/go.sum000066400000000000000000000025441517523267600204700ustar00rootroot00000000000000github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/kelindar/simd v1.2.0 h1:1nSnINZRchuZwjnfqM01gV04RkJg0zz62ZC4hZQRYis= github.com/kelindar/simd v1.2.0/go.mod h1:inq4DFudC7W8L5fhxoeZflLRNpWSs0GNx6MlWFvuvr0= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= golang-github-kelindar-bitmap-1.5.5/range.go000066400000000000000000000200241517523267600207510ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import ( "unsafe" "github.com/kelindar/simd" ) const full = 0xffffffffffffffff // Range iterates over all of the bits set to one in this bitmap. func (dst Bitmap) Range(fn func(x uint32)) { for blkAt := 0; blkAt < len(dst); blkAt++ { blk := (dst)[blkAt] if blk == 0x0 { continue // Skip the empty page } // Iterate in a 4-bit chunks so we can reduce the number of function calls and skip // the bits for which we should not call our range function. offset := uint32(blkAt << 6) for ; blk > 0; blk = blk >> 4 { switch blk & 0b1111 { case 0b0001: fn(offset + 0) case 0b0010: fn(offset + 1) case 0b0011: fn(offset + 0) fn(offset + 1) case 0b0100: fn(offset + 2) case 0b0101: fn(offset + 0) fn(offset + 2) case 0b0110: fn(offset + 1) fn(offset + 2) case 0b0111: fn(offset + 0) fn(offset + 1) fn(offset + 2) case 0b1000: fn(offset + 3) case 0b1001: fn(offset + 0) fn(offset + 3) case 0b1010: fn(offset + 1) fn(offset + 3) case 0b1011: fn(offset + 0) fn(offset + 1) fn(offset + 3) case 0b1100: fn(offset + 2) fn(offset + 3) case 0b1101: fn(offset + 0) fn(offset + 2) fn(offset + 3) case 0b1110: fn(offset + 1) fn(offset + 2) fn(offset + 3) case 0b1111: fn(offset + 0) fn(offset + 1) fn(offset + 2) fn(offset + 3) } offset += 4 } } } // Filter predicate type predicate = func(x uint32) byte // Filter iterates over the bitmap elements and calls a predicate provided for each // containing element. If the predicate returns false, the bitmap at the element's // position is set to zero. func (dst *Bitmap) Filter(f func(x uint32) bool) { fn := *(*predicate)(unsafe.Pointer(&f)) for blkAt := 0; blkAt < len(*dst); blkAt++ { blk := (*dst)[blkAt] if blk == 0x0 { continue // Skip the empty page } offset := uint32(blkAt << 6) var mask uint64 var i uint32 // Iterate in a 4-bit chunks so we can reduce the number of function calls and skip // the bits for which we should not call our filter function. for ; blk > 0; blk = blk >> 4 { switch blk & 0b1111 { case 0b0001: mask |= uint64(fn(offset)) << i case 0b0010: mask |= uint64(fn(offset+1)<<1) << i case 0b0011: mask |= uint64(fn(offset)|(fn(offset+1)<<1)) << i case 0b0100: mask |= uint64(fn(offset+2)<<2) << i case 0b0101: mask |= uint64(fn(offset)|fn(offset+2)<<2) << i case 0b0110: mask |= uint64((fn(offset+1)<<1)|(fn(offset+2)<<2)) << i case 0b0111: mask |= uint64(fn(offset)|(fn(offset+1)<<1)|(fn(offset+2)<<2)) << i case 0b1000: mask |= uint64(fn(offset+3)<<3) << i case 0b1001: mask |= uint64(fn(offset)|(fn(offset+3)<<3)) << i case 0b1010: mask |= uint64((fn(offset+1)<<1)|(fn(offset+3)<<3)) << i case 0b1011: mask |= uint64(fn(offset)|(fn(offset+1)<<1)|(fn(offset+3)<<3)) << i case 0b1100: mask |= uint64((fn(offset+2)<<2)|(fn(offset+3)<<3)) << i case 0b1101: mask |= uint64(fn(offset)|(fn(offset+2)<<2)|(fn(offset+3)<<3)) << i case 0b1110: mask |= uint64((fn(offset+1)<<1)|(fn(offset+2)<<2)|(fn(offset+3)<<3)) << i case 0b1111: mask |= uint64(fn(offset)|(fn(offset+1)<<1)|(fn(offset+2)<<2)|(fn(offset+3)<<3)) << i } i += 4 offset += 4 } // Apply the mask (*dst)[blkAt] &= mask } } // Sum computes a horizontal sum of a slice, filtered by the provided bitmap func Sum[T simd.Number](src []T, filter Bitmap) (sum T) { tail := minint(len(src)/64, len(filter)) << 6 // End of 64-byte blocks last := minint(len(src), len(filter)*64) // End of slice or mask var frame [64]T var i0, i1 int for i1 = 0; i1 < tail; i1 += 64 { switch filter[i1>>6] { case full: continue // Continue buffering case 0: default: sum += simd.Sum(leftPack(&frame, src[i1:i1+64], filter[i1>>6])) } // Flush the current buffer if (i1 - i0) > 0 { sum += simd.Sum(src[i0:i1]) } i0 = i1 + 64 } // Flush the accumulated buffer so far if (i1 - i0) > 0 { sum += simd.Sum(src[i0:i1]) } // Process the tail for i := tail; i < last; i++ { if filter.Contains(uint32(i)) { sum += src[i] } } return sum } // Min finds the smallest value in a slice, filtered by the provided bitmap func Min[T simd.Number](src []T, filter Bitmap) (min T, hit bool) { tail := minint(len(src)/64, len(filter)) << 6 // End of 64-byte blocks last := minint(len(src), len(filter)*64) // End of slice or mask var frame [64]T var i0, i1 int for i1 = 0; i1 < tail; i1 += 64 { switch filter[i1>>6] { case full: continue // Continue buffering case 0: default: if m := simd.Min(leftPack(&frame, src[i1:i1+64], filter[i1>>6])); m < min || !hit { hit = true min = m } } // Flush the current buffer if (i1 - i0) > 0 { if m := simd.Min(src[i0:i1]); m < min || !hit { hit = true min = m } } i0 = i1 + 64 } // Flush the accumulated buffer so far if (i1 - i0) > 0 { if m := simd.Min(src[i0:i1]); m < min || !hit { hit = true min = m } } // Process the tail for i := tail; i < last; i++ { if filter.Contains(uint32(i)) && (src[i] < min || !hit) { hit = true min = src[i] } } return } // Max finds the largest value in a slice, filtered by the provided bitmap func Max[T simd.Number](src []T, filter Bitmap) (max T, hit bool) { tail := minint(len(src)/64, len(filter)) << 6 // End of 64-byte blocks last := minint(len(src), len(filter)*64) // End of slice or mask var frame [64]T var i0, i1 int for i1 = 0; i1 < tail; i1 += 64 { switch filter[i1>>6] { case full: continue // Continue buffering case 0: default: if m := simd.Max(leftPack(&frame, src[i1:i1+64], filter[i1>>6])); m > max || !hit { hit = true max = m } } // Flush the current buffer if (i1 - i0) > 0 { if m := simd.Max(src[i0:i1]); m > max || !hit { hit = true max = m } } i0 = i1 + 64 } // Flush the accumulated buffer so far if (i1 - i0) > 0 { if m := simd.Max(src[i0:i1]); m > max || !hit { hit = true max = m } } // Process the tail for i := tail; i < last; i++ { if filter.Contains(uint32(i)) && (src[i] > max || !hit) { hit = true max = src[i] } } return } // leftPack left-packs a src slice into a dst for a single block blk func leftPack[T any](dst *[64]T, src []T, blk uint64) []T { offset := 0 cursor := 0 for ; blk > 0; blk = blk >> 4 { switch blk & 0b1111 { case 0b0001: dst[cursor] = src[offset+0] cursor += 1 case 0b0010: dst[cursor] = src[offset+1] cursor += 1 case 0b0011: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+1] cursor += 2 case 0b0100: dst[cursor] = src[offset+2] cursor += 1 case 0b0101: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+2] cursor += 2 case 0b0110: dst[cursor] = src[offset+1] dst[cursor+1] = src[offset+2] cursor += 2 case 0b0111: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+1] dst[cursor+2] = src[offset+2] cursor += 3 case 0b1000: dst[cursor] = src[offset+3] cursor += 1 case 0b1001: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+3] cursor += 2 case 0b1010: dst[cursor] = src[offset+1] dst[cursor+1] = src[offset+3] cursor += 2 case 0b1011: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+1] dst[cursor+2] = src[offset+3] cursor += 3 case 0b1100: dst[cursor] = src[offset+2] dst[cursor+1] = src[offset+3] cursor += 2 case 0b1101: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+2] dst[cursor+2] = src[offset+3] cursor += 3 case 0b1110: dst[cursor] = src[offset+1] dst[cursor+1] = src[offset+2] dst[cursor+2] = src[offset+3] cursor += 3 case 0b1111: dst[cursor] = src[offset+0] dst[cursor+1] = src[offset+1] dst[cursor+2] = src[offset+2] dst[cursor+3] = src[offset+3] cursor += 4 } offset += 4 } return (*dst)[:cursor] } golang-github-kelindar-bitmap-1.5.5/range_test.go000066400000000000000000000175521517523267600220240ustar00rootroot00000000000000package bitmap import ( "testing" "github.com/kelindar/simd" "github.com/stretchr/testify/assert" ) /* cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz BenchmarkRange/range-8 1891 674656 ns/op 0 B/op 0 allocs/op BenchmarkRange/filter-8 2222 535359 ns/op 0 B/op 0 allocs/op */ func BenchmarkRange(b *testing.B) { var i uint32 run(b, "range", func(index Bitmap) { index.Range(func(x uint32) { i = x }) }) run(b, "filter", func(index Bitmap) { index.Filter(func(x uint32) bool { return x%2 == 0 }) }) _ = i } /* cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz BenchmarkAggregate/sum-8 1849 627004 ns/op 0 B/op 0 allocs/op BenchmarkAggregate/sum-full-8 16939 68971 ns/op 0 B/op 0 allocs/op BenchmarkAggregate/min-8 1474 868868 ns/op 0 B/op 0 allocs/op BenchmarkAggregate/min-full-8 17082 68719 ns/op 0 B/op 0 allocs/op BenchmarkAggregate/max-8 1322 864578 ns/op 0 B/op 0 allocs/op BenchmarkAggregate/max-full-8 17354 69015 ns/op 0 B/op 0 allocs/op */ func BenchmarkAggregate(b *testing.B) { target := make([]float32, 1000000) run(b, "sum", func(index Bitmap) { Sum(target, index) }) runFull(b, "sum-full", func(index Bitmap) { Sum(target, index) }) run(b, "min", func(index Bitmap) { Min(target, index) }) runFull(b, "min-full", func(index Bitmap) { Min(target, index) }) run(b, "max", func(index Bitmap) { Max(target, index) }) runFull(b, "max-full", func(index Bitmap) { Max(target, index) }) } func TestFilter(t *testing.T) { a := make(Bitmap, 4) a.Ones() assert.Equal(t, 256, a.Count()) // Filter out odd a.Filter(func(x uint32) bool { return x%2 == 0 }) assert.Equal(t, 128, a.Count()) // Filter out even a.Filter(func(x uint32) bool { assert.Equal(t, 0, int(x%2)) // Must be odd return x%2 == 1 }) assert.Equal(t, 0, a.Count()) // Filter cases for i := 0; i < 512; i++ { b := Bitmap{uint64(i)} c1 := b.Count() c2 := 0 b.Filter(func(x uint32) bool { c2++ return true }) // We must have the minimum number of function calls assert.Equal(t, c1, c2) assert.Equal(t, uint64(i), b[0]) } } func TestRangeCases(t *testing.T) { for i := 0; i < 512; i++ { b := Bitmap{uint64(i)} c1 := b.Count() c2 := 0 b.Range(func(x uint32) { c2++ return }) // We must have the minimum number of function calls assert.Equal(t, c1, c2) assert.Equal(t, uint64(i), b[0]) } } func TestRangeIndex(t *testing.T) { a := make(Bitmap, 2) a.Ones() triangular := 0 a.Range(func(x uint32) { triangular += int(x) return }) assert.Equal(t, 8128, triangular) } // ----------------------------- Aggregation ----------------------------- func TestAggSum(t *testing.T) { { // Empty Bitmap arr, index := makeAggregateInput(0x0, 0x0) assert.Equal(t, sumNaive(arr, index), Sum(arr, index)) } { // Partial Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0x0123456789abcdef) assert.Equal(t, sumNaive(arr, index), Sum(arr, index)) } { // Full Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0xffffffffffffffff) assert.Equal(t, sumNaive(arr, index), Sum(arr, index)) } { // Nil Bitmap arr, _ := makeAggregateInput(0x0, 0x0) assert.Equal(t, sumNaive(arr, nil), Sum(arr, nil)) } { // Nil Array _, index := makeAggregateInput(0x0, 0x0) assert.Equal(t, sumNaive([]int{}, index), Sum([]int{}, index)) } } func TestAggMin(t *testing.T) { { // Empty Bitmap arr, index := makeAggregateInput(0x0, 0x0) expect, ok1 := minNaive(arr, index) result, ok2 := Min(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Partial Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0x0123456789abcdef) expect, ok1 := minNaive(arr, index) result, ok2 := Min(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Full Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0xffffffffffffffff) expect, ok1 := minNaive(arr, index) result, ok2 := Min(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Nil Bitmap arr, _ := makeAggregateInput(0x0, 0x0) expect, ok1 := minNaive(arr, nil) result, ok2 := Min(arr, nil) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Nil Array _, index := makeAggregateInput(0x0, 0x0) expect, ok1 := minNaive([]int{}, index) result, ok2 := Min([]int{}, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } } func TestAggMax(t *testing.T) { { // Empty Bitmap arr, index := makeAggregateInput(0x0, 0x0) expect, ok1 := maxNaive(arr, index) result, ok2 := Max(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Partial Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0x0123456789abcdef) expect, ok1 := maxNaive(arr, index) result, ok2 := Max(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Full Bitmap arr, index := makeAggregateInput(0xffffffffffffffff, 0xffffffffffffffff) expect, ok1 := maxNaive(arr, index) result, ok2 := Max(arr, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Nil Bitmap arr, _ := makeAggregateInput(0x0, 0x0) expect, ok1 := maxNaive(arr, nil) result, ok2 := Max(arr, nil) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } { // Nil Array _, index := makeAggregateInput(0x0, 0x0) expect, ok1 := maxNaive([]int{}, index) result, ok2 := Max([]int{}, index) assert.Equal(t, expect, result) assert.Equal(t, ok1, ok2) } } func TestLeftPack(t *testing.T) { src, index := makeAggregateInput(0x0123456789abcdef, 0x0123456789abcdef) dst := leftPack(&[64]int{}, src, index[0]) assert.Equal(t, 32, len(dst)) } // ----------------------------- Naive Aggregation Funcs ----------------------------- func sumNaive[T simd.Number](src []T, index Bitmap) (out T) { size := minint(len(src), len(index)*64) for i := 0; i < size; i++ { if index.Contains(uint32(i)) { out += src[i] } } return } func minNaive[T simd.Number](src []T, index Bitmap) (T, bool) { if len(src) == 0 || index.Count() == 0 { return 0, false } size := minint(len(src), len(index)*64) out := src[0] for i := 0; i < size; i++ { if index.Contains(uint32(i)) && src[i] < out { out = src[i] } } return out, true } func maxNaive[T simd.Number](src []T, index Bitmap) (T, bool) { if len(src) == 0 || index.Count() == 0 { return 0, false } size := minint(len(src), len(index)*64) out := src[0] for i := 0; i < size; i++ { if index.Contains(uint32(i)) && src[i] > out { out = src[i] } } return out, true } func makeAggregateInput(filter1, filter2 uint64) ([]int, Bitmap) { index := make(Bitmap, 0, 80) for i := 0; i < 80; i += 2 { index = append(index, filter1, filter2) } var arr []int for i := 0; i < 5000; i++ { arr = append(arr, 100+i) } arr[102] = 50 arr[101] = 5000 arr[152] = 40 arr[151] = 6000 arr[4999] = 30 arr[4998] = 20000 return arr, index } // ----------------------------- Benchmark ----------------------------- // run runs a benchmark func run(b *testing.B, name string, f func(index Bitmap)) { count := 1000064 b.Run(name, func(b *testing.B) { index := make(Bitmap, count/64) index.Grow(uint32(count)) for i := 0; i < len(index); i++ { index[i] = 0xf0f0f0f0f0f0f0f0 } b.ReportAllocs() b.ResetTimer() for n := 0; n < b.N; n++ { f(index) } }) } // run runs a benchmark on a full bitmap func runFull(b *testing.B, name string, f func(index Bitmap)) { count := 1000000 b.Run(name, func(b *testing.B) { index := make(Bitmap, count/64) index.Grow(uint32(count - 1)) for i := 0; i < len(index); i++ { index[i] = 0xffffffffffffffff } b.ReportAllocs() b.ResetTimer() for n := 0; n < b.N; n++ { f(index) } }) } golang-github-kelindar-bitmap-1.5.5/simd_apple.go000066400000000000000000000016151517523267600217770ustar00rootroot00000000000000//go:build !noasm && darwin && arm64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT package bitmap import "unsafe" //go:nosplit //go:noescape func _and(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _andn(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _or(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _xor(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _and_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _andn_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _or_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _xor_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _count(a unsafe.Pointer, size uint64, result unsafe.Pointer) golang-github-kelindar-bitmap-1.5.5/simd_apple.s000066400000000000000000000564621517523267600216460ustar00rootroot00000000000000//go:build !noasm && darwin && arm64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT TEXT ·_and(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, LBB0_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo LBB0_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls LBB0_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls LBB0_8 BB0_4: WORD $0xd2800008 // mov x8, #0 BB0_5: WORD $0xcb080049 // sub x9, x2, x8 WORD $0xd37df10a // lsl x10, x8, #3 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 BB0_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0x8a0b018b // and x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0xf1000529 // subs x9, x9, #1 WORD $0x54ffff61 // b.ne LBB0_6 BB0_7: WORD $0xd65f03c0 // ret BB0_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 BB0_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4e201c40 // and.16b v0, v2, v0 WORD $0x4e211c61 // and.16b v1, v3, v1 WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0x9100814a // add x10, x10, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0x54ffff01 // b.ne LBB0_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq LBB0_7 WORD $0x17ffffe6 // b LBB0_5 TEXT ·_andn(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, LBB1_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo LBB1_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls LBB1_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls LBB1_8 BB1_4: WORD $0xd2800008 // mov x8, #0 BB1_5: WORD $0xcb080049 // sub x9, x2, x8 WORD $0xd37df10a // lsl x10, x8, #3 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 BB1_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0x8a2b018b // bic x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0xf1000529 // subs x9, x9, #1 WORD $0x54ffff61 // b.ne LBB1_6 BB1_7: WORD $0xd65f03c0 // ret BB1_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 BB1_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4e601c40 // bic.16b v0, v2, v0 WORD $0x4e611c61 // bic.16b v1, v3, v1 WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0x9100814a // add x10, x10, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0x54ffff01 // b.ne LBB1_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq LBB1_7 WORD $0x17ffffe6 // b LBB1_5 TEXT ·_or(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, LBB2_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo LBB2_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls LBB2_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls LBB2_8 BB2_4: WORD $0xd2800008 // mov x8, #0 BB2_5: WORD $0xcb080049 // sub x9, x2, x8 WORD $0xd37df10a // lsl x10, x8, #3 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 BB2_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xaa0b018b // orr x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0xf1000529 // subs x9, x9, #1 WORD $0x54ffff61 // b.ne LBB2_6 BB2_7: WORD $0xd65f03c0 // ret BB2_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 BB2_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4ea01c40 // orr.16b v0, v2, v0 WORD $0x4ea11c61 // orr.16b v1, v3, v1 WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0x9100814a // add x10, x10, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0x54ffff01 // b.ne LBB2_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq LBB2_7 WORD $0x17ffffe6 // b LBB2_5 TEXT ·_xor(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, LBB3_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo LBB3_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls LBB3_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls LBB3_8 BB3_4: WORD $0xd2800008 // mov x8, #0 BB3_5: WORD $0xcb080049 // sub x9, x2, x8 WORD $0xd37df10a // lsl x10, x8, #3 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 BB3_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xca0b018b // eor x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0xf1000529 // subs x9, x9, #1 WORD $0x54ffff61 // b.ne LBB3_6 BB3_7: WORD $0xd65f03c0 // ret BB3_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 BB3_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x6e201c40 // eor.16b v0, v2, v0 WORD $0x6e211c61 // eor.16b v1, v3, v1 WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0x9100814a // add x10, x10, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0x54ffff01 // b.ne LBB3_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq LBB3_7 WORD $0x17ffffe6 // b LBB3_5 TEXT ·_and_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xa9bc67fa // stp x26, x25, [sp, #-64]! ; 16-byte Folded Spill WORD $0xa9015ff8 // stp x24, x23, [sp, #16] ; 16-byte Folded Spill WORD $0xa90257f6 // stp x22, x21, [sp, #32] ; 16-byte Folded Spill WORD $0xa9034ff4 // stp x20, x19, [sp, #48] ; 16-byte Folded Spill WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0x54000a20 // b.eq LBB4_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, LBB4_14 WORD $0xd2800009 // mov x9, #0 WORD $0xd280000a // mov x10, #0 WORD $0xd280000f // mov x15, #0 WORD $0xf100057f // cmp x11, #1 WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b LBB4_4 BB4_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 ; =4096 WORD $0x9140058c // add x12, x12, #1, lsl #12 ; =4096 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs LBB4_14 BB4_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0x910801f0 // add x16, x15, #512 WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883202 // csel x2, x16, x8, lo WORD $0xeb0201ff // cmp x15, x2 WORD $0x54fffe42 // b.hs LBB4_3 WORD $0xd2800002 // mov x2, #0 WORD $0x8b090223 // add x3, x17, x9 WORD $0x927ef463 // and x3, x3, #0xfffffffffffffffc WORD $0xcb0a2624 // sub x4, x17, x10, lsl #9 WORD $0xd374cd45 // lsl x5, x10, #12 WORD $0xd37df086 // lsl x6, x4, #3 WORD $0xd10020d5 // sub x21, x6, #8 WORD $0x8b050006 // add x6, x0, x5 WORD $0x8b1500c7 // add x7, x6, x21 WORD $0x910020e7 // add x7, x7, #8 WORD $0x927ef493 // and x19, x4, #0xfffffffffffffffc WORD $0x8b1301f4 // add x20, x15, x19 WORD $0x8b1500b5 // add x21, x5, x21 WORD $0x14000004 // b LBB4_7 BB4_6: WORD $0x91000442 // add x2, x2, #1 WORD $0xeb0b005f // cmp x2, x11 WORD $0x54fffc20 // b.eq LBB4_3 BB4_7: WORD $0xf8627836 // ldr x22, [x1, x2, lsl #3] WORD $0xaa0f03f8 // mov x24, x15 WORD $0xf100109f // cmp x4, #4 WORD $0x540002e3 // b.lo LBB4_12 WORD $0x8b0502d7 // add x23, x22, x5 WORD $0x8b1502d8 // add x24, x22, x21 WORD $0x91002318 // add x24, x24, #8 WORD $0xeb1800df // cmp x6, x24 WORD $0xfa4732e2 // ccmp x23, x7, #2, lo WORD $0xaa0f03f8 // mov x24, x15 WORD $0x54000203 // b.lo LBB4_12 WORD $0x8b0e02d7 // add x23, x22, x14 WORD $0xaa0c03f8 // mov x24, x12 WORD $0xaa0303f9 // mov x25, x3 BB4_10: WORD $0xad7f86e0 // ldp q0, q1, [x23, #-16] WORD $0xad7f8f02 // ldp q2, q3, [x24, #-16] WORD $0x4e201c40 // and.16b v0, v2, v0 WORD $0x4e211c61 // and.16b v1, v3, v1 WORD $0xad3f8700 // stp q0, q1, [x24, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x91008318 // add x24, x24, #32 WORD $0xf1001339 // subs x25, x25, #4 WORD $0x54ffff01 // b.ne LBB4_10 WORD $0xaa1403f8 // mov x24, x20 WORD $0xeb13009f // cmp x4, x19 WORD $0x54fffc80 // b.eq LBB4_6 BB4_12: WORD $0xcb180237 // sub x23, x17, x24 WORD $0xd37df319 // lsl x25, x24, #3 WORD $0x8b190018 // add x24, x0, x25 WORD $0x8b1902d6 // add x22, x22, x25 BB4_13: WORD $0xf84086d9 // ldr x25, [x22], #8 WORD $0xf940031a // ldr x26, [x24] WORD $0x8a190359 // and x25, x26, x25 WORD $0xf8008719 // str x25, [x24], #8 WORD $0xf10006f7 // subs x23, x23, #1 WORD $0x54ffff61 // b.ne LBB4_13 WORD $0x17ffffd9 // b LBB4_6 BB4_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] ; 16-byte Folded Reload WORD $0xa94257f6 // ldp x22, x21, [sp, #32] ; 16-byte Folded Reload WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] ; 16-byte Folded Reload WORD $0xa8c467fa // ldp x26, x25, [sp], #64 ; 16-byte Folded Reload WORD $0xd65f03c0 // ret TEXT ·_andn_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xa9bc67fa // stp x26, x25, [sp, #-64]! ; 16-byte Folded Spill WORD $0xa9015ff8 // stp x24, x23, [sp, #16] ; 16-byte Folded Spill WORD $0xa90257f6 // stp x22, x21, [sp, #32] ; 16-byte Folded Spill WORD $0xa9034ff4 // stp x20, x19, [sp, #48] ; 16-byte Folded Spill WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0x54000a20 // b.eq LBB5_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, LBB5_14 WORD $0xd2800009 // mov x9, #0 WORD $0xd280000a // mov x10, #0 WORD $0xd280000f // mov x15, #0 WORD $0xf100057f // cmp x11, #1 WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b LBB5_4 BB5_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 ; =4096 WORD $0x9140058c // add x12, x12, #1, lsl #12 ; =4096 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs LBB5_14 BB5_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0x910801f0 // add x16, x15, #512 WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883202 // csel x2, x16, x8, lo WORD $0xeb0201ff // cmp x15, x2 WORD $0x54fffe42 // b.hs LBB5_3 WORD $0xd2800002 // mov x2, #0 WORD $0x8b090223 // add x3, x17, x9 WORD $0x927ef463 // and x3, x3, #0xfffffffffffffffc WORD $0xcb0a2624 // sub x4, x17, x10, lsl #9 WORD $0xd374cd45 // lsl x5, x10, #12 WORD $0xd37df086 // lsl x6, x4, #3 WORD $0xd10020d5 // sub x21, x6, #8 WORD $0x8b050006 // add x6, x0, x5 WORD $0x8b1500c7 // add x7, x6, x21 WORD $0x910020e7 // add x7, x7, #8 WORD $0x927ef493 // and x19, x4, #0xfffffffffffffffc WORD $0x8b1301f4 // add x20, x15, x19 WORD $0x8b1500b5 // add x21, x5, x21 WORD $0x14000004 // b LBB5_7 BB5_6: WORD $0x91000442 // add x2, x2, #1 WORD $0xeb0b005f // cmp x2, x11 WORD $0x54fffc20 // b.eq LBB5_3 BB5_7: WORD $0xf8627836 // ldr x22, [x1, x2, lsl #3] WORD $0xaa0f03f8 // mov x24, x15 WORD $0xf100109f // cmp x4, #4 WORD $0x540002e3 // b.lo LBB5_12 WORD $0x8b0502d7 // add x23, x22, x5 WORD $0x8b1502d8 // add x24, x22, x21 WORD $0x91002318 // add x24, x24, #8 WORD $0xeb1800df // cmp x6, x24 WORD $0xfa4732e2 // ccmp x23, x7, #2, lo WORD $0xaa0f03f8 // mov x24, x15 WORD $0x54000203 // b.lo LBB5_12 WORD $0x8b0e02d7 // add x23, x22, x14 WORD $0xaa0c03f8 // mov x24, x12 WORD $0xaa0303f9 // mov x25, x3 BB5_10: WORD $0xad7f86e0 // ldp q0, q1, [x23, #-16] WORD $0xad7f8f02 // ldp q2, q3, [x24, #-16] WORD $0x4e601c40 // bic.16b v0, v2, v0 WORD $0x4e611c61 // bic.16b v1, v3, v1 WORD $0xad3f8700 // stp q0, q1, [x24, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x91008318 // add x24, x24, #32 WORD $0xf1001339 // subs x25, x25, #4 WORD $0x54ffff01 // b.ne LBB5_10 WORD $0xaa1403f8 // mov x24, x20 WORD $0xeb13009f // cmp x4, x19 WORD $0x54fffc80 // b.eq LBB5_6 BB5_12: WORD $0xcb180237 // sub x23, x17, x24 WORD $0xd37df319 // lsl x25, x24, #3 WORD $0x8b190018 // add x24, x0, x25 WORD $0x8b1902d6 // add x22, x22, x25 BB5_13: WORD $0xf84086d9 // ldr x25, [x22], #8 WORD $0xf940031a // ldr x26, [x24] WORD $0x8a390359 // bic x25, x26, x25 WORD $0xf8008719 // str x25, [x24], #8 WORD $0xf10006f7 // subs x23, x23, #1 WORD $0x54ffff61 // b.ne LBB5_13 WORD $0x17ffffd9 // b LBB5_6 BB5_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] ; 16-byte Folded Reload WORD $0xa94257f6 // ldp x22, x21, [sp, #32] ; 16-byte Folded Reload WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] ; 16-byte Folded Reload WORD $0xa8c467fa // ldp x26, x25, [sp], #64 ; 16-byte Folded Reload WORD $0xd65f03c0 // ret TEXT ·_or_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xa9bc67fa // stp x26, x25, [sp, #-64]! ; 16-byte Folded Spill WORD $0xa9015ff8 // stp x24, x23, [sp, #16] ; 16-byte Folded Spill WORD $0xa90257f6 // stp x22, x21, [sp, #32] ; 16-byte Folded Spill WORD $0xa9034ff4 // stp x20, x19, [sp, #48] ; 16-byte Folded Spill WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0x54000a20 // b.eq LBB6_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, LBB6_14 WORD $0xd2800009 // mov x9, #0 WORD $0xd280000a // mov x10, #0 WORD $0xd280000f // mov x15, #0 WORD $0xf100057f // cmp x11, #1 WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b LBB6_4 BB6_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 ; =4096 WORD $0x9140058c // add x12, x12, #1, lsl #12 ; =4096 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs LBB6_14 BB6_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0x910801f0 // add x16, x15, #512 WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883202 // csel x2, x16, x8, lo WORD $0xeb0201ff // cmp x15, x2 WORD $0x54fffe42 // b.hs LBB6_3 WORD $0xd2800002 // mov x2, #0 WORD $0x8b090223 // add x3, x17, x9 WORD $0x927ef463 // and x3, x3, #0xfffffffffffffffc WORD $0xcb0a2624 // sub x4, x17, x10, lsl #9 WORD $0xd374cd45 // lsl x5, x10, #12 WORD $0xd37df086 // lsl x6, x4, #3 WORD $0xd10020d5 // sub x21, x6, #8 WORD $0x8b050006 // add x6, x0, x5 WORD $0x8b1500c7 // add x7, x6, x21 WORD $0x910020e7 // add x7, x7, #8 WORD $0x927ef493 // and x19, x4, #0xfffffffffffffffc WORD $0x8b1301f4 // add x20, x15, x19 WORD $0x8b1500b5 // add x21, x5, x21 WORD $0x14000004 // b LBB6_7 BB6_6: WORD $0x91000442 // add x2, x2, #1 WORD $0xeb0b005f // cmp x2, x11 WORD $0x54fffc20 // b.eq LBB6_3 BB6_7: WORD $0xf8627836 // ldr x22, [x1, x2, lsl #3] WORD $0xaa0f03f8 // mov x24, x15 WORD $0xf100109f // cmp x4, #4 WORD $0x540002e3 // b.lo LBB6_12 WORD $0x8b0502d7 // add x23, x22, x5 WORD $0x8b1502d8 // add x24, x22, x21 WORD $0x91002318 // add x24, x24, #8 WORD $0xeb1800df // cmp x6, x24 WORD $0xfa4732e2 // ccmp x23, x7, #2, lo WORD $0xaa0f03f8 // mov x24, x15 WORD $0x54000203 // b.lo LBB6_12 WORD $0x8b0e02d7 // add x23, x22, x14 WORD $0xaa0c03f8 // mov x24, x12 WORD $0xaa0303f9 // mov x25, x3 BB6_10: WORD $0xad7f86e0 // ldp q0, q1, [x23, #-16] WORD $0xad7f8f02 // ldp q2, q3, [x24, #-16] WORD $0x4ea01c40 // orr.16b v0, v2, v0 WORD $0x4ea11c61 // orr.16b v1, v3, v1 WORD $0xad3f8700 // stp q0, q1, [x24, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x91008318 // add x24, x24, #32 WORD $0xf1001339 // subs x25, x25, #4 WORD $0x54ffff01 // b.ne LBB6_10 WORD $0xaa1403f8 // mov x24, x20 WORD $0xeb13009f // cmp x4, x19 WORD $0x54fffc80 // b.eq LBB6_6 BB6_12: WORD $0xcb180237 // sub x23, x17, x24 WORD $0xd37df319 // lsl x25, x24, #3 WORD $0x8b190018 // add x24, x0, x25 WORD $0x8b1902d6 // add x22, x22, x25 BB6_13: WORD $0xf84086d9 // ldr x25, [x22], #8 WORD $0xf940031a // ldr x26, [x24] WORD $0xaa190359 // orr x25, x26, x25 WORD $0xf8008719 // str x25, [x24], #8 WORD $0xf10006f7 // subs x23, x23, #1 WORD $0x54ffff61 // b.ne LBB6_13 WORD $0x17ffffd9 // b LBB6_6 BB6_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] ; 16-byte Folded Reload WORD $0xa94257f6 // ldp x22, x21, [sp, #32] ; 16-byte Folded Reload WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] ; 16-byte Folded Reload WORD $0xa8c467fa // ldp x26, x25, [sp], #64 ; 16-byte Folded Reload WORD $0xd65f03c0 // ret TEXT ·_xor_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xa9bc67fa // stp x26, x25, [sp, #-64]! ; 16-byte Folded Spill WORD $0xa9015ff8 // stp x24, x23, [sp, #16] ; 16-byte Folded Spill WORD $0xa90257f6 // stp x22, x21, [sp, #32] ; 16-byte Folded Spill WORD $0xa9034ff4 // stp x20, x19, [sp, #48] ; 16-byte Folded Spill WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0x54000a20 // b.eq LBB7_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, LBB7_14 WORD $0xd2800009 // mov x9, #0 WORD $0xd280000a // mov x10, #0 WORD $0xd280000f // mov x15, #0 WORD $0xf100057f // cmp x11, #1 WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b LBB7_4 BB7_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 ; =4096 WORD $0x9140058c // add x12, x12, #1, lsl #12 ; =4096 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs LBB7_14 BB7_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0x910801f0 // add x16, x15, #512 WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883202 // csel x2, x16, x8, lo WORD $0xeb0201ff // cmp x15, x2 WORD $0x54fffe42 // b.hs LBB7_3 WORD $0xd2800002 // mov x2, #0 WORD $0x8b090223 // add x3, x17, x9 WORD $0x927ef463 // and x3, x3, #0xfffffffffffffffc WORD $0xcb0a2624 // sub x4, x17, x10, lsl #9 WORD $0xd374cd45 // lsl x5, x10, #12 WORD $0xd37df086 // lsl x6, x4, #3 WORD $0xd10020d5 // sub x21, x6, #8 WORD $0x8b050006 // add x6, x0, x5 WORD $0x8b1500c7 // add x7, x6, x21 WORD $0x910020e7 // add x7, x7, #8 WORD $0x927ef493 // and x19, x4, #0xfffffffffffffffc WORD $0x8b1301f4 // add x20, x15, x19 WORD $0x8b1500b5 // add x21, x5, x21 WORD $0x14000004 // b LBB7_7 BB7_6: WORD $0x91000442 // add x2, x2, #1 WORD $0xeb0b005f // cmp x2, x11 WORD $0x54fffc20 // b.eq LBB7_3 BB7_7: WORD $0xf8627836 // ldr x22, [x1, x2, lsl #3] WORD $0xaa0f03f8 // mov x24, x15 WORD $0xf100109f // cmp x4, #4 WORD $0x540002e3 // b.lo LBB7_12 WORD $0x8b0502d7 // add x23, x22, x5 WORD $0x8b1502d8 // add x24, x22, x21 WORD $0x91002318 // add x24, x24, #8 WORD $0xeb1800df // cmp x6, x24 WORD $0xfa4732e2 // ccmp x23, x7, #2, lo WORD $0xaa0f03f8 // mov x24, x15 WORD $0x54000203 // b.lo LBB7_12 WORD $0x8b0e02d7 // add x23, x22, x14 WORD $0xaa0c03f8 // mov x24, x12 WORD $0xaa0303f9 // mov x25, x3 BB7_10: WORD $0xad7f86e0 // ldp q0, q1, [x23, #-16] WORD $0xad7f8f02 // ldp q2, q3, [x24, #-16] WORD $0x6e201c40 // eor.16b v0, v2, v0 WORD $0x6e211c61 // eor.16b v1, v3, v1 WORD $0xad3f8700 // stp q0, q1, [x24, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x91008318 // add x24, x24, #32 WORD $0xf1001339 // subs x25, x25, #4 WORD $0x54ffff01 // b.ne LBB7_10 WORD $0xaa1403f8 // mov x24, x20 WORD $0xeb13009f // cmp x4, x19 WORD $0x54fffc80 // b.eq LBB7_6 BB7_12: WORD $0xcb180237 // sub x23, x17, x24 WORD $0xd37df319 // lsl x25, x24, #3 WORD $0x8b190018 // add x24, x0, x25 WORD $0x8b1902d6 // add x22, x22, x25 BB7_13: WORD $0xf84086d9 // ldr x25, [x22], #8 WORD $0xf940031a // ldr x26, [x24] WORD $0xca190359 // eor x25, x26, x25 WORD $0xf8008719 // str x25, [x24], #8 WORD $0xf10006f7 // subs x23, x23, #1 WORD $0x54ffff61 // b.ne LBB7_13 WORD $0x17ffffd9 // b LBB7_6 BB7_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] ; 16-byte Folded Reload WORD $0xa94257f6 // ldp x22, x21, [sp, #32] ; 16-byte Folded Reload WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] ; 16-byte Folded Reload WORD $0xa8c467fa // ldp x26, x25, [sp], #64 ; 16-byte Folded Reload WORD $0xd65f03c0 // ret TEXT ·_count(SB), $0-32 MOVD a+0(FP), R0 MOVD size+8(FP), R1 MOVD result+16(FP), R2 WORD $0xb40000c1 // cbz x1, LBB8_3 WORD $0xf100103f // cmp x1, #4 WORD $0x540000c2 // b.hs LBB8_4 WORD $0xd2800008 // mov x8, #0 WORD $0xd2800009 // mov x9, #0 WORD $0x14000019 // b LBB8_7 BB8_3: WORD $0xf900005f // str xzr, [x2] WORD $0xd65f03c0 // ret BB8_4: WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc WORD $0x91004009 // add x9, x0, #16 WORD $0x6f00e400 // movi.2d v0, #0000000000000000 WORD $0xaa0803ea // mov x10, x8 WORD $0x6f00e401 // movi.2d v1, #0000000000000000 BB8_5: WORD $0xad7f8d22 // ldp q2, q3, [x9, #-16] WORD $0x4e205842 // cnt.16b v2, v2 WORD $0x6e202842 // uaddlp.8h v2, v2 WORD $0x6e602842 // uaddlp.4s v2, v2 WORD $0x4e205863 // cnt.16b v3, v3 WORD $0x6e202863 // uaddlp.8h v3, v3 WORD $0x6e602863 // uaddlp.4s v3, v3 WORD $0x6ea06840 // uadalp.2d v0, v2 WORD $0x6ea06861 // uadalp.2d v1, v3 WORD $0x91008129 // add x9, x9, #32 WORD $0xf100114a // subs x10, x10, #4 WORD $0x54fffea1 // b.ne LBB8_5 WORD $0x4ee08420 // add.2d v0, v1, v0 WORD $0x5ef1b800 // addp.2d d0, v0 WORD $0x9e660009 // fmov x9, d0 WORD $0xeb01011f // cmp x8, x1 WORD $0x54000140 // b.eq LBB8_9 BB8_7: WORD $0x8b080c0a // add x10, x0, x8, lsl #3 WORD $0xcb080028 // sub x8, x1, x8 BB8_8: WORD $0xfc408540 // ldr d0, [x10], #8 WORD $0x0e205800 // cnt.8b v0, v0 WORD $0x2e303800 // uaddlv.8b h0, v0 WORD $0x1e26000b // fmov w11, s0 WORD $0x8b090169 // add x9, x11, x9 WORD $0xf1000508 // subs x8, x8, #1 WORD $0x54ffff41 // b.ne LBB8_8 BB8_9: WORD $0xf9000049 // str x9, [x2] WORD $0xd65f03c0 // ret golang-github-kelindar-bitmap-1.5.5/simd_avx.go000066400000000000000000000016031517523267600214710ustar00rootroot00000000000000//go:build !noasm && amd64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT package bitmap import "unsafe" //go:nosplit //go:noescape func _and(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _andn(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _or(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _xor(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _and_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _andn_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _or_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _xor_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _count(a unsafe.Pointer, size uint64, result unsafe.Pointer) golang-github-kelindar-bitmap-1.5.5/simd_avx.s000066400000000000000000000770731517523267600213440ustar00rootroot00000000000000//go:build !noasm && amd64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT TEXT ·_and(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB0_10 LONG $0x04fa8348 // cmp rdx, 4 JAE LBB0_3 WORD $0xc031 // xor eax, eax JMP LBB0_9 LBB0_3: LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB0_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB0_6 WORD $0xc031 // xor eax, eax JMP LBB0_9 LBB0_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xfce08348 // and rax, -4 WORD $0xc931 // xor ecx, ecx LBB0_7: LONG $0x0410fcc5; BYTE $0xcf // vmovups ymm0, ymmword ptr [rdi + 8*rcx] LONG $0x0454fcc5; BYTE $0xce // vandps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xcf // vmovups ymmword ptr [rdi + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB0_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JE LBB0_10 LBB0_9: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB0_9 LBB0_10: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_andn(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB1_10 LONG $0x04fa8348 // cmp rdx, 4 JAE LBB1_3 WORD $0xc031 // xor eax, eax JMP LBB1_9 LBB1_3: LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB1_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB1_6 WORD $0xc031 // xor eax, eax JMP LBB1_9 LBB1_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xfce08348 // and rax, -4 WORD $0xc931 // xor ecx, ecx LBB1_7: LONG $0x0410fcc5; BYTE $0xce // vmovups ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0455fcc5; BYTE $0xcf // vandnps ymm0, ymm0, ymmword ptr [rdi + 8*rcx] LONG $0x0411fcc5; BYTE $0xcf // vmovups ymmword ptr [rdi + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB1_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JE LBB1_10 LBB1_9: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] WORD $0xf748; BYTE $0xd1 // not rcx LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB1_9 LBB1_10: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_or(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB2_10 LONG $0x04fa8348 // cmp rdx, 4 JAE LBB2_3 WORD $0xc031 // xor eax, eax JMP LBB2_9 LBB2_3: LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB2_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB2_6 WORD $0xc031 // xor eax, eax JMP LBB2_9 LBB2_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xfce08348 // and rax, -4 WORD $0xc931 // xor ecx, ecx LBB2_7: LONG $0x0410fcc5; BYTE $0xcf // vmovups ymm0, ymmword ptr [rdi + 8*rcx] LONG $0x0456fcc5; BYTE $0xce // vorps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xcf // vmovups ymmword ptr [rdi + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB2_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JE LBB2_10 LBB2_9: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c0948 // or qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB2_9 LBB2_10: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_xor(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB3_10 LONG $0x04fa8348 // cmp rdx, 4 JAE LBB3_3 WORD $0xc031 // xor eax, eax JMP LBB3_9 LBB3_3: LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB3_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB3_6 WORD $0xc031 // xor eax, eax JMP LBB3_9 LBB3_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xfce08348 // and rax, -4 WORD $0xc931 // xor ecx, ecx LBB3_7: LONG $0x0410fcc5; BYTE $0xcf // vmovups ymm0, ymmword ptr [rdi + 8*rcx] LONG $0x0457fcc5; BYTE $0xce // vxorps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xcf // vmovups ymmword ptr [rdi + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB3_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JE LBB3_10 LBB3_9: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c3148 // xor qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB3_9 LBB3_10: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_and_many(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffbb; BYTE $0xff // mov ebx, 4294967295 WORD $0x2148; BYTE $0xd3 // and rbx, rdx JE LBB4_14 WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0x20e8c148 // shr rax, 32 LONG $0x02f88348 // cmp rax, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0x24448948; BYTE $0x28 // mov qword ptr [rsp + 40], rax LONG $0xe0430f4c // cmovae r12, rax LONG $0x000200b9; BYTE $0x00 // mov ecx, 512 WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0x8948; BYTE $0xf8 // mov rax, rdi WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x10 // mov qword ptr [rsp + 16], rdx WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x08 // mov qword ptr [rsp + 8], rdx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d LONG $0x245c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rbx JMP LBB4_3 LBB4_2: LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] LONG $0x244c8b48; BYTE $0x38 // mov rcx, qword ptr [rsp + 56] LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add rcx, 512 QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x10000548; WORD $0x0000 // add rax, 4096 LONG $0x245c8b48; BYTE $0x20 // mov rbx, qword ptr [rsp + 32] LONG $0x244c8b4c; BYTE $0x30 // mov r9, qword ptr [rsp + 48] WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB4_14 LBB4_3: WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx WORD $0x8949; BYTE $0xda // mov r10, rbx LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx LONG $0xd1420f4c // cmovb r10, rcx LONG $0x00898d49; WORD $0x0002; BYTE $0x00 // lea rcx, [r9 + 512] WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx LONG $0x244c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rcx LONG $0xd9420f48 // cmovb rbx, rcx LONG $0x247c8348; WORD $0x0028 // cmp qword ptr [rsp + 40], 0 JE LBB4_2 LONG $0x2454034c; BYTE $0x10 // add r10, qword ptr [rsp + 16] LONG $0xfce28349 // and r10, -4 LONG $0x247c8b4c; BYTE $0x08 // mov r15, qword ptr [rsp + 8] WORD $0x894c; BYTE $0xf9 // mov rcx, r15 LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xd8 // mov r8, rbx WORD $0x2949; BYTE $0xc8 // sub r8, rcx LONG $0x0ce7c149 // shl r15, 12 LONG $0x3f1c8d4e // lea r11, [rdi + r15] LONG $0x247c894c; BYTE $0x60 // mov qword ptr [rsp + 96], r15 LONG $0xc73c8d4f // lea r15, [r15 + 8*r8] LONG $0x3f0c8d49 // lea rcx, [r15 + rdi] LONG $0x244c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rcx WORD $0x894c; BYTE $0xc1 // mov rcx, r8 LONG $0xfce18348 // and rcx, -4 LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x014c; BYTE $0xc9 // add rcx, r9 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x3145; BYTE $0xf6 // xor r14d, r14d LONG $0x247c894c; BYTE $0x50 // mov qword ptr [rsp + 80], r15 JMP LBB4_5 LBB4_12: WORD $0xff49; BYTE $0xc6 // inc r14 WORD $0x394d; BYTE $0xe6 // cmp r14, r12 JE LBB4_2 LBB4_5: WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB4_12 LONG $0xf6148b4a // mov rdx, qword ptr [rsi + 8*r14] WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x04f88349 // cmp r8, 4 JB LBB4_13 LONG $0x3a0c8d4a // lea rcx, [rdx + r15] WORD $0x3949; BYTE $0xcb // cmp r11, rcx JAE LBB4_9 LONG $0x244c8b48; BYTE $0x60 // mov rcx, qword ptr [rsp + 96] WORD $0x0148; BYTE $0xd1 // add rcx, rdx WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x244c3b48; BYTE $0x58 // cmp rcx, qword ptr [rsp + 88] JB LBB4_13 LBB4_9: WORD $0x894d; BYTE $0xdf // mov r15, r11 WORD $0x8949; BYTE $0xf3 // mov r11, rsi LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x0a348d48 // lea rsi, [rdx + rcx] WORD $0xc931 // xor ecx, ecx LBB4_10: LONG $0x0410fcc5; BYTE $0xc8 // vmovups ymm0, ymmword ptr [rax + 8*rcx] LONG $0x0454fcc5; BYTE $0xce // vandps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xc8 // vmovups ymmword ptr [rax + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB4_10 LONG $0x246c8b4c; BYTE $0x40 // mov r13, qword ptr [rsp + 64] LONG $0x24443b4c; BYTE $0x48 // cmp r8, qword ptr [rsp + 72] WORD $0x894c; BYTE $0xde // mov rsi, r11 WORD $0x894d; BYTE $0xfb // mov r11, r15 LONG $0x247c8b4c; BYTE $0x50 // mov r15, qword ptr [rsp + 80] JE LBB4_12 LBB4_13: LONG $0xea0c8b4a // mov rcx, qword ptr [rdx + 8*r13] LONG $0xef0c214a // and qword ptr [rdi + 8*r13], rcx WORD $0xff49; BYTE $0xc5 // inc r13 WORD $0x3949; BYTE $0xdd // cmp r13, rbx JB LBB4_13 JMP LBB4_12 LBB4_14: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_andn_many(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffbb; BYTE $0xff // mov ebx, 4294967295 WORD $0x2148; BYTE $0xd3 // and rbx, rdx JE LBB5_14 WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0x20e8c148 // shr rax, 32 LONG $0x02f88348 // cmp rax, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0x24448948; BYTE $0x28 // mov qword ptr [rsp + 40], rax LONG $0xe0430f4c // cmovae r12, rax LONG $0x000200b9; BYTE $0x00 // mov ecx, 512 WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0x8948; BYTE $0xf8 // mov rax, rdi WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x10 // mov qword ptr [rsp + 16], rdx WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x08 // mov qword ptr [rsp + 8], rdx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d LONG $0x245c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rbx JMP LBB5_3 LBB5_2: LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] LONG $0x244c8b48; BYTE $0x38 // mov rcx, qword ptr [rsp + 56] LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add rcx, 512 QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x10000548; WORD $0x0000 // add rax, 4096 LONG $0x245c8b48; BYTE $0x20 // mov rbx, qword ptr [rsp + 32] LONG $0x244c8b4c; BYTE $0x30 // mov r9, qword ptr [rsp + 48] WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB5_14 LBB5_3: WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx WORD $0x8949; BYTE $0xdb // mov r11, rbx LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx LONG $0xd9420f4c // cmovb r11, rcx LONG $0x00898d49; WORD $0x0002; BYTE $0x00 // lea rcx, [r9 + 512] WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx LONG $0x244c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rcx LONG $0xd9420f48 // cmovb rbx, rcx LONG $0x247c8348; WORD $0x0028 // cmp qword ptr [rsp + 40], 0 JE LBB5_2 LONG $0x245c034c; BYTE $0x10 // add r11, qword ptr [rsp + 16] LONG $0xfce38349 // and r11, -4 LONG $0x247c8b4c; BYTE $0x08 // mov r15, qword ptr [rsp + 8] WORD $0x894c; BYTE $0xf9 // mov rcx, r15 LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xd8 // mov r8, rbx WORD $0x2949; BYTE $0xc8 // sub r8, rcx LONG $0x0ce7c149 // shl r15, 12 LONG $0x3f148d4e // lea r10, [rdi + r15] LONG $0x247c894c; BYTE $0x60 // mov qword ptr [rsp + 96], r15 LONG $0xc73c8d4f // lea r15, [r15 + 8*r8] LONG $0x3f0c8d49 // lea rcx, [r15 + rdi] LONG $0x244c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rcx WORD $0x894c; BYTE $0xc1 // mov rcx, r8 LONG $0xfce18348 // and rcx, -4 LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x014c; BYTE $0xc9 // add rcx, r9 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x3145; BYTE $0xf6 // xor r14d, r14d LONG $0x247c894c; BYTE $0x50 // mov qword ptr [rsp + 80], r15 JMP LBB5_5 LBB5_12: WORD $0xff49; BYTE $0xc6 // inc r14 WORD $0x394d; BYTE $0xe6 // cmp r14, r12 JE LBB5_2 LBB5_5: WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB5_12 LONG $0xf6148b4a // mov rdx, qword ptr [rsi + 8*r14] WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x04f88349 // cmp r8, 4 JB LBB5_13 LONG $0x3a0c8d4a // lea rcx, [rdx + r15] WORD $0x3949; BYTE $0xca // cmp r10, rcx JAE LBB5_9 LONG $0x244c8b48; BYTE $0x60 // mov rcx, qword ptr [rsp + 96] WORD $0x0148; BYTE $0xd1 // add rcx, rdx WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x244c3b48; BYTE $0x58 // cmp rcx, qword ptr [rsp + 88] JB LBB5_13 LBB5_9: WORD $0x894d; BYTE $0xd7 // mov r15, r10 WORD $0x8949; BYTE $0xf2 // mov r10, rsi LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x0a348d48 // lea rsi, [rdx + rcx] WORD $0xc931 // xor ecx, ecx LBB5_10: LONG $0x0410fcc5; BYTE $0xce // vmovups ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0455fcc5; BYTE $0xc8 // vandnps ymm0, ymm0, ymmword ptr [rax + 8*rcx] LONG $0x0411fcc5; BYTE $0xc8 // vmovups ymmword ptr [rax + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB5_10 LONG $0x246c8b4c; BYTE $0x40 // mov r13, qword ptr [rsp + 64] LONG $0x24443b4c; BYTE $0x48 // cmp r8, qword ptr [rsp + 72] WORD $0x894c; BYTE $0xd6 // mov rsi, r10 WORD $0x894d; BYTE $0xfa // mov r10, r15 LONG $0x247c8b4c; BYTE $0x50 // mov r15, qword ptr [rsp + 80] JE LBB5_12 LBB5_13: LONG $0xea0c8b4a // mov rcx, qword ptr [rdx + 8*r13] WORD $0xf748; BYTE $0xd1 // not rcx LONG $0xef0c214a // and qword ptr [rdi + 8*r13], rcx WORD $0xff49; BYTE $0xc5 // inc r13 WORD $0x3949; BYTE $0xdd // cmp r13, rbx JB LBB5_13 JMP LBB5_12 LBB5_14: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_or_many(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffbb; BYTE $0xff // mov ebx, 4294967295 WORD $0x2148; BYTE $0xd3 // and rbx, rdx JE LBB6_14 WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0x20e8c148 // shr rax, 32 LONG $0x02f88348 // cmp rax, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0x24448948; BYTE $0x28 // mov qword ptr [rsp + 40], rax LONG $0xe0430f4c // cmovae r12, rax LONG $0x000200b9; BYTE $0x00 // mov ecx, 512 WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0x8948; BYTE $0xf8 // mov rax, rdi WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x10 // mov qword ptr [rsp + 16], rdx WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x08 // mov qword ptr [rsp + 8], rdx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d LONG $0x245c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rbx JMP LBB6_3 LBB6_2: LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] LONG $0x244c8b48; BYTE $0x38 // mov rcx, qword ptr [rsp + 56] LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add rcx, 512 QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x10000548; WORD $0x0000 // add rax, 4096 LONG $0x245c8b48; BYTE $0x20 // mov rbx, qword ptr [rsp + 32] LONG $0x244c8b4c; BYTE $0x30 // mov r9, qword ptr [rsp + 48] WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB6_14 LBB6_3: WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx WORD $0x8949; BYTE $0xda // mov r10, rbx LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx LONG $0xd1420f4c // cmovb r10, rcx LONG $0x00898d49; WORD $0x0002; BYTE $0x00 // lea rcx, [r9 + 512] WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx LONG $0x244c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rcx LONG $0xd9420f48 // cmovb rbx, rcx LONG $0x247c8348; WORD $0x0028 // cmp qword ptr [rsp + 40], 0 JE LBB6_2 LONG $0x2454034c; BYTE $0x10 // add r10, qword ptr [rsp + 16] LONG $0xfce28349 // and r10, -4 LONG $0x247c8b4c; BYTE $0x08 // mov r15, qword ptr [rsp + 8] WORD $0x894c; BYTE $0xf9 // mov rcx, r15 LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xd8 // mov r8, rbx WORD $0x2949; BYTE $0xc8 // sub r8, rcx LONG $0x0ce7c149 // shl r15, 12 LONG $0x3f1c8d4e // lea r11, [rdi + r15] LONG $0x247c894c; BYTE $0x60 // mov qword ptr [rsp + 96], r15 LONG $0xc73c8d4f // lea r15, [r15 + 8*r8] LONG $0x3f0c8d49 // lea rcx, [r15 + rdi] LONG $0x244c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rcx WORD $0x894c; BYTE $0xc1 // mov rcx, r8 LONG $0xfce18348 // and rcx, -4 LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x014c; BYTE $0xc9 // add rcx, r9 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x3145; BYTE $0xf6 // xor r14d, r14d LONG $0x247c894c; BYTE $0x50 // mov qword ptr [rsp + 80], r15 JMP LBB6_5 LBB6_12: WORD $0xff49; BYTE $0xc6 // inc r14 WORD $0x394d; BYTE $0xe6 // cmp r14, r12 JE LBB6_2 LBB6_5: WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB6_12 LONG $0xf6148b4a // mov rdx, qword ptr [rsi + 8*r14] WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x04f88349 // cmp r8, 4 JB LBB6_13 LONG $0x3a0c8d4a // lea rcx, [rdx + r15] WORD $0x3949; BYTE $0xcb // cmp r11, rcx JAE LBB6_9 LONG $0x244c8b48; BYTE $0x60 // mov rcx, qword ptr [rsp + 96] WORD $0x0148; BYTE $0xd1 // add rcx, rdx WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x244c3b48; BYTE $0x58 // cmp rcx, qword ptr [rsp + 88] JB LBB6_13 LBB6_9: WORD $0x894d; BYTE $0xdf // mov r15, r11 WORD $0x8949; BYTE $0xf3 // mov r11, rsi LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x0a348d48 // lea rsi, [rdx + rcx] WORD $0xc931 // xor ecx, ecx LBB6_10: LONG $0x0410fcc5; BYTE $0xc8 // vmovups ymm0, ymmword ptr [rax + 8*rcx] LONG $0x0456fcc5; BYTE $0xce // vorps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xc8 // vmovups ymmword ptr [rax + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB6_10 LONG $0x246c8b4c; BYTE $0x40 // mov r13, qword ptr [rsp + 64] LONG $0x24443b4c; BYTE $0x48 // cmp r8, qword ptr [rsp + 72] WORD $0x894c; BYTE $0xde // mov rsi, r11 WORD $0x894d; BYTE $0xfb // mov r11, r15 LONG $0x247c8b4c; BYTE $0x50 // mov r15, qword ptr [rsp + 80] JE LBB6_12 LBB6_13: LONG $0xea0c8b4a // mov rcx, qword ptr [rdx + 8*r13] LONG $0xef0c094a // or qword ptr [rdi + 8*r13], rcx WORD $0xff49; BYTE $0xc5 // inc r13 WORD $0x3949; BYTE $0xdd // cmp r13, rbx JB LBB6_13 JMP LBB6_12 LBB6_14: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_xor_many(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffbb; BYTE $0xff // mov ebx, 4294967295 WORD $0x2148; BYTE $0xd3 // and rbx, rdx JE LBB7_14 WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0x20e8c148 // shr rax, 32 LONG $0x02f88348 // cmp rax, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0x24448948; BYTE $0x28 // mov qword ptr [rsp + 40], rax LONG $0xe0430f4c // cmovae r12, rax LONG $0x000200b9; BYTE $0x00 // mov ecx, 512 WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0x8948; BYTE $0xf8 // mov rax, rdi WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x10 // mov qword ptr [rsp + 16], rdx WORD $0xd231 // xor edx, edx LONG $0x24548948; BYTE $0x08 // mov qword ptr [rsp + 8], rdx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d LONG $0x245c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rbx JMP LBB7_3 LBB7_2: LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] LONG $0x244c8b48; BYTE $0x38 // mov rcx, qword ptr [rsp + 56] LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add rcx, 512 QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x10000548; WORD $0x0000 // add rax, 4096 LONG $0x245c8b48; BYTE $0x20 // mov rbx, qword ptr [rsp + 32] LONG $0x244c8b4c; BYTE $0x30 // mov r9, qword ptr [rsp + 48] WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB7_14 LBB7_3: WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx WORD $0x8949; BYTE $0xda // mov r10, rbx LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx LONG $0xd1420f4c // cmovb r10, rcx LONG $0x00898d49; WORD $0x0002; BYTE $0x00 // lea rcx, [r9 + 512] WORD $0x3948; BYTE $0xd9 // cmp rcx, rbx LONG $0x244c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rcx LONG $0xd9420f48 // cmovb rbx, rcx LONG $0x247c8348; WORD $0x0028 // cmp qword ptr [rsp + 40], 0 JE LBB7_2 LONG $0x2454034c; BYTE $0x10 // add r10, qword ptr [rsp + 16] LONG $0xfce28349 // and r10, -4 LONG $0x247c8b4c; BYTE $0x08 // mov r15, qword ptr [rsp + 8] WORD $0x894c; BYTE $0xf9 // mov rcx, r15 LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xd8 // mov r8, rbx WORD $0x2949; BYTE $0xc8 // sub r8, rcx LONG $0x0ce7c149 // shl r15, 12 LONG $0x3f1c8d4e // lea r11, [rdi + r15] LONG $0x247c894c; BYTE $0x60 // mov qword ptr [rsp + 96], r15 LONG $0xc73c8d4f // lea r15, [r15 + 8*r8] LONG $0x3f0c8d49 // lea rcx, [r15 + rdi] LONG $0x244c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rcx WORD $0x894c; BYTE $0xc1 // mov rcx, r8 LONG $0xfce18348 // and rcx, -4 LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x014c; BYTE $0xc9 // add rcx, r9 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x3145; BYTE $0xf6 // xor r14d, r14d LONG $0x247c894c; BYTE $0x50 // mov qword ptr [rsp + 80], r15 JMP LBB7_5 LBB7_12: WORD $0xff49; BYTE $0xc6 // inc r14 WORD $0x394d; BYTE $0xe6 // cmp r14, r12 JE LBB7_2 LBB7_5: WORD $0x3949; BYTE $0xd9 // cmp r9, rbx JAE LBB7_12 LONG $0xf6148b4a // mov rdx, qword ptr [rsi + 8*r14] WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x04f88349 // cmp r8, 4 JB LBB7_13 LONG $0x3a0c8d4a // lea rcx, [rdx + r15] WORD $0x3949; BYTE $0xcb // cmp r11, rcx JAE LBB7_9 LONG $0x244c8b48; BYTE $0x60 // mov rcx, qword ptr [rsp + 96] WORD $0x0148; BYTE $0xd1 // add rcx, rdx WORD $0x894d; BYTE $0xcd // mov r13, r9 LONG $0x244c3b48; BYTE $0x58 // cmp rcx, qword ptr [rsp + 88] JB LBB7_13 LBB7_9: WORD $0x894d; BYTE $0xdf // mov r15, r11 WORD $0x8949; BYTE $0xf3 // mov r11, rsi LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x0a348d48 // lea rsi, [rdx + rcx] WORD $0xc931 // xor ecx, ecx LBB7_10: LONG $0x0410fcc5; BYTE $0xc8 // vmovups ymm0, ymmword ptr [rax + 8*rcx] LONG $0x0457fcc5; BYTE $0xce // vxorps ymm0, ymm0, ymmword ptr [rsi + 8*rcx] LONG $0x0411fcc5; BYTE $0xc8 // vmovups ymmword ptr [rax + 8*rcx], ymm0 LONG $0x04c18348 // add rcx, 4 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB7_10 LONG $0x246c8b4c; BYTE $0x40 // mov r13, qword ptr [rsp + 64] LONG $0x24443b4c; BYTE $0x48 // cmp r8, qword ptr [rsp + 72] WORD $0x894c; BYTE $0xde // mov rsi, r11 WORD $0x894d; BYTE $0xfb // mov r11, r15 LONG $0x247c8b4c; BYTE $0x50 // mov r15, qword ptr [rsp + 80] JE LBB7_12 LBB7_13: LONG $0xea0c8b4a // mov rcx, qword ptr [rdx + 8*r13] LONG $0xef0c314a // xor qword ptr [rdi + 8*r13], rcx WORD $0xff49; BYTE $0xc5 // inc r13 WORD $0x3949; BYTE $0xdd // cmp r13, rbx JB LBB7_13 JMP LBB7_12 LBB7_14: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_count(SB), $0-32 MOVQ a+0(FP), DI MOVQ size+8(FP), SI MOVQ result+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xf6 // test rsi, rsi JE LBB8_1 WORD $0xc931 // xor ecx, ecx WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB8_4: LONG $0xb80f48f3; WORD $0xcf04 // popcnt rax, qword ptr [rdi + 8*rcx] WORD $0x0149; BYTE $0xc0 // add r8, rax WORD $0xff48; BYTE $0xc1 // inc rcx WORD $0x3948; BYTE $0xce // cmp rsi, rcx JNE LBB8_4 JMP LBB8_2 LBB8_1: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB8_2: WORD $0x894c; BYTE $0x02 // mov qword ptr [rdx], r8 WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp BYTE $0xc3 // ret golang-github-kelindar-bitmap-1.5.5/simd_avx512.go000066400000000000000000000015351517523267600217250ustar00rootroot00000000000000//go:build !noasm && amd64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT package bitmap import "unsafe" //go:nosplit //go:noescape func _and_avx512(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _andn_avx512(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _or_avx512(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _xor_avx512(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _and_many_avx512(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _andn_many_avx512(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _or_many_avx512(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _xor_many_avx512(a unsafe.Pointer, b unsafe.Pointer, dims uint64) golang-github-kelindar-bitmap-1.5.5/simd_avx512.s000066400000000000000000001340061517523267600215620ustar00rootroot00000000000000//go:build !noasm && amd64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT TEXT ·_and_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB0_13 LONG $0x20fa8348 // cmp rdx, 32 JB LBB0_2 LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB0_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB0_6 LBB0_2: WORD $0xc031 // xor eax, eax LBB0_9: WORD $0x8949; BYTE $0xc0 // mov r8, rax WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xd0 // add r8, rdx WORD $0x8949; BYTE $0xd1 // mov r9, rdx LONG $0x03e18349 // and r9, 3 JE LBB0_11 LBB0_10: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0xff49; BYTE $0xc9 // dec r9 JNE LBB0_10 LBB0_11: LONG $0x03f88349 // cmp r8, 3 JB LBB0_13 LBB0_12: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx LONG $0xc64c8b48; BYTE $0x08 // mov rcx, qword ptr [rsi + 8*rax + 8] LONG $0xc74c2148; BYTE $0x08 // and qword ptr [rdi + 8*rax + 8], rcx LONG $0xc64c8b48; BYTE $0x10 // mov rcx, qword ptr [rsi + 8*rax + 16] LONG $0xc74c2148; BYTE $0x10 // and qword ptr [rdi + 8*rax + 16], rcx LONG $0xc64c8b48; BYTE $0x18 // mov rcx, qword ptr [rsi + 8*rax + 24] LONG $0xc74c2148; BYTE $0x18 // and qword ptr [rdi + 8*rax + 24], rcx LONG $0x04c08348 // add rax, 4 WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB0_12 JMP LBB0_13 LBB0_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xe0e08348 // and rax, -32 WORD $0xc931 // xor ecx, ecx LBB0_7: LONG $0x487cf162; WORD $0x0410; BYTE $0xcf // vmovups zmm0, zmmword ptr [rdi + 8*rcx] QUAD $0x01cf4c10487cf162 // vmovups zmm1, zmmword ptr [rdi + 8*rcx + 64] QUAD $0x02cf5410487cf162 // vmovups zmm2, zmmword ptr [rdi + 8*rcx + 128] QUAD $0x03cf5c10487cf162 // vmovups zmm3, zmmword ptr [rdi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0454; BYTE $0xce // vandps zmm0, zmm0, zmmword ptr [rsi + 8*rcx] QUAD $0x01ce4c544874f162 // vandps zmm1, zmm1, zmmword ptr [rsi + 8*rcx + 64] QUAD $0x02ce5454486cf162 // vandps zmm2, zmm2, zmmword ptr [rsi + 8*rcx + 128] QUAD $0x03ce5c544864f162 // vandps zmm3, zmm3, zmmword ptr [rsi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0411; BYTE $0xcf // vmovups zmmword ptr [rdi + 8*rcx], zmm0 QUAD $0x01cf4c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 64], zmm1 QUAD $0x02cf5411487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 128], zmm2 QUAD $0x03cf5c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 192], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB0_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JNE LBB0_9 LBB0_13: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_andn_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB1_13 LONG $0x20fa8348 // cmp rdx, 32 JB LBB1_2 LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB1_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB1_6 LBB1_2: WORD $0xc031 // xor eax, eax LBB1_9: WORD $0x8949; BYTE $0xc0 // mov r8, rax WORD $0xf749; BYTE $0xd0 // not r8 WORD $0xc2f6; BYTE $0x01 // test dl, 1 JE LBB1_11 LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] WORD $0xf748; BYTE $0xd1 // not rcx LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx LONG $0x01c88348 // or rax, 1 LBB1_11: WORD $0x0149; BYTE $0xd0 // add r8, rdx JE LBB1_13 LBB1_12: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] WORD $0xf748; BYTE $0xd1 // not rcx LONG $0xc70c2148 // and qword ptr [rdi + 8*rax], rcx LONG $0xc64c8b48; BYTE $0x08 // mov rcx, qword ptr [rsi + 8*rax + 8] WORD $0xf748; BYTE $0xd1 // not rcx LONG $0xc74c2148; BYTE $0x08 // and qword ptr [rdi + 8*rax + 8], rcx LONG $0x02c08348 // add rax, 2 WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB1_12 JMP LBB1_13 LBB1_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xe0e08348 // and rax, -32 WORD $0xc931 // xor ecx, ecx LBB1_7: LONG $0x487cf162; WORD $0x0410; BYTE $0xce // vmovups zmm0, zmmword ptr [rsi + 8*rcx] QUAD $0x01ce4c10487cf162 // vmovups zmm1, zmmword ptr [rsi + 8*rcx + 64] QUAD $0x02ce5410487cf162 // vmovups zmm2, zmmword ptr [rsi + 8*rcx + 128] QUAD $0x03ce5c10487cf162 // vmovups zmm3, zmmword ptr [rsi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0455; BYTE $0xcf // vandnps zmm0, zmm0, zmmword ptr [rdi + 8*rcx] QUAD $0x01cf4c554874f162 // vandnps zmm1, zmm1, zmmword ptr [rdi + 8*rcx + 64] QUAD $0x02cf5455486cf162 // vandnps zmm2, zmm2, zmmword ptr [rdi + 8*rcx + 128] QUAD $0x03cf5c554864f162 // vandnps zmm3, zmm3, zmmword ptr [rdi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0411; BYTE $0xcf // vmovups zmmword ptr [rdi + 8*rcx], zmm0 QUAD $0x01cf4c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 64], zmm1 QUAD $0x02cf5411487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 128], zmm2 QUAD $0x03cf5c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 192], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB1_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JNE LBB1_9 LBB1_13: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_or_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB2_13 LONG $0x20fa8348 // cmp rdx, 32 JB LBB2_2 LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB2_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB2_6 LBB2_2: WORD $0xc031 // xor eax, eax LBB2_9: WORD $0x8949; BYTE $0xc0 // mov r8, rax WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xd0 // add r8, rdx WORD $0x8949; BYTE $0xd1 // mov r9, rdx LONG $0x03e18349 // and r9, 3 JE LBB2_11 LBB2_10: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c0948 // or qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0xff49; BYTE $0xc9 // dec r9 JNE LBB2_10 LBB2_11: LONG $0x03f88349 // cmp r8, 3 JB LBB2_13 LBB2_12: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c0948 // or qword ptr [rdi + 8*rax], rcx LONG $0xc64c8b48; BYTE $0x08 // mov rcx, qword ptr [rsi + 8*rax + 8] LONG $0xc74c0948; BYTE $0x08 // or qword ptr [rdi + 8*rax + 8], rcx LONG $0xc64c8b48; BYTE $0x10 // mov rcx, qword ptr [rsi + 8*rax + 16] LONG $0xc74c0948; BYTE $0x10 // or qword ptr [rdi + 8*rax + 16], rcx LONG $0xc64c8b48; BYTE $0x18 // mov rcx, qword ptr [rsi + 8*rax + 24] LONG $0xc74c0948; BYTE $0x18 // or qword ptr [rdi + 8*rax + 24], rcx LONG $0x04c08348 // add rax, 4 WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB2_12 JMP LBB2_13 LBB2_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xe0e08348 // and rax, -32 WORD $0xc931 // xor ecx, ecx LBB2_7: LONG $0x487cf162; WORD $0x0410; BYTE $0xcf // vmovups zmm0, zmmword ptr [rdi + 8*rcx] QUAD $0x01cf4c10487cf162 // vmovups zmm1, zmmword ptr [rdi + 8*rcx + 64] QUAD $0x02cf5410487cf162 // vmovups zmm2, zmmword ptr [rdi + 8*rcx + 128] QUAD $0x03cf5c10487cf162 // vmovups zmm3, zmmword ptr [rdi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0456; BYTE $0xce // vorps zmm0, zmm0, zmmword ptr [rsi + 8*rcx] QUAD $0x01ce4c564874f162 // vorps zmm1, zmm1, zmmword ptr [rsi + 8*rcx + 64] QUAD $0x02ce5456486cf162 // vorps zmm2, zmm2, zmmword ptr [rsi + 8*rcx + 128] QUAD $0x03ce5c564864f162 // vorps zmm3, zmm3, zmmword ptr [rsi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0411; BYTE $0xcf // vmovups zmmword ptr [rdi + 8*rcx], zmm0 QUAD $0x01cf4c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 64], zmm1 QUAD $0x02cf5411487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 128], zmm2 QUAD $0x03cf5c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 192], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB2_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JNE LBB2_9 LBB2_13: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_xor_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ n+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp LONG $0xf8e48348 // and rsp, -8 WORD $0x8548; BYTE $0xd2 // test rdx, rdx JE LBB3_13 LONG $0x20fa8348 // cmp rdx, 32 JB LBB3_2 LONG $0xd6048d48 // lea rax, [rsi + 8*rdx] WORD $0x3948; BYTE $0xf8 // cmp rax, rdi JBE LBB3_6 LONG $0xd7048d48 // lea rax, [rdi + 8*rdx] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi JBE LBB3_6 LBB3_2: WORD $0xc031 // xor eax, eax LBB3_9: WORD $0x8949; BYTE $0xc0 // mov r8, rax WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xd0 // add r8, rdx WORD $0x8949; BYTE $0xd1 // mov r9, rdx LONG $0x03e18349 // and r9, 3 JE LBB3_11 LBB3_10: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c3148 // xor qword ptr [rdi + 8*rax], rcx WORD $0xff48; BYTE $0xc0 // inc rax WORD $0xff49; BYTE $0xc9 // dec r9 JNE LBB3_10 LBB3_11: LONG $0x03f88349 // cmp r8, 3 JB LBB3_13 LBB3_12: LONG $0xc60c8b48 // mov rcx, qword ptr [rsi + 8*rax] LONG $0xc70c3148 // xor qword ptr [rdi + 8*rax], rcx LONG $0xc64c8b48; BYTE $0x08 // mov rcx, qword ptr [rsi + 8*rax + 8] LONG $0xc74c3148; BYTE $0x08 // xor qword ptr [rdi + 8*rax + 8], rcx LONG $0xc64c8b48; BYTE $0x10 // mov rcx, qword ptr [rsi + 8*rax + 16] LONG $0xc74c3148; BYTE $0x10 // xor qword ptr [rdi + 8*rax + 16], rcx LONG $0xc64c8b48; BYTE $0x18 // mov rcx, qword ptr [rsi + 8*rax + 24] LONG $0xc74c3148; BYTE $0x18 // xor qword ptr [rdi + 8*rax + 24], rcx LONG $0x04c08348 // add rax, 4 WORD $0x3948; BYTE $0xc2 // cmp rdx, rax JNE LBB3_12 JMP LBB3_13 LBB3_6: WORD $0x8948; BYTE $0xd0 // mov rax, rdx LONG $0xe0e08348 // and rax, -32 WORD $0xc931 // xor ecx, ecx LBB3_7: LONG $0x487cf162; WORD $0x0410; BYTE $0xcf // vmovups zmm0, zmmword ptr [rdi + 8*rcx] QUAD $0x01cf4c10487cf162 // vmovups zmm1, zmmword ptr [rdi + 8*rcx + 64] QUAD $0x02cf5410487cf162 // vmovups zmm2, zmmword ptr [rdi + 8*rcx + 128] QUAD $0x03cf5c10487cf162 // vmovups zmm3, zmmword ptr [rdi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0457; BYTE $0xce // vxorps zmm0, zmm0, zmmword ptr [rsi + 8*rcx] QUAD $0x01ce4c574874f162 // vxorps zmm1, zmm1, zmmword ptr [rsi + 8*rcx + 64] QUAD $0x02ce5457486cf162 // vxorps zmm2, zmm2, zmmword ptr [rsi + 8*rcx + 128] QUAD $0x03ce5c574864f162 // vxorps zmm3, zmm3, zmmword ptr [rsi + 8*rcx + 192] LONG $0x487cf162; WORD $0x0411; BYTE $0xcf // vmovups zmmword ptr [rdi + 8*rcx], zmm0 QUAD $0x01cf4c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 64], zmm1 QUAD $0x02cf5411487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 128], zmm2 QUAD $0x03cf5c11487cf162 // vmovups zmmword ptr [rdi + 8*rcx + 192], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JNE LBB3_7 WORD $0x3948; BYTE $0xd0 // cmp rax, rdx JNE LBB3_9 LBB3_13: WORD $0x8948; BYTE $0xec // mov rsp, rbp BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_and_many_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffb9; BYTE $0xff // mov ecx, 4294967295 WORD $0x2148; BYTE $0xd1 // and rcx, rdx JE LBB4_18 LONG $0x20eac148 // shr rdx, 32 JE LBB4_18 LONG $0x02fa8348 // cmp rdx, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0xe2430f4c // cmovae r12, rdx LONG $0xc0978d48; WORD $0x0000; BYTE $0x00 // lea rdx, [rdi + 192] LONG $0x000200bb; BYTE $0x00 // mov ebx, 512 LONG $0x0000c0b8; BYTE $0x00 // mov eax, 192 LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x10 // mov qword ptr [rsp + 16], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x08 // mov qword ptr [rsp + 8], rax WORD $0x3145; BYTE $0xff // xor r15d, r15d LONG $0x244c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rcx JMP LBB4_3 LBB4_17: LONG $0x245c8b48; BYTE $0x30 // mov rbx, qword ptr [rsp + 48] LONG $0x00c38148; WORD $0x0002; BYTE $0x00 // add rbx, 512 LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x00c28148; WORD $0x0010; BYTE $0x00 // add rdx, 4096 LONG $0x24448b48; BYTE $0x28 // mov rax, qword ptr [rsp + 40] WORD $0x8949; BYTE $0xc7 // mov r15, rax LONG $0x244c8b48; BYTE $0x20 // mov rcx, qword ptr [rsp + 32] WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JAE LBB4_18 LBB4_3: WORD $0x3948; BYTE $0xcb // cmp rbx, rcx WORD $0x8948; BYTE $0xc8 // mov rax, rcx LONG $0x245c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rbx LONG $0xc3420f48 // cmovb rax, rbx LONG $0x009f8d49; WORD $0x0002; BYTE $0x00 // lea rbx, [r15 + 512] WORD $0x3948; BYTE $0xcb // cmp rbx, rcx LONG $0x245c8948; BYTE $0x28 // mov qword ptr [rsp + 40], rbx LONG $0xcb420f48 // cmovb rcx, rbx WORD $0x3949; BYTE $0xcf // cmp r15, rcx JAE LBB4_17 LONG $0x244c8b48; BYTE $0x10 // mov rcx, qword ptr [rsp + 16] LONG $0x08148d4c // lea r10, [rax + rcx] LONG $0xe0e28349 // and r10, -32 LONG $0x245c8b48; BYTE $0x08 // mov rbx, qword ptr [rsp + 8] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xc5 // mov r13, rax WORD $0x2949; BYTE $0xcd // sub r13, rcx LONG $0x0ce3c148 // shl rbx, 12 LONG $0x1f0c8d48 // lea rcx, [rdi + rbx] LONG $0x244c8948; BYTE $0x60 // mov qword ptr [rsp + 96], rcx LONG $0x245c8948; BYTE $0x50 // mov qword ptr [rsp + 80], rbx LONG $0xeb1c8d4a // lea rbx, [rbx + 8*r13] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x245c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rbx LONG $0x3b0c8d48 // lea rcx, [rbx + rdi] LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x894c; BYTE $0xe9 // mov rcx, r13 LONG $0xe0e18348 // and rcx, -32 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x014c; BYTE $0xf9 // add rcx, r15 LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB4_5 LBB4_16: WORD $0xff49; BYTE $0xc1 // inc r9 WORD $0x394d; BYTE $0xe1 // cmp r9, r12 JE LBB4_17 LBB4_5: LONG $0xce048b4e // mov r8, qword ptr [rsi + 8*r9] WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x20fd8349 // cmp r13, 32 JB LBB4_11 LONG $0x244c8b48; BYTE $0x58 // mov rcx, qword ptr [rsp + 88] WORD $0x014c; BYTE $0xc1 // add rcx, r8 LONG $0x244c3948; BYTE $0x60 // cmp qword ptr [rsp + 96], rcx JAE LBB4_8 LONG $0x244c8b48; BYTE $0x50 // mov rcx, qword ptr [rsp + 80] WORD $0x014c; BYTE $0xc1 // add rcx, r8 WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x244c3b48; BYTE $0x48 // cmp rcx, qword ptr [rsp + 72] JB LBB4_11 LBB4_8: LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x081c8d49 // lea rbx, [r8 + rcx] WORD $0xc931 // xor ecx, ecx LBB4_9: QUAD $0xfdca4410487cf162 // vmovups zmm0, zmmword ptr [rdx + 8*rcx - 192] QUAD $0xfeca4c10487cf162 // vmovups zmm1, zmmword ptr [rdx + 8*rcx - 128] QUAD $0xffca5410487cf162 // vmovups zmm2, zmmword ptr [rdx + 8*rcx - 64] LONG $0x487cf162; WORD $0x1c10; BYTE $0xca // vmovups zmm3, zmmword ptr [rdx + 8*rcx] QUAD $0xfdcb4454487cf162 // vandps zmm0, zmm0, zmmword ptr [rbx + 8*rcx - 192] QUAD $0xfecb4c544874f162 // vandps zmm1, zmm1, zmmword ptr [rbx + 8*rcx - 128] QUAD $0xffcb5454486cf162 // vandps zmm2, zmm2, zmmword ptr [rbx + 8*rcx - 64] LONG $0x4864f162; WORD $0x1c54; BYTE $0xcb // vandps zmm3, zmm3, zmmword ptr [rbx + 8*rcx] QUAD $0xfdca4411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 192], zmm0 QUAD $0xfeca4c11487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 128], zmm1 QUAD $0xffca5411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 64], zmm2 LONG $0x487cf162; WORD $0x1c11; BYTE $0xca // vmovups zmmword ptr [rdx + 8*rcx], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB4_9 LONG $0x245c8b48; BYTE $0x38 // mov rbx, qword ptr [rsp + 56] LONG $0x246c3b4c; BYTE $0x40 // cmp r13, qword ptr [rsp + 64] JE LBB4_16 LBB4_11: WORD $0xc189 // mov ecx, eax WORD $0xd929 // sub ecx, ebx WORD $0x8949; BYTE $0xdb // mov r11, rbx WORD $0xf749; BYTE $0xd3 // not r11 WORD $0x0149; BYTE $0xc3 // add r11, rax WORD $0xc1f6; BYTE $0x03 // test cl, 3 JE LBB4_14 WORD $0xc189 // mov ecx, eax WORD $0xd928 // sub cl, bl LONG $0xf1b60f44 // movzx r14d, cl LONG $0x03e68341 // and r14d, 3 LBB4_13: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c2148 // and qword ptr [rdi + 8*rbx], rcx WORD $0xff48; BYTE $0xc3 // inc rbx WORD $0xff49; BYTE $0xce // dec r14 JNE LBB4_13 LBB4_14: LONG $0x03fb8349 // cmp r11, 3 JB LBB4_16 LBB4_15: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c2148 // and qword ptr [rdi + 8*rbx], rcx LONG $0xd84c8b49; BYTE $0x08 // mov rcx, qword ptr [r8 + 8*rbx + 8] LONG $0xdf4c2148; BYTE $0x08 // and qword ptr [rdi + 8*rbx + 8], rcx LONG $0xd84c8b49; BYTE $0x10 // mov rcx, qword ptr [r8 + 8*rbx + 16] LONG $0xdf4c2148; BYTE $0x10 // and qword ptr [rdi + 8*rbx + 16], rcx LONG $0xd84c8b49; BYTE $0x18 // mov rcx, qword ptr [r8 + 8*rbx + 24] LONG $0xdf4c2148; BYTE $0x18 // and qword ptr [rdi + 8*rbx + 24], rcx LONG $0x04c38348 // add rbx, 4 WORD $0x3948; BYTE $0xd8 // cmp rax, rbx JNE LBB4_15 JMP LBB4_16 LBB4_18: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_andn_many_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x58ec8348 // sub rsp, 88 LONG $0xffffffb9; BYTE $0xff // mov ecx, 4294967295 WORD $0x2148; BYTE $0xd1 // and rcx, rdx JE LBB5_17 LONG $0x20eac148 // shr rdx, 32 JE LBB5_17 LONG $0x02fa8348 // cmp rdx, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0xe2430f4c // cmovae r12, rdx LONG $0xc0978d48; WORD $0x0000; BYTE $0x00 // lea rdx, [rdi + 192] LONG $0x000200b8; BYTE $0x00 // mov eax, 512 LONG $0x0000c0bb; BYTE $0x00 // mov ebx, 192 LONG $0x245c8948; BYTE $0x10 // mov qword ptr [rsp + 16], rbx WORD $0xdb31 // xor ebx, ebx LONG $0x245c8948; BYTE $0x08 // mov qword ptr [rsp + 8], rbx WORD $0xdb31 // xor ebx, ebx LONG $0x241c8948 // mov qword ptr [rsp], rbx WORD $0x3145; BYTE $0xff // xor r15d, r15d LONG $0x244c8948; BYTE $0x18 // mov qword ptr [rsp + 24], rcx JMP LBB5_3 LBB5_16: LONG $0x24448b48; BYTE $0x28 // mov rax, qword ptr [rsp + 40] LONG $0x02000548; WORD $0x0000 // add rax, 512 LONG $0x2404ff48 // inc qword ptr [rsp] QUAD $0xfffe000824448148; BYTE $0xff // add qword ptr [rsp + 8], -512 QUAD $0x0010001024448148; BYTE $0x00 // add qword ptr [rsp + 16], 4096 LONG $0x00c28148; WORD $0x0010; BYTE $0x00 // add rdx, 4096 LONG $0x245c8b48; BYTE $0x20 // mov rbx, qword ptr [rsp + 32] WORD $0x8949; BYTE $0xdf // mov r15, rbx LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] WORD $0x3948; BYTE $0xcb // cmp rbx, rcx JAE LBB5_17 LBB5_3: WORD $0x3948; BYTE $0xc8 // cmp rax, rcx WORD $0x8949; BYTE $0xcd // mov r13, rcx LONG $0x24448948; BYTE $0x28 // mov qword ptr [rsp + 40], rax LONG $0xe8420f4c // cmovb r13, rax LONG $0x00878d49; WORD $0x0002; BYTE $0x00 // lea rax, [r15 + 512] WORD $0x3948; BYTE $0xc8 // cmp rax, rcx LONG $0x24448948; BYTE $0x20 // mov qword ptr [rsp + 32], rax LONG $0xc8420f48 // cmovb rcx, rax WORD $0x3949; BYTE $0xcf // cmp r15, rcx JAE LBB5_16 LONG $0x24448b48; BYTE $0x08 // mov rax, qword ptr [rsp + 8] LONG $0x280c8d4a // lea rcx, [rax + r13] LONG $0xe0e18348 // and rcx, -32 LONG $0x24048b48 // mov rax, qword ptr [rsp] WORD $0x8948; BYTE $0xc3 // mov rbx, rax LONG $0x09e3c148 // shl rbx, 9 WORD $0x894d; BYTE $0xee // mov r14, r13 WORD $0x2949; BYTE $0xde // sub r14, rbx LONG $0x0ce0c148 // shl rax, 12 LONG $0x071c8d48 // lea rbx, [rdi + rax] LONG $0x245c8948; BYTE $0x50 // mov qword ptr [rsp + 80], rbx LONG $0x24448948; BYTE $0x48 // mov qword ptr [rsp + 72], rax LONG $0xf0148d4e // lea r10, [rax + 8*r14] LONG $0x3a048d49 // lea rax, [r10 + rdi] LONG $0x24448948; BYTE $0x40 // mov qword ptr [rsp + 64], rax WORD $0x894c; BYTE $0xf0 // mov rax, r14 LONG $0xe0e08348 // and rax, -32 LONG $0x24448948; BYTE $0x38 // mov qword ptr [rsp + 56], rax WORD $0x014c; BYTE $0xf8 // add rax, r15 LONG $0x24448948; BYTE $0x30 // mov qword ptr [rsp + 48], rax WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB5_5 LBB5_15: WORD $0xff49; BYTE $0xc1 // inc r9 WORD $0x394d; BYTE $0xe1 // cmp r9, r12 JE LBB5_16 LBB5_5: LONG $0xce048b4e // mov r8, qword ptr [rsi + 8*r9] WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x20fe8349 // cmp r14, 32 JB LBB5_11 LONG $0x101c8d4b // lea rbx, [r8 + r10] LONG $0x245c3948; BYTE $0x50 // cmp qword ptr [rsp + 80], rbx JAE LBB5_8 LONG $0x24448b48; BYTE $0x48 // mov rax, qword ptr [rsp + 72] LONG $0x001c8d4d // lea r11, [r8 + rax] WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x245c3b4c; BYTE $0x40 // cmp r11, qword ptr [rsp + 64] JB LBB5_11 LBB5_8: LONG $0x24448b48; BYTE $0x10 // mov rax, qword ptr [rsp + 16] LONG $0x001c8d49 // lea rbx, [r8 + rax] WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB5_9: QUAD $0xfddb4410487cb162 // vmovups zmm0, zmmword ptr [rbx + 8*r11 - 192] QUAD $0xfedb4c10487cb162 // vmovups zmm1, zmmword ptr [rbx + 8*r11 - 128] QUAD $0xffdb5410487cb162 // vmovups zmm2, zmmword ptr [rbx + 8*r11 - 64] LONG $0x487cb162; WORD $0x1c10; BYTE $0xdb // vmovups zmm3, zmmword ptr [rbx + 8*r11] QUAD $0xfdda4455487cb162 // vandnps zmm0, zmm0, zmmword ptr [rdx + 8*r11 - 192] QUAD $0xfeda4c554874b162 // vandnps zmm1, zmm1, zmmword ptr [rdx + 8*r11 - 128] QUAD $0xffda5455486cb162 // vandnps zmm2, zmm2, zmmword ptr [rdx + 8*r11 - 64] LONG $0x4864b162; WORD $0x1c55; BYTE $0xda // vandnps zmm3, zmm3, zmmword ptr [rdx + 8*r11] QUAD $0xfdda4411487cb162 // vmovups zmmword ptr [rdx + 8*r11 - 192], zmm0 QUAD $0xfeda4c11487cb162 // vmovups zmmword ptr [rdx + 8*r11 - 128], zmm1 QUAD $0xffda5411487cb162 // vmovups zmmword ptr [rdx + 8*r11 - 64], zmm2 LONG $0x487cb162; WORD $0x1c11; BYTE $0xda // vmovups zmmword ptr [rdx + 8*r11], zmm3 LONG $0x20c38349 // add r11, 32 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB5_9 LONG $0x245c8b48; BYTE $0x30 // mov rbx, qword ptr [rsp + 48] LONG $0x24743b4c; BYTE $0x38 // cmp r14, qword ptr [rsp + 56] JE LBB5_15 LBB5_11: WORD $0x8944; BYTE $0xe8 // mov eax, r13d WORD $0xd829 // sub eax, ebx LONG $0x015b8d4c // lea r11, [rbx + 1] WORD $0x01a8 // test al, 1 JE LBB5_13 LONG $0xd8048b49 // mov rax, qword ptr [r8 + 8*rbx] WORD $0xf748; BYTE $0xd0 // not rax LONG $0xdf042148 // and qword ptr [rdi + 8*rbx], rax WORD $0x894c; BYTE $0xdb // mov rbx, r11 LBB5_13: WORD $0x394d; BYTE $0xdd // cmp r13, r11 JE LBB5_15 LBB5_14: LONG $0xd8048b49 // mov rax, qword ptr [r8 + 8*rbx] WORD $0xf748; BYTE $0xd0 // not rax LONG $0xdf042148 // and qword ptr [rdi + 8*rbx], rax LONG $0xd8448b49; BYTE $0x08 // mov rax, qword ptr [r8 + 8*rbx + 8] WORD $0xf748; BYTE $0xd0 // not rax LONG $0xdf442148; BYTE $0x08 // and qword ptr [rdi + 8*rbx + 8], rax LONG $0x02c38348 // add rbx, 2 WORD $0x3949; BYTE $0xdd // cmp r13, rbx JNE LBB5_14 JMP LBB5_15 LBB5_17: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_or_many_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffb9; BYTE $0xff // mov ecx, 4294967295 WORD $0x2148; BYTE $0xd1 // and rcx, rdx JE LBB6_18 LONG $0x20eac148 // shr rdx, 32 JE LBB6_18 LONG $0x02fa8348 // cmp rdx, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0xe2430f4c // cmovae r12, rdx LONG $0xc0978d48; WORD $0x0000; BYTE $0x00 // lea rdx, [rdi + 192] LONG $0x000200bb; BYTE $0x00 // mov ebx, 512 LONG $0x0000c0b8; BYTE $0x00 // mov eax, 192 LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x10 // mov qword ptr [rsp + 16], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x08 // mov qword ptr [rsp + 8], rax WORD $0x3145; BYTE $0xff // xor r15d, r15d LONG $0x244c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rcx JMP LBB6_3 LBB6_17: LONG $0x245c8b48; BYTE $0x30 // mov rbx, qword ptr [rsp + 48] LONG $0x00c38148; WORD $0x0002; BYTE $0x00 // add rbx, 512 LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x00c28148; WORD $0x0010; BYTE $0x00 // add rdx, 4096 LONG $0x24448b48; BYTE $0x28 // mov rax, qword ptr [rsp + 40] WORD $0x8949; BYTE $0xc7 // mov r15, rax LONG $0x244c8b48; BYTE $0x20 // mov rcx, qword ptr [rsp + 32] WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JAE LBB6_18 LBB6_3: WORD $0x3948; BYTE $0xcb // cmp rbx, rcx WORD $0x8948; BYTE $0xc8 // mov rax, rcx LONG $0x245c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rbx LONG $0xc3420f48 // cmovb rax, rbx LONG $0x009f8d49; WORD $0x0002; BYTE $0x00 // lea rbx, [r15 + 512] WORD $0x3948; BYTE $0xcb // cmp rbx, rcx LONG $0x245c8948; BYTE $0x28 // mov qword ptr [rsp + 40], rbx LONG $0xcb420f48 // cmovb rcx, rbx WORD $0x3949; BYTE $0xcf // cmp r15, rcx JAE LBB6_17 LONG $0x244c8b48; BYTE $0x10 // mov rcx, qword ptr [rsp + 16] LONG $0x08148d4c // lea r10, [rax + rcx] LONG $0xe0e28349 // and r10, -32 LONG $0x245c8b48; BYTE $0x08 // mov rbx, qword ptr [rsp + 8] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xc5 // mov r13, rax WORD $0x2949; BYTE $0xcd // sub r13, rcx LONG $0x0ce3c148 // shl rbx, 12 LONG $0x1f0c8d48 // lea rcx, [rdi + rbx] LONG $0x244c8948; BYTE $0x60 // mov qword ptr [rsp + 96], rcx LONG $0x245c8948; BYTE $0x50 // mov qword ptr [rsp + 80], rbx LONG $0xeb1c8d4a // lea rbx, [rbx + 8*r13] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x245c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rbx LONG $0x3b0c8d48 // lea rcx, [rbx + rdi] LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x894c; BYTE $0xe9 // mov rcx, r13 LONG $0xe0e18348 // and rcx, -32 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x014c; BYTE $0xf9 // add rcx, r15 LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB6_5 LBB6_16: WORD $0xff49; BYTE $0xc1 // inc r9 WORD $0x394d; BYTE $0xe1 // cmp r9, r12 JE LBB6_17 LBB6_5: LONG $0xce048b4e // mov r8, qword ptr [rsi + 8*r9] WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x20fd8349 // cmp r13, 32 JB LBB6_11 LONG $0x244c8b48; BYTE $0x58 // mov rcx, qword ptr [rsp + 88] WORD $0x014c; BYTE $0xc1 // add rcx, r8 LONG $0x244c3948; BYTE $0x60 // cmp qword ptr [rsp + 96], rcx JAE LBB6_8 LONG $0x244c8b48; BYTE $0x50 // mov rcx, qword ptr [rsp + 80] WORD $0x014c; BYTE $0xc1 // add rcx, r8 WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x244c3b48; BYTE $0x48 // cmp rcx, qword ptr [rsp + 72] JB LBB6_11 LBB6_8: LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x081c8d49 // lea rbx, [r8 + rcx] WORD $0xc931 // xor ecx, ecx LBB6_9: QUAD $0xfdca4410487cf162 // vmovups zmm0, zmmword ptr [rdx + 8*rcx - 192] QUAD $0xfeca4c10487cf162 // vmovups zmm1, zmmword ptr [rdx + 8*rcx - 128] QUAD $0xffca5410487cf162 // vmovups zmm2, zmmword ptr [rdx + 8*rcx - 64] LONG $0x487cf162; WORD $0x1c10; BYTE $0xca // vmovups zmm3, zmmword ptr [rdx + 8*rcx] QUAD $0xfdcb4456487cf162 // vorps zmm0, zmm0, zmmword ptr [rbx + 8*rcx - 192] QUAD $0xfecb4c564874f162 // vorps zmm1, zmm1, zmmword ptr [rbx + 8*rcx - 128] QUAD $0xffcb5456486cf162 // vorps zmm2, zmm2, zmmword ptr [rbx + 8*rcx - 64] LONG $0x4864f162; WORD $0x1c56; BYTE $0xcb // vorps zmm3, zmm3, zmmword ptr [rbx + 8*rcx] QUAD $0xfdca4411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 192], zmm0 QUAD $0xfeca4c11487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 128], zmm1 QUAD $0xffca5411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 64], zmm2 LONG $0x487cf162; WORD $0x1c11; BYTE $0xca // vmovups zmmword ptr [rdx + 8*rcx], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB6_9 LONG $0x245c8b48; BYTE $0x38 // mov rbx, qword ptr [rsp + 56] LONG $0x246c3b4c; BYTE $0x40 // cmp r13, qword ptr [rsp + 64] JE LBB6_16 LBB6_11: WORD $0xc189 // mov ecx, eax WORD $0xd929 // sub ecx, ebx WORD $0x8949; BYTE $0xdb // mov r11, rbx WORD $0xf749; BYTE $0xd3 // not r11 WORD $0x0149; BYTE $0xc3 // add r11, rax WORD $0xc1f6; BYTE $0x03 // test cl, 3 JE LBB6_14 WORD $0xc189 // mov ecx, eax WORD $0xd928 // sub cl, bl LONG $0xf1b60f44 // movzx r14d, cl LONG $0x03e68341 // and r14d, 3 LBB6_13: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c0948 // or qword ptr [rdi + 8*rbx], rcx WORD $0xff48; BYTE $0xc3 // inc rbx WORD $0xff49; BYTE $0xce // dec r14 JNE LBB6_13 LBB6_14: LONG $0x03fb8349 // cmp r11, 3 JB LBB6_16 LBB6_15: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c0948 // or qword ptr [rdi + 8*rbx], rcx LONG $0xd84c8b49; BYTE $0x08 // mov rcx, qword ptr [r8 + 8*rbx + 8] LONG $0xdf4c0948; BYTE $0x08 // or qword ptr [rdi + 8*rbx + 8], rcx LONG $0xd84c8b49; BYTE $0x10 // mov rcx, qword ptr [r8 + 8*rbx + 16] LONG $0xdf4c0948; BYTE $0x10 // or qword ptr [rdi + 8*rbx + 16], rcx LONG $0xd84c8b49; BYTE $0x18 // mov rcx, qword ptr [r8 + 8*rbx + 24] LONG $0xdf4c0948; BYTE $0x18 // or qword ptr [rdi + 8*rbx + 24], rcx LONG $0x04c38348 // add rbx, 4 WORD $0x3948; BYTE $0xd8 // cmp rax, rbx JNE LBB6_15 JMP LBB6_16 LBB6_18: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret TEXT ·_xor_many_avx512(SB), $0-32 MOVQ a+0(FP), DI MOVQ b+8(FP), SI MOVQ dims+16(FP), DX BYTE $0x55 // push rbp WORD $0x8948; BYTE $0xe5 // mov rbp, rsp WORD $0x5741 // push r15 WORD $0x5641 // push r14 WORD $0x5541 // push r13 WORD $0x5441 // push r12 BYTE $0x53 // push rbx LONG $0xf8e48348 // and rsp, -8 LONG $0x68ec8348 // sub rsp, 104 LONG $0xffffffb9; BYTE $0xff // mov ecx, 4294967295 WORD $0x2148; BYTE $0xd1 // and rcx, rdx JE LBB7_18 LONG $0x20eac148 // shr rdx, 32 JE LBB7_18 LONG $0x02fa8348 // cmp rdx, 2 LONG $0x0001bc41; WORD $0x0000 // mov r12d, 1 LONG $0xe2430f4c // cmovae r12, rdx LONG $0xc0978d48; WORD $0x0000; BYTE $0x00 // lea rdx, [rdi + 192] LONG $0x000200bb; BYTE $0x00 // mov ebx, 512 LONG $0x0000c0b8; BYTE $0x00 // mov eax, 192 LONG $0x24448948; BYTE $0x18 // mov qword ptr [rsp + 24], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x10 // mov qword ptr [rsp + 16], rax WORD $0xc031 // xor eax, eax LONG $0x24448948; BYTE $0x08 // mov qword ptr [rsp + 8], rax WORD $0x3145; BYTE $0xff // xor r15d, r15d LONG $0x244c8948; BYTE $0x20 // mov qword ptr [rsp + 32], rcx JMP LBB7_3 LBB7_17: LONG $0x245c8b48; BYTE $0x30 // mov rbx, qword ptr [rsp + 48] LONG $0x00c38148; WORD $0x0002; BYTE $0x00 // add rbx, 512 LONG $0x2444ff48; BYTE $0x08 // inc qword ptr [rsp + 8] QUAD $0xfffe001024448148; BYTE $0xff // add qword ptr [rsp + 16], -512 QUAD $0x0010001824448148; BYTE $0x00 // add qword ptr [rsp + 24], 4096 LONG $0x00c28148; WORD $0x0010; BYTE $0x00 // add rdx, 4096 LONG $0x24448b48; BYTE $0x28 // mov rax, qword ptr [rsp + 40] WORD $0x8949; BYTE $0xc7 // mov r15, rax LONG $0x244c8b48; BYTE $0x20 // mov rcx, qword ptr [rsp + 32] WORD $0x3948; BYTE $0xc8 // cmp rax, rcx JAE LBB7_18 LBB7_3: WORD $0x3948; BYTE $0xcb // cmp rbx, rcx WORD $0x8948; BYTE $0xc8 // mov rax, rcx LONG $0x245c8948; BYTE $0x30 // mov qword ptr [rsp + 48], rbx LONG $0xc3420f48 // cmovb rax, rbx LONG $0x009f8d49; WORD $0x0002; BYTE $0x00 // lea rbx, [r15 + 512] WORD $0x3948; BYTE $0xcb // cmp rbx, rcx LONG $0x245c8948; BYTE $0x28 // mov qword ptr [rsp + 40], rbx LONG $0xcb420f48 // cmovb rcx, rbx WORD $0x3949; BYTE $0xcf // cmp r15, rcx JAE LBB7_17 LONG $0x244c8b48; BYTE $0x10 // mov rcx, qword ptr [rsp + 16] LONG $0x08148d4c // lea r10, [rax + rcx] LONG $0xe0e28349 // and r10, -32 LONG $0x245c8b48; BYTE $0x08 // mov rbx, qword ptr [rsp + 8] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x09e1c148 // shl rcx, 9 WORD $0x8949; BYTE $0xc5 // mov r13, rax WORD $0x2949; BYTE $0xcd // sub r13, rcx LONG $0x0ce3c148 // shl rbx, 12 LONG $0x1f0c8d48 // lea rcx, [rdi + rbx] LONG $0x244c8948; BYTE $0x60 // mov qword ptr [rsp + 96], rcx LONG $0x245c8948; BYTE $0x50 // mov qword ptr [rsp + 80], rbx LONG $0xeb1c8d4a // lea rbx, [rbx + 8*r13] WORD $0x8948; BYTE $0xd9 // mov rcx, rbx LONG $0x245c8948; BYTE $0x58 // mov qword ptr [rsp + 88], rbx LONG $0x3b0c8d48 // lea rcx, [rbx + rdi] LONG $0x244c8948; BYTE $0x48 // mov qword ptr [rsp + 72], rcx WORD $0x894c; BYTE $0xe9 // mov rcx, r13 LONG $0xe0e18348 // and rcx, -32 LONG $0x244c8948; BYTE $0x40 // mov qword ptr [rsp + 64], rcx WORD $0x014c; BYTE $0xf9 // add rcx, r15 LONG $0x244c8948; BYTE $0x38 // mov qword ptr [rsp + 56], rcx WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB7_5 LBB7_16: WORD $0xff49; BYTE $0xc1 // inc r9 WORD $0x394d; BYTE $0xe1 // cmp r9, r12 JE LBB7_17 LBB7_5: LONG $0xce048b4e // mov r8, qword ptr [rsi + 8*r9] WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x20fd8349 // cmp r13, 32 JB LBB7_11 LONG $0x244c8b48; BYTE $0x58 // mov rcx, qword ptr [rsp + 88] WORD $0x014c; BYTE $0xc1 // add rcx, r8 LONG $0x244c3948; BYTE $0x60 // cmp qword ptr [rsp + 96], rcx JAE LBB7_8 LONG $0x244c8b48; BYTE $0x50 // mov rcx, qword ptr [rsp + 80] WORD $0x014c; BYTE $0xc1 // add rcx, r8 WORD $0x894c; BYTE $0xfb // mov rbx, r15 LONG $0x244c3b48; BYTE $0x48 // cmp rcx, qword ptr [rsp + 72] JB LBB7_11 LBB7_8: LONG $0x244c8b48; BYTE $0x18 // mov rcx, qword ptr [rsp + 24] LONG $0x081c8d49 // lea rbx, [r8 + rcx] WORD $0xc931 // xor ecx, ecx LBB7_9: QUAD $0xfdca4410487cf162 // vmovups zmm0, zmmword ptr [rdx + 8*rcx - 192] QUAD $0xfeca4c10487cf162 // vmovups zmm1, zmmword ptr [rdx + 8*rcx - 128] QUAD $0xffca5410487cf162 // vmovups zmm2, zmmword ptr [rdx + 8*rcx - 64] LONG $0x487cf162; WORD $0x1c10; BYTE $0xca // vmovups zmm3, zmmword ptr [rdx + 8*rcx] QUAD $0xfdcb4457487cf162 // vxorps zmm0, zmm0, zmmword ptr [rbx + 8*rcx - 192] QUAD $0xfecb4c574874f162 // vxorps zmm1, zmm1, zmmword ptr [rbx + 8*rcx - 128] QUAD $0xffcb5457486cf162 // vxorps zmm2, zmm2, zmmword ptr [rbx + 8*rcx - 64] LONG $0x4864f162; WORD $0x1c57; BYTE $0xcb // vxorps zmm3, zmm3, zmmword ptr [rbx + 8*rcx] QUAD $0xfdca4411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 192], zmm0 QUAD $0xfeca4c11487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 128], zmm1 QUAD $0xffca5411487cf162 // vmovups zmmword ptr [rdx + 8*rcx - 64], zmm2 LONG $0x487cf162; WORD $0x1c11; BYTE $0xca // vmovups zmmword ptr [rdx + 8*rcx], zmm3 LONG $0x20c18348 // add rcx, 32 WORD $0x3949; BYTE $0xca // cmp r10, rcx JNE LBB7_9 LONG $0x245c8b48; BYTE $0x38 // mov rbx, qword ptr [rsp + 56] LONG $0x246c3b4c; BYTE $0x40 // cmp r13, qword ptr [rsp + 64] JE LBB7_16 LBB7_11: WORD $0xc189 // mov ecx, eax WORD $0xd929 // sub ecx, ebx WORD $0x8949; BYTE $0xdb // mov r11, rbx WORD $0xf749; BYTE $0xd3 // not r11 WORD $0x0149; BYTE $0xc3 // add r11, rax WORD $0xc1f6; BYTE $0x03 // test cl, 3 JE LBB7_14 WORD $0xc189 // mov ecx, eax WORD $0xd928 // sub cl, bl LONG $0xf1b60f44 // movzx r14d, cl LONG $0x03e68341 // and r14d, 3 LBB7_13: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c3148 // xor qword ptr [rdi + 8*rbx], rcx WORD $0xff48; BYTE $0xc3 // inc rbx WORD $0xff49; BYTE $0xce // dec r14 JNE LBB7_13 LBB7_14: LONG $0x03fb8349 // cmp r11, 3 JB LBB7_16 LBB7_15: LONG $0xd80c8b49 // mov rcx, qword ptr [r8 + 8*rbx] LONG $0xdf0c3148 // xor qword ptr [rdi + 8*rbx], rcx LONG $0xd84c8b49; BYTE $0x08 // mov rcx, qword ptr [r8 + 8*rbx + 8] LONG $0xdf4c3148; BYTE $0x08 // xor qword ptr [rdi + 8*rbx + 8], rcx LONG $0xd84c8b49; BYTE $0x10 // mov rcx, qword ptr [r8 + 8*rbx + 16] LONG $0xdf4c3148; BYTE $0x10 // xor qword ptr [rdi + 8*rbx + 16], rcx LONG $0xd84c8b49; BYTE $0x18 // mov rcx, qword ptr [r8 + 8*rbx + 24] LONG $0xdf4c3148; BYTE $0x18 // xor qword ptr [rdi + 8*rbx + 24], rcx LONG $0x04c38348 // add rbx, 4 WORD $0x3948; BYTE $0xd8 // cmp rax, rbx JNE LBB7_15 JMP LBB7_16 LBB7_18: LONG $0xd8658d48 // lea rsp, [rbp - 40] BYTE $0x5b // pop rbx WORD $0x5c41 // pop r12 WORD $0x5d41 // pop r13 WORD $0x5e41 // pop r14 WORD $0x5f41 // pop r15 BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret golang-github-kelindar-bitmap-1.5.5/simd_generic.go000066400000000000000000000031161517523267600223100ustar00rootroot00000000000000// Copyright (c) Roman Atachiants and contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for details. package bitmap import "math/bits" // Count counts the number of bits set to one func count(arr []uint64) int { sum := 0 for i := 0; i < len(arr); i++ { sum += bits.OnesCount64(arr[i]) } return sum } // and computes the intersection of multiple bitmaps func and(a Bitmap, upper int, other Bitmap, extra []Bitmap) { for i := 0; i < upper; i++ { a[i] = a[i] & other[i] } for _, b := range extra { for i := 0; i < upper; i++ { a[i] = a[i] & b[i] } } } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap func andn(a Bitmap, upper int, other Bitmap, extra []Bitmap) { for i := 0; i < upper; i++ { a[i] = a[i] &^ other[i] } for _, b := range extra { for i := 0; i < upper; i++ { a[i] = a[i] &^ b[i] } } } // or computes the union between two bitmaps and stores the result in the current bitmap func or(a Bitmap, other Bitmap, extra []Bitmap) { for i := 0; i < len(other); i++ { a[i] = a[i] | other[i] } for _, b := range extra { for i := 0; i < len(b); i++ { a[i] = a[i] | b[i] } } } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func xor(a Bitmap, other Bitmap, extra []Bitmap) { for i := 0; i < len(other); i++ { a[i] = a[i] ^ other[i] } for _, b := range extra { for i := 0; i < len(b); i++ { a[i] = a[i] ^ b[i] } } } golang-github-kelindar-bitmap-1.5.5/simd_neon.go000066400000000000000000000016161517523267600216360ustar00rootroot00000000000000//go:build !noasm && !darwin && arm64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT package bitmap import "unsafe" //go:nosplit //go:noescape func _and(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _andn(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _or(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _xor(a unsafe.Pointer, b unsafe.Pointer, n uint64) //go:nosplit //go:noescape func _and_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _andn_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _or_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _xor_many(a unsafe.Pointer, b unsafe.Pointer, dims uint64) //go:nosplit //go:noescape func _count(a unsafe.Pointer, size uint64, result unsafe.Pointer) golang-github-kelindar-bitmap-1.5.5/simd_neon.s000066400000000000000000000545661517523267600215070ustar00rootroot00000000000000//go:build !noasm && !darwin && arm64 // AUTO-GENERATED BY GOCC -- DO NOT EDIT TEXT ·_and(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, .LBB0_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo .LBB0_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls .LBB0_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls .LBB0_8 LBB0_4: WORD $0xaa1f03e8 // mov x8, xzr LBB0_5: WORD $0xd37df10a // lsl x10, x8, #3 WORD $0xcb080049 // sub x9, x2, x8 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 LBB0_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xf1000529 // subs x9, x9, #1 WORD $0x8a0b018b // and x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0x54ffff61 // b.ne .LBB0_6 LBB0_7: WORD $0xd65f03c0 // ret LBB0_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 LBB0_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4e201c40 // and v0.16b, v2.16b, v0.16b WORD $0x4e211c61 // and v1.16b, v3.16b, v1.16b WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x9100814a // add x10, x10, #32 WORD $0x54ffff01 // b.ne .LBB0_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq .LBB0_7 WORD $0x17ffffe6 // b .LBB0_5 TEXT ·_andn(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, .LBB1_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo .LBB1_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls .LBB1_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls .LBB1_8 LBB1_4: WORD $0xaa1f03e8 // mov x8, xzr LBB1_5: WORD $0xd37df10a // lsl x10, x8, #3 WORD $0xcb080049 // sub x9, x2, x8 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 LBB1_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xf1000529 // subs x9, x9, #1 WORD $0x8a2b018b // bic x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0x54ffff61 // b.ne .LBB1_6 LBB1_7: WORD $0xd65f03c0 // ret LBB1_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 LBB1_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4e601c40 // bic v0.16b, v2.16b, v0.16b WORD $0x4e611c61 // bic v1.16b, v3.16b, v1.16b WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x9100814a // add x10, x10, #32 WORD $0x54ffff01 // b.ne .LBB1_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq .LBB1_7 WORD $0x17ffffe6 // b .LBB1_5 TEXT ·_or(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, .LBB2_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo .LBB2_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls .LBB2_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls .LBB2_8 LBB2_4: WORD $0xaa1f03e8 // mov x8, xzr LBB2_5: WORD $0xd37df10a // lsl x10, x8, #3 WORD $0xcb080049 // sub x9, x2, x8 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 LBB2_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xf1000529 // subs x9, x9, #1 WORD $0xaa0b018b // orr x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0x54ffff61 // b.ne .LBB2_6 LBB2_7: WORD $0xd65f03c0 // ret LBB2_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 LBB2_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x4ea01c40 // orr v0.16b, v2.16b, v0.16b WORD $0x4ea11c61 // orr v1.16b, v3.16b, v1.16b WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x9100814a // add x10, x10, #32 WORD $0x54ffff01 // b.ne .LBB2_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq .LBB2_7 WORD $0x17ffffe6 // b .LBB2_5 TEXT ·_xor(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD n+16(FP), R2 WORD $0xb40002a2 // cbz x2, .LBB3_7 WORD $0xf100105f // cmp x2, #4 WORD $0x54000103 // b.lo .LBB3_4 WORD $0xd37df048 // lsl x8, x2, #3 WORD $0x8b080029 // add x9, x1, x8 WORD $0xeb00013f // cmp x9, x0 WORD $0x54000209 // b.ls .LBB3_8 WORD $0x8b080008 // add x8, x0, x8 WORD $0xeb01011f // cmp x8, x1 WORD $0x540001a9 // b.ls .LBB3_8 LBB3_4: WORD $0xaa1f03e8 // mov x8, xzr LBB3_5: WORD $0xd37df10a // lsl x10, x8, #3 WORD $0xcb080049 // sub x9, x2, x8 WORD $0x8b0a0008 // add x8, x0, x10 WORD $0x8b0a002a // add x10, x1, x10 LBB3_6: WORD $0xf840854b // ldr x11, [x10], #8 WORD $0xf940010c // ldr x12, [x8] WORD $0xf1000529 // subs x9, x9, #1 WORD $0xca0b018b // eor x11, x12, x11 WORD $0xf800850b // str x11, [x8], #8 WORD $0x54ffff61 // b.ne .LBB3_6 LBB3_7: WORD $0xd65f03c0 // ret LBB3_8: WORD $0x927ef448 // and x8, x2, #0xfffffffffffffffc WORD $0x91004029 // add x9, x1, #16 WORD $0x9100400a // add x10, x0, #16 WORD $0xaa0803eb // mov x11, x8 LBB3_9: WORD $0xad7f8520 // ldp q0, q1, [x9, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0xf100116b // subs x11, x11, #4 WORD $0xad7f8d42 // ldp q2, q3, [x10, #-16] WORD $0x6e201c40 // eor v0.16b, v2.16b, v0.16b WORD $0x6e211c61 // eor v1.16b, v3.16b, v1.16b WORD $0xad3f8540 // stp q0, q1, [x10, #-16] WORD $0x9100814a // add x10, x10, #32 WORD $0x54ffff01 // b.ne .LBB3_9 WORD $0xeb02011f // cmp x8, x2 WORD $0x54fffe20 // b.eq .LBB3_7 WORD $0x17ffffe6 // b .LBB3_5 TEXT ·_and_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xf81c0ff9 // str x25, [sp, #-64]! WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0xa9015ff8 // stp x24, x23, [sp, #16] WORD $0xa90257f6 // stp x22, x21, [sp, #32] WORD $0xa9034ff4 // stp x20, x19, [sp, #48] WORD $0x54000a20 // b.eq .LBB4_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, .LBB4_14 WORD $0xf100057f // cmp x11, #1 WORD $0xaa1f03e9 // mov x9, xzr WORD $0xaa1f03ea // mov x10, xzr WORD $0xaa1f03ef // mov x15, xzr WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b .LBB4_4 LBB4_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 WORD $0x9140058c // add x12, x12, #1, lsl #12 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs .LBB4_14 LBB4_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x910801f0 // add x16, x15, #512 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883212 // csel x18, x16, x8, lo WORD $0xeb1201ff // cmp x15, x18 WORD $0x54fffe42 // b.hs .LBB4_3 WORD $0xcb0a2622 // sub x2, x17, x10, lsl #9 WORD $0xd374cd43 // lsl x3, x10, #12 WORD $0xd37df046 // lsl x6, x2, #3 WORD $0x8b030004 // add x4, x0, x3 WORD $0xd10020d4 // sub x20, x6, #8 WORD $0x8b090225 // add x5, x17, x9 WORD $0x8b140086 // add x6, x4, x20 WORD $0x927ef447 // and x7, x2, #0xfffffffffffffffc WORD $0xaa1f03f2 // mov x18, xzr WORD $0x927ef4a5 // and x5, x5, #0xfffffffffffffffc WORD $0x910020c6 // add x6, x6, #8 WORD $0x8b0701f3 // add x19, x15, x7 WORD $0x8b140074 // add x20, x3, x20 WORD $0x14000004 // b .LBB4_7 LBB4_6: WORD $0x91000652 // add x18, x18, #1 WORD $0xeb0b025f // cmp x18, x11 WORD $0x54fffc20 // b.eq .LBB4_3 LBB4_7: WORD $0xf8727835 // ldr x21, [x1, x18, lsl #3] WORD $0xaa0f03f7 // mov x23, x15 WORD $0xf100105f // cmp x2, #4 WORD $0x540002e3 // b.lo .LBB4_12 WORD $0x8b1402b6 // add x22, x21, x20 WORD $0x8b0302b7 // add x23, x21, x3 WORD $0x910022d6 // add x22, x22, #8 WORD $0xeb16009f // cmp x4, x22 WORD $0xfa4632e2 // ccmp x23, x6, #2, lo WORD $0xaa0f03f7 // mov x23, x15 WORD $0x54000203 // b.lo .LBB4_12 WORD $0x8b0e02b6 // add x22, x21, x14 WORD $0xaa0c03f7 // mov x23, x12 WORD $0xaa0503f8 // mov x24, x5 LBB4_10: WORD $0xad7f86c0 // ldp q0, q1, [x22, #-16] WORD $0xf1001318 // subs x24, x24, #4 WORD $0x910082d6 // add x22, x22, #32 WORD $0xad7f8ee2 // ldp q2, q3, [x23, #-16] WORD $0x4e201c40 // and v0.16b, v2.16b, v0.16b WORD $0x4e211c61 // and v1.16b, v3.16b, v1.16b WORD $0xad3f86e0 // stp q0, q1, [x23, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x54ffff01 // b.ne .LBB4_10 WORD $0xaa1303f7 // mov x23, x19 WORD $0xeb07005f // cmp x2, x7 WORD $0x54fffc80 // b.eq .LBB4_6 LBB4_12: WORD $0xd37df2f8 // lsl x24, x23, #3 WORD $0xcb170236 // sub x22, x17, x23 WORD $0x8b180017 // add x23, x0, x24 WORD $0x8b1802b5 // add x21, x21, x24 LBB4_13: WORD $0xf84086b8 // ldr x24, [x21], #8 WORD $0xf94002f9 // ldr x25, [x23] WORD $0xf10006d6 // subs x22, x22, #1 WORD $0x8a180338 // and x24, x25, x24 WORD $0xf80086f8 // str x24, [x23], #8 WORD $0x54ffff61 // b.ne .LBB4_13 WORD $0x17ffffd9 // b .LBB4_6 LBB4_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] WORD $0xa94257f6 // ldp x22, x21, [sp, #32] WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] WORD $0xf84407f9 // ldr x25, [sp], #64 WORD $0xd65f03c0 // ret TEXT ·_andn_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xf81c0ff9 // str x25, [sp, #-64]! WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0xa9015ff8 // stp x24, x23, [sp, #16] WORD $0xa90257f6 // stp x22, x21, [sp, #32] WORD $0xa9034ff4 // stp x20, x19, [sp, #48] WORD $0x54000a20 // b.eq .LBB5_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, .LBB5_14 WORD $0xf100057f // cmp x11, #1 WORD $0xaa1f03e9 // mov x9, xzr WORD $0xaa1f03ea // mov x10, xzr WORD $0xaa1f03ef // mov x15, xzr WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b .LBB5_4 LBB5_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 WORD $0x9140058c // add x12, x12, #1, lsl #12 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs .LBB5_14 LBB5_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x910801f0 // add x16, x15, #512 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883212 // csel x18, x16, x8, lo WORD $0xeb1201ff // cmp x15, x18 WORD $0x54fffe42 // b.hs .LBB5_3 WORD $0xcb0a2622 // sub x2, x17, x10, lsl #9 WORD $0xd374cd43 // lsl x3, x10, #12 WORD $0xd37df046 // lsl x6, x2, #3 WORD $0x8b030004 // add x4, x0, x3 WORD $0xd10020d4 // sub x20, x6, #8 WORD $0x8b090225 // add x5, x17, x9 WORD $0x8b140086 // add x6, x4, x20 WORD $0x927ef447 // and x7, x2, #0xfffffffffffffffc WORD $0xaa1f03f2 // mov x18, xzr WORD $0x927ef4a5 // and x5, x5, #0xfffffffffffffffc WORD $0x910020c6 // add x6, x6, #8 WORD $0x8b0701f3 // add x19, x15, x7 WORD $0x8b140074 // add x20, x3, x20 WORD $0x14000004 // b .LBB5_7 LBB5_6: WORD $0x91000652 // add x18, x18, #1 WORD $0xeb0b025f // cmp x18, x11 WORD $0x54fffc20 // b.eq .LBB5_3 LBB5_7: WORD $0xf8727835 // ldr x21, [x1, x18, lsl #3] WORD $0xaa0f03f7 // mov x23, x15 WORD $0xf100105f // cmp x2, #4 WORD $0x540002e3 // b.lo .LBB5_12 WORD $0x8b1402b6 // add x22, x21, x20 WORD $0x8b0302b7 // add x23, x21, x3 WORD $0x910022d6 // add x22, x22, #8 WORD $0xeb16009f // cmp x4, x22 WORD $0xfa4632e2 // ccmp x23, x6, #2, lo WORD $0xaa0f03f7 // mov x23, x15 WORD $0x54000203 // b.lo .LBB5_12 WORD $0x8b0e02b6 // add x22, x21, x14 WORD $0xaa0c03f7 // mov x23, x12 WORD $0xaa0503f8 // mov x24, x5 LBB5_10: WORD $0xad7f86c0 // ldp q0, q1, [x22, #-16] WORD $0xf1001318 // subs x24, x24, #4 WORD $0x910082d6 // add x22, x22, #32 WORD $0xad7f8ee2 // ldp q2, q3, [x23, #-16] WORD $0x4e601c40 // bic v0.16b, v2.16b, v0.16b WORD $0x4e611c61 // bic v1.16b, v3.16b, v1.16b WORD $0xad3f86e0 // stp q0, q1, [x23, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x54ffff01 // b.ne .LBB5_10 WORD $0xaa1303f7 // mov x23, x19 WORD $0xeb07005f // cmp x2, x7 WORD $0x54fffc80 // b.eq .LBB5_6 LBB5_12: WORD $0xd37df2f8 // lsl x24, x23, #3 WORD $0xcb170236 // sub x22, x17, x23 WORD $0x8b180017 // add x23, x0, x24 WORD $0x8b1802b5 // add x21, x21, x24 LBB5_13: WORD $0xf84086b8 // ldr x24, [x21], #8 WORD $0xf94002f9 // ldr x25, [x23] WORD $0xf10006d6 // subs x22, x22, #1 WORD $0x8a380338 // bic x24, x25, x24 WORD $0xf80086f8 // str x24, [x23], #8 WORD $0x54ffff61 // b.ne .LBB5_13 WORD $0x17ffffd9 // b .LBB5_6 LBB5_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] WORD $0xa94257f6 // ldp x22, x21, [sp, #32] WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] WORD $0xf84407f9 // ldr x25, [sp], #64 WORD $0xd65f03c0 // ret TEXT ·_or_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xf81c0ff9 // str x25, [sp, #-64]! WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0xa9015ff8 // stp x24, x23, [sp, #16] WORD $0xa90257f6 // stp x22, x21, [sp, #32] WORD $0xa9034ff4 // stp x20, x19, [sp, #48] WORD $0x54000a20 // b.eq .LBB6_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, .LBB6_14 WORD $0xf100057f // cmp x11, #1 WORD $0xaa1f03e9 // mov x9, xzr WORD $0xaa1f03ea // mov x10, xzr WORD $0xaa1f03ef // mov x15, xzr WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b .LBB6_4 LBB6_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 WORD $0x9140058c // add x12, x12, #1, lsl #12 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs .LBB6_14 LBB6_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x910801f0 // add x16, x15, #512 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883212 // csel x18, x16, x8, lo WORD $0xeb1201ff // cmp x15, x18 WORD $0x54fffe42 // b.hs .LBB6_3 WORD $0xcb0a2622 // sub x2, x17, x10, lsl #9 WORD $0xd374cd43 // lsl x3, x10, #12 WORD $0xd37df046 // lsl x6, x2, #3 WORD $0x8b030004 // add x4, x0, x3 WORD $0xd10020d4 // sub x20, x6, #8 WORD $0x8b090225 // add x5, x17, x9 WORD $0x8b140086 // add x6, x4, x20 WORD $0x927ef447 // and x7, x2, #0xfffffffffffffffc WORD $0xaa1f03f2 // mov x18, xzr WORD $0x927ef4a5 // and x5, x5, #0xfffffffffffffffc WORD $0x910020c6 // add x6, x6, #8 WORD $0x8b0701f3 // add x19, x15, x7 WORD $0x8b140074 // add x20, x3, x20 WORD $0x14000004 // b .LBB6_7 LBB6_6: WORD $0x91000652 // add x18, x18, #1 WORD $0xeb0b025f // cmp x18, x11 WORD $0x54fffc20 // b.eq .LBB6_3 LBB6_7: WORD $0xf8727835 // ldr x21, [x1, x18, lsl #3] WORD $0xaa0f03f7 // mov x23, x15 WORD $0xf100105f // cmp x2, #4 WORD $0x540002e3 // b.lo .LBB6_12 WORD $0x8b1402b6 // add x22, x21, x20 WORD $0x8b0302b7 // add x23, x21, x3 WORD $0x910022d6 // add x22, x22, #8 WORD $0xeb16009f // cmp x4, x22 WORD $0xfa4632e2 // ccmp x23, x6, #2, lo WORD $0xaa0f03f7 // mov x23, x15 WORD $0x54000203 // b.lo .LBB6_12 WORD $0x8b0e02b6 // add x22, x21, x14 WORD $0xaa0c03f7 // mov x23, x12 WORD $0xaa0503f8 // mov x24, x5 LBB6_10: WORD $0xad7f86c0 // ldp q0, q1, [x22, #-16] WORD $0xf1001318 // subs x24, x24, #4 WORD $0x910082d6 // add x22, x22, #32 WORD $0xad7f8ee2 // ldp q2, q3, [x23, #-16] WORD $0x4ea01c40 // orr v0.16b, v2.16b, v0.16b WORD $0x4ea11c61 // orr v1.16b, v3.16b, v1.16b WORD $0xad3f86e0 // stp q0, q1, [x23, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x54ffff01 // b.ne .LBB6_10 WORD $0xaa1303f7 // mov x23, x19 WORD $0xeb07005f // cmp x2, x7 WORD $0x54fffc80 // b.eq .LBB6_6 LBB6_12: WORD $0xd37df2f8 // lsl x24, x23, #3 WORD $0xcb170236 // sub x22, x17, x23 WORD $0x8b180017 // add x23, x0, x24 WORD $0x8b1802b5 // add x21, x21, x24 LBB6_13: WORD $0xf84086b8 // ldr x24, [x21], #8 WORD $0xf94002f9 // ldr x25, [x23] WORD $0xf10006d6 // subs x22, x22, #1 WORD $0xaa180338 // orr x24, x25, x24 WORD $0xf80086f8 // str x24, [x23], #8 WORD $0x54ffff61 // b.ne .LBB6_13 WORD $0x17ffffd9 // b .LBB6_6 LBB6_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] WORD $0xa94257f6 // ldp x22, x21, [sp, #32] WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] WORD $0xf84407f9 // ldr x25, [sp], #64 WORD $0xd65f03c0 // ret TEXT ·_xor_many(SB), $0-32 MOVD a+0(FP), R0 MOVD b+8(FP), R1 MOVD dims+16(FP), R2 WORD $0xf81c0ff9 // str x25, [sp, #-64]! WORD $0xf2407c48 // ands x8, x2, #0xffffffff WORD $0xa9015ff8 // stp x24, x23, [sp, #16] WORD $0xa90257f6 // stp x22, x21, [sp, #32] WORD $0xa9034ff4 // stp x20, x19, [sp, #48] WORD $0x54000a20 // b.eq .LBB7_14 WORD $0xd360fc4b // lsr x11, x2, #32 WORD $0xb40009eb // cbz x11, .LBB7_14 WORD $0xf100057f // cmp x11, #1 WORD $0xaa1f03e9 // mov x9, xzr WORD $0xaa1f03ea // mov x10, xzr WORD $0xaa1f03ef // mov x15, xzr WORD $0x9a9f856b // csinc x11, x11, xzr, hi WORD $0x9100400c // add x12, x0, #16 WORD $0x5280400d // mov w13, #512 WORD $0x5280020e // mov w14, #16 WORD $0x14000009 // b .LBB7_4 LBB7_3: WORD $0x910801ad // add x13, x13, #512 WORD $0x9100054a // add x10, x10, #1 WORD $0xd1080129 // sub x9, x9, #512 WORD $0x914005ce // add x14, x14, #1, lsl #12 WORD $0x9140058c // add x12, x12, #1, lsl #12 WORD $0xaa1003ef // mov x15, x16 WORD $0xeb08021f // cmp x16, x8 WORD $0x540007c2 // b.hs .LBB7_14 LBB7_4: WORD $0xeb0801bf // cmp x13, x8 WORD $0x910801f0 // add x16, x15, #512 WORD $0x9a8831b1 // csel x17, x13, x8, lo WORD $0xeb08021f // cmp x16, x8 WORD $0x9a883212 // csel x18, x16, x8, lo WORD $0xeb1201ff // cmp x15, x18 WORD $0x54fffe42 // b.hs .LBB7_3 WORD $0xcb0a2622 // sub x2, x17, x10, lsl #9 WORD $0xd374cd43 // lsl x3, x10, #12 WORD $0xd37df046 // lsl x6, x2, #3 WORD $0x8b030004 // add x4, x0, x3 WORD $0xd10020d4 // sub x20, x6, #8 WORD $0x8b090225 // add x5, x17, x9 WORD $0x8b140086 // add x6, x4, x20 WORD $0x927ef447 // and x7, x2, #0xfffffffffffffffc WORD $0xaa1f03f2 // mov x18, xzr WORD $0x927ef4a5 // and x5, x5, #0xfffffffffffffffc WORD $0x910020c6 // add x6, x6, #8 WORD $0x8b0701f3 // add x19, x15, x7 WORD $0x8b140074 // add x20, x3, x20 WORD $0x14000004 // b .LBB7_7 LBB7_6: WORD $0x91000652 // add x18, x18, #1 WORD $0xeb0b025f // cmp x18, x11 WORD $0x54fffc20 // b.eq .LBB7_3 LBB7_7: WORD $0xf8727835 // ldr x21, [x1, x18, lsl #3] WORD $0xaa0f03f7 // mov x23, x15 WORD $0xf100105f // cmp x2, #4 WORD $0x540002e3 // b.lo .LBB7_12 WORD $0x8b1402b6 // add x22, x21, x20 WORD $0x8b0302b7 // add x23, x21, x3 WORD $0x910022d6 // add x22, x22, #8 WORD $0xeb16009f // cmp x4, x22 WORD $0xfa4632e2 // ccmp x23, x6, #2, lo WORD $0xaa0f03f7 // mov x23, x15 WORD $0x54000203 // b.lo .LBB7_12 WORD $0x8b0e02b6 // add x22, x21, x14 WORD $0xaa0c03f7 // mov x23, x12 WORD $0xaa0503f8 // mov x24, x5 LBB7_10: WORD $0xad7f86c0 // ldp q0, q1, [x22, #-16] WORD $0xf1001318 // subs x24, x24, #4 WORD $0x910082d6 // add x22, x22, #32 WORD $0xad7f8ee2 // ldp q2, q3, [x23, #-16] WORD $0x6e201c40 // eor v0.16b, v2.16b, v0.16b WORD $0x6e211c61 // eor v1.16b, v3.16b, v1.16b WORD $0xad3f86e0 // stp q0, q1, [x23, #-16] WORD $0x910082f7 // add x23, x23, #32 WORD $0x54ffff01 // b.ne .LBB7_10 WORD $0xaa1303f7 // mov x23, x19 WORD $0xeb07005f // cmp x2, x7 WORD $0x54fffc80 // b.eq .LBB7_6 LBB7_12: WORD $0xd37df2f8 // lsl x24, x23, #3 WORD $0xcb170236 // sub x22, x17, x23 WORD $0x8b180017 // add x23, x0, x24 WORD $0x8b1802b5 // add x21, x21, x24 LBB7_13: WORD $0xf84086b8 // ldr x24, [x21], #8 WORD $0xf94002f9 // ldr x25, [x23] WORD $0xf10006d6 // subs x22, x22, #1 WORD $0xca180338 // eor x24, x25, x24 WORD $0xf80086f8 // str x24, [x23], #8 WORD $0x54ffff61 // b.ne .LBB7_13 WORD $0x17ffffd9 // b .LBB7_6 LBB7_14: WORD $0xa9434ff4 // ldp x20, x19, [sp, #48] WORD $0xa94257f6 // ldp x22, x21, [sp, #32] WORD $0xa9415ff8 // ldp x24, x23, [sp, #16] WORD $0xf84407f9 // ldr x25, [sp], #64 WORD $0xd65f03c0 // ret TEXT ·_count(SB), $0-32 MOVD a+0(FP), R0 MOVD size+8(FP), R1 MOVD result+16(FP), R2 WORD $0xb40000c1 // cbz x1, .LBB8_3 WORD $0xf100103f // cmp x1, #4 WORD $0x540000c2 // b.hs .LBB8_4 WORD $0xaa1f03e8 // mov x8, xzr WORD $0xaa1f03e9 // mov x9, xzr WORD $0x14000019 // b .LBB8_7 LBB8_3: WORD $0xf900005f // str xzr, [x2] WORD $0xd65f03c0 // ret LBB8_4: WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc WORD $0x91004009 // add x9, x0, #16 WORD $0x6f00e400 // movi v0.2d, #0000000000000000 WORD $0xaa0803ea // mov x10, x8 WORD $0x6f00e401 // movi v1.2d, #0000000000000000 LBB8_5: WORD $0xad7f8d22 // ldp q2, q3, [x9, #-16] WORD $0x91008129 // add x9, x9, #32 WORD $0xf100114a // subs x10, x10, #4 WORD $0x4e205842 // cnt v2.16b, v2.16b WORD $0x4e205863 // cnt v3.16b, v3.16b WORD $0x6e202842 // uaddlp v2.8h, v2.16b WORD $0x6e202863 // uaddlp v3.8h, v3.16b WORD $0x6e602842 // uaddlp v2.4s, v2.8h WORD $0x6e602863 // uaddlp v3.4s, v3.8h WORD $0x6ea06840 // uadalp v0.2d, v2.4s WORD $0x6ea06861 // uadalp v1.2d, v3.4s WORD $0x54fffea1 // b.ne .LBB8_5 WORD $0x4ee08420 // add v0.2d, v1.2d, v0.2d WORD $0xeb01011f // cmp x8, x1 WORD $0x5ef1b800 // addp d0, v0.2d WORD $0x9e660009 // fmov x9, d0 WORD $0x54000140 // b.eq .LBB8_9 LBB8_7: WORD $0x8b080c0a // add x10, x0, x8, lsl #3 WORD $0xcb080028 // sub x8, x1, x8 LBB8_8: WORD $0xfc408540 // ldr d0, [x10], #8 WORD $0xf1000508 // subs x8, x8, #1 WORD $0x0e205800 // cnt v0.8b, v0.8b WORD $0x2e303800 // uaddlv h0, v0.8b WORD $0x1e26000b // fmov w11, s0 WORD $0x8b090169 // add x9, x11, x9 WORD $0x54ffff41 // b.ne .LBB8_8 LBB8_9: WORD $0xf9000049 // str x9, [x2] WORD $0xd65f03c0 // ret