From 7f301c44b7b2d860948cbfd0f9fbb87ab59c1af7 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Fri, 16 Aug 2019 14:26:45 -0700 Subject: [PATCH] more cleanup --- ext/arm32-neon-salsa2012-asm/README.md | 6 - ext/arm32-neon-salsa2012-asm/salsa2012.h | 25 - ext/arm32-neon-salsa2012-asm/salsa2012.s | 2231 ----------- ext/x64-salsa2012-asm/README.md | 6 - ext/x64-salsa2012-asm/salsa2012.h | 16 - ext/x64-salsa2012-asm/salsa2012.s | 4488 ---------------------- 6 files changed, 6772 deletions(-) delete mode 100644 ext/arm32-neon-salsa2012-asm/README.md delete mode 100644 ext/arm32-neon-salsa2012-asm/salsa2012.h delete mode 100644 ext/arm32-neon-salsa2012-asm/salsa2012.s delete mode 100644 ext/x64-salsa2012-asm/README.md delete mode 100644 ext/x64-salsa2012-asm/salsa2012.h delete mode 100644 ext/x64-salsa2012-asm/salsa2012.s diff --git a/ext/arm32-neon-salsa2012-asm/README.md b/ext/arm32-neon-salsa2012-asm/README.md deleted file mode 100644 index 54fc6f5fa..000000000 --- a/ext/arm32-neon-salsa2012-asm/README.md +++ /dev/null @@ -1,6 +0,0 @@ -ARM NEON (32-bit) ASM implementation of Salsa20/12 -====== - -This is from [supercop](http://bench.cr.yp.to/supercop.html) and was originally written by Daniel J. Bernstein. Code is in the public domain like the rest of Salsa20. It's much faster than the naive implementation. - -It's included automatically in 32-bit Linux ARM builds. It likely will not work on 64-bit ARM, so it'll need to be ported at least. That will unfortunately keep it out of mobile versions for now since those are all going 64-bit. diff --git a/ext/arm32-neon-salsa2012-asm/salsa2012.h b/ext/arm32-neon-salsa2012-asm/salsa2012.h deleted file mode 100644 index 262c9b9b5..000000000 --- a/ext/arm32-neon-salsa2012-asm/salsa2012.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ZT_SALSA2012_ARM32NEON_ASM -#define ZT_SALSA2012_ARM32NEON_ASM - -#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux) -#include -#include -#define zt_arm_has_neon() ((getauxval(AT_HWCAP) & HWCAP_NEON) != 0) -#elif defined(__ARM_NEON__) || defined(__ARM_NEON) -#define zt_arm_has_neon() (true) -#else -#define zt_arm_has_neon() (false) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ciphertext buffer, message/NULL, length, nonce (8 bytes), key (32 bytes) -extern int zt_salsa2012_armneon3_xor(unsigned char *c,const unsigned char *m,unsigned long long len,const unsigned char *n,const unsigned char *k); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/ext/arm32-neon-salsa2012-asm/salsa2012.s b/ext/arm32-neon-salsa2012-asm/salsa2012.s deleted file mode 100644 index 9e5989cd3..000000000 --- a/ext/arm32-neon-salsa2012-asm/salsa2012.s +++ /dev/null @@ -1,2231 +0,0 @@ - -# qhasm: int32 input_0 - -# qhasm: int32 input_1 - -# qhasm: int32 input_2 - -# qhasm: int32 input_3 - -# qhasm: stack32 input_4 - -# qhasm: stack32 input_5 - -# qhasm: stack32 input_6 - -# qhasm: stack32 input_7 - -# qhasm: int32 caller_r4 - -# qhasm: int32 caller_r5 - -# qhasm: int32 caller_r6 - -# qhasm: int32 caller_r7 - -# qhasm: int32 caller_r8 - -# qhasm: int32 caller_r9 - -# qhasm: int32 caller_r10 - -# qhasm: int32 caller_r11 - -# qhasm: int32 caller_r14 - -# qhasm: reg128 caller_q4 - -# qhasm: reg128 caller_q5 - -# qhasm: reg128 caller_q6 - -# qhasm: reg128 caller_q7 - -# qhasm: startcode -.fpu neon -.text - -# qhasm: constant sigma: -.align 2 -sigma: - -# qhasm: const32 1634760805 -.word 1634760805 - -# qhasm: const32 857760878 -.word 857760878 - -# qhasm: const32 2036477234 -.word 2036477234 - -# qhasm: const32 1797285236 -.word 1797285236 - -# qhasm: int128 abab - -# qhasm: int128 diag0 - -# qhasm: int128 diag1 - -# qhasm: int128 diag2 - -# qhasm: int128 diag3 - -# qhasm: int128 a0 - -# qhasm: int128 a1 - -# qhasm: int128 a2 - -# qhasm: int128 a3 - -# qhasm: int128 b0 - -# qhasm: int128 b1 - -# qhasm: int128 b2 - -# qhasm: int128 b3 - -# qhasm: int128 next_diag0 - -# qhasm: int128 next_diag1 - -# qhasm: int128 next_diag2 - -# qhasm: int128 next_diag3 - -# qhasm: int128 next_a0 - -# qhasm: int128 next_a1 - -# qhasm: int128 next_a2 - -# qhasm: int128 next_a3 - -# qhasm: int128 next_b0 - -# qhasm: int128 next_b1 - -# qhasm: int128 next_b2 - -# qhasm: int128 next_b3 - -# qhasm: int128 x0x5x10x15 - -# qhasm: int128 x12x1x6x11 - -# qhasm: int128 x8x13x2x7 - -# qhasm: int128 x4x9x14x3 - -# qhasm: int128 x0x1x10x11 - -# qhasm: int128 x12x13x6x7 - -# qhasm: int128 x8x9x2x3 - -# qhasm: int128 x4x5x14x15 - -# qhasm: int128 x0x1x2x3 - -# qhasm: int128 x4x5x6x7 - -# qhasm: int128 x8x9x10x11 - -# qhasm: int128 x12x13x14x15 - -# qhasm: int128 m0m1m2m3 - -# qhasm: int128 m4m5m6m7 - -# qhasm: int128 m8m9m10m11 - -# qhasm: int128 m12m13m14m15 - -# qhasm: int128 start0 - -# qhasm: int128 start1 - -# qhasm: int128 start2 - -# qhasm: int128 start3 - -# qhasm: stack128 stack_start3 - -# qhasm: stack128 next_start2 - -# qhasm: stack128 next_start3 - -# qhasm: int128 k0k1k2k3 - -# qhasm: int128 k4k5k6k7 - -# qhasm: int128 k1n1k7k2 - -# qhasm: int128 n2n3n3n2 - -# qhasm: int128 k2k3k6k7 - -# qhasm: int128 nextblock - -# qhasm: stack128 stack_q4 - -# qhasm: stack128 stack_q5 - -# qhasm: stack128 stack_q6 - -# qhasm: stack128 stack_q7 - -# qhasm: stack32 stack_r4 - -# qhasm: stack128 k2k3k6k7_stack - -# qhasm: stack128 k1n1k7k2_stack - -# qhasm: stack512 tmp - -# qhasm: stack32 savec - -# qhasm: int32 i - -# qhasm: int32 ci - -# qhasm: int32 mi - -# qhasm: enter zt_salsa2012_armneon3_xor -.align 2 -.global _zt_salsa2012_armneon3_xor -.global zt_salsa2012_armneon3_xor -.type _zt_salsa2012_armneon3_xor STT_FUNC -.type zt_salsa2012_armneon3_xor STT_FUNC -_zt_salsa2012_armneon3_xor: -zt_salsa2012_armneon3_xor: -sub sp,sp,#256 - -# qhasm: new stack_q4 - -# qhasm: new stack_q5 - -# qhasm: new stack_q6 - -# qhasm: new stack_q7 - -# qhasm: stack_q4 bot = caller_q4 bot -# asm 1: vstr stack_r4=stack32#2 -# asm 2: str stack_r4=[sp,#68] -str r4,[sp,#68] - -# qhasm: int32 c - -# qhasm: c = input_0 -# asm 1: mov >c=int32#1,c=r0,m=int32#2,m=r1,mlenlow=int32#3,mlenlow=r2,mlenhigh=int32#4,mlenhigh=r3,n=int32#5,n=r4,k=int32#13,k=r12,k0k1k2k3=reg128#1%bot->k0k1k2k3=reg128#1%top},[k0k1k2k3=d0->k0k1k2k3=d1},[k4k5k6k7=reg128#2%bot->k4k5k6k7=reg128#2%top},[k4k5k6k7=d2->k4k5k6k7=d3},[i=int32#13,=sigma -# asm 2: ldr >i=r12,=sigma -ldr r12,=sigma - -# qhasm: start0 = mem128[i] -# asm 1: vld1.8 {>start0=reg128#3%bot->start0=reg128#3%top},[start0=d4->start0=d5},[start1=reg128#4,#0 -# asm 2: vmov.i64 >start1=q3,#0 -vmov.i64 q3,#0 - -# qhasm: start1 bot = mem64[n] -# asm 1: vld1.8 {k2k3k6k7=reg128#6,k2k3k6k7=q5,n2n3n3n2=reg128#1,#0 -# asm 2: vmov.i64 >n2n3n3n2=q0,#0 -vmov.i64 q0,#0 - -# qhasm: unsigneddiag0=reg128#8,diag0=q7,diag1=reg128#9,diag1=q8,start2=reg128#10,start2=q9,nextblock=reg128#11,#0xff -# asm 2: vmov.i64 >nextblock=q10,#0xff -vmov.i64 q10,#0xff - -# qhasm: 4x nextblock unsigned>>= 7 -# asm 1: vshr.u32 >nextblock=reg128#11,nextblock=q10,n2n3n3n2=reg128#1,n2n3n3n2=q0,n2n3n3n2=reg128#1,n2n3n3n2=q0,next_diag0=reg128#2,next_diag0=q1,next_diag1=reg128#5,next_diag1=q4,i=int32#5,=12 -# asm 2: ldr >i=r4,=12 -ldr r4,=12 - -# qhasm: mainloop2: -._mainloop2: - -# qhasm: 4x a0 = diag1 + diag0 -# asm 1: vadd.i32 >a0=reg128#11,a0=q10,next_a0=reg128#14,next_a0=q13,b0=reg128#15,b0=q14,next_b0=reg128#16,next_b0=q15,> 25 -# asm 1: vsri.i32 > 25 -# asm 1: vsri.i32 diag3=reg128#7,diag3=q6,next_diag3=reg128#11,next_diag3=q10,a1=reg128#13,a1=q12,next_a1=reg128#14,next_a1=q13,b1=reg128#15,b1=q14,next_b1=reg128#16,next_b1=q15,> 23 -# asm 1: vsri.i32 > 23 -# asm 1: vsri.i32 diag2=reg128#6,diag2=q5,next_diag2=reg128#12,next_diag2=q11,a2=reg128#13,a2=q12,diag3=reg128#7,diag3=q6,next_a2=reg128#14,next_a2=q13,b2=reg128#15,b2=q14,next_diag3=reg128#11,next_diag3=q10,next_b2=reg128#16,next_b2=q15,> 19 -# asm 1: vsri.i32 > 19 -# asm 1: vsri.i32 diag1=reg128#9,diag1=q8,next_diag1=reg128#5,next_diag1=q4,a3=reg128#13,a3=q12,next_a3=reg128#14,next_a3=q13,b3=reg128#15,b3=q14,next_b3=reg128#16,next_b3=q15,> 14 -# asm 1: vsri.i32 diag1=reg128#9,diag1=q8,> 14 -# asm 1: vsri.i32 diag0=reg128#8,diag0=q7,next_diag1=reg128#5,next_diag1=q4,next_diag0=reg128#2,next_diag0=q1,a0=reg128#13,a0=q12,next_a0=reg128#14,next_a0=q13,b0=reg128#15,b0=q14,next_b0=reg128#16,next_b0=q15,> 25 -# asm 1: vsri.i32 > 25 -# asm 1: vsri.i32 diag1=reg128#9,diag1=q8,next_diag1=reg128#5,next_diag1=q4,a1=reg128#13,a1=q12,next_a1=reg128#14,next_a1=q13,b1=reg128#15,b1=q14,next_b1=reg128#16,next_b1=q15,> 23 -# asm 1: vsri.i32 ? i -= 2 -# asm 1: subs > 23 -# asm 1: vsri.i32 diag2=reg128#6,diag2=q5,next_diag2=reg128#12,next_diag2=q11,a2=reg128#13,a2=q12,diag1=reg128#9,diag1=q8,next_a2=reg128#14,next_a2=q13,b2=reg128#15,b2=q14,next_diag1=reg128#5,next_diag1=q4,next_b2=reg128#16,next_b2=q15,> 19 -# asm 1: vsri.i32 > 19 -# asm 1: vsri.i32 diag3=reg128#7,diag3=q6,next_diag3=reg128#11,next_diag3=q10,a3=reg128#13,a3=q12,next_a3=reg128#14,next_a3=q13,b3=reg128#15,b3=q14,next_b3=reg128#16,next_b3=q15,> 14 -# asm 1: vsri.i32 diag3=reg128#7,diag3=q6,> 14 -# asm 1: vsri.i32 diag0=reg128#8,diag0=q7,next_diag3=reg128#13,next_diag3=q12,next_diag0=reg128#2,next_diag0=q1, -bhi ._mainloop2 - -# qhasm: 2x abab = 0xffffffff -# asm 1: vmov.i64 >abab=reg128#11,#0xffffffff -# asm 2: vmov.i64 >abab=q10,#0xffffffff -vmov.i64 q10,#0xffffffff - -# qhasm: new x4x9x14x3 - -# qhasm: x4x9x14x3 bot = stack_start3 bot -# asm 1: vldr x0x5x10x15=reg128#8,x0x5x10x15=q7,x12x1x6x11=reg128#9,x12x1x6x11=q8,x8x13x2x7=reg128#6,x8x13x2x7=q5,x4x9x14x3=reg128#7,x4x9x14x3=q6,x0x1x10x11=reg128#10,x0x1x10x11=q9,x12x13x6x7=reg128#14,x12x13x6x7=q13,x8x9x2x3=reg128#15,x8x9x2x3=q14,x4x5x14x15=reg128#16,x4x5x14x15=q15,x0x1x2x3=reg128#6,x0x1x2x3=q5,x4x5x6x7=reg128#7,x4x5x6x7=q6,x8x9x10x11=reg128#8,x8x9x10x11=q7,x12x13x14x15=reg128#9,x12x13x14x15=q8,m0m1m2m3=reg128#10%bot->m0m1m2m3=reg128#10%top},[m0m1m2m3=d18->m0m1m2m3=d19},[m4m5m6m7=reg128#14%bot->m4m5m6m7=reg128#14%top},[m4m5m6m7=d26->m4m5m6m7=d27},[m8m9m10m11=reg128#15%bot->m8m9m10m11=reg128#15%top},[m8m9m10m11=d28->m8m9m10m11=d29},[m12m13m14m15=reg128#16%bot->m12m13m14m15=reg128#16%top},[m12m13m14m15=d30->m12m13m14m15=d31},[x0x1x2x3=reg128#6,x0x1x2x3=q5,x4x5x6x7=reg128#7,x4x5x6x7=q6,x8x9x10x11=reg128#8,x8x9x10x11=q7,x12x13x14x15=reg128#9,x12x13x14x15=q8,x0x5x10x15=reg128#2,x0x5x10x15=q1,x12x1x6x11=reg128#5,x12x1x6x11=q4,x8x13x2x7=reg128#6,x8x13x2x7=q5,x4x9x14x3=reg128#7,x4x9x14x3=q6,x0x1x10x11=reg128#8,x0x1x10x11=q7,x12x13x6x7=reg128#9,x12x13x6x7=q8,x8x9x2x3=reg128#10,x8x9x2x3=q9,x4x5x14x15=reg128#12,x4x5x14x15=q11,x0x1x2x3=reg128#2,x0x1x2x3=q1,x4x5x6x7=reg128#5,x4x5x6x7=q4,x8x9x10x11=reg128#6,x8x9x10x11=q5,x12x13x14x15=reg128#7,x12x13x14x15=q6,m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[m0m1m2m3=d14->m0m1m2m3=d15},[m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[m4m5m6m7=d16->m4m5m6m7=d17},[m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[m8m9m10m11=d18->m8m9m10m11=d19},[m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[m12m13m14m15=d20->m12m13m14m15=d21},[x0x1x2x3=reg128#2,x0x1x2x3=q1,x4x5x6x7=reg128#5,x4x5x6x7=q4,x8x9x10x11=reg128#6,x8x9x10x11=q5,x12x13x14x15=reg128#7,x12x13x14x15=q6,? mlenhigh - 0 -# asm 1: cmp -bhi ._mlenatleast128 - -# qhasm: =? mlenlow - 0 -# asm 1: cmp savec=stack32#1 -# asm 2: str savec=[sp,#64] -str r0,[sp,#64] - -# qhasm: c = &tmp -# asm 1: lea >c=int32#1,c=r0,i=int32#4,=0 -# asm 2: ldr >i=r3,=0 -ldr r3,=0 - -# qhasm: mcopy: -._mcopy: - -# qhasm: mi = mem8[m + 0] -# asm 1: ldrb >mi=int32#5,[mi=r4,[mi=int32#2,=0 -# asm 2: ldr >mi=r1,=0 -ldr r1,=0 - -# qhasm: pad: -._pad: - -# qhasm: mem8[c + 0] = mi -# asm 1: strb m=int32#2,m=r1,diag0=reg128#2,diag0=q1,diag1=reg128#5,diag1=q4,diag2=reg128#8,diag2=q7,diag3=reg128#9,diag3=q8,nextblock=reg128#10,#0xff -# asm 2: vmov.i64 >nextblock=q9,#0xff -vmov.i64 q9,#0xff - -# qhasm: 4x nextblock unsigned>>= 7 -# asm 1: vshr.u32 >nextblock=reg128#10,nextblock=q9,n2n3n3n2=reg128#1,n2n3n3n2=q0,i=int32#4,=12 -# asm 2: ldr >i=r3,=12 -ldr r3,=12 - -# qhasm: mainloop1: -._mainloop1: - -# qhasm: 4x a0 = diag1 + diag0 -# asm 1: vadd.i32 >a0=reg128#10,a0=q9,b0=reg128#11,b0=q10,> 25 -# asm 1: vsri.i32 diag3=reg128#9,diag3=q8,a1=reg128#10,a1=q9,b1=reg128#11,b1=q10,> 23 -# asm 1: vsri.i32 diag2=reg128#8,diag2=q7,a2=reg128#10,a2=q9,diag3=reg128#9,diag3=q8,b2=reg128#11,b2=q10,> 19 -# asm 1: vsri.i32 diag1=reg128#5,diag1=q4,a3=reg128#10,a3=q9,b3=reg128#11,b3=q10,> 14 -# asm 1: vsri.i32 diag1=reg128#5,diag1=q4,diag0=reg128#2,diag0=q1,a0=reg128#10,a0=q9,b0=reg128#11,b0=q10,> 25 -# asm 1: vsri.i32 diag1=reg128#5,diag1=q4,a1=reg128#10,a1=q9,b1=reg128#11,b1=q10,> 23 -# asm 1: vsri.i32 ? i -= 2 -# asm 1: subs diag2=reg128#8,diag2=q7,a2=reg128#10,a2=q9,diag1=reg128#5,diag1=q4,b2=reg128#11,b2=q10,> 19 -# asm 1: vsri.i32 diag3=reg128#9,diag3=q8,a3=reg128#10,a3=q9,b3=reg128#11,b3=q10,> 14 -# asm 1: vsri.i32 diag3=reg128#9,diag3=q8,diag0=reg128#2,diag0=q1, -bhi ._mainloop1 - -# qhasm: 2x abab = 0xffffffff -# asm 1: vmov.i64 >abab=reg128#10,#0xffffffff -# asm 2: vmov.i64 >abab=q9,#0xffffffff -vmov.i64 q9,#0xffffffff - -# qhasm: 4x x0x5x10x15 = diag0 + start0 -# asm 1: vadd.i32 >x0x5x10x15=reg128#2,x0x5x10x15=q1,x12x1x6x11=reg128#5,x12x1x6x11=q4,x8x13x2x7=reg128#6,x8x13x2x7=q5,x4x9x14x3=reg128#7,x4x9x14x3=q6,x0x1x10x11=reg128#8,x0x1x10x11=q7,x12x13x6x7=reg128#9,x12x13x6x7=q8,x8x9x2x3=reg128#11,x8x9x2x3=q10,x4x5x14x15=reg128#12,x4x5x14x15=q11,x0x1x2x3=reg128#2,x0x1x2x3=q1,x4x5x6x7=reg128#5,x4x5x6x7=q4,x8x9x10x11=reg128#6,x8x9x10x11=q5,x12x13x14x15=reg128#7,x12x13x14x15=q6,m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[m0m1m2m3=d14->m0m1m2m3=d15},[m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[m4m5m6m7=d16->m4m5m6m7=d17},[m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[m8m9m10m11=d18->m8m9m10m11=d19},[m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[m12m13m14m15=d20->m12m13m14m15=d21},[x0x1x2x3=reg128#2,x0x1x2x3=q1,x4x5x6x7=reg128#5,x4x5x6x7=q4,x8x9x10x11=reg128#6,x8x9x10x11=q5,x12x13x14x15=reg128#7,x12x13x14x15=q6,i=int32#4,=0 -# asm 2: ldr >i=r3,=0 -ldr r3,=0 - -# qhasm: m = c - 64 -# asm 1: sub >m=int32#2,m=r1,c=int32#1,c=r0,ci=int32#5,[ci=r4,[? mlenlow -= 64 -# asm 1: subs -bhi ._mlenatleast1 - -# qhasm: done: -._done: - -# qhasm: new caller_r4 - -# qhasm: caller_r4 = stack_r4 -# asm 1: ldr >caller_r4=int32#5,caller_r4=r4,result=int32#1,=0 -# asm 2: ldr >result=r0,=0 -ldr r0,=0 - -# qhasm: return result -add sp,sp,#256 -bx lr diff --git a/ext/x64-salsa2012-asm/README.md b/ext/x64-salsa2012-asm/README.md deleted file mode 100644 index a69a1a671..000000000 --- a/ext/x64-salsa2012-asm/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Blazingly fast X64 ASM implementation of Salsa20/12 -====== - -This is ripped from the [cnacl](https://github.com/cjdelisle/cnacl) source. The actual code is by Danial J. Bernstein and is in the public domain. - -This is included on Linux and Mac 64-bit builds and is significantly faster than the SSE intrinsics or C versions. It's used for packet encode/decode only since its use differs a bit from the regular Salsa20 C++ class. Specifically it lacks the ability to be called on multiple blocks, preferring instead to take a key and a single stream to encrypt and that's it. diff --git a/ext/x64-salsa2012-asm/salsa2012.h b/ext/x64-salsa2012-asm/salsa2012.h deleted file mode 100644 index 73e375ebe..000000000 --- a/ext/x64-salsa2012-asm/salsa2012.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef ZT_X64_SALSA2012_ASM -#define ZT_X64_SALSA2012_ASM - -#ifdef __cplusplus -extern "C" { -#endif - -// Generates Salsa20/12 key stream -// output, outlen, nonce, key (256-bit / 32-byte) -extern int zt_salsa2012_amd64_xmm6(unsigned char *, unsigned long long, const unsigned char *, const unsigned char *); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/ext/x64-salsa2012-asm/salsa2012.s b/ext/x64-salsa2012-asm/salsa2012.s deleted file mode 100644 index 699c89ac6..000000000 --- a/ext/x64-salsa2012-asm/salsa2012.s +++ /dev/null @@ -1,4488 +0,0 @@ -# qhasm: enter zt_salsa2012_amd64_xmm6 -.text -.p2align 5 -.globl _zt_salsa2012_amd64_xmm6 -.globl zt_salsa2012_amd64_xmm6 -_zt_salsa2012_amd64_xmm6: -zt_salsa2012_amd64_xmm6: -mov %rsp,%r11 -and $31,%r11 -add $480,%r11 -sub %r11,%rsp - -# qhasm: r11_stack = r11_caller -# asm 1: movq r11_stack=stack64#1 -# asm 2: movq r11_stack=352(%rsp) -movq %r11,352(%rsp) - -# qhasm: r12_stack = r12_caller -# asm 1: movq r12_stack=stack64#2 -# asm 2: movq r12_stack=360(%rsp) -movq %r12,360(%rsp) - -# qhasm: r13_stack = r13_caller -# asm 1: movq r13_stack=stack64#3 -# asm 2: movq r13_stack=368(%rsp) -movq %r13,368(%rsp) - -# qhasm: r14_stack = r14_caller -# asm 1: movq r14_stack=stack64#4 -# asm 2: movq r14_stack=376(%rsp) -movq %r14,376(%rsp) - -# qhasm: r15_stack = r15_caller -# asm 1: movq r15_stack=stack64#5 -# asm 2: movq r15_stack=384(%rsp) -movq %r15,384(%rsp) - -# qhasm: rbx_stack = rbx_caller -# asm 1: movq rbx_stack=stack64#6 -# asm 2: movq rbx_stack=392(%rsp) -movq %rbx,392(%rsp) - -# qhasm: rbp_stack = rbp_caller -# asm 1: movq rbp_stack=stack64#7 -# asm 2: movq rbp_stack=400(%rsp) -movq %rbp,400(%rsp) - -# qhasm: bytes = arg2 -# asm 1: mov bytes=int64#6 -# asm 2: mov bytes=%r9 -mov %rsi,%r9 - -# qhasm: out = arg1 -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdi,%rdi - -# qhasm: m = out -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rdi,%rsi - -# qhasm: iv = arg3 -# asm 1: mov iv=int64#3 -# asm 2: mov iv=%rdx -mov %rdx,%rdx - -# qhasm: k = arg4 -# asm 1: mov k=int64#8 -# asm 2: mov k=%r10 -mov %rcx,%r10 - -# qhasm: unsigned>? bytes - 0 -# asm 1: cmp $0, -jbe ._done - -# qhasm: a = 0 -# asm 1: mov $0,>a=int64#7 -# asm 2: mov $0,>a=%rax -mov $0,%rax - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = a; --i } -rep stosb - -# qhasm: out -= bytes -# asm 1: sub r11_stack=stack64#1 -# asm 2: movq r11_stack=352(%rsp) -movq %r11,352(%rsp) - -# qhasm: r12_stack = r12_caller -# asm 1: movq r12_stack=stack64#2 -# asm 2: movq r12_stack=360(%rsp) -movq %r12,360(%rsp) - -# qhasm: r13_stack = r13_caller -# asm 1: movq r13_stack=stack64#3 -# asm 2: movq r13_stack=368(%rsp) -movq %r13,368(%rsp) - -# qhasm: r14_stack = r14_caller -# asm 1: movq r14_stack=stack64#4 -# asm 2: movq r14_stack=376(%rsp) -movq %r14,376(%rsp) - -# qhasm: r15_stack = r15_caller -# asm 1: movq r15_stack=stack64#5 -# asm 2: movq r15_stack=384(%rsp) -movq %r15,384(%rsp) - -# qhasm: rbx_stack = rbx_caller -# asm 1: movq rbx_stack=stack64#6 -# asm 2: movq rbx_stack=392(%rsp) -movq %rbx,392(%rsp) - -# qhasm: rbp_stack = rbp_caller -# asm 1: movq rbp_stack=stack64#7 -# asm 2: movq rbp_stack=400(%rsp) -movq %rbp,400(%rsp) - -# qhasm: out = arg1 -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdi,%rdi - -# qhasm: m = arg2 -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rsi,%rsi - -# qhasm: bytes = arg3 -# asm 1: mov bytes=int64#6 -# asm 2: mov bytes=%r9 -mov %rdx,%r9 - -# qhasm: iv = arg4 -# asm 1: mov iv=int64#3 -# asm 2: mov iv=%rdx -mov %rcx,%rdx - -# qhasm: k = arg5 -# asm 1: mov k=int64#8 -# asm 2: mov k=%r10 -mov %r8,%r10 - -# qhasm: unsigned>? bytes - 0 -# asm 1: cmp $0, -jbe ._done -# comment:fp stack unchanged by fallthrough - -# qhasm: start: -._start: - -# qhasm: in12 = *(uint32 *) (k + 20) -# asm 1: movl 20(in12=int64#4d -# asm 2: movl 20(in12=%ecx -movl 20(%r10),%ecx - -# qhasm: in1 = *(uint32 *) (k + 0) -# asm 1: movl 0(in1=int64#5d -# asm 2: movl 0(in1=%r8d -movl 0(%r10),%r8d - -# qhasm: in6 = *(uint32 *) (iv + 0) -# asm 1: movl 0(in6=int64#7d -# asm 2: movl 0(in6=%eax -movl 0(%rdx),%eax - -# qhasm: in11 = *(uint32 *) (k + 16) -# asm 1: movl 16(in11=int64#9d -# asm 2: movl 16(in11=%r11d -movl 16(%r10),%r11d - -# qhasm: ((uint32 *)&x1)[0] = in12 -# asm 1: movl x1=stack128#1 -# asm 2: movl x1=0(%rsp) -movl %ecx,0(%rsp) - -# qhasm: ((uint32 *)&x1)[1] = in1 -# asm 1: movl in8=int64#4 -# asm 2: mov $0,>in8=%rcx -mov $0,%rcx - -# qhasm: in13 = *(uint32 *) (k + 24) -# asm 1: movl 24(in13=int64#5d -# asm 2: movl 24(in13=%r8d -movl 24(%r10),%r8d - -# qhasm: in2 = *(uint32 *) (k + 4) -# asm 1: movl 4(in2=int64#7d -# asm 2: movl 4(in2=%eax -movl 4(%r10),%eax - -# qhasm: in7 = *(uint32 *) (iv + 4) -# asm 1: movl 4(in7=int64#3d -# asm 2: movl 4(in7=%edx -movl 4(%rdx),%edx - -# qhasm: ((uint32 *)&x2)[0] = in8 -# asm 1: movl x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %ecx,16(%rsp) - -# qhasm: ((uint32 *)&x2)[1] = in13 -# asm 1: movl in4=int64#3d -# asm 2: movl 12(in4=%edx -movl 12(%r10),%edx - -# qhasm: in9 = 0 -# asm 1: mov $0,>in9=int64#4 -# asm 2: mov $0,>in9=%rcx -mov $0,%rcx - -# qhasm: in14 = *(uint32 *) (k + 28) -# asm 1: movl 28(in14=int64#5d -# asm 2: movl 28(in14=%r8d -movl 28(%r10),%r8d - -# qhasm: in3 = *(uint32 *) (k + 8) -# asm 1: movl 8(in3=int64#7d -# asm 2: movl 8(in3=%eax -movl 8(%r10),%eax - -# qhasm: ((uint32 *)&x3)[0] = in4 -# asm 1: movl x3=stack128#3 -# asm 2: movl x3=32(%rsp) -movl %edx,32(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl in0=int64#3 -# asm 2: mov $1634760805,>in0=%rdx -mov $1634760805,%rdx - -# qhasm: in5 = 857760878 -# asm 1: mov $857760878,>in5=int64#4 -# asm 2: mov $857760878,>in5=%rcx -mov $857760878,%rcx - -# qhasm: in10 = 2036477234 -# asm 1: mov $2036477234,>in10=int64#5 -# asm 2: mov $2036477234,>in10=%r8 -mov $2036477234,%r8 - -# qhasm: in15 = 1797285236 -# asm 1: mov $1797285236,>in15=int64#7 -# asm 2: mov $1797285236,>in15=%rax -mov $1797285236,%rax - -# qhasm: ((uint32 *)&x0)[0] = in0 -# asm 1: movl x0=stack128#4 -# asm 2: movl x0=48(%rsp) -movl %edx,48(%rsp) - -# qhasm: ((uint32 *)&x0)[1] = in5 -# asm 1: movl z0=int6464#1 -# asm 2: movdqa z0=%xmm0 -movdqa 48(%rsp),%xmm0 - -# qhasm: z5 = z0[1,1,1,1] -# asm 1: pshufd $0x55,z5=int6464#2 -# asm 2: pshufd $0x55,z5=%xmm1 -pshufd $0x55,%xmm0,%xmm1 - -# qhasm: z10 = z0[2,2,2,2] -# asm 1: pshufd $0xaa,z10=int6464#3 -# asm 2: pshufd $0xaa,z10=%xmm2 -pshufd $0xaa,%xmm0,%xmm2 - -# qhasm: z15 = z0[3,3,3,3] -# asm 1: pshufd $0xff,z15=int6464#4 -# asm 2: pshufd $0xff,z15=%xmm3 -pshufd $0xff,%xmm0,%xmm3 - -# qhasm: z0 = z0[0,0,0,0] -# asm 1: pshufd $0x00,z0=int6464#1 -# asm 2: pshufd $0x00,z0=%xmm0 -pshufd $0x00,%xmm0,%xmm0 - -# qhasm: orig5 = z5 -# asm 1: movdqa orig5=stack128#5 -# asm 2: movdqa orig5=64(%rsp) -movdqa %xmm1,64(%rsp) - -# qhasm: orig10 = z10 -# asm 1: movdqa orig10=stack128#6 -# asm 2: movdqa orig10=80(%rsp) -movdqa %xmm2,80(%rsp) - -# qhasm: orig15 = z15 -# asm 1: movdqa orig15=stack128#7 -# asm 2: movdqa orig15=96(%rsp) -movdqa %xmm3,96(%rsp) - -# qhasm: orig0 = z0 -# asm 1: movdqa orig0=stack128#8 -# asm 2: movdqa orig0=112(%rsp) -movdqa %xmm0,112(%rsp) - -# qhasm: z1 = x1 -# asm 1: movdqa z1=int6464#1 -# asm 2: movdqa z1=%xmm0 -movdqa 0(%rsp),%xmm0 - -# qhasm: z6 = z1[2,2,2,2] -# asm 1: pshufd $0xaa,z6=int6464#2 -# asm 2: pshufd $0xaa,z6=%xmm1 -pshufd $0xaa,%xmm0,%xmm1 - -# qhasm: z11 = z1[3,3,3,3] -# asm 1: pshufd $0xff,z11=int6464#3 -# asm 2: pshufd $0xff,z11=%xmm2 -pshufd $0xff,%xmm0,%xmm2 - -# qhasm: z12 = z1[0,0,0,0] -# asm 1: pshufd $0x00,z12=int6464#4 -# asm 2: pshufd $0x00,z12=%xmm3 -pshufd $0x00,%xmm0,%xmm3 - -# qhasm: z1 = z1[1,1,1,1] -# asm 1: pshufd $0x55,z1=int6464#1 -# asm 2: pshufd $0x55,z1=%xmm0 -pshufd $0x55,%xmm0,%xmm0 - -# qhasm: orig6 = z6 -# asm 1: movdqa orig6=stack128#9 -# asm 2: movdqa orig6=128(%rsp) -movdqa %xmm1,128(%rsp) - -# qhasm: orig11 = z11 -# asm 1: movdqa orig11=stack128#10 -# asm 2: movdqa orig11=144(%rsp) -movdqa %xmm2,144(%rsp) - -# qhasm: orig12 = z12 -# asm 1: movdqa orig12=stack128#11 -# asm 2: movdqa orig12=160(%rsp) -movdqa %xmm3,160(%rsp) - -# qhasm: orig1 = z1 -# asm 1: movdqa orig1=stack128#12 -# asm 2: movdqa orig1=176(%rsp) -movdqa %xmm0,176(%rsp) - -# qhasm: z2 = x2 -# asm 1: movdqa z2=int6464#1 -# asm 2: movdqa z2=%xmm0 -movdqa 16(%rsp),%xmm0 - -# qhasm: z7 = z2[3,3,3,3] -# asm 1: pshufd $0xff,z7=int6464#2 -# asm 2: pshufd $0xff,z7=%xmm1 -pshufd $0xff,%xmm0,%xmm1 - -# qhasm: z13 = z2[1,1,1,1] -# asm 1: pshufd $0x55,z13=int6464#3 -# asm 2: pshufd $0x55,z13=%xmm2 -pshufd $0x55,%xmm0,%xmm2 - -# qhasm: z2 = z2[2,2,2,2] -# asm 1: pshufd $0xaa,z2=int6464#1 -# asm 2: pshufd $0xaa,z2=%xmm0 -pshufd $0xaa,%xmm0,%xmm0 - -# qhasm: orig7 = z7 -# asm 1: movdqa orig7=stack128#13 -# asm 2: movdqa orig7=192(%rsp) -movdqa %xmm1,192(%rsp) - -# qhasm: orig13 = z13 -# asm 1: movdqa orig13=stack128#14 -# asm 2: movdqa orig13=208(%rsp) -movdqa %xmm2,208(%rsp) - -# qhasm: orig2 = z2 -# asm 1: movdqa orig2=stack128#15 -# asm 2: movdqa orig2=224(%rsp) -movdqa %xmm0,224(%rsp) - -# qhasm: z3 = x3 -# asm 1: movdqa z3=int6464#1 -# asm 2: movdqa z3=%xmm0 -movdqa 32(%rsp),%xmm0 - -# qhasm: z4 = z3[0,0,0,0] -# asm 1: pshufd $0x00,z4=int6464#2 -# asm 2: pshufd $0x00,z4=%xmm1 -pshufd $0x00,%xmm0,%xmm1 - -# qhasm: z14 = z3[2,2,2,2] -# asm 1: pshufd $0xaa,z14=int6464#3 -# asm 2: pshufd $0xaa,z14=%xmm2 -pshufd $0xaa,%xmm0,%xmm2 - -# qhasm: z3 = z3[3,3,3,3] -# asm 1: pshufd $0xff,z3=int6464#1 -# asm 2: pshufd $0xff,z3=%xmm0 -pshufd $0xff,%xmm0,%xmm0 - -# qhasm: orig4 = z4 -# asm 1: movdqa orig4=stack128#16 -# asm 2: movdqa orig4=240(%rsp) -movdqa %xmm1,240(%rsp) - -# qhasm: orig14 = z14 -# asm 1: movdqa orig14=stack128#17 -# asm 2: movdqa orig14=256(%rsp) -movdqa %xmm2,256(%rsp) - -# qhasm: orig3 = z3 -# asm 1: movdqa orig3=stack128#18 -# asm 2: movdqa orig3=272(%rsp) -movdqa %xmm0,272(%rsp) - -# qhasm: bytesatleast256: -._bytesatleast256: - -# qhasm: in8 = ((uint32 *)&x2)[0] -# asm 1: movl in8=int64#3d -# asm 2: movl in8=%edx -movl 16(%rsp),%edx - -# qhasm: in9 = ((uint32 *)&x3)[1] -# asm 1: movl 4+in9=int64#4d -# asm 2: movl 4+in9=%ecx -movl 4+32(%rsp),%ecx - -# qhasm: ((uint32 *) &orig8)[0] = in8 -# asm 1: movl orig8=stack128#19 -# asm 2: movl orig8=288(%rsp) -movl %edx,288(%rsp) - -# qhasm: ((uint32 *) &orig9)[0] = in9 -# asm 1: movl orig9=stack128#20 -# asm 2: movl orig9=304(%rsp) -movl %ecx,304(%rsp) - -# qhasm: in8 += 1 -# asm 1: add $1,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,in9=int64#4 -# asm 2: mov in9=%rcx -mov %rdx,%rcx - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %edx,16(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl bytes_backup=stack64#8 -# asm 2: movq bytes_backup=408(%rsp) -movq %r9,408(%rsp) - -# qhasm: i = 12 -# asm 1: mov $12,>i=int64#3 -# asm 2: mov $12,>i=%rdx -mov $12,%rdx - -# qhasm: z5 = orig5 -# asm 1: movdqa z5=int6464#1 -# asm 2: movdqa z5=%xmm0 -movdqa 64(%rsp),%xmm0 - -# qhasm: z10 = orig10 -# asm 1: movdqa z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 80(%rsp),%xmm1 - -# qhasm: z15 = orig15 -# asm 1: movdqa z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 96(%rsp),%xmm2 - -# qhasm: z14 = orig14 -# asm 1: movdqa z14=int6464#4 -# asm 2: movdqa z14=%xmm3 -movdqa 256(%rsp),%xmm3 - -# qhasm: z3 = orig3 -# asm 1: movdqa z3=int6464#5 -# asm 2: movdqa z3=%xmm4 -movdqa 272(%rsp),%xmm4 - -# qhasm: z6 = orig6 -# asm 1: movdqa z6=int6464#6 -# asm 2: movdqa z6=%xmm5 -movdqa 128(%rsp),%xmm5 - -# qhasm: z11 = orig11 -# asm 1: movdqa z11=int6464#7 -# asm 2: movdqa z11=%xmm6 -movdqa 144(%rsp),%xmm6 - -# qhasm: z1 = orig1 -# asm 1: movdqa z1=int6464#8 -# asm 2: movdqa z1=%xmm7 -movdqa 176(%rsp),%xmm7 - -# qhasm: z7 = orig7 -# asm 1: movdqa z7=int6464#9 -# asm 2: movdqa z7=%xmm8 -movdqa 192(%rsp),%xmm8 - -# qhasm: z13 = orig13 -# asm 1: movdqa z13=int6464#10 -# asm 2: movdqa z13=%xmm9 -movdqa 208(%rsp),%xmm9 - -# qhasm: z2 = orig2 -# asm 1: movdqa z2=int6464#11 -# asm 2: movdqa z2=%xmm10 -movdqa 224(%rsp),%xmm10 - -# qhasm: z9 = orig9 -# asm 1: movdqa z9=int6464#12 -# asm 2: movdqa z9=%xmm11 -movdqa 304(%rsp),%xmm11 - -# qhasm: z0 = orig0 -# asm 1: movdqa z0=int6464#13 -# asm 2: movdqa z0=%xmm12 -movdqa 112(%rsp),%xmm12 - -# qhasm: z12 = orig12 -# asm 1: movdqa z12=int6464#14 -# asm 2: movdqa z12=%xmm13 -movdqa 160(%rsp),%xmm13 - -# qhasm: z4 = orig4 -# asm 1: movdqa z4=int6464#15 -# asm 2: movdqa z4=%xmm14 -movdqa 240(%rsp),%xmm14 - -# qhasm: z8 = orig8 -# asm 1: movdqa z8=int6464#16 -# asm 2: movdqa z8=%xmm15 -movdqa 288(%rsp),%xmm15 - -# qhasm: mainloop1: -._mainloop1: - -# qhasm: z10_stack = z10 -# asm 1: movdqa z10_stack=stack128#21 -# asm 2: movdqa z10_stack=320(%rsp) -movdqa %xmm1,320(%rsp) - -# qhasm: z15_stack = z15 -# asm 1: movdqa z15_stack=stack128#22 -# asm 2: movdqa z15_stack=336(%rsp) -movdqa %xmm2,336(%rsp) - -# qhasm: y4 = z12 -# asm 1: movdqa y4=int6464#2 -# asm 2: movdqa y4=%xmm1 -movdqa %xmm13,%xmm1 - -# qhasm: uint32323232 y4 += z0 -# asm 1: paddd r4=int6464#3 -# asm 2: movdqa r4=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y9=int6464#2 -# asm 2: movdqa y9=%xmm1 -movdqa %xmm7,%xmm1 - -# qhasm: uint32323232 y9 += z5 -# asm 1: paddd r9=int6464#3 -# asm 2: movdqa r9=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y9 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y8=int6464#2 -# asm 2: movdqa y8=%xmm1 -movdqa %xmm12,%xmm1 - -# qhasm: uint32323232 y8 += z4 -# asm 1: paddd r8=int6464#3 -# asm 2: movdqa r8=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y8 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y13=int6464#2 -# asm 2: movdqa y13=%xmm1 -movdqa %xmm0,%xmm1 - -# qhasm: uint32323232 y13 += z9 -# asm 1: paddd r13=int6464#3 -# asm 2: movdqa r13=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y13 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y12=int6464#2 -# asm 2: movdqa y12=%xmm1 -movdqa %xmm14,%xmm1 - -# qhasm: uint32323232 y12 += z8 -# asm 1: paddd r12=int6464#3 -# asm 2: movdqa r12=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y12 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y1=int6464#2 -# asm 2: movdqa y1=%xmm1 -movdqa %xmm11,%xmm1 - -# qhasm: uint32323232 y1 += z13 -# asm 1: paddd r1=int6464#3 -# asm 2: movdqa r1=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y1 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y0=int6464#2 -# asm 2: movdqa y0=%xmm1 -movdqa %xmm15,%xmm1 - -# qhasm: uint32323232 y0 += z12 -# asm 1: paddd r0=int6464#3 -# asm 2: movdqa r0=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y0 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 320(%rsp),%xmm1 - -# qhasm: z0_stack = z0 -# asm 1: movdqa z0_stack=stack128#21 -# asm 2: movdqa z0_stack=320(%rsp) -movdqa %xmm12,320(%rsp) - -# qhasm: y5 = z13 -# asm 1: movdqa y5=int6464#3 -# asm 2: movdqa y5=%xmm2 -movdqa %xmm9,%xmm2 - -# qhasm: uint32323232 y5 += z1 -# asm 1: paddd r5=int6464#13 -# asm 2: movdqa r5=%xmm12 -movdqa %xmm2,%xmm12 - -# qhasm: uint32323232 y5 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y14=int6464#3 -# asm 2: movdqa y14=%xmm2 -movdqa %xmm5,%xmm2 - -# qhasm: uint32323232 y14 += z10 -# asm 1: paddd r14=int6464#13 -# asm 2: movdqa r14=%xmm12 -movdqa %xmm2,%xmm12 - -# qhasm: uint32323232 y14 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 336(%rsp),%xmm2 - -# qhasm: z5_stack = z5 -# asm 1: movdqa z5_stack=stack128#22 -# asm 2: movdqa z5_stack=336(%rsp) -movdqa %xmm0,336(%rsp) - -# qhasm: y3 = z11 -# asm 1: movdqa y3=int6464#1 -# asm 2: movdqa y3=%xmm0 -movdqa %xmm6,%xmm0 - -# qhasm: uint32323232 y3 += z15 -# asm 1: paddd r3=int6464#13 -# asm 2: movdqa r3=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y3 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y2=int6464#1 -# asm 2: movdqa y2=%xmm0 -movdqa %xmm1,%xmm0 - -# qhasm: uint32323232 y2 += z14 -# asm 1: paddd r2=int6464#13 -# asm 2: movdqa r2=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y2 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y7=int6464#1 -# asm 2: movdqa y7=%xmm0 -movdqa %xmm2,%xmm0 - -# qhasm: uint32323232 y7 += z3 -# asm 1: paddd r7=int6464#13 -# asm 2: movdqa r7=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y7 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y6=int6464#1 -# asm 2: movdqa y6=%xmm0 -movdqa %xmm3,%xmm0 - -# qhasm: uint32323232 y6 += z2 -# asm 1: paddd r6=int6464#13 -# asm 2: movdqa r6=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y11=int6464#1 -# asm 2: movdqa y11=%xmm0 -movdqa %xmm4,%xmm0 - -# qhasm: uint32323232 y11 += z7 -# asm 1: paddd r11=int6464#13 -# asm 2: movdqa r11=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y11 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y10=int6464#1 -# asm 2: movdqa y10=%xmm0 -movdqa %xmm10,%xmm0 - -# qhasm: uint32323232 y10 += z6 -# asm 1: paddd r10=int6464#13 -# asm 2: movdqa r10=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y10 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z0=int6464#1 -# asm 2: movdqa z0=%xmm0 -movdqa 320(%rsp),%xmm0 - -# qhasm: z10_stack = z10 -# asm 1: movdqa z10_stack=stack128#21 -# asm 2: movdqa z10_stack=320(%rsp) -movdqa %xmm1,320(%rsp) - -# qhasm: y1 = z3 -# asm 1: movdqa y1=int6464#2 -# asm 2: movdqa y1=%xmm1 -movdqa %xmm4,%xmm1 - -# qhasm: uint32323232 y1 += z0 -# asm 1: paddd r1=int6464#13 -# asm 2: movdqa r1=%xmm12 -movdqa %xmm1,%xmm12 - -# qhasm: uint32323232 y1 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y15=int6464#2 -# asm 2: movdqa y15=%xmm1 -movdqa %xmm8,%xmm1 - -# qhasm: uint32323232 y15 += z11 -# asm 1: paddd r15=int6464#13 -# asm 2: movdqa r15=%xmm12 -movdqa %xmm1,%xmm12 - -# qhasm: uint32323232 y15 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z5=int6464#13 -# asm 2: movdqa z5=%xmm12 -movdqa 336(%rsp),%xmm12 - -# qhasm: z15_stack = z15 -# asm 1: movdqa z15_stack=stack128#22 -# asm 2: movdqa z15_stack=336(%rsp) -movdqa %xmm2,336(%rsp) - -# qhasm: y6 = z4 -# asm 1: movdqa y6=int6464#2 -# asm 2: movdqa y6=%xmm1 -movdqa %xmm14,%xmm1 - -# qhasm: uint32323232 y6 += z5 -# asm 1: paddd r6=int6464#3 -# asm 2: movdqa r6=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y6 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y2=int6464#2 -# asm 2: movdqa y2=%xmm1 -movdqa %xmm0,%xmm1 - -# qhasm: uint32323232 y2 += z1 -# asm 1: paddd r2=int6464#3 -# asm 2: movdqa r2=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y2 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y7=int6464#2 -# asm 2: movdqa y7=%xmm1 -movdqa %xmm12,%xmm1 - -# qhasm: uint32323232 y7 += z6 -# asm 1: paddd r7=int6464#3 -# asm 2: movdqa r7=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y7 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y3=int6464#2 -# asm 2: movdqa y3=%xmm1 -movdqa %xmm7,%xmm1 - -# qhasm: uint32323232 y3 += z2 -# asm 1: paddd r3=int6464#3 -# asm 2: movdqa r3=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y3 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y4=int6464#2 -# asm 2: movdqa y4=%xmm1 -movdqa %xmm5,%xmm1 - -# qhasm: uint32323232 y4 += z7 -# asm 1: paddd r4=int6464#3 -# asm 2: movdqa r4=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y4 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y0=int6464#2 -# asm 2: movdqa y0=%xmm1 -movdqa %xmm10,%xmm1 - -# qhasm: uint32323232 y0 += z3 -# asm 1: paddd r0=int6464#3 -# asm 2: movdqa r0=%xmm2 -movdqa %xmm1,%xmm2 - -# qhasm: uint32323232 y0 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z10=int6464#2 -# asm 2: movdqa z10=%xmm1 -movdqa 320(%rsp),%xmm1 - -# qhasm: z0_stack = z0 -# asm 1: movdqa z0_stack=stack128#21 -# asm 2: movdqa z0_stack=320(%rsp) -movdqa %xmm0,320(%rsp) - -# qhasm: y5 = z7 -# asm 1: movdqa y5=int6464#1 -# asm 2: movdqa y5=%xmm0 -movdqa %xmm8,%xmm0 - -# qhasm: uint32323232 y5 += z4 -# asm 1: paddd r5=int6464#3 -# asm 2: movdqa r5=%xmm2 -movdqa %xmm0,%xmm2 - -# qhasm: uint32323232 y5 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y11=int6464#1 -# asm 2: movdqa y11=%xmm0 -movdqa %xmm11,%xmm0 - -# qhasm: uint32323232 y11 += z10 -# asm 1: paddd r11=int6464#3 -# asm 2: movdqa r11=%xmm2 -movdqa %xmm0,%xmm2 - -# qhasm: uint32323232 y11 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,z15=int6464#3 -# asm 2: movdqa z15=%xmm2 -movdqa 336(%rsp),%xmm2 - -# qhasm: z5_stack = z5 -# asm 1: movdqa z5_stack=stack128#22 -# asm 2: movdqa z5_stack=336(%rsp) -movdqa %xmm12,336(%rsp) - -# qhasm: y12 = z14 -# asm 1: movdqa y12=int6464#1 -# asm 2: movdqa y12=%xmm0 -movdqa %xmm3,%xmm0 - -# qhasm: uint32323232 y12 += z15 -# asm 1: paddd r12=int6464#13 -# asm 2: movdqa r12=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y12 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,y8=int6464#1 -# asm 2: movdqa y8=%xmm0 -movdqa %xmm1,%xmm0 - -# qhasm: uint32323232 y8 += z11 -# asm 1: paddd r8=int6464#13 -# asm 2: movdqa r8=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y8 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y13=int6464#1 -# asm 2: movdqa y13=%xmm0 -movdqa %xmm2,%xmm0 - -# qhasm: uint32323232 y13 += z12 -# asm 1: paddd r13=int6464#13 -# asm 2: movdqa r13=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y13 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,y9=int6464#1 -# asm 2: movdqa y9=%xmm0 -movdqa %xmm6,%xmm0 - -# qhasm: uint32323232 y9 += z8 -# asm 1: paddd r9=int6464#13 -# asm 2: movdqa r9=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y9 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y14=int6464#1 -# asm 2: movdqa y14=%xmm0 -movdqa %xmm13,%xmm0 - -# qhasm: uint32323232 y14 += z13 -# asm 1: paddd r14=int6464#13 -# asm 2: movdqa r14=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y14 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,y10=int6464#1 -# asm 2: movdqa y10=%xmm0 -movdqa %xmm15,%xmm0 - -# qhasm: uint32323232 y10 += z9 -# asm 1: paddd r10=int6464#13 -# asm 2: movdqa r10=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y10 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,y15=int6464#1 -# asm 2: movdqa y15=%xmm0 -movdqa %xmm9,%xmm0 - -# qhasm: uint32323232 y15 += z14 -# asm 1: paddd r15=int6464#13 -# asm 2: movdqa r15=%xmm12 -movdqa %xmm0,%xmm12 - -# qhasm: uint32323232 y15 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,z0=int6464#13 -# asm 2: movdqa z0=%xmm12 -movdqa 320(%rsp),%xmm12 - -# qhasm: z5 = z5_stack -# asm 1: movdqa z5=int6464#1 -# asm 2: movdqa z5=%xmm0 -movdqa 336(%rsp),%xmm0 - -# qhasm: unsigned>? i -= 2 -# asm 1: sub $2, -ja ._mainloop1 - -# qhasm: uint32323232 z0 += orig0 -# asm 1: paddd in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: z0 <<<= 96 -# asm 1: pshufd $0x39,in0=int64#3 -# asm 2: movd in0=%rdx -movd %xmm12,%rdx - -# qhasm: in1 = z1 -# asm 1: movd in1=int64#4 -# asm 2: movd in1=%rcx -movd %xmm7,%rcx - -# qhasm: in2 = z2 -# asm 1: movd in2=int64#5 -# asm 2: movd in2=%r8 -movd %xmm10,%r8 - -# qhasm: in3 = z3 -# asm 1: movd in3=int64#6 -# asm 2: movd in3=%r9 -movd %xmm4,%r9 - -# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) -# asm 1: xorl 192(in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: z4 <<<= 96 -# asm 1: pshufd $0x39,in4=int64#3 -# asm 2: movd in4=%rdx -movd %xmm14,%rdx - -# qhasm: in5 = z5 -# asm 1: movd in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = z6 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm5,%r8 - -# qhasm: in7 = z7 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm8,%r9 - -# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) -# asm 1: xorl 208(in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: z8 <<<= 96 -# asm 1: pshufd $0x39,in8=int64#3 -# asm 2: movd in8=%rdx -movd %xmm15,%rdx - -# qhasm: in9 = z9 -# asm 1: movd in9=int64#4 -# asm 2: movd in9=%rcx -movd %xmm11,%rcx - -# qhasm: in10 = z10 -# asm 1: movd in10=int64#5 -# asm 2: movd in10=%r8 -movd %xmm1,%r8 - -# qhasm: in11 = z11 -# asm 1: movd in11=int64#6 -# asm 2: movd in11=%r9 -movd %xmm6,%r9 - -# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) -# asm 1: xorl 224(in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: z12 <<<= 96 -# asm 1: pshufd $0x39,in12=int64#3 -# asm 2: movd in12=%rdx -movd %xmm13,%rdx - -# qhasm: in13 = z13 -# asm 1: movd in13=int64#4 -# asm 2: movd in13=%rcx -movd %xmm9,%rcx - -# qhasm: in14 = z14 -# asm 1: movd in14=int64#5 -# asm 2: movd in14=%r8 -movd %xmm3,%r8 - -# qhasm: in15 = z15 -# asm 1: movd in15=int64#6 -# asm 2: movd in15=%r9 -movd %xmm2,%r9 - -# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) -# asm 1: xorl 240(bytes=int64#6 -# asm 2: movq bytes=%r9 -movq 408(%rsp),%r9 - -# qhasm: bytes -= 256 -# asm 1: sub $256,? bytes - 0 -# asm 1: cmp $0, -jbe ._done -# comment:fp stack unchanged by fallthrough - -# qhasm: bytesbetween1and255: -._bytesbetween1and255: - -# qhasm: unsignedctarget=int64#3 -# asm 2: mov ctarget=%rdx -mov %rdi,%rdx - -# qhasm: out = &tmp -# asm 1: leaq out=int64#1 -# asm 2: leaq out=%rdi -leaq 416(%rsp),%rdi - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = *m++; --i } -rep movsb - -# qhasm: out = &tmp -# asm 1: leaq out=int64#1 -# asm 2: leaq out=%rdi -leaq 416(%rsp),%rdi - -# qhasm: m = &tmp -# asm 1: leaq m=int64#2 -# asm 2: leaq m=%rsi -leaq 416(%rsp),%rsi -# comment:fp stack unchanged by fallthrough - -# qhasm: nocopy: -._nocopy: - -# qhasm: bytes_backup = bytes -# asm 1: movq bytes_backup=stack64#8 -# asm 2: movq bytes_backup=408(%rsp) -movq %r9,408(%rsp) - -# qhasm: diag0 = x0 -# asm 1: movdqa diag0=int6464#1 -# asm 2: movdqa diag0=%xmm0 -movdqa 48(%rsp),%xmm0 - -# qhasm: diag1 = x1 -# asm 1: movdqa diag1=int6464#2 -# asm 2: movdqa diag1=%xmm1 -movdqa 0(%rsp),%xmm1 - -# qhasm: diag2 = x2 -# asm 1: movdqa diag2=int6464#3 -# asm 2: movdqa diag2=%xmm2 -movdqa 16(%rsp),%xmm2 - -# qhasm: diag3 = x3 -# asm 1: movdqa diag3=int6464#4 -# asm 2: movdqa diag3=%xmm3 -movdqa 32(%rsp),%xmm3 - -# qhasm: a0 = diag1 -# asm 1: movdqa a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: i = 12 -# asm 1: mov $12,>i=int64#4 -# asm 2: mov $12,>i=%rcx -mov $12,%rcx - -# qhasm: mainloop2: -._mainloop2: - -# qhasm: uint32323232 a0 += diag0 -# asm 1: paddd a1=int6464#6 -# asm 2: movdqa a1=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b0 = a0 -# asm 1: movdqa b0=int6464#7 -# asm 2: movdqa b0=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a0 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a2=int6464#5 -# asm 2: movdqa a2=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b1 = a1 -# asm 1: movdqa b1=int6464#7 -# asm 2: movdqa b1=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a1 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a3=int6464#6 -# asm 2: movdqa a3=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b2 = a2 -# asm 1: movdqa b2=int6464#7 -# asm 2: movdqa b2=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a2 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a4=int6464#5 -# asm 2: movdqa a4=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b3 = a3 -# asm 1: movdqa b3=int6464#7 -# asm 2: movdqa b3=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a3 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a5=int6464#6 -# asm 2: movdqa a5=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b4 = a4 -# asm 1: movdqa b4=int6464#7 -# asm 2: movdqa b4=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a6=int6464#5 -# asm 2: movdqa a6=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b5 = a5 -# asm 1: movdqa b5=int6464#7 -# asm 2: movdqa b5=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a5 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a7=int6464#6 -# asm 2: movdqa a7=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b6 = a6 -# asm 1: movdqa b6=int6464#7 -# asm 2: movdqa b6=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b7 = a7 -# asm 1: movdqa b7=int6464#7 -# asm 2: movdqa b7=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a7 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a1=int6464#6 -# asm 2: movdqa a1=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b0 = a0 -# asm 1: movdqa b0=int6464#7 -# asm 2: movdqa b0=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a0 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a2=int6464#5 -# asm 2: movdqa a2=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b1 = a1 -# asm 1: movdqa b1=int6464#7 -# asm 2: movdqa b1=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a1 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a3=int6464#6 -# asm 2: movdqa a3=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b2 = a2 -# asm 1: movdqa b2=int6464#7 -# asm 2: movdqa b2=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a2 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,a4=int6464#5 -# asm 2: movdqa a4=%xmm4 -movdqa %xmm3,%xmm4 - -# qhasm: b3 = a3 -# asm 1: movdqa b3=int6464#7 -# asm 2: movdqa b3=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a3 <<= 18 -# asm 1: pslld $18,>= 14 -# asm 1: psrld $14,a5=int6464#6 -# asm 2: movdqa a5=%xmm5 -movdqa %xmm0,%xmm5 - -# qhasm: b4 = a4 -# asm 1: movdqa b4=int6464#7 -# asm 2: movdqa b4=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a4 <<= 7 -# asm 1: pslld $7,>= 25 -# asm 1: psrld $25,a6=int6464#5 -# asm 2: movdqa a6=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b5 = a5 -# asm 1: movdqa b5=int6464#7 -# asm 2: movdqa b5=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a5 <<= 9 -# asm 1: pslld $9,>= 23 -# asm 1: psrld $23,a7=int6464#6 -# asm 2: movdqa a7=%xmm5 -movdqa %xmm2,%xmm5 - -# qhasm: b6 = a6 -# asm 1: movdqa b6=int6464#7 -# asm 2: movdqa b6=%xmm6 -movdqa %xmm4,%xmm6 - -# qhasm: uint32323232 a6 <<= 13 -# asm 1: pslld $13,>= 19 -# asm 1: psrld $19,? i -= 4 -# asm 1: sub $4,a0=int6464#5 -# asm 2: movdqa a0=%xmm4 -movdqa %xmm1,%xmm4 - -# qhasm: b7 = a7 -# asm 1: movdqa b7=int6464#7 -# asm 2: movdqa b7=%xmm6 -movdqa %xmm5,%xmm6 - -# qhasm: uint32323232 a7 <<= 18 -# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 -# asm 2: pxor >b0=%xmm7,>b0=%xmm7 -pxor %xmm7,%xmm7 - -# qhasm: uint32323232 b7 >>= 14 -# asm 1: psrld $14, -ja ._mainloop2 - -# qhasm: uint32323232 diag0 += x0 -# asm 1: paddd in0=int64#4 -# asm 2: movd in0=%rcx -movd %xmm0,%rcx - -# qhasm: in12 = diag1 -# asm 1: movd in12=int64#5 -# asm 2: movd in12=%r8 -movd %xmm1,%r8 - -# qhasm: in8 = diag2 -# asm 1: movd in8=int64#6 -# asm 2: movd in8=%r9 -movd %xmm2,%r9 - -# qhasm: in4 = diag3 -# asm 1: movd in4=int64#7 -# asm 2: movd in4=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in5=int64#4 -# asm 2: movd in5=%rcx -movd %xmm0,%rcx - -# qhasm: in1 = diag1 -# asm 1: movd in1=int64#5 -# asm 2: movd in1=%r8 -movd %xmm1,%r8 - -# qhasm: in13 = diag2 -# asm 1: movd in13=int64#6 -# asm 2: movd in13=%r9 -movd %xmm2,%r9 - -# qhasm: in9 = diag3 -# asm 1: movd in9=int64#7 -# asm 2: movd in9=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in10=int64#4 -# asm 2: movd in10=%rcx -movd %xmm0,%rcx - -# qhasm: in6 = diag1 -# asm 1: movd in6=int64#5 -# asm 2: movd in6=%r8 -movd %xmm1,%r8 - -# qhasm: in2 = diag2 -# asm 1: movd in2=int64#6 -# asm 2: movd in2=%r9 -movd %xmm2,%r9 - -# qhasm: in14 = diag3 -# asm 1: movd in14=int64#7 -# asm 2: movd in14=%rax -movd %xmm3,%rax - -# qhasm: diag0 <<<= 96 -# asm 1: pshufd $0x39,in15=int64#4 -# asm 2: movd in15=%rcx -movd %xmm0,%rcx - -# qhasm: in11 = diag1 -# asm 1: movd in11=int64#5 -# asm 2: movd in11=%r8 -movd %xmm1,%r8 - -# qhasm: in7 = diag2 -# asm 1: movd in7=int64#6 -# asm 2: movd in7=%r9 -movd %xmm2,%r9 - -# qhasm: in3 = diag3 -# asm 1: movd in3=int64#7 -# asm 2: movd in3=%rax -movd %xmm3,%rax - -# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) -# asm 1: xorl 60(bytes=int64#6 -# asm 2: movq bytes=%r9 -movq 408(%rsp),%r9 - -# qhasm: in8 = ((uint32 *)&x2)[0] -# asm 1: movl in8=int64#4d -# asm 2: movl in8=%ecx -movl 16(%rsp),%ecx - -# qhasm: in9 = ((uint32 *)&x3)[1] -# asm 1: movl 4+in9=int64#5d -# asm 2: movl 4+in9=%r8d -movl 4+32(%rsp),%r8d - -# qhasm: in8 += 1 -# asm 1: add $1,in9=int64#5 -# asm 2: mov in9=%r8 -mov %rcx,%r8 - -# qhasm: (uint64) in9 >>= 32 -# asm 1: shr $32,x2=stack128#2 -# asm 2: movl x2=16(%rsp) -movl %ecx,16(%rsp) - -# qhasm: ((uint32 *)&x3)[1] = in9 -# asm 1: movl ? unsigned -ja ._bytesatleast65 -# comment:fp stack unchanged by jump - -# qhasm: goto bytesatleast64 if !unsigned< -jae ._bytesatleast64 - -# qhasm: m = out -# asm 1: mov m=int64#2 -# asm 2: mov m=%rsi -mov %rdi,%rsi - -# qhasm: out = ctarget -# asm 1: mov out=int64#1 -# asm 2: mov out=%rdi -mov %rdx,%rdi - -# qhasm: i = bytes -# asm 1: mov i=int64#4 -# asm 2: mov i=%rcx -mov %r9,%rcx - -# qhasm: while (i) { *out++ = *m++; --i } -rep movsb -# comment:fp stack unchanged by fallthrough - -# qhasm: bytesatleast64: -._bytesatleast64: -# comment:fp stack unchanged by fallthrough - -# qhasm: done: -._done: - -# qhasm: r11_caller = r11_stack -# asm 1: movq r11_caller=int64#9 -# asm 2: movq r11_caller=%r11 -movq 352(%rsp),%r11 - -# qhasm: r12_caller = r12_stack -# asm 1: movq r12_caller=int64#10 -# asm 2: movq r12_caller=%r12 -movq 360(%rsp),%r12 - -# qhasm: r13_caller = r13_stack -# asm 1: movq r13_caller=int64#11 -# asm 2: movq r13_caller=%r13 -movq 368(%rsp),%r13 - -# qhasm: r14_caller = r14_stack -# asm 1: movq r14_caller=int64#12 -# asm 2: movq r14_caller=%r14 -movq 376(%rsp),%r14 - -# qhasm: r15_caller = r15_stack -# asm 1: movq r15_caller=int64#13 -# asm 2: movq r15_caller=%r15 -movq 384(%rsp),%r15 - -# qhasm: rbx_caller = rbx_stack -# asm 1: movq rbx_caller=int64#14 -# asm 2: movq rbx_caller=%rbx -movq 392(%rsp),%rbx - -# qhasm: rbp_caller = rbp_stack -# asm 1: movq rbp_caller=int64#15 -# asm 2: movq rbp_caller=%rbp -movq 400(%rsp),%rbp - -# qhasm: leave -add %r11,%rsp -xor %rax,%rax -xor %rdx,%rdx -ret - -# qhasm: bytesatleast65: -._bytesatleast65: - -# qhasm: bytes -= 64 -# asm 1: sub $64,