Annotation of embedaddon/rsync/lib/md5-asm-x86_64.S, revision 1.1
1.1 ! misho 1: /*
! 2: * x86-64 optimized assembler MD5 implementation
! 3: *
! 4: * Author: Marc Bevand, 2004
! 5: *
! 6: * This code was placed in the public domain by the author. The original
! 7: * publication can be found at:
! 8: *
! 9: * https://www.zorinaq.com/papers/md5-amd64.html
! 10: */
! 11: /*
! 12: * No modifications were made aside from changing the function and file names.
! 13: * The MD5_CTX structure as expected here (from OpenSSL) is binary compatible
! 14: * with the md_context used by rsync, for the fields accessed.
! 15: *
! 16: * Benchmarks (in MB/s) C ASM
! 17: * - Intel Atom D2700 302 334
! 18: * - Intel i7-7700hq 351 376
! 19: * - AMD ThreadRipper 2950x 728 784
! 20: *
! 21: * The original code was also incorporated into OpenSSL. It has since been
! 22: * modified there. Those changes have not been made here due to licensing
! 23: * incompatibilities. Benchmarks of those changes on the above CPUs did not
! 24: * show any significant difference in performance, though.
! 25: */
! 26:
! 27: #include "config.h"
! 28: #include "md-defines.h"
! 29:
! 30: #if !defined USE_OPENSSL && CSUM_CHUNK == 64
! 31:
! 32: #ifdef __APPLE__
! 33: #define md5_process_asm _md5_process_asm
! 34: #endif
! 35:
! 36: .text
! 37: .align 16
! 38:
! 39: .globl md5_process_asm
! 40: md5_process_asm:
! 41: push %rbp
! 42: push %rbx
! 43: push %r12
! 44: push %r13 # not really useful (r13 is unused)
! 45: push %r14
! 46: push %r15
! 47:
! 48: # rdi = arg #1 (ctx, MD5_CTX pointer)
! 49: # rsi = arg #2 (ptr, data pointer)
! 50: # rdx = arg #3 (nbr, number of 16-word blocks to process)
! 51: mov %rdi, %rbp # rbp = ctx
! 52: shl $6, %rdx # rdx = nbr in bytes
! 53: lea (%rsi,%rdx), %rdi # rdi = end
! 54: mov 0*4(%rbp), %eax # eax = ctx->A
! 55: mov 1*4(%rbp), %ebx # ebx = ctx->B
! 56: mov 2*4(%rbp), %ecx # ecx = ctx->C
! 57: mov 3*4(%rbp), %edx # edx = ctx->D
! 58: # end is 'rdi'
! 59: # ptr is 'rsi'
! 60: # A is 'eax'
! 61: # B is 'ebx'
! 62: # C is 'ecx'
! 63: # D is 'edx'
! 64:
! 65: cmp %rdi, %rsi # cmp end with ptr
! 66: je 1f # jmp if ptr == end
! 67:
! 68: # BEGIN of loop over 16-word blocks
! 69: 2: # save old values of A, B, C, D
! 70: mov %eax, %r8d
! 71: mov %ebx, %r9d
! 72: mov %ecx, %r14d
! 73: mov %edx, %r15d
! 74: mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
! 75: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 76: xor %ecx, %r11d /* y ^ ... */
! 77: lea -680876936(%eax,%r10d),%eax /* Const + dst + ... */
! 78: and %ebx, %r11d /* x & ... */
! 79: xor %edx, %r11d /* z ^ ... */
! 80: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
! 81: add %r11d, %eax /* dst += ... */
! 82: rol $7, %eax /* dst <<< s */
! 83: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 84: add %ebx, %eax /* dst += x */
! 85: xor %ebx, %r11d /* y ^ ... */
! 86: lea -389564586(%edx,%r10d),%edx /* Const + dst + ... */
! 87: and %eax, %r11d /* x & ... */
! 88: xor %ecx, %r11d /* z ^ ... */
! 89: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
! 90: add %r11d, %edx /* dst += ... */
! 91: rol $12, %edx /* dst <<< s */
! 92: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 93: add %eax, %edx /* dst += x */
! 94: xor %eax, %r11d /* y ^ ... */
! 95: lea 606105819(%ecx,%r10d),%ecx /* Const + dst + ... */
! 96: and %edx, %r11d /* x & ... */
! 97: xor %ebx, %r11d /* z ^ ... */
! 98: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
! 99: add %r11d, %ecx /* dst += ... */
! 100: rol $17, %ecx /* dst <<< s */
! 101: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 102: add %edx, %ecx /* dst += x */
! 103: xor %edx, %r11d /* y ^ ... */
! 104: lea -1044525330(%ebx,%r10d),%ebx /* Const + dst + ... */
! 105: and %ecx, %r11d /* x & ... */
! 106: xor %eax, %r11d /* z ^ ... */
! 107: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
! 108: add %r11d, %ebx /* dst += ... */
! 109: rol $22, %ebx /* dst <<< s */
! 110: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 111: add %ecx, %ebx /* dst += x */
! 112: xor %ecx, %r11d /* y ^ ... */
! 113: lea -176418897(%eax,%r10d),%eax /* Const + dst + ... */
! 114: and %ebx, %r11d /* x & ... */
! 115: xor %edx, %r11d /* z ^ ... */
! 116: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
! 117: add %r11d, %eax /* dst += ... */
! 118: rol $7, %eax /* dst <<< s */
! 119: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 120: add %ebx, %eax /* dst += x */
! 121: xor %ebx, %r11d /* y ^ ... */
! 122: lea 1200080426(%edx,%r10d),%edx /* Const + dst + ... */
! 123: and %eax, %r11d /* x & ... */
! 124: xor %ecx, %r11d /* z ^ ... */
! 125: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
! 126: add %r11d, %edx /* dst += ... */
! 127: rol $12, %edx /* dst <<< s */
! 128: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 129: add %eax, %edx /* dst += x */
! 130: xor %eax, %r11d /* y ^ ... */
! 131: lea -1473231341(%ecx,%r10d),%ecx /* Const + dst + ... */
! 132: and %edx, %r11d /* x & ... */
! 133: xor %ebx, %r11d /* z ^ ... */
! 134: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
! 135: add %r11d, %ecx /* dst += ... */
! 136: rol $17, %ecx /* dst <<< s */
! 137: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 138: add %edx, %ecx /* dst += x */
! 139: xor %edx, %r11d /* y ^ ... */
! 140: lea -45705983(%ebx,%r10d),%ebx /* Const + dst + ... */
! 141: and %ecx, %r11d /* x & ... */
! 142: xor %eax, %r11d /* z ^ ... */
! 143: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
! 144: add %r11d, %ebx /* dst += ... */
! 145: rol $22, %ebx /* dst <<< s */
! 146: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 147: add %ecx, %ebx /* dst += x */
! 148: xor %ecx, %r11d /* y ^ ... */
! 149: lea 1770035416(%eax,%r10d),%eax /* Const + dst + ... */
! 150: and %ebx, %r11d /* x & ... */
! 151: xor %edx, %r11d /* z ^ ... */
! 152: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
! 153: add %r11d, %eax /* dst += ... */
! 154: rol $7, %eax /* dst <<< s */
! 155: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 156: add %ebx, %eax /* dst += x */
! 157: xor %ebx, %r11d /* y ^ ... */
! 158: lea -1958414417(%edx,%r10d),%edx /* Const + dst + ... */
! 159: and %eax, %r11d /* x & ... */
! 160: xor %ecx, %r11d /* z ^ ... */
! 161: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
! 162: add %r11d, %edx /* dst += ... */
! 163: rol $12, %edx /* dst <<< s */
! 164: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 165: add %eax, %edx /* dst += x */
! 166: xor %eax, %r11d /* y ^ ... */
! 167: lea -42063(%ecx,%r10d),%ecx /* Const + dst + ... */
! 168: and %edx, %r11d /* x & ... */
! 169: xor %ebx, %r11d /* z ^ ... */
! 170: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
! 171: add %r11d, %ecx /* dst += ... */
! 172: rol $17, %ecx /* dst <<< s */
! 173: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 174: add %edx, %ecx /* dst += x */
! 175: xor %edx, %r11d /* y ^ ... */
! 176: lea -1990404162(%ebx,%r10d),%ebx /* Const + dst + ... */
! 177: and %ecx, %r11d /* x & ... */
! 178: xor %eax, %r11d /* z ^ ... */
! 179: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
! 180: add %r11d, %ebx /* dst += ... */
! 181: rol $22, %ebx /* dst <<< s */
! 182: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 183: add %ecx, %ebx /* dst += x */
! 184: xor %ecx, %r11d /* y ^ ... */
! 185: lea 1804603682(%eax,%r10d),%eax /* Const + dst + ... */
! 186: and %ebx, %r11d /* x & ... */
! 187: xor %edx, %r11d /* z ^ ... */
! 188: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
! 189: add %r11d, %eax /* dst += ... */
! 190: rol $7, %eax /* dst <<< s */
! 191: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 192: add %ebx, %eax /* dst += x */
! 193: xor %ebx, %r11d /* y ^ ... */
! 194: lea -40341101(%edx,%r10d),%edx /* Const + dst + ... */
! 195: and %eax, %r11d /* x & ... */
! 196: xor %ecx, %r11d /* z ^ ... */
! 197: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
! 198: add %r11d, %edx /* dst += ... */
! 199: rol $12, %edx /* dst <<< s */
! 200: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 201: add %eax, %edx /* dst += x */
! 202: xor %eax, %r11d /* y ^ ... */
! 203: lea -1502002290(%ecx,%r10d),%ecx /* Const + dst + ... */
! 204: and %edx, %r11d /* x & ... */
! 205: xor %ebx, %r11d /* z ^ ... */
! 206: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
! 207: add %r11d, %ecx /* dst += ... */
! 208: rol $17, %ecx /* dst <<< s */
! 209: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 210: add %edx, %ecx /* dst += x */
! 211: xor %edx, %r11d /* y ^ ... */
! 212: lea 1236535329(%ebx,%r10d),%ebx /* Const + dst + ... */
! 213: and %ecx, %r11d /* x & ... */
! 214: xor %eax, %r11d /* z ^ ... */
! 215: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 216: add %r11d, %ebx /* dst += ... */
! 217: rol $22, %ebx /* dst <<< s */
! 218: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 219: add %ecx, %ebx /* dst += x */
! 220: mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */
! 221: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 222: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
! 223: not %r11d /* not z */
! 224: lea -165796510(%eax,%r10d),%eax /* Const + dst + ... */
! 225: and %ebx, %r12d /* x & z */
! 226: and %ecx, %r11d /* y & (not z) */
! 227: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
! 228: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 229: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 230: add %r12d, %eax /* dst += ... */
! 231: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
! 232: rol $5, %eax /* dst <<< s */
! 233: add %ebx, %eax /* dst += x */
! 234: not %r11d /* not z */
! 235: lea -1069501632(%edx,%r10d),%edx /* Const + dst + ... */
! 236: and %eax, %r12d /* x & z */
! 237: and %ebx, %r11d /* y & (not z) */
! 238: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
! 239: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 240: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 241: add %r12d, %edx /* dst += ... */
! 242: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
! 243: rol $9, %edx /* dst <<< s */
! 244: add %eax, %edx /* dst += x */
! 245: not %r11d /* not z */
! 246: lea 643717713(%ecx,%r10d),%ecx /* Const + dst + ... */
! 247: and %edx, %r12d /* x & z */
! 248: and %eax, %r11d /* y & (not z) */
! 249: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 250: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 251: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 252: add %r12d, %ecx /* dst += ... */
! 253: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
! 254: rol $14, %ecx /* dst <<< s */
! 255: add %edx, %ecx /* dst += x */
! 256: not %r11d /* not z */
! 257: lea -373897302(%ebx,%r10d),%ebx /* Const + dst + ... */
! 258: and %ecx, %r12d /* x & z */
! 259: and %edx, %r11d /* y & (not z) */
! 260: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
! 261: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 262: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 263: add %r12d, %ebx /* dst += ... */
! 264: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
! 265: rol $20, %ebx /* dst <<< s */
! 266: add %ecx, %ebx /* dst += x */
! 267: not %r11d /* not z */
! 268: lea -701558691(%eax,%r10d),%eax /* Const + dst + ... */
! 269: and %ebx, %r12d /* x & z */
! 270: and %ecx, %r11d /* y & (not z) */
! 271: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
! 272: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 273: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 274: add %r12d, %eax /* dst += ... */
! 275: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
! 276: rol $5, %eax /* dst <<< s */
! 277: add %ebx, %eax /* dst += x */
! 278: not %r11d /* not z */
! 279: lea 38016083(%edx,%r10d),%edx /* Const + dst + ... */
! 280: and %eax, %r12d /* x & z */
! 281: and %ebx, %r11d /* y & (not z) */
! 282: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
! 283: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 284: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 285: add %r12d, %edx /* dst += ... */
! 286: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
! 287: rol $9, %edx /* dst <<< s */
! 288: add %eax, %edx /* dst += x */
! 289: not %r11d /* not z */
! 290: lea -660478335(%ecx,%r10d),%ecx /* Const + dst + ... */
! 291: and %edx, %r12d /* x & z */
! 292: and %eax, %r11d /* y & (not z) */
! 293: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
! 294: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 295: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 296: add %r12d, %ecx /* dst += ... */
! 297: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
! 298: rol $14, %ecx /* dst <<< s */
! 299: add %edx, %ecx /* dst += x */
! 300: not %r11d /* not z */
! 301: lea -405537848(%ebx,%r10d),%ebx /* Const + dst + ... */
! 302: and %ecx, %r12d /* x & z */
! 303: and %edx, %r11d /* y & (not z) */
! 304: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
! 305: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 306: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 307: add %r12d, %ebx /* dst += ... */
! 308: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
! 309: rol $20, %ebx /* dst <<< s */
! 310: add %ecx, %ebx /* dst += x */
! 311: not %r11d /* not z */
! 312: lea 568446438(%eax,%r10d),%eax /* Const + dst + ... */
! 313: and %ebx, %r12d /* x & z */
! 314: and %ecx, %r11d /* y & (not z) */
! 315: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
! 316: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 317: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 318: add %r12d, %eax /* dst += ... */
! 319: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
! 320: rol $5, %eax /* dst <<< s */
! 321: add %ebx, %eax /* dst += x */
! 322: not %r11d /* not z */
! 323: lea -1019803690(%edx,%r10d),%edx /* Const + dst + ... */
! 324: and %eax, %r12d /* x & z */
! 325: and %ebx, %r11d /* y & (not z) */
! 326: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
! 327: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 328: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 329: add %r12d, %edx /* dst += ... */
! 330: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
! 331: rol $9, %edx /* dst <<< s */
! 332: add %eax, %edx /* dst += x */
! 333: not %r11d /* not z */
! 334: lea -187363961(%ecx,%r10d),%ecx /* Const + dst + ... */
! 335: and %edx, %r12d /* x & z */
! 336: and %eax, %r11d /* y & (not z) */
! 337: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
! 338: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 339: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 340: add %r12d, %ecx /* dst += ... */
! 341: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
! 342: rol $14, %ecx /* dst <<< s */
! 343: add %edx, %ecx /* dst += x */
! 344: not %r11d /* not z */
! 345: lea 1163531501(%ebx,%r10d),%ebx /* Const + dst + ... */
! 346: and %ecx, %r12d /* x & z */
! 347: and %edx, %r11d /* y & (not z) */
! 348: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
! 349: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 350: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 351: add %r12d, %ebx /* dst += ... */
! 352: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
! 353: rol $20, %ebx /* dst <<< s */
! 354: add %ecx, %ebx /* dst += x */
! 355: not %r11d /* not z */
! 356: lea -1444681467(%eax,%r10d),%eax /* Const + dst + ... */
! 357: and %ebx, %r12d /* x & z */
! 358: and %ecx, %r11d /* y & (not z) */
! 359: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
! 360: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 361: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
! 362: add %r12d, %eax /* dst += ... */
! 363: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
! 364: rol $5, %eax /* dst <<< s */
! 365: add %ebx, %eax /* dst += x */
! 366: not %r11d /* not z */
! 367: lea -51403784(%edx,%r10d),%edx /* Const + dst + ... */
! 368: and %eax, %r12d /* x & z */
! 369: and %ebx, %r11d /* y & (not z) */
! 370: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
! 371: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 372: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
! 373: add %r12d, %edx /* dst += ... */
! 374: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
! 375: rol $9, %edx /* dst <<< s */
! 376: add %eax, %edx /* dst += x */
! 377: not %r11d /* not z */
! 378: lea 1735328473(%ecx,%r10d),%ecx /* Const + dst + ... */
! 379: and %edx, %r12d /* x & z */
! 380: and %eax, %r11d /* y & (not z) */
! 381: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
! 382: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 383: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
! 384: add %r12d, %ecx /* dst += ... */
! 385: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
! 386: rol $14, %ecx /* dst <<< s */
! 387: add %edx, %ecx /* dst += x */
! 388: not %r11d /* not z */
! 389: lea -1926607734(%ebx,%r10d),%ebx /* Const + dst + ... */
! 390: and %ecx, %r12d /* x & z */
! 391: and %edx, %r11d /* y & (not z) */
! 392: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 393: or %r11d, %r12d /* (y & (not z)) | (x & z) */
! 394: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
! 395: add %r12d, %ebx /* dst += ... */
! 396: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
! 397: rol $20, %ebx /* dst <<< s */
! 398: add %ecx, %ebx /* dst += x */
! 399: mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */
! 400: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
! 401: lea -378558(%eax,%r10d),%eax /* Const + dst + ... */
! 402: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
! 403: xor %edx, %r11d /* z ^ ... */
! 404: xor %ebx, %r11d /* x ^ ... */
! 405: add %r11d, %eax /* dst += ... */
! 406: rol $4, %eax /* dst <<< s */
! 407: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
! 408: add %ebx, %eax /* dst += x */
! 409: lea -2022574463(%edx,%r10d),%edx /* Const + dst + ... */
! 410: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
! 411: xor %ecx, %r11d /* z ^ ... */
! 412: xor %eax, %r11d /* x ^ ... */
! 413: add %r11d, %edx /* dst += ... */
! 414: rol $11, %edx /* dst <<< s */
! 415: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
! 416: add %eax, %edx /* dst += x */
! 417: lea 1839030562(%ecx,%r10d),%ecx /* Const + dst + ... */
! 418: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
! 419: xor %ebx, %r11d /* z ^ ... */
! 420: xor %edx, %r11d /* x ^ ... */
! 421: add %r11d, %ecx /* dst += ... */
! 422: rol $16, %ecx /* dst <<< s */
! 423: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
! 424: add %edx, %ecx /* dst += x */
! 425: lea -35309556(%ebx,%r10d),%ebx /* Const + dst + ... */
! 426: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
! 427: xor %eax, %r11d /* z ^ ... */
! 428: xor %ecx, %r11d /* x ^ ... */
! 429: add %r11d, %ebx /* dst += ... */
! 430: rol $23, %ebx /* dst <<< s */
! 431: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
! 432: add %ecx, %ebx /* dst += x */
! 433: lea -1530992060(%eax,%r10d),%eax /* Const + dst + ... */
! 434: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
! 435: xor %edx, %r11d /* z ^ ... */
! 436: xor %ebx, %r11d /* x ^ ... */
! 437: add %r11d, %eax /* dst += ... */
! 438: rol $4, %eax /* dst <<< s */
! 439: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
! 440: add %ebx, %eax /* dst += x */
! 441: lea 1272893353(%edx,%r10d),%edx /* Const + dst + ... */
! 442: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
! 443: xor %ecx, %r11d /* z ^ ... */
! 444: xor %eax, %r11d /* x ^ ... */
! 445: add %r11d, %edx /* dst += ... */
! 446: rol $11, %edx /* dst <<< s */
! 447: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
! 448: add %eax, %edx /* dst += x */
! 449: lea -155497632(%ecx,%r10d),%ecx /* Const + dst + ... */
! 450: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
! 451: xor %ebx, %r11d /* z ^ ... */
! 452: xor %edx, %r11d /* x ^ ... */
! 453: add %r11d, %ecx /* dst += ... */
! 454: rol $16, %ecx /* dst <<< s */
! 455: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
! 456: add %edx, %ecx /* dst += x */
! 457: lea -1094730640(%ebx,%r10d),%ebx /* Const + dst + ... */
! 458: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
! 459: xor %eax, %r11d /* z ^ ... */
! 460: xor %ecx, %r11d /* x ^ ... */
! 461: add %r11d, %ebx /* dst += ... */
! 462: rol $23, %ebx /* dst <<< s */
! 463: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
! 464: add %ecx, %ebx /* dst += x */
! 465: lea 681279174(%eax,%r10d),%eax /* Const + dst + ... */
! 466: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 467: xor %edx, %r11d /* z ^ ... */
! 468: xor %ebx, %r11d /* x ^ ... */
! 469: add %r11d, %eax /* dst += ... */
! 470: rol $4, %eax /* dst <<< s */
! 471: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
! 472: add %ebx, %eax /* dst += x */
! 473: lea -358537222(%edx,%r10d),%edx /* Const + dst + ... */
! 474: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
! 475: xor %ecx, %r11d /* z ^ ... */
! 476: xor %eax, %r11d /* x ^ ... */
! 477: add %r11d, %edx /* dst += ... */
! 478: rol $11, %edx /* dst <<< s */
! 479: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
! 480: add %eax, %edx /* dst += x */
! 481: lea -722521979(%ecx,%r10d),%ecx /* Const + dst + ... */
! 482: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
! 483: xor %ebx, %r11d /* z ^ ... */
! 484: xor %edx, %r11d /* x ^ ... */
! 485: add %r11d, %ecx /* dst += ... */
! 486: rol $16, %ecx /* dst <<< s */
! 487: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
! 488: add %edx, %ecx /* dst += x */
! 489: lea 76029189(%ebx,%r10d),%ebx /* Const + dst + ... */
! 490: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
! 491: xor %eax, %r11d /* z ^ ... */
! 492: xor %ecx, %r11d /* x ^ ... */
! 493: add %r11d, %ebx /* dst += ... */
! 494: rol $23, %ebx /* dst <<< s */
! 495: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
! 496: add %ecx, %ebx /* dst += x */
! 497: lea -640364487(%eax,%r10d),%eax /* Const + dst + ... */
! 498: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
! 499: xor %edx, %r11d /* z ^ ... */
! 500: xor %ebx, %r11d /* x ^ ... */
! 501: add %r11d, %eax /* dst += ... */
! 502: rol $4, %eax /* dst <<< s */
! 503: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
! 504: add %ebx, %eax /* dst += x */
! 505: lea -421815835(%edx,%r10d),%edx /* Const + dst + ... */
! 506: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
! 507: xor %ecx, %r11d /* z ^ ... */
! 508: xor %eax, %r11d /* x ^ ... */
! 509: add %r11d, %edx /* dst += ... */
! 510: rol $11, %edx /* dst <<< s */
! 511: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
! 512: add %eax, %edx /* dst += x */
! 513: lea 530742520(%ecx,%r10d),%ecx /* Const + dst + ... */
! 514: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
! 515: xor %ebx, %r11d /* z ^ ... */
! 516: xor %edx, %r11d /* x ^ ... */
! 517: add %r11d, %ecx /* dst += ... */
! 518: rol $16, %ecx /* dst <<< s */
! 519: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
! 520: add %edx, %ecx /* dst += x */
! 521: lea -995338651(%ebx,%r10d),%ebx /* Const + dst + ... */
! 522: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 523: xor %eax, %r11d /* z ^ ... */
! 524: xor %ecx, %r11d /* x ^ ... */
! 525: add %r11d, %ebx /* dst += ... */
! 526: rol $23, %ebx /* dst <<< s */
! 527: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
! 528: add %ecx, %ebx /* dst += x */
! 529: mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
! 530: mov $0xffffffff, %r11d
! 531: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/
! 532: lea -198630844(%eax,%r10d),%eax /* Const + dst + ... */
! 533: or %ebx, %r11d /* x | ... */
! 534: xor %ecx, %r11d /* y ^ ... */
! 535: add %r11d, %eax /* dst += ... */
! 536: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
! 537: mov $0xffffffff, %r11d
! 538: rol $6, %eax /* dst <<< s */
! 539: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
! 540: add %ebx, %eax /* dst += x */
! 541: lea 1126891415(%edx,%r10d),%edx /* Const + dst + ... */
! 542: or %eax, %r11d /* x | ... */
! 543: xor %ebx, %r11d /* y ^ ... */
! 544: add %r11d, %edx /* dst += ... */
! 545: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
! 546: mov $0xffffffff, %r11d
! 547: rol $10, %edx /* dst <<< s */
! 548: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
! 549: add %eax, %edx /* dst += x */
! 550: lea -1416354905(%ecx,%r10d),%ecx /* Const + dst + ... */
! 551: or %edx, %r11d /* x | ... */
! 552: xor %eax, %r11d /* y ^ ... */
! 553: add %r11d, %ecx /* dst += ... */
! 554: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
! 555: mov $0xffffffff, %r11d
! 556: rol $15, %ecx /* dst <<< s */
! 557: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
! 558: add %edx, %ecx /* dst += x */
! 559: lea -57434055(%ebx,%r10d),%ebx /* Const + dst + ... */
! 560: or %ecx, %r11d /* x | ... */
! 561: xor %edx, %r11d /* y ^ ... */
! 562: add %r11d, %ebx /* dst += ... */
! 563: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
! 564: mov $0xffffffff, %r11d
! 565: rol $21, %ebx /* dst <<< s */
! 566: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
! 567: add %ecx, %ebx /* dst += x */
! 568: lea 1700485571(%eax,%r10d),%eax /* Const + dst + ... */
! 569: or %ebx, %r11d /* x | ... */
! 570: xor %ecx, %r11d /* y ^ ... */
! 571: add %r11d, %eax /* dst += ... */
! 572: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
! 573: mov $0xffffffff, %r11d
! 574: rol $6, %eax /* dst <<< s */
! 575: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
! 576: add %ebx, %eax /* dst += x */
! 577: lea -1894986606(%edx,%r10d),%edx /* Const + dst + ... */
! 578: or %eax, %r11d /* x | ... */
! 579: xor %ebx, %r11d /* y ^ ... */
! 580: add %r11d, %edx /* dst += ... */
! 581: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
! 582: mov $0xffffffff, %r11d
! 583: rol $10, %edx /* dst <<< s */
! 584: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
! 585: add %eax, %edx /* dst += x */
! 586: lea -1051523(%ecx,%r10d),%ecx /* Const + dst + ... */
! 587: or %edx, %r11d /* x | ... */
! 588: xor %eax, %r11d /* y ^ ... */
! 589: add %r11d, %ecx /* dst += ... */
! 590: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
! 591: mov $0xffffffff, %r11d
! 592: rol $15, %ecx /* dst <<< s */
! 593: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
! 594: add %edx, %ecx /* dst += x */
! 595: lea -2054922799(%ebx,%r10d),%ebx /* Const + dst + ... */
! 596: or %ecx, %r11d /* x | ... */
! 597: xor %edx, %r11d /* y ^ ... */
! 598: add %r11d, %ebx /* dst += ... */
! 599: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
! 600: mov $0xffffffff, %r11d
! 601: rol $21, %ebx /* dst <<< s */
! 602: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
! 603: add %ecx, %ebx /* dst += x */
! 604: lea 1873313359(%eax,%r10d),%eax /* Const + dst + ... */
! 605: or %ebx, %r11d /* x | ... */
! 606: xor %ecx, %r11d /* y ^ ... */
! 607: add %r11d, %eax /* dst += ... */
! 608: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
! 609: mov $0xffffffff, %r11d
! 610: rol $6, %eax /* dst <<< s */
! 611: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
! 612: add %ebx, %eax /* dst += x */
! 613: lea -30611744(%edx,%r10d),%edx /* Const + dst + ... */
! 614: or %eax, %r11d /* x | ... */
! 615: xor %ebx, %r11d /* y ^ ... */
! 616: add %r11d, %edx /* dst += ... */
! 617: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
! 618: mov $0xffffffff, %r11d
! 619: rol $10, %edx /* dst <<< s */
! 620: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
! 621: add %eax, %edx /* dst += x */
! 622: lea -1560198380(%ecx,%r10d),%ecx /* Const + dst + ... */
! 623: or %edx, %r11d /* x | ... */
! 624: xor %eax, %r11d /* y ^ ... */
! 625: add %r11d, %ecx /* dst += ... */
! 626: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
! 627: mov $0xffffffff, %r11d
! 628: rol $15, %ecx /* dst <<< s */
! 629: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
! 630: add %edx, %ecx /* dst += x */
! 631: lea 1309151649(%ebx,%r10d),%ebx /* Const + dst + ... */
! 632: or %ecx, %r11d /* x | ... */
! 633: xor %edx, %r11d /* y ^ ... */
! 634: add %r11d, %ebx /* dst += ... */
! 635: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
! 636: mov $0xffffffff, %r11d
! 637: rol $21, %ebx /* dst <<< s */
! 638: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
! 639: add %ecx, %ebx /* dst += x */
! 640: lea -145523070(%eax,%r10d),%eax /* Const + dst + ... */
! 641: or %ebx, %r11d /* x | ... */
! 642: xor %ecx, %r11d /* y ^ ... */
! 643: add %r11d, %eax /* dst += ... */
! 644: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
! 645: mov $0xffffffff, %r11d
! 646: rol $6, %eax /* dst <<< s */
! 647: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
! 648: add %ebx, %eax /* dst += x */
! 649: lea -1120210379(%edx,%r10d),%edx /* Const + dst + ... */
! 650: or %eax, %r11d /* x | ... */
! 651: xor %ebx, %r11d /* y ^ ... */
! 652: add %r11d, %edx /* dst += ... */
! 653: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
! 654: mov $0xffffffff, %r11d
! 655: rol $10, %edx /* dst <<< s */
! 656: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
! 657: add %eax, %edx /* dst += x */
! 658: lea 718787259(%ecx,%r10d),%ecx /* Const + dst + ... */
! 659: or %edx, %r11d /* x | ... */
! 660: xor %eax, %r11d /* y ^ ... */
! 661: add %r11d, %ecx /* dst += ... */
! 662: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
! 663: mov $0xffffffff, %r11d
! 664: rol $15, %ecx /* dst <<< s */
! 665: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
! 666: add %edx, %ecx /* dst += x */
! 667: lea -343485551(%ebx,%r10d),%ebx /* Const + dst + ... */
! 668: or %ecx, %r11d /* x | ... */
! 669: xor %edx, %r11d /* y ^ ... */
! 670: add %r11d, %ebx /* dst += ... */
! 671: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
! 672: mov $0xffffffff, %r11d
! 673: rol $21, %ebx /* dst <<< s */
! 674: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
! 675: add %ecx, %ebx /* dst += x */
! 676: # add old values of A, B, C, D
! 677: add %r8d, %eax
! 678: add %r9d, %ebx
! 679: add %r14d, %ecx
! 680: add %r15d, %edx
! 681:
! 682: # loop control
! 683: add $64, %rsi # ptr += 64
! 684: cmp %rdi, %rsi # cmp end with ptr
! 685: jb 2b # jmp if ptr < end
! 686: # END of loop over 16-word blocks
! 687: 1:
! 688: mov %eax, 0*4(%rbp) # ctx->A = A
! 689: mov %ebx, 1*4(%rbp) # ctx->B = B
! 690: mov %ecx, 2*4(%rbp) # ctx->C = C
! 691: mov %edx, 3*4(%rbp) # ctx->D = D
! 692:
! 693: pop %r15
! 694: pop %r14
! 695: pop %r13 # not really useful (r13 is unused)
! 696: pop %r12
! 697: pop %rbx
! 698: pop %rbp
! 699: ret
! 700:
! 701: #endif /* !USE_OPENSSL ... */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>