Annotation of embedaddon/rsync/lib/md5-asm-x86_64.S, revision 1.1.1.1
1.1 misho 1: /*
2: * x86-64 optimized assembler MD5 implementation
3: *
4: * Author: Marc Bevand, 2004
5: *
6: * This code was placed in the public domain by the author. The original
7: * publication can be found at:
8: *
9: * https://www.zorinaq.com/papers/md5-amd64.html
10: */
11: /*
12: * No modifications were made aside from changing the function and file names.
13: * The MD5_CTX structure as expected here (from OpenSSL) is binary compatible
14: * with the md_context used by rsync, for the fields accessed.
15: *
16: * Benchmarks (in MB/s) C ASM
17: * - Intel Atom D2700 302 334
18: * - Intel i7-7700hq 351 376
19: * - AMD ThreadRipper 2950x 728 784
20: *
21: * The original code was also incorporated into OpenSSL. It has since been
22: * modified there. Those changes have not been made here due to licensing
23: * incompatibilities. Benchmarks of those changes on the above CPUs did not
24: * show any significant difference in performance, though.
25: */
26:
27: #include "config.h"
28: #include "md-defines.h"
29:
30: #if !defined USE_OPENSSL && CSUM_CHUNK == 64
31:
32: #ifdef __APPLE__
33: #define md5_process_asm _md5_process_asm
34: #endif
35:
36: .text
37: .align 16
38:
39: .globl md5_process_asm
40: md5_process_asm:
41: push %rbp
42: push %rbx
43: push %r12
44: push %r13 # not really useful (r13 is unused)
45: push %r14
46: push %r15
47:
48: # rdi = arg #1 (ctx, MD5_CTX pointer)
49: # rsi = arg #2 (ptr, data pointer)
50: # rdx = arg #3 (nbr, number of 16-word blocks to process)
51: mov %rdi, %rbp # rbp = ctx
52: shl $6, %rdx # rdx = nbr in bytes
53: lea (%rsi,%rdx), %rdi # rdi = end
54: mov 0*4(%rbp), %eax # eax = ctx->A
55: mov 1*4(%rbp), %ebx # ebx = ctx->B
56: mov 2*4(%rbp), %ecx # ecx = ctx->C
57: mov 3*4(%rbp), %edx # edx = ctx->D
58: # end is 'rdi'
59: # ptr is 'rsi'
60: # A is 'eax'
61: # B is 'ebx'
62: # C is 'ecx'
63: # D is 'edx'
64:
65: cmp %rdi, %rsi # cmp end with ptr
66: je 1f # jmp if ptr == end
67:
68: # BEGIN of loop over 16-word blocks
69: 2: # save old values of A, B, C, D
70: mov %eax, %r8d
71: mov %ebx, %r9d
72: mov %ecx, %r14d
73: mov %edx, %r15d
74: mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
75: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
76: xor %ecx, %r11d /* y ^ ... */
77: lea -680876936(%eax,%r10d),%eax /* Const + dst + ... */
78: and %ebx, %r11d /* x & ... */
79: xor %edx, %r11d /* z ^ ... */
80: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
81: add %r11d, %eax /* dst += ... */
82: rol $7, %eax /* dst <<< s */
83: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
84: add %ebx, %eax /* dst += x */
85: xor %ebx, %r11d /* y ^ ... */
86: lea -389564586(%edx,%r10d),%edx /* Const + dst + ... */
87: and %eax, %r11d /* x & ... */
88: xor %ecx, %r11d /* z ^ ... */
89: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
90: add %r11d, %edx /* dst += ... */
91: rol $12, %edx /* dst <<< s */
92: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
93: add %eax, %edx /* dst += x */
94: xor %eax, %r11d /* y ^ ... */
95: lea 606105819(%ecx,%r10d),%ecx /* Const + dst + ... */
96: and %edx, %r11d /* x & ... */
97: xor %ebx, %r11d /* z ^ ... */
98: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
99: add %r11d, %ecx /* dst += ... */
100: rol $17, %ecx /* dst <<< s */
101: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
102: add %edx, %ecx /* dst += x */
103: xor %edx, %r11d /* y ^ ... */
104: lea -1044525330(%ebx,%r10d),%ebx /* Const + dst + ... */
105: and %ecx, %r11d /* x & ... */
106: xor %eax, %r11d /* z ^ ... */
107: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
108: add %r11d, %ebx /* dst += ... */
109: rol $22, %ebx /* dst <<< s */
110: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
111: add %ecx, %ebx /* dst += x */
112: xor %ecx, %r11d /* y ^ ... */
113: lea -176418897(%eax,%r10d),%eax /* Const + dst + ... */
114: and %ebx, %r11d /* x & ... */
115: xor %edx, %r11d /* z ^ ... */
116: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
117: add %r11d, %eax /* dst += ... */
118: rol $7, %eax /* dst <<< s */
119: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
120: add %ebx, %eax /* dst += x */
121: xor %ebx, %r11d /* y ^ ... */
122: lea 1200080426(%edx,%r10d),%edx /* Const + dst + ... */
123: and %eax, %r11d /* x & ... */
124: xor %ecx, %r11d /* z ^ ... */
125: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
126: add %r11d, %edx /* dst += ... */
127: rol $12, %edx /* dst <<< s */
128: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
129: add %eax, %edx /* dst += x */
130: xor %eax, %r11d /* y ^ ... */
131: lea -1473231341(%ecx,%r10d),%ecx /* Const + dst + ... */
132: and %edx, %r11d /* x & ... */
133: xor %ebx, %r11d /* z ^ ... */
134: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
135: add %r11d, %ecx /* dst += ... */
136: rol $17, %ecx /* dst <<< s */
137: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
138: add %edx, %ecx /* dst += x */
139: xor %edx, %r11d /* y ^ ... */
140: lea -45705983(%ebx,%r10d),%ebx /* Const + dst + ... */
141: and %ecx, %r11d /* x & ... */
142: xor %eax, %r11d /* z ^ ... */
143: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
144: add %r11d, %ebx /* dst += ... */
145: rol $22, %ebx /* dst <<< s */
146: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
147: add %ecx, %ebx /* dst += x */
148: xor %ecx, %r11d /* y ^ ... */
149: lea 1770035416(%eax,%r10d),%eax /* Const + dst + ... */
150: and %ebx, %r11d /* x & ... */
151: xor %edx, %r11d /* z ^ ... */
152: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
153: add %r11d, %eax /* dst += ... */
154: rol $7, %eax /* dst <<< s */
155: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
156: add %ebx, %eax /* dst += x */
157: xor %ebx, %r11d /* y ^ ... */
158: lea -1958414417(%edx,%r10d),%edx /* Const + dst + ... */
159: and %eax, %r11d /* x & ... */
160: xor %ecx, %r11d /* z ^ ... */
161: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
162: add %r11d, %edx /* dst += ... */
163: rol $12, %edx /* dst <<< s */
164: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
165: add %eax, %edx /* dst += x */
166: xor %eax, %r11d /* y ^ ... */
167: lea -42063(%ecx,%r10d),%ecx /* Const + dst + ... */
168: and %edx, %r11d /* x & ... */
169: xor %ebx, %r11d /* z ^ ... */
170: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
171: add %r11d, %ecx /* dst += ... */
172: rol $17, %ecx /* dst <<< s */
173: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
174: add %edx, %ecx /* dst += x */
175: xor %edx, %r11d /* y ^ ... */
176: lea -1990404162(%ebx,%r10d),%ebx /* Const + dst + ... */
177: and %ecx, %r11d /* x & ... */
178: xor %eax, %r11d /* z ^ ... */
179: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
180: add %r11d, %ebx /* dst += ... */
181: rol $22, %ebx /* dst <<< s */
182: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
183: add %ecx, %ebx /* dst += x */
184: xor %ecx, %r11d /* y ^ ... */
185: lea 1804603682(%eax,%r10d),%eax /* Const + dst + ... */
186: and %ebx, %r11d /* x & ... */
187: xor %edx, %r11d /* z ^ ... */
188: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
189: add %r11d, %eax /* dst += ... */
190: rol $7, %eax /* dst <<< s */
191: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
192: add %ebx, %eax /* dst += x */
193: xor %ebx, %r11d /* y ^ ... */
194: lea -40341101(%edx,%r10d),%edx /* Const + dst + ... */
195: and %eax, %r11d /* x & ... */
196: xor %ecx, %r11d /* z ^ ... */
197: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
198: add %r11d, %edx /* dst += ... */
199: rol $12, %edx /* dst <<< s */
200: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
201: add %eax, %edx /* dst += x */
202: xor %eax, %r11d /* y ^ ... */
203: lea -1502002290(%ecx,%r10d),%ecx /* Const + dst + ... */
204: and %edx, %r11d /* x & ... */
205: xor %ebx, %r11d /* z ^ ... */
206: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
207: add %r11d, %ecx /* dst += ... */
208: rol $17, %ecx /* dst <<< s */
209: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
210: add %edx, %ecx /* dst += x */
211: xor %edx, %r11d /* y ^ ... */
212: lea 1236535329(%ebx,%r10d),%ebx /* Const + dst + ... */
213: and %ecx, %r11d /* x & ... */
214: xor %eax, %r11d /* z ^ ... */
215: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
216: add %r11d, %ebx /* dst += ... */
217: rol $22, %ebx /* dst <<< s */
218: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
219: add %ecx, %ebx /* dst += x */
220: mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */
221: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
222: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
223: not %r11d /* not z */
224: lea -165796510(%eax,%r10d),%eax /* Const + dst + ... */
225: and %ebx, %r12d /* x & z */
226: and %ecx, %r11d /* y & (not z) */
227: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
228: or %r11d, %r12d /* (y & (not z)) | (x & z) */
229: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
230: add %r12d, %eax /* dst += ... */
231: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
232: rol $5, %eax /* dst <<< s */
233: add %ebx, %eax /* dst += x */
234: not %r11d /* not z */
235: lea -1069501632(%edx,%r10d),%edx /* Const + dst + ... */
236: and %eax, %r12d /* x & z */
237: and %ebx, %r11d /* y & (not z) */
238: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
239: or %r11d, %r12d /* (y & (not z)) | (x & z) */
240: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
241: add %r12d, %edx /* dst += ... */
242: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
243: rol $9, %edx /* dst <<< s */
244: add %eax, %edx /* dst += x */
245: not %r11d /* not z */
246: lea 643717713(%ecx,%r10d),%ecx /* Const + dst + ... */
247: and %edx, %r12d /* x & z */
248: and %eax, %r11d /* y & (not z) */
249: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
250: or %r11d, %r12d /* (y & (not z)) | (x & z) */
251: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
252: add %r12d, %ecx /* dst += ... */
253: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
254: rol $14, %ecx /* dst <<< s */
255: add %edx, %ecx /* dst += x */
256: not %r11d /* not z */
257: lea -373897302(%ebx,%r10d),%ebx /* Const + dst + ... */
258: and %ecx, %r12d /* x & z */
259: and %edx, %r11d /* y & (not z) */
260: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
261: or %r11d, %r12d /* (y & (not z)) | (x & z) */
262: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
263: add %r12d, %ebx /* dst += ... */
264: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
265: rol $20, %ebx /* dst <<< s */
266: add %ecx, %ebx /* dst += x */
267: not %r11d /* not z */
268: lea -701558691(%eax,%r10d),%eax /* Const + dst + ... */
269: and %ebx, %r12d /* x & z */
270: and %ecx, %r11d /* y & (not z) */
271: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
272: or %r11d, %r12d /* (y & (not z)) | (x & z) */
273: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
274: add %r12d, %eax /* dst += ... */
275: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
276: rol $5, %eax /* dst <<< s */
277: add %ebx, %eax /* dst += x */
278: not %r11d /* not z */
279: lea 38016083(%edx,%r10d),%edx /* Const + dst + ... */
280: and %eax, %r12d /* x & z */
281: and %ebx, %r11d /* y & (not z) */
282: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
283: or %r11d, %r12d /* (y & (not z)) | (x & z) */
284: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
285: add %r12d, %edx /* dst += ... */
286: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
287: rol $9, %edx /* dst <<< s */
288: add %eax, %edx /* dst += x */
289: not %r11d /* not z */
290: lea -660478335(%ecx,%r10d),%ecx /* Const + dst + ... */
291: and %edx, %r12d /* x & z */
292: and %eax, %r11d /* y & (not z) */
293: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
294: or %r11d, %r12d /* (y & (not z)) | (x & z) */
295: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
296: add %r12d, %ecx /* dst += ... */
297: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
298: rol $14, %ecx /* dst <<< s */
299: add %edx, %ecx /* dst += x */
300: not %r11d /* not z */
301: lea -405537848(%ebx,%r10d),%ebx /* Const + dst + ... */
302: and %ecx, %r12d /* x & z */
303: and %edx, %r11d /* y & (not z) */
304: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
305: or %r11d, %r12d /* (y & (not z)) | (x & z) */
306: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
307: add %r12d, %ebx /* dst += ... */
308: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
309: rol $20, %ebx /* dst <<< s */
310: add %ecx, %ebx /* dst += x */
311: not %r11d /* not z */
312: lea 568446438(%eax,%r10d),%eax /* Const + dst + ... */
313: and %ebx, %r12d /* x & z */
314: and %ecx, %r11d /* y & (not z) */
315: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
316: or %r11d, %r12d /* (y & (not z)) | (x & z) */
317: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
318: add %r12d, %eax /* dst += ... */
319: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
320: rol $5, %eax /* dst <<< s */
321: add %ebx, %eax /* dst += x */
322: not %r11d /* not z */
323: lea -1019803690(%edx,%r10d),%edx /* Const + dst + ... */
324: and %eax, %r12d /* x & z */
325: and %ebx, %r11d /* y & (not z) */
326: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
327: or %r11d, %r12d /* (y & (not z)) | (x & z) */
328: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
329: add %r12d, %edx /* dst += ... */
330: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
331: rol $9, %edx /* dst <<< s */
332: add %eax, %edx /* dst += x */
333: not %r11d /* not z */
334: lea -187363961(%ecx,%r10d),%ecx /* Const + dst + ... */
335: and %edx, %r12d /* x & z */
336: and %eax, %r11d /* y & (not z) */
337: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
338: or %r11d, %r12d /* (y & (not z)) | (x & z) */
339: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
340: add %r12d, %ecx /* dst += ... */
341: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
342: rol $14, %ecx /* dst <<< s */
343: add %edx, %ecx /* dst += x */
344: not %r11d /* not z */
345: lea 1163531501(%ebx,%r10d),%ebx /* Const + dst + ... */
346: and %ecx, %r12d /* x & z */
347: and %edx, %r11d /* y & (not z) */
348: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
349: or %r11d, %r12d /* (y & (not z)) | (x & z) */
350: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
351: add %r12d, %ebx /* dst += ... */
352: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
353: rol $20, %ebx /* dst <<< s */
354: add %ecx, %ebx /* dst += x */
355: not %r11d /* not z */
356: lea -1444681467(%eax,%r10d),%eax /* Const + dst + ... */
357: and %ebx, %r12d /* x & z */
358: and %ecx, %r11d /* y & (not z) */
359: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
360: or %r11d, %r12d /* (y & (not z)) | (x & z) */
361: mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
362: add %r12d, %eax /* dst += ... */
363: mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
364: rol $5, %eax /* dst <<< s */
365: add %ebx, %eax /* dst += x */
366: not %r11d /* not z */
367: lea -51403784(%edx,%r10d),%edx /* Const + dst + ... */
368: and %eax, %r12d /* x & z */
369: and %ebx, %r11d /* y & (not z) */
370: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
371: or %r11d, %r12d /* (y & (not z)) | (x & z) */
372: mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
373: add %r12d, %edx /* dst += ... */
374: mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
375: rol $9, %edx /* dst <<< s */
376: add %eax, %edx /* dst += x */
377: not %r11d /* not z */
378: lea 1735328473(%ecx,%r10d),%ecx /* Const + dst + ... */
379: and %edx, %r12d /* x & z */
380: and %eax, %r11d /* y & (not z) */
381: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
382: or %r11d, %r12d /* (y & (not z)) | (x & z) */
383: mov %eax, %r11d /* (NEXT STEP) z' = %eax */
384: add %r12d, %ecx /* dst += ... */
385: mov %eax, %r12d /* (NEXT STEP) z' = %eax */
386: rol $14, %ecx /* dst <<< s */
387: add %edx, %ecx /* dst += x */
388: not %r11d /* not z */
389: lea -1926607734(%ebx,%r10d),%ebx /* Const + dst + ... */
390: and %ecx, %r12d /* x & z */
391: and %edx, %r11d /* y & (not z) */
392: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
393: or %r11d, %r12d /* (y & (not z)) | (x & z) */
394: mov %edx, %r11d /* (NEXT STEP) z' = %edx */
395: add %r12d, %ebx /* dst += ... */
396: mov %edx, %r12d /* (NEXT STEP) z' = %edx */
397: rol $20, %ebx /* dst <<< s */
398: add %ecx, %ebx /* dst += x */
399: mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */
400: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
401: lea -378558(%eax,%r10d),%eax /* Const + dst + ... */
402: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
403: xor %edx, %r11d /* z ^ ... */
404: xor %ebx, %r11d /* x ^ ... */
405: add %r11d, %eax /* dst += ... */
406: rol $4, %eax /* dst <<< s */
407: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
408: add %ebx, %eax /* dst += x */
409: lea -2022574463(%edx,%r10d),%edx /* Const + dst + ... */
410: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
411: xor %ecx, %r11d /* z ^ ... */
412: xor %eax, %r11d /* x ^ ... */
413: add %r11d, %edx /* dst += ... */
414: rol $11, %edx /* dst <<< s */
415: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
416: add %eax, %edx /* dst += x */
417: lea 1839030562(%ecx,%r10d),%ecx /* Const + dst + ... */
418: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
419: xor %ebx, %r11d /* z ^ ... */
420: xor %edx, %r11d /* x ^ ... */
421: add %r11d, %ecx /* dst += ... */
422: rol $16, %ecx /* dst <<< s */
423: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
424: add %edx, %ecx /* dst += x */
425: lea -35309556(%ebx,%r10d),%ebx /* Const + dst + ... */
426: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
427: xor %eax, %r11d /* z ^ ... */
428: xor %ecx, %r11d /* x ^ ... */
429: add %r11d, %ebx /* dst += ... */
430: rol $23, %ebx /* dst <<< s */
431: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
432: add %ecx, %ebx /* dst += x */
433: lea -1530992060(%eax,%r10d),%eax /* Const + dst + ... */
434: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
435: xor %edx, %r11d /* z ^ ... */
436: xor %ebx, %r11d /* x ^ ... */
437: add %r11d, %eax /* dst += ... */
438: rol $4, %eax /* dst <<< s */
439: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
440: add %ebx, %eax /* dst += x */
441: lea 1272893353(%edx,%r10d),%edx /* Const + dst + ... */
442: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
443: xor %ecx, %r11d /* z ^ ... */
444: xor %eax, %r11d /* x ^ ... */
445: add %r11d, %edx /* dst += ... */
446: rol $11, %edx /* dst <<< s */
447: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
448: add %eax, %edx /* dst += x */
449: lea -155497632(%ecx,%r10d),%ecx /* Const + dst + ... */
450: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
451: xor %ebx, %r11d /* z ^ ... */
452: xor %edx, %r11d /* x ^ ... */
453: add %r11d, %ecx /* dst += ... */
454: rol $16, %ecx /* dst <<< s */
455: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
456: add %edx, %ecx /* dst += x */
457: lea -1094730640(%ebx,%r10d),%ebx /* Const + dst + ... */
458: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
459: xor %eax, %r11d /* z ^ ... */
460: xor %ecx, %r11d /* x ^ ... */
461: add %r11d, %ebx /* dst += ... */
462: rol $23, %ebx /* dst <<< s */
463: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
464: add %ecx, %ebx /* dst += x */
465: lea 681279174(%eax,%r10d),%eax /* Const + dst + ... */
466: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
467: xor %edx, %r11d /* z ^ ... */
468: xor %ebx, %r11d /* x ^ ... */
469: add %r11d, %eax /* dst += ... */
470: rol $4, %eax /* dst <<< s */
471: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
472: add %ebx, %eax /* dst += x */
473: lea -358537222(%edx,%r10d),%edx /* Const + dst + ... */
474: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
475: xor %ecx, %r11d /* z ^ ... */
476: xor %eax, %r11d /* x ^ ... */
477: add %r11d, %edx /* dst += ... */
478: rol $11, %edx /* dst <<< s */
479: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
480: add %eax, %edx /* dst += x */
481: lea -722521979(%ecx,%r10d),%ecx /* Const + dst + ... */
482: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
483: xor %ebx, %r11d /* z ^ ... */
484: xor %edx, %r11d /* x ^ ... */
485: add %r11d, %ecx /* dst += ... */
486: rol $16, %ecx /* dst <<< s */
487: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
488: add %edx, %ecx /* dst += x */
489: lea 76029189(%ebx,%r10d),%ebx /* Const + dst + ... */
490: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
491: xor %eax, %r11d /* z ^ ... */
492: xor %ecx, %r11d /* x ^ ... */
493: add %r11d, %ebx /* dst += ... */
494: rol $23, %ebx /* dst <<< s */
495: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
496: add %ecx, %ebx /* dst += x */
497: lea -640364487(%eax,%r10d),%eax /* Const + dst + ... */
498: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
499: xor %edx, %r11d /* z ^ ... */
500: xor %ebx, %r11d /* x ^ ... */
501: add %r11d, %eax /* dst += ... */
502: rol $4, %eax /* dst <<< s */
503: mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
504: add %ebx, %eax /* dst += x */
505: lea -421815835(%edx,%r10d),%edx /* Const + dst + ... */
506: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
507: xor %ecx, %r11d /* z ^ ... */
508: xor %eax, %r11d /* x ^ ... */
509: add %r11d, %edx /* dst += ... */
510: rol $11, %edx /* dst <<< s */
511: mov %eax, %r11d /* (NEXT STEP) y' = %eax */
512: add %eax, %edx /* dst += x */
513: lea 530742520(%ecx,%r10d),%ecx /* Const + dst + ... */
514: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
515: xor %ebx, %r11d /* z ^ ... */
516: xor %edx, %r11d /* x ^ ... */
517: add %r11d, %ecx /* dst += ... */
518: rol $16, %ecx /* dst <<< s */
519: mov %edx, %r11d /* (NEXT STEP) y' = %edx */
520: add %edx, %ecx /* dst += x */
521: lea -995338651(%ebx,%r10d),%ebx /* Const + dst + ... */
522: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
523: xor %eax, %r11d /* z ^ ... */
524: xor %ecx, %r11d /* x ^ ... */
525: add %r11d, %ebx /* dst += ... */
526: rol $23, %ebx /* dst <<< s */
527: mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
528: add %ecx, %ebx /* dst += x */
529: mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
530: mov $0xffffffff, %r11d
531: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/
532: lea -198630844(%eax,%r10d),%eax /* Const + dst + ... */
533: or %ebx, %r11d /* x | ... */
534: xor %ecx, %r11d /* y ^ ... */
535: add %r11d, %eax /* dst += ... */
536: mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
537: mov $0xffffffff, %r11d
538: rol $6, %eax /* dst <<< s */
539: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
540: add %ebx, %eax /* dst += x */
541: lea 1126891415(%edx,%r10d),%edx /* Const + dst + ... */
542: or %eax, %r11d /* x | ... */
543: xor %ebx, %r11d /* y ^ ... */
544: add %r11d, %edx /* dst += ... */
545: mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
546: mov $0xffffffff, %r11d
547: rol $10, %edx /* dst <<< s */
548: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
549: add %eax, %edx /* dst += x */
550: lea -1416354905(%ecx,%r10d),%ecx /* Const + dst + ... */
551: or %edx, %r11d /* x | ... */
552: xor %eax, %r11d /* y ^ ... */
553: add %r11d, %ecx /* dst += ... */
554: mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
555: mov $0xffffffff, %r11d
556: rol $15, %ecx /* dst <<< s */
557: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
558: add %edx, %ecx /* dst += x */
559: lea -57434055(%ebx,%r10d),%ebx /* Const + dst + ... */
560: or %ecx, %r11d /* x | ... */
561: xor %edx, %r11d /* y ^ ... */
562: add %r11d, %ebx /* dst += ... */
563: mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
564: mov $0xffffffff, %r11d
565: rol $21, %ebx /* dst <<< s */
566: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
567: add %ecx, %ebx /* dst += x */
568: lea 1700485571(%eax,%r10d),%eax /* Const + dst + ... */
569: or %ebx, %r11d /* x | ... */
570: xor %ecx, %r11d /* y ^ ... */
571: add %r11d, %eax /* dst += ... */
572: mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
573: mov $0xffffffff, %r11d
574: rol $6, %eax /* dst <<< s */
575: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
576: add %ebx, %eax /* dst += x */
577: lea -1894986606(%edx,%r10d),%edx /* Const + dst + ... */
578: or %eax, %r11d /* x | ... */
579: xor %ebx, %r11d /* y ^ ... */
580: add %r11d, %edx /* dst += ... */
581: mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
582: mov $0xffffffff, %r11d
583: rol $10, %edx /* dst <<< s */
584: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
585: add %eax, %edx /* dst += x */
586: lea -1051523(%ecx,%r10d),%ecx /* Const + dst + ... */
587: or %edx, %r11d /* x | ... */
588: xor %eax, %r11d /* y ^ ... */
589: add %r11d, %ecx /* dst += ... */
590: mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
591: mov $0xffffffff, %r11d
592: rol $15, %ecx /* dst <<< s */
593: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
594: add %edx, %ecx /* dst += x */
595: lea -2054922799(%ebx,%r10d),%ebx /* Const + dst + ... */
596: or %ecx, %r11d /* x | ... */
597: xor %edx, %r11d /* y ^ ... */
598: add %r11d, %ebx /* dst += ... */
599: mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
600: mov $0xffffffff, %r11d
601: rol $21, %ebx /* dst <<< s */
602: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
603: add %ecx, %ebx /* dst += x */
604: lea 1873313359(%eax,%r10d),%eax /* Const + dst + ... */
605: or %ebx, %r11d /* x | ... */
606: xor %ecx, %r11d /* y ^ ... */
607: add %r11d, %eax /* dst += ... */
608: mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
609: mov $0xffffffff, %r11d
610: rol $6, %eax /* dst <<< s */
611: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
612: add %ebx, %eax /* dst += x */
613: lea -30611744(%edx,%r10d),%edx /* Const + dst + ... */
614: or %eax, %r11d /* x | ... */
615: xor %ebx, %r11d /* y ^ ... */
616: add %r11d, %edx /* dst += ... */
617: mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
618: mov $0xffffffff, %r11d
619: rol $10, %edx /* dst <<< s */
620: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
621: add %eax, %edx /* dst += x */
622: lea -1560198380(%ecx,%r10d),%ecx /* Const + dst + ... */
623: or %edx, %r11d /* x | ... */
624: xor %eax, %r11d /* y ^ ... */
625: add %r11d, %ecx /* dst += ... */
626: mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
627: mov $0xffffffff, %r11d
628: rol $15, %ecx /* dst <<< s */
629: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
630: add %edx, %ecx /* dst += x */
631: lea 1309151649(%ebx,%r10d),%ebx /* Const + dst + ... */
632: or %ecx, %r11d /* x | ... */
633: xor %edx, %r11d /* y ^ ... */
634: add %r11d, %ebx /* dst += ... */
635: mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
636: mov $0xffffffff, %r11d
637: rol $21, %ebx /* dst <<< s */
638: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
639: add %ecx, %ebx /* dst += x */
640: lea -145523070(%eax,%r10d),%eax /* Const + dst + ... */
641: or %ebx, %r11d /* x | ... */
642: xor %ecx, %r11d /* y ^ ... */
643: add %r11d, %eax /* dst += ... */
644: mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
645: mov $0xffffffff, %r11d
646: rol $6, %eax /* dst <<< s */
647: xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
648: add %ebx, %eax /* dst += x */
649: lea -1120210379(%edx,%r10d),%edx /* Const + dst + ... */
650: or %eax, %r11d /* x | ... */
651: xor %ebx, %r11d /* y ^ ... */
652: add %r11d, %edx /* dst += ... */
653: mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
654: mov $0xffffffff, %r11d
655: rol $10, %edx /* dst <<< s */
656: xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
657: add %eax, %edx /* dst += x */
658: lea 718787259(%ecx,%r10d),%ecx /* Const + dst + ... */
659: or %edx, %r11d /* x | ... */
660: xor %eax, %r11d /* y ^ ... */
661: add %r11d, %ecx /* dst += ... */
662: mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
663: mov $0xffffffff, %r11d
664: rol $15, %ecx /* dst <<< s */
665: xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
666: add %edx, %ecx /* dst += x */
667: lea -343485551(%ebx,%r10d),%ebx /* Const + dst + ... */
668: or %ecx, %r11d /* x | ... */
669: xor %edx, %r11d /* y ^ ... */
670: add %r11d, %ebx /* dst += ... */
671: mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
672: mov $0xffffffff, %r11d
673: rol $21, %ebx /* dst <<< s */
674: xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
675: add %ecx, %ebx /* dst += x */
676: # add old values of A, B, C, D
677: add %r8d, %eax
678: add %r9d, %ebx
679: add %r14d, %ecx
680: add %r15d, %edx
681:
682: # loop control
683: add $64, %rsi # ptr += 64
684: cmp %rdi, %rsi # cmp end with ptr
685: jb 2b # jmp if ptr < end
686: # END of loop over 16-word blocks
687: 1:
688: mov %eax, 0*4(%rbp) # ctx->A = A
689: mov %ebx, 1*4(%rbp) # ctx->B = B
690: mov %ecx, 2*4(%rbp) # ctx->C = C
691: mov %edx, 3*4(%rbp) # ctx->D = D
692:
693: pop %r15
694: pop %r14
695: pop %r13 # not really useful (r13 is unused)
696: pop %r12
697: pop %rbx
698: pop %rbp
699: ret
700:
701: #endif /* !USE_OPENSSL ... */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>