Annotation of embedaddon/strongswan/src/libstrongswan/plugins/aesni/aesni_cbc.c, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 2015 Martin Willi
3: * Copyright (C) 2015 revosec AG
4: *
5: * This program is free software; you can redistribute it and/or modify it
6: * under the terms of the GNU General Public License as published by the
7: * Free Software Foundation; either version 2 of the License, or (at your
8: * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9: *
10: * This program is distributed in the hope that it will be useful, but
11: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13: * for more details.
14: */
15:
16: #include "aesni_cbc.h"
17: #include "aesni_key.h"
18:
19: /**
20: * Pipeline parallelism we use for CBC decryption
21: */
22: #define CBC_DECRYPT_PARALLELISM 4
23:
24: typedef struct private_aesni_cbc_t private_aesni_cbc_t;
25:
26: /**
27: * CBC en/decryption method type
28: */
29: typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*);
30:
31: /**
32: * Private data of an aesni_cbc_t object.
33: */
34: struct private_aesni_cbc_t {
35:
36: /**
37: * Public aesni_cbc_t interface.
38: */
39: aesni_cbc_t public;
40:
41: /**
42: * Key size
43: */
44: u_int key_size;
45:
46: /**
47: * Encryption key schedule
48: */
49: aesni_key_t *ekey;
50:
51: /**
52: * Decryption key schedule
53: */
54: aesni_key_t *dkey;
55:
56: /**
57: * Encryption method
58: */
59: aesni_cbc_fn_t encrypt;
60:
61: /**
62: * Decryption method
63: */
64: aesni_cbc_fn_t decrypt;
65: };
66:
67: /**
68: * AES-128 CBC encryption
69: */
70: static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
71: u_char *iv, u_char *out)
72: {
73: __m128i *ks, t, fb, *bi, *bo;
74: int i;
75:
76: ks = key->schedule;
77: bi = (__m128i*)in;
78: bo = (__m128i*)out;
79:
80: fb = _mm_loadu_si128((__m128i*)iv);
81: for (i = 0; i < blocks; i++)
82: {
83: t = _mm_loadu_si128(bi + i);
84: fb = _mm_xor_si128(t, fb);
85: fb = _mm_xor_si128(fb, ks[0]);
86:
87: fb = _mm_aesenc_si128(fb, ks[1]);
88: fb = _mm_aesenc_si128(fb, ks[2]);
89: fb = _mm_aesenc_si128(fb, ks[3]);
90: fb = _mm_aesenc_si128(fb, ks[4]);
91: fb = _mm_aesenc_si128(fb, ks[5]);
92: fb = _mm_aesenc_si128(fb, ks[6]);
93: fb = _mm_aesenc_si128(fb, ks[7]);
94: fb = _mm_aesenc_si128(fb, ks[8]);
95: fb = _mm_aesenc_si128(fb, ks[9]);
96:
97: fb = _mm_aesenclast_si128(fb, ks[10]);
98: _mm_storeu_si128(bo + i, fb);
99: }
100: }
101:
102: /**
103: * AES-128 CBC decryption
104: */
105: static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
106: u_char *iv, u_char *out)
107: {
108: __m128i *ks, last, *bi, *bo;
109: __m128i t1, t2, t3, t4;
110: __m128i f1, f2, f3, f4;
111: u_int i, pblocks;
112:
113: ks = key->schedule;
114: bi = (__m128i*)in;
115: bo = (__m128i*)out;
116: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
117:
118: f1 = _mm_loadu_si128((__m128i*)iv);
119:
120: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
121: {
122: t1 = _mm_loadu_si128(bi + i + 0);
123: t2 = _mm_loadu_si128(bi + i + 1);
124: t3 = _mm_loadu_si128(bi + i + 2);
125: t4 = _mm_loadu_si128(bi + i + 3);
126:
127: f2 = t1;
128: f3 = t2;
129: f4 = t3;
130: last = t4;
131:
132: t1 = _mm_xor_si128(t1, ks[0]);
133: t2 = _mm_xor_si128(t2, ks[0]);
134: t3 = _mm_xor_si128(t3, ks[0]);
135: t4 = _mm_xor_si128(t4, ks[0]);
136:
137: t1 = _mm_aesdec_si128(t1, ks[1]);
138: t2 = _mm_aesdec_si128(t2, ks[1]);
139: t3 = _mm_aesdec_si128(t3, ks[1]);
140: t4 = _mm_aesdec_si128(t4, ks[1]);
141: t1 = _mm_aesdec_si128(t1, ks[2]);
142: t2 = _mm_aesdec_si128(t2, ks[2]);
143: t3 = _mm_aesdec_si128(t3, ks[2]);
144: t4 = _mm_aesdec_si128(t4, ks[2]);
145: t1 = _mm_aesdec_si128(t1, ks[3]);
146: t2 = _mm_aesdec_si128(t2, ks[3]);
147: t3 = _mm_aesdec_si128(t3, ks[3]);
148: t4 = _mm_aesdec_si128(t4, ks[3]);
149: t1 = _mm_aesdec_si128(t1, ks[4]);
150: t2 = _mm_aesdec_si128(t2, ks[4]);
151: t3 = _mm_aesdec_si128(t3, ks[4]);
152: t4 = _mm_aesdec_si128(t4, ks[4]);
153: t1 = _mm_aesdec_si128(t1, ks[5]);
154: t2 = _mm_aesdec_si128(t2, ks[5]);
155: t3 = _mm_aesdec_si128(t3, ks[5]);
156: t4 = _mm_aesdec_si128(t4, ks[5]);
157: t1 = _mm_aesdec_si128(t1, ks[6]);
158: t2 = _mm_aesdec_si128(t2, ks[6]);
159: t3 = _mm_aesdec_si128(t3, ks[6]);
160: t4 = _mm_aesdec_si128(t4, ks[6]);
161: t1 = _mm_aesdec_si128(t1, ks[7]);
162: t2 = _mm_aesdec_si128(t2, ks[7]);
163: t3 = _mm_aesdec_si128(t3, ks[7]);
164: t4 = _mm_aesdec_si128(t4, ks[7]);
165: t1 = _mm_aesdec_si128(t1, ks[8]);
166: t2 = _mm_aesdec_si128(t2, ks[8]);
167: t3 = _mm_aesdec_si128(t3, ks[8]);
168: t4 = _mm_aesdec_si128(t4, ks[8]);
169: t1 = _mm_aesdec_si128(t1, ks[9]);
170: t2 = _mm_aesdec_si128(t2, ks[9]);
171: t3 = _mm_aesdec_si128(t3, ks[9]);
172: t4 = _mm_aesdec_si128(t4, ks[9]);
173:
174: t1 = _mm_aesdeclast_si128(t1, ks[10]);
175: t2 = _mm_aesdeclast_si128(t2, ks[10]);
176: t3 = _mm_aesdeclast_si128(t3, ks[10]);
177: t4 = _mm_aesdeclast_si128(t4, ks[10]);
178: t1 = _mm_xor_si128(t1, f1);
179: t2 = _mm_xor_si128(t2, f2);
180: t3 = _mm_xor_si128(t3, f3);
181: t4 = _mm_xor_si128(t4, f4);
182: _mm_storeu_si128(bo + i + 0, t1);
183: _mm_storeu_si128(bo + i + 1, t2);
184: _mm_storeu_si128(bo + i + 2, t3);
185: _mm_storeu_si128(bo + i + 3, t4);
186: f1 = last;
187: }
188:
189: for (i = pblocks; i < blocks; i++)
190: {
191: last = _mm_loadu_si128(bi + i);
192: t1 = _mm_xor_si128(last, ks[0]);
193:
194: t1 = _mm_aesdec_si128(t1, ks[1]);
195: t1 = _mm_aesdec_si128(t1, ks[2]);
196: t1 = _mm_aesdec_si128(t1, ks[3]);
197: t1 = _mm_aesdec_si128(t1, ks[4]);
198: t1 = _mm_aesdec_si128(t1, ks[5]);
199: t1 = _mm_aesdec_si128(t1, ks[6]);
200: t1 = _mm_aesdec_si128(t1, ks[7]);
201: t1 = _mm_aesdec_si128(t1, ks[8]);
202: t1 = _mm_aesdec_si128(t1, ks[9]);
203:
204: t1 = _mm_aesdeclast_si128(t1, ks[10]);
205: t1 = _mm_xor_si128(t1, f1);
206: _mm_storeu_si128(bo + i, t1);
207: f1 = last;
208: }
209: }
210:
211: /**
212: * AES-192 CBC encryption
213: */
214: static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
215: u_char *iv, u_char *out)
216: {
217: __m128i *ks, t, fb, *bi, *bo;
218: int i;
219:
220: ks = key->schedule;
221: bi = (__m128i*)in;
222: bo = (__m128i*)out;
223:
224: fb = _mm_loadu_si128((__m128i*)iv);
225: for (i = 0; i < blocks; i++)
226: {
227: t = _mm_loadu_si128(bi + i);
228: fb = _mm_xor_si128(t, fb);
229: fb = _mm_xor_si128(fb, ks[0]);
230:
231: fb = _mm_aesenc_si128(fb, ks[1]);
232: fb = _mm_aesenc_si128(fb, ks[2]);
233: fb = _mm_aesenc_si128(fb, ks[3]);
234: fb = _mm_aesenc_si128(fb, ks[4]);
235: fb = _mm_aesenc_si128(fb, ks[5]);
236: fb = _mm_aesenc_si128(fb, ks[6]);
237: fb = _mm_aesenc_si128(fb, ks[7]);
238: fb = _mm_aesenc_si128(fb, ks[8]);
239: fb = _mm_aesenc_si128(fb, ks[9]);
240: fb = _mm_aesenc_si128(fb, ks[10]);
241: fb = _mm_aesenc_si128(fb, ks[11]);
242:
243: fb = _mm_aesenclast_si128(fb, ks[12]);
244: _mm_storeu_si128(bo + i, fb);
245: }
246: }
247:
248: /**
249: * AES-192 CBC decryption
250: */
251: static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
252: u_char *iv, u_char *out)
253: {
254: __m128i *ks, last, *bi, *bo;
255: __m128i t1, t2, t3, t4;
256: __m128i f1, f2, f3, f4;
257: u_int i, pblocks;
258:
259: ks = key->schedule;
260: bi = (__m128i*)in;
261: bo = (__m128i*)out;
262: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
263:
264: f1 = _mm_loadu_si128((__m128i*)iv);
265:
266: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
267: {
268: t1 = _mm_loadu_si128(bi + i + 0);
269: t2 = _mm_loadu_si128(bi + i + 1);
270: t3 = _mm_loadu_si128(bi + i + 2);
271: t4 = _mm_loadu_si128(bi + i + 3);
272:
273: f2 = t1;
274: f3 = t2;
275: f4 = t3;
276: last = t4;
277:
278: t1 = _mm_xor_si128(t1, ks[0]);
279: t2 = _mm_xor_si128(t2, ks[0]);
280: t3 = _mm_xor_si128(t3, ks[0]);
281: t4 = _mm_xor_si128(t4, ks[0]);
282:
283: t1 = _mm_aesdec_si128(t1, ks[1]);
284: t2 = _mm_aesdec_si128(t2, ks[1]);
285: t3 = _mm_aesdec_si128(t3, ks[1]);
286: t4 = _mm_aesdec_si128(t4, ks[1]);
287: t1 = _mm_aesdec_si128(t1, ks[2]);
288: t2 = _mm_aesdec_si128(t2, ks[2]);
289: t3 = _mm_aesdec_si128(t3, ks[2]);
290: t4 = _mm_aesdec_si128(t4, ks[2]);
291: t1 = _mm_aesdec_si128(t1, ks[3]);
292: t2 = _mm_aesdec_si128(t2, ks[3]);
293: t3 = _mm_aesdec_si128(t3, ks[3]);
294: t4 = _mm_aesdec_si128(t4, ks[3]);
295: t1 = _mm_aesdec_si128(t1, ks[4]);
296: t2 = _mm_aesdec_si128(t2, ks[4]);
297: t3 = _mm_aesdec_si128(t3, ks[4]);
298: t4 = _mm_aesdec_si128(t4, ks[4]);
299: t1 = _mm_aesdec_si128(t1, ks[5]);
300: t2 = _mm_aesdec_si128(t2, ks[5]);
301: t3 = _mm_aesdec_si128(t3, ks[5]);
302: t4 = _mm_aesdec_si128(t4, ks[5]);
303: t1 = _mm_aesdec_si128(t1, ks[6]);
304: t2 = _mm_aesdec_si128(t2, ks[6]);
305: t3 = _mm_aesdec_si128(t3, ks[6]);
306: t4 = _mm_aesdec_si128(t4, ks[6]);
307: t1 = _mm_aesdec_si128(t1, ks[7]);
308: t2 = _mm_aesdec_si128(t2, ks[7]);
309: t3 = _mm_aesdec_si128(t3, ks[7]);
310: t4 = _mm_aesdec_si128(t4, ks[7]);
311: t1 = _mm_aesdec_si128(t1, ks[8]);
312: t2 = _mm_aesdec_si128(t2, ks[8]);
313: t3 = _mm_aesdec_si128(t3, ks[8]);
314: t4 = _mm_aesdec_si128(t4, ks[8]);
315: t1 = _mm_aesdec_si128(t1, ks[9]);
316: t2 = _mm_aesdec_si128(t2, ks[9]);
317: t3 = _mm_aesdec_si128(t3, ks[9]);
318: t4 = _mm_aesdec_si128(t4, ks[9]);
319: t1 = _mm_aesdec_si128(t1, ks[10]);
320: t2 = _mm_aesdec_si128(t2, ks[10]);
321: t3 = _mm_aesdec_si128(t3, ks[10]);
322: t4 = _mm_aesdec_si128(t4, ks[10]);
323: t1 = _mm_aesdec_si128(t1, ks[11]);
324: t2 = _mm_aesdec_si128(t2, ks[11]);
325: t3 = _mm_aesdec_si128(t3, ks[11]);
326: t4 = _mm_aesdec_si128(t4, ks[11]);
327:
328: t1 = _mm_aesdeclast_si128(t1, ks[12]);
329: t2 = _mm_aesdeclast_si128(t2, ks[12]);
330: t3 = _mm_aesdeclast_si128(t3, ks[12]);
331: t4 = _mm_aesdeclast_si128(t4, ks[12]);
332: t1 = _mm_xor_si128(t1, f1);
333: t2 = _mm_xor_si128(t2, f2);
334: t3 = _mm_xor_si128(t3, f3);
335: t4 = _mm_xor_si128(t4, f4);
336: _mm_storeu_si128(bo + i + 0, t1);
337: _mm_storeu_si128(bo + i + 1, t2);
338: _mm_storeu_si128(bo + i + 2, t3);
339: _mm_storeu_si128(bo + i + 3, t4);
340: f1 = last;
341: }
342:
343: for (i = pblocks; i < blocks; i++)
344: {
345: last = _mm_loadu_si128(bi + i);
346: t1 = _mm_xor_si128(last, ks[0]);
347:
348: t1 = _mm_aesdec_si128(t1, ks[1]);
349: t1 = _mm_aesdec_si128(t1, ks[2]);
350: t1 = _mm_aesdec_si128(t1, ks[3]);
351: t1 = _mm_aesdec_si128(t1, ks[4]);
352: t1 = _mm_aesdec_si128(t1, ks[5]);
353: t1 = _mm_aesdec_si128(t1, ks[6]);
354: t1 = _mm_aesdec_si128(t1, ks[7]);
355: t1 = _mm_aesdec_si128(t1, ks[8]);
356: t1 = _mm_aesdec_si128(t1, ks[9]);
357: t1 = _mm_aesdec_si128(t1, ks[10]);
358: t1 = _mm_aesdec_si128(t1, ks[11]);
359:
360: t1 = _mm_aesdeclast_si128(t1, ks[12]);
361: t1 = _mm_xor_si128(t1, f1);
362: _mm_storeu_si128(bo + i, t1);
363: f1 = last;
364: }
365: }
366:
367: /**
368: * AES-256 CBC encryption
369: */
370: static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
371: u_char *iv, u_char *out)
372: {
373: __m128i *ks, t, fb, *bi, *bo;
374: int i;
375:
376: ks = key->schedule;
377: bi = (__m128i*)in;
378: bo = (__m128i*)out;
379:
380: fb = _mm_loadu_si128((__m128i*)iv);
381: for (i = 0; i < blocks; i++)
382: {
383: t = _mm_loadu_si128(bi + i);
384: fb = _mm_xor_si128(t, fb);
385: fb = _mm_xor_si128(fb, ks[0]);
386:
387: fb = _mm_aesenc_si128(fb, ks[1]);
388: fb = _mm_aesenc_si128(fb, ks[2]);
389: fb = _mm_aesenc_si128(fb, ks[3]);
390: fb = _mm_aesenc_si128(fb, ks[4]);
391: fb = _mm_aesenc_si128(fb, ks[5]);
392: fb = _mm_aesenc_si128(fb, ks[6]);
393: fb = _mm_aesenc_si128(fb, ks[7]);
394: fb = _mm_aesenc_si128(fb, ks[8]);
395: fb = _mm_aesenc_si128(fb, ks[9]);
396: fb = _mm_aesenc_si128(fb, ks[10]);
397: fb = _mm_aesenc_si128(fb, ks[11]);
398: fb = _mm_aesenc_si128(fb, ks[12]);
399: fb = _mm_aesenc_si128(fb, ks[13]);
400:
401: fb = _mm_aesenclast_si128(fb, ks[14]);
402: _mm_storeu_si128(bo + i, fb);
403: }
404: }
405:
406: /**
407: * AES-256 CBC decryption
408: */
409: static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
410: u_char *iv, u_char *out)
411: {
412: __m128i *ks, last, *bi, *bo;
413: __m128i t1, t2, t3, t4;
414: __m128i f1, f2, f3, f4;
415: u_int i, pblocks;
416:
417: ks = key->schedule;
418: bi = (__m128i*)in;
419: bo = (__m128i*)out;
420: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
421:
422: f1 = _mm_loadu_si128((__m128i*)iv);
423:
424: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
425: {
426: t1 = _mm_loadu_si128(bi + i + 0);
427: t2 = _mm_loadu_si128(bi + i + 1);
428: t3 = _mm_loadu_si128(bi + i + 2);
429: t4 = _mm_loadu_si128(bi + i + 3);
430:
431: f2 = t1;
432: f3 = t2;
433: f4 = t3;
434: last = t4;
435:
436: t1 = _mm_xor_si128(t1, ks[0]);
437: t2 = _mm_xor_si128(t2, ks[0]);
438: t3 = _mm_xor_si128(t3, ks[0]);
439: t4 = _mm_xor_si128(t4, ks[0]);
440:
441: t1 = _mm_aesdec_si128(t1, ks[1]);
442: t2 = _mm_aesdec_si128(t2, ks[1]);
443: t3 = _mm_aesdec_si128(t3, ks[1]);
444: t4 = _mm_aesdec_si128(t4, ks[1]);
445: t1 = _mm_aesdec_si128(t1, ks[2]);
446: t2 = _mm_aesdec_si128(t2, ks[2]);
447: t3 = _mm_aesdec_si128(t3, ks[2]);
448: t4 = _mm_aesdec_si128(t4, ks[2]);
449: t1 = _mm_aesdec_si128(t1, ks[3]);
450: t2 = _mm_aesdec_si128(t2, ks[3]);
451: t3 = _mm_aesdec_si128(t3, ks[3]);
452: t4 = _mm_aesdec_si128(t4, ks[3]);
453: t1 = _mm_aesdec_si128(t1, ks[4]);
454: t2 = _mm_aesdec_si128(t2, ks[4]);
455: t3 = _mm_aesdec_si128(t3, ks[4]);
456: t4 = _mm_aesdec_si128(t4, ks[4]);
457: t1 = _mm_aesdec_si128(t1, ks[5]);
458: t2 = _mm_aesdec_si128(t2, ks[5]);
459: t3 = _mm_aesdec_si128(t3, ks[5]);
460: t4 = _mm_aesdec_si128(t4, ks[5]);
461: t1 = _mm_aesdec_si128(t1, ks[6]);
462: t2 = _mm_aesdec_si128(t2, ks[6]);
463: t3 = _mm_aesdec_si128(t3, ks[6]);
464: t4 = _mm_aesdec_si128(t4, ks[6]);
465: t1 = _mm_aesdec_si128(t1, ks[7]);
466: t2 = _mm_aesdec_si128(t2, ks[7]);
467: t3 = _mm_aesdec_si128(t3, ks[7]);
468: t4 = _mm_aesdec_si128(t4, ks[7]);
469: t1 = _mm_aesdec_si128(t1, ks[8]);
470: t2 = _mm_aesdec_si128(t2, ks[8]);
471: t3 = _mm_aesdec_si128(t3, ks[8]);
472: t4 = _mm_aesdec_si128(t4, ks[8]);
473: t1 = _mm_aesdec_si128(t1, ks[9]);
474: t2 = _mm_aesdec_si128(t2, ks[9]);
475: t3 = _mm_aesdec_si128(t3, ks[9]);
476: t4 = _mm_aesdec_si128(t4, ks[9]);
477: t1 = _mm_aesdec_si128(t1, ks[10]);
478: t2 = _mm_aesdec_si128(t2, ks[10]);
479: t3 = _mm_aesdec_si128(t3, ks[10]);
480: t4 = _mm_aesdec_si128(t4, ks[10]);
481: t1 = _mm_aesdec_si128(t1, ks[11]);
482: t2 = _mm_aesdec_si128(t2, ks[11]);
483: t3 = _mm_aesdec_si128(t3, ks[11]);
484: t4 = _mm_aesdec_si128(t4, ks[11]);
485: t1 = _mm_aesdec_si128(t1, ks[12]);
486: t2 = _mm_aesdec_si128(t2, ks[12]);
487: t3 = _mm_aesdec_si128(t3, ks[12]);
488: t4 = _mm_aesdec_si128(t4, ks[12]);
489: t1 = _mm_aesdec_si128(t1, ks[13]);
490: t2 = _mm_aesdec_si128(t2, ks[13]);
491: t3 = _mm_aesdec_si128(t3, ks[13]);
492: t4 = _mm_aesdec_si128(t4, ks[13]);
493:
494: t1 = _mm_aesdeclast_si128(t1, ks[14]);
495: t2 = _mm_aesdeclast_si128(t2, ks[14]);
496: t3 = _mm_aesdeclast_si128(t3, ks[14]);
497: t4 = _mm_aesdeclast_si128(t4, ks[14]);
498: t1 = _mm_xor_si128(t1, f1);
499: t2 = _mm_xor_si128(t2, f2);
500: t3 = _mm_xor_si128(t3, f3);
501: t4 = _mm_xor_si128(t4, f4);
502: _mm_storeu_si128(bo + i + 0, t1);
503: _mm_storeu_si128(bo + i + 1, t2);
504: _mm_storeu_si128(bo + i + 2, t3);
505: _mm_storeu_si128(bo + i + 3, t4);
506: f1 = last;
507: }
508:
509: for (i = pblocks; i < blocks; i++)
510: {
511: last = _mm_loadu_si128(bi + i);
512: t1 = _mm_xor_si128(last, ks[0]);
513:
514: t1 = _mm_aesdec_si128(t1, ks[1]);
515: t1 = _mm_aesdec_si128(t1, ks[2]);
516: t1 = _mm_aesdec_si128(t1, ks[3]);
517: t1 = _mm_aesdec_si128(t1, ks[4]);
518: t1 = _mm_aesdec_si128(t1, ks[5]);
519: t1 = _mm_aesdec_si128(t1, ks[6]);
520: t1 = _mm_aesdec_si128(t1, ks[7]);
521: t1 = _mm_aesdec_si128(t1, ks[8]);
522: t1 = _mm_aesdec_si128(t1, ks[9]);
523: t1 = _mm_aesdec_si128(t1, ks[10]);
524: t1 = _mm_aesdec_si128(t1, ks[11]);
525: t1 = _mm_aesdec_si128(t1, ks[12]);
526: t1 = _mm_aesdec_si128(t1, ks[13]);
527:
528: t1 = _mm_aesdeclast_si128(t1, ks[14]);
529: t1 = _mm_xor_si128(t1, f1);
530: _mm_storeu_si128(bo + i, t1);
531: f1 = last;
532: }
533: }
534:
535: /**
536: * Do inline or allocated de/encryption using key schedule
537: */
538: static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key,
539: chunk_t data, chunk_t iv, chunk_t *out)
540: {
541: u_char *buf;
542:
543: if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE)
544: {
545: return FALSE;
546: }
547: if (out)
548: {
549: *out = chunk_alloc(data.len);
550: buf = out->ptr;
551: }
552: else
553: {
554: buf = data.ptr;
555: }
556: fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf);
557: return TRUE;
558: }
559:
560: METHOD(crypter_t, encrypt, bool,
561: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
562: {
563: return crypt(this->encrypt, this->ekey, data, iv, encrypted);
564: }
565:
566: METHOD(crypter_t, decrypt, bool,
567: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
568: {
569: return crypt(this->decrypt, this->dkey, data, iv, decrypted);
570: }
571:
572: METHOD(crypter_t, get_block_size, size_t,
573: private_aesni_cbc_t *this)
574: {
575: return AES_BLOCK_SIZE;
576: }
577:
578: METHOD(crypter_t, get_iv_size, size_t,
579: private_aesni_cbc_t *this)
580: {
581: return AES_BLOCK_SIZE;
582: }
583:
584: METHOD(crypter_t, get_key_size, size_t,
585: private_aesni_cbc_t *this)
586: {
587: return this->key_size;
588: }
589:
590: METHOD(crypter_t, set_key, bool,
591: private_aesni_cbc_t *this, chunk_t key)
592: {
593: if (key.len != this->key_size)
594: {
595: return FALSE;
596: }
597:
598: DESTROY_IF(this->ekey);
599: DESTROY_IF(this->dkey);
600:
601: this->ekey = aesni_key_create(TRUE, key);
602: this->dkey = aesni_key_create(FALSE, key);
603:
604: return this->ekey && this->dkey;
605: }
606:
607: METHOD(crypter_t, destroy, void,
608: private_aesni_cbc_t *this)
609: {
610: DESTROY_IF(this->ekey);
611: DESTROY_IF(this->dkey);
612: free_align(this);
613: }
614:
615: /**
616: * See header
617: */
618: aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size)
619: {
620: private_aesni_cbc_t *this;
621:
622: if (algo != ENCR_AES_CBC)
623: {
624: return NULL;
625: }
626: switch (key_size)
627: {
628: case 0:
629: key_size = 16;
630: break;
631: case 16:
632: case 24:
633: case 32:
634: break;
635: default:
636: return NULL;
637: }
638:
639: INIT_ALIGN(this, sizeof(__m128i),
640: .public = {
641: .crypter = {
642: .encrypt = _encrypt,
643: .decrypt = _decrypt,
644: .get_block_size = _get_block_size,
645: .get_iv_size = _get_iv_size,
646: .get_key_size = _get_key_size,
647: .set_key = _set_key,
648: .destroy = _destroy,
649: },
650: },
651: .key_size = key_size,
652: );
653:
654: switch (key_size)
655: {
656: case 16:
657: this->encrypt = encrypt_cbc128;
658: this->decrypt = decrypt_cbc128;
659: break;
660: case 24:
661: this->encrypt = encrypt_cbc192;
662: this->decrypt = decrypt_cbc192;
663: break;
664: case 32:
665: this->encrypt = encrypt_cbc256;
666: this->decrypt = decrypt_cbc256;
667: break;
668: }
669:
670: return &this->public;
671: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>