Annotation of embedaddon/strongswan/src/libstrongswan/plugins/aesni/aesni_cbc.c, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 2015 Martin Willi
! 3: * Copyright (C) 2015 revosec AG
! 4: *
! 5: * This program is free software; you can redistribute it and/or modify it
! 6: * under the terms of the GNU General Public License as published by the
! 7: * Free Software Foundation; either version 2 of the License, or (at your
! 8: * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
! 9: *
! 10: * This program is distributed in the hope that it will be useful, but
! 11: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 12: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
! 13: * for more details.
! 14: */
! 15:
! 16: #include "aesni_cbc.h"
! 17: #include "aesni_key.h"
! 18:
! 19: /**
! 20: * Pipeline parallelism we use for CBC decryption
! 21: */
! 22: #define CBC_DECRYPT_PARALLELISM 4
! 23:
! 24: typedef struct private_aesni_cbc_t private_aesni_cbc_t;
! 25:
! 26: /**
! 27: * CBC en/decryption method type
! 28: */
! 29: typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*);
! 30:
! 31: /**
! 32: * Private data of an aesni_cbc_t object.
! 33: */
! 34: struct private_aesni_cbc_t {
! 35:
! 36: /**
! 37: * Public aesni_cbc_t interface.
! 38: */
! 39: aesni_cbc_t public;
! 40:
! 41: /**
! 42: * Key size
! 43: */
! 44: u_int key_size;
! 45:
! 46: /**
! 47: * Encryption key schedule
! 48: */
! 49: aesni_key_t *ekey;
! 50:
! 51: /**
! 52: * Decryption key schedule
! 53: */
! 54: aesni_key_t *dkey;
! 55:
! 56: /**
! 57: * Encryption method
! 58: */
! 59: aesni_cbc_fn_t encrypt;
! 60:
! 61: /**
! 62: * Decryption method
! 63: */
! 64: aesni_cbc_fn_t decrypt;
! 65: };
! 66:
! 67: /**
! 68: * AES-128 CBC encryption
! 69: */
! 70: static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
! 71: u_char *iv, u_char *out)
! 72: {
! 73: __m128i *ks, t, fb, *bi, *bo;
! 74: int i;
! 75:
! 76: ks = key->schedule;
! 77: bi = (__m128i*)in;
! 78: bo = (__m128i*)out;
! 79:
! 80: fb = _mm_loadu_si128((__m128i*)iv);
! 81: for (i = 0; i < blocks; i++)
! 82: {
! 83: t = _mm_loadu_si128(bi + i);
! 84: fb = _mm_xor_si128(t, fb);
! 85: fb = _mm_xor_si128(fb, ks[0]);
! 86:
! 87: fb = _mm_aesenc_si128(fb, ks[1]);
! 88: fb = _mm_aesenc_si128(fb, ks[2]);
! 89: fb = _mm_aesenc_si128(fb, ks[3]);
! 90: fb = _mm_aesenc_si128(fb, ks[4]);
! 91: fb = _mm_aesenc_si128(fb, ks[5]);
! 92: fb = _mm_aesenc_si128(fb, ks[6]);
! 93: fb = _mm_aesenc_si128(fb, ks[7]);
! 94: fb = _mm_aesenc_si128(fb, ks[8]);
! 95: fb = _mm_aesenc_si128(fb, ks[9]);
! 96:
! 97: fb = _mm_aesenclast_si128(fb, ks[10]);
! 98: _mm_storeu_si128(bo + i, fb);
! 99: }
! 100: }
! 101:
! 102: /**
! 103: * AES-128 CBC decryption
! 104: */
! 105: static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in,
! 106: u_char *iv, u_char *out)
! 107: {
! 108: __m128i *ks, last, *bi, *bo;
! 109: __m128i t1, t2, t3, t4;
! 110: __m128i f1, f2, f3, f4;
! 111: u_int i, pblocks;
! 112:
! 113: ks = key->schedule;
! 114: bi = (__m128i*)in;
! 115: bo = (__m128i*)out;
! 116: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
! 117:
! 118: f1 = _mm_loadu_si128((__m128i*)iv);
! 119:
! 120: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
! 121: {
! 122: t1 = _mm_loadu_si128(bi + i + 0);
! 123: t2 = _mm_loadu_si128(bi + i + 1);
! 124: t3 = _mm_loadu_si128(bi + i + 2);
! 125: t4 = _mm_loadu_si128(bi + i + 3);
! 126:
! 127: f2 = t1;
! 128: f3 = t2;
! 129: f4 = t3;
! 130: last = t4;
! 131:
! 132: t1 = _mm_xor_si128(t1, ks[0]);
! 133: t2 = _mm_xor_si128(t2, ks[0]);
! 134: t3 = _mm_xor_si128(t3, ks[0]);
! 135: t4 = _mm_xor_si128(t4, ks[0]);
! 136:
! 137: t1 = _mm_aesdec_si128(t1, ks[1]);
! 138: t2 = _mm_aesdec_si128(t2, ks[1]);
! 139: t3 = _mm_aesdec_si128(t3, ks[1]);
! 140: t4 = _mm_aesdec_si128(t4, ks[1]);
! 141: t1 = _mm_aesdec_si128(t1, ks[2]);
! 142: t2 = _mm_aesdec_si128(t2, ks[2]);
! 143: t3 = _mm_aesdec_si128(t3, ks[2]);
! 144: t4 = _mm_aesdec_si128(t4, ks[2]);
! 145: t1 = _mm_aesdec_si128(t1, ks[3]);
! 146: t2 = _mm_aesdec_si128(t2, ks[3]);
! 147: t3 = _mm_aesdec_si128(t3, ks[3]);
! 148: t4 = _mm_aesdec_si128(t4, ks[3]);
! 149: t1 = _mm_aesdec_si128(t1, ks[4]);
! 150: t2 = _mm_aesdec_si128(t2, ks[4]);
! 151: t3 = _mm_aesdec_si128(t3, ks[4]);
! 152: t4 = _mm_aesdec_si128(t4, ks[4]);
! 153: t1 = _mm_aesdec_si128(t1, ks[5]);
! 154: t2 = _mm_aesdec_si128(t2, ks[5]);
! 155: t3 = _mm_aesdec_si128(t3, ks[5]);
! 156: t4 = _mm_aesdec_si128(t4, ks[5]);
! 157: t1 = _mm_aesdec_si128(t1, ks[6]);
! 158: t2 = _mm_aesdec_si128(t2, ks[6]);
! 159: t3 = _mm_aesdec_si128(t3, ks[6]);
! 160: t4 = _mm_aesdec_si128(t4, ks[6]);
! 161: t1 = _mm_aesdec_si128(t1, ks[7]);
! 162: t2 = _mm_aesdec_si128(t2, ks[7]);
! 163: t3 = _mm_aesdec_si128(t3, ks[7]);
! 164: t4 = _mm_aesdec_si128(t4, ks[7]);
! 165: t1 = _mm_aesdec_si128(t1, ks[8]);
! 166: t2 = _mm_aesdec_si128(t2, ks[8]);
! 167: t3 = _mm_aesdec_si128(t3, ks[8]);
! 168: t4 = _mm_aesdec_si128(t4, ks[8]);
! 169: t1 = _mm_aesdec_si128(t1, ks[9]);
! 170: t2 = _mm_aesdec_si128(t2, ks[9]);
! 171: t3 = _mm_aesdec_si128(t3, ks[9]);
! 172: t4 = _mm_aesdec_si128(t4, ks[9]);
! 173:
! 174: t1 = _mm_aesdeclast_si128(t1, ks[10]);
! 175: t2 = _mm_aesdeclast_si128(t2, ks[10]);
! 176: t3 = _mm_aesdeclast_si128(t3, ks[10]);
! 177: t4 = _mm_aesdeclast_si128(t4, ks[10]);
! 178: t1 = _mm_xor_si128(t1, f1);
! 179: t2 = _mm_xor_si128(t2, f2);
! 180: t3 = _mm_xor_si128(t3, f3);
! 181: t4 = _mm_xor_si128(t4, f4);
! 182: _mm_storeu_si128(bo + i + 0, t1);
! 183: _mm_storeu_si128(bo + i + 1, t2);
! 184: _mm_storeu_si128(bo + i + 2, t3);
! 185: _mm_storeu_si128(bo + i + 3, t4);
! 186: f1 = last;
! 187: }
! 188:
! 189: for (i = pblocks; i < blocks; i++)
! 190: {
! 191: last = _mm_loadu_si128(bi + i);
! 192: t1 = _mm_xor_si128(last, ks[0]);
! 193:
! 194: t1 = _mm_aesdec_si128(t1, ks[1]);
! 195: t1 = _mm_aesdec_si128(t1, ks[2]);
! 196: t1 = _mm_aesdec_si128(t1, ks[3]);
! 197: t1 = _mm_aesdec_si128(t1, ks[4]);
! 198: t1 = _mm_aesdec_si128(t1, ks[5]);
! 199: t1 = _mm_aesdec_si128(t1, ks[6]);
! 200: t1 = _mm_aesdec_si128(t1, ks[7]);
! 201: t1 = _mm_aesdec_si128(t1, ks[8]);
! 202: t1 = _mm_aesdec_si128(t1, ks[9]);
! 203:
! 204: t1 = _mm_aesdeclast_si128(t1, ks[10]);
! 205: t1 = _mm_xor_si128(t1, f1);
! 206: _mm_storeu_si128(bo + i, t1);
! 207: f1 = last;
! 208: }
! 209: }
! 210:
! 211: /**
! 212: * AES-192 CBC encryption
! 213: */
! 214: static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
! 215: u_char *iv, u_char *out)
! 216: {
! 217: __m128i *ks, t, fb, *bi, *bo;
! 218: int i;
! 219:
! 220: ks = key->schedule;
! 221: bi = (__m128i*)in;
! 222: bo = (__m128i*)out;
! 223:
! 224: fb = _mm_loadu_si128((__m128i*)iv);
! 225: for (i = 0; i < blocks; i++)
! 226: {
! 227: t = _mm_loadu_si128(bi + i);
! 228: fb = _mm_xor_si128(t, fb);
! 229: fb = _mm_xor_si128(fb, ks[0]);
! 230:
! 231: fb = _mm_aesenc_si128(fb, ks[1]);
! 232: fb = _mm_aesenc_si128(fb, ks[2]);
! 233: fb = _mm_aesenc_si128(fb, ks[3]);
! 234: fb = _mm_aesenc_si128(fb, ks[4]);
! 235: fb = _mm_aesenc_si128(fb, ks[5]);
! 236: fb = _mm_aesenc_si128(fb, ks[6]);
! 237: fb = _mm_aesenc_si128(fb, ks[7]);
! 238: fb = _mm_aesenc_si128(fb, ks[8]);
! 239: fb = _mm_aesenc_si128(fb, ks[9]);
! 240: fb = _mm_aesenc_si128(fb, ks[10]);
! 241: fb = _mm_aesenc_si128(fb, ks[11]);
! 242:
! 243: fb = _mm_aesenclast_si128(fb, ks[12]);
! 244: _mm_storeu_si128(bo + i, fb);
! 245: }
! 246: }
! 247:
! 248: /**
! 249: * AES-192 CBC decryption
! 250: */
! 251: static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in,
! 252: u_char *iv, u_char *out)
! 253: {
! 254: __m128i *ks, last, *bi, *bo;
! 255: __m128i t1, t2, t3, t4;
! 256: __m128i f1, f2, f3, f4;
! 257: u_int i, pblocks;
! 258:
! 259: ks = key->schedule;
! 260: bi = (__m128i*)in;
! 261: bo = (__m128i*)out;
! 262: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
! 263:
! 264: f1 = _mm_loadu_si128((__m128i*)iv);
! 265:
! 266: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
! 267: {
! 268: t1 = _mm_loadu_si128(bi + i + 0);
! 269: t2 = _mm_loadu_si128(bi + i + 1);
! 270: t3 = _mm_loadu_si128(bi + i + 2);
! 271: t4 = _mm_loadu_si128(bi + i + 3);
! 272:
! 273: f2 = t1;
! 274: f3 = t2;
! 275: f4 = t3;
! 276: last = t4;
! 277:
! 278: t1 = _mm_xor_si128(t1, ks[0]);
! 279: t2 = _mm_xor_si128(t2, ks[0]);
! 280: t3 = _mm_xor_si128(t3, ks[0]);
! 281: t4 = _mm_xor_si128(t4, ks[0]);
! 282:
! 283: t1 = _mm_aesdec_si128(t1, ks[1]);
! 284: t2 = _mm_aesdec_si128(t2, ks[1]);
! 285: t3 = _mm_aesdec_si128(t3, ks[1]);
! 286: t4 = _mm_aesdec_si128(t4, ks[1]);
! 287: t1 = _mm_aesdec_si128(t1, ks[2]);
! 288: t2 = _mm_aesdec_si128(t2, ks[2]);
! 289: t3 = _mm_aesdec_si128(t3, ks[2]);
! 290: t4 = _mm_aesdec_si128(t4, ks[2]);
! 291: t1 = _mm_aesdec_si128(t1, ks[3]);
! 292: t2 = _mm_aesdec_si128(t2, ks[3]);
! 293: t3 = _mm_aesdec_si128(t3, ks[3]);
! 294: t4 = _mm_aesdec_si128(t4, ks[3]);
! 295: t1 = _mm_aesdec_si128(t1, ks[4]);
! 296: t2 = _mm_aesdec_si128(t2, ks[4]);
! 297: t3 = _mm_aesdec_si128(t3, ks[4]);
! 298: t4 = _mm_aesdec_si128(t4, ks[4]);
! 299: t1 = _mm_aesdec_si128(t1, ks[5]);
! 300: t2 = _mm_aesdec_si128(t2, ks[5]);
! 301: t3 = _mm_aesdec_si128(t3, ks[5]);
! 302: t4 = _mm_aesdec_si128(t4, ks[5]);
! 303: t1 = _mm_aesdec_si128(t1, ks[6]);
! 304: t2 = _mm_aesdec_si128(t2, ks[6]);
! 305: t3 = _mm_aesdec_si128(t3, ks[6]);
! 306: t4 = _mm_aesdec_si128(t4, ks[6]);
! 307: t1 = _mm_aesdec_si128(t1, ks[7]);
! 308: t2 = _mm_aesdec_si128(t2, ks[7]);
! 309: t3 = _mm_aesdec_si128(t3, ks[7]);
! 310: t4 = _mm_aesdec_si128(t4, ks[7]);
! 311: t1 = _mm_aesdec_si128(t1, ks[8]);
! 312: t2 = _mm_aesdec_si128(t2, ks[8]);
! 313: t3 = _mm_aesdec_si128(t3, ks[8]);
! 314: t4 = _mm_aesdec_si128(t4, ks[8]);
! 315: t1 = _mm_aesdec_si128(t1, ks[9]);
! 316: t2 = _mm_aesdec_si128(t2, ks[9]);
! 317: t3 = _mm_aesdec_si128(t3, ks[9]);
! 318: t4 = _mm_aesdec_si128(t4, ks[9]);
! 319: t1 = _mm_aesdec_si128(t1, ks[10]);
! 320: t2 = _mm_aesdec_si128(t2, ks[10]);
! 321: t3 = _mm_aesdec_si128(t3, ks[10]);
! 322: t4 = _mm_aesdec_si128(t4, ks[10]);
! 323: t1 = _mm_aesdec_si128(t1, ks[11]);
! 324: t2 = _mm_aesdec_si128(t2, ks[11]);
! 325: t3 = _mm_aesdec_si128(t3, ks[11]);
! 326: t4 = _mm_aesdec_si128(t4, ks[11]);
! 327:
! 328: t1 = _mm_aesdeclast_si128(t1, ks[12]);
! 329: t2 = _mm_aesdeclast_si128(t2, ks[12]);
! 330: t3 = _mm_aesdeclast_si128(t3, ks[12]);
! 331: t4 = _mm_aesdeclast_si128(t4, ks[12]);
! 332: t1 = _mm_xor_si128(t1, f1);
! 333: t2 = _mm_xor_si128(t2, f2);
! 334: t3 = _mm_xor_si128(t3, f3);
! 335: t4 = _mm_xor_si128(t4, f4);
! 336: _mm_storeu_si128(bo + i + 0, t1);
! 337: _mm_storeu_si128(bo + i + 1, t2);
! 338: _mm_storeu_si128(bo + i + 2, t3);
! 339: _mm_storeu_si128(bo + i + 3, t4);
! 340: f1 = last;
! 341: }
! 342:
! 343: for (i = pblocks; i < blocks; i++)
! 344: {
! 345: last = _mm_loadu_si128(bi + i);
! 346: t1 = _mm_xor_si128(last, ks[0]);
! 347:
! 348: t1 = _mm_aesdec_si128(t1, ks[1]);
! 349: t1 = _mm_aesdec_si128(t1, ks[2]);
! 350: t1 = _mm_aesdec_si128(t1, ks[3]);
! 351: t1 = _mm_aesdec_si128(t1, ks[4]);
! 352: t1 = _mm_aesdec_si128(t1, ks[5]);
! 353: t1 = _mm_aesdec_si128(t1, ks[6]);
! 354: t1 = _mm_aesdec_si128(t1, ks[7]);
! 355: t1 = _mm_aesdec_si128(t1, ks[8]);
! 356: t1 = _mm_aesdec_si128(t1, ks[9]);
! 357: t1 = _mm_aesdec_si128(t1, ks[10]);
! 358: t1 = _mm_aesdec_si128(t1, ks[11]);
! 359:
! 360: t1 = _mm_aesdeclast_si128(t1, ks[12]);
! 361: t1 = _mm_xor_si128(t1, f1);
! 362: _mm_storeu_si128(bo + i, t1);
! 363: f1 = last;
! 364: }
! 365: }
! 366:
! 367: /**
! 368: * AES-256 CBC encryption
! 369: */
! 370: static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
! 371: u_char *iv, u_char *out)
! 372: {
! 373: __m128i *ks, t, fb, *bi, *bo;
! 374: int i;
! 375:
! 376: ks = key->schedule;
! 377: bi = (__m128i*)in;
! 378: bo = (__m128i*)out;
! 379:
! 380: fb = _mm_loadu_si128((__m128i*)iv);
! 381: for (i = 0; i < blocks; i++)
! 382: {
! 383: t = _mm_loadu_si128(bi + i);
! 384: fb = _mm_xor_si128(t, fb);
! 385: fb = _mm_xor_si128(fb, ks[0]);
! 386:
! 387: fb = _mm_aesenc_si128(fb, ks[1]);
! 388: fb = _mm_aesenc_si128(fb, ks[2]);
! 389: fb = _mm_aesenc_si128(fb, ks[3]);
! 390: fb = _mm_aesenc_si128(fb, ks[4]);
! 391: fb = _mm_aesenc_si128(fb, ks[5]);
! 392: fb = _mm_aesenc_si128(fb, ks[6]);
! 393: fb = _mm_aesenc_si128(fb, ks[7]);
! 394: fb = _mm_aesenc_si128(fb, ks[8]);
! 395: fb = _mm_aesenc_si128(fb, ks[9]);
! 396: fb = _mm_aesenc_si128(fb, ks[10]);
! 397: fb = _mm_aesenc_si128(fb, ks[11]);
! 398: fb = _mm_aesenc_si128(fb, ks[12]);
! 399: fb = _mm_aesenc_si128(fb, ks[13]);
! 400:
! 401: fb = _mm_aesenclast_si128(fb, ks[14]);
! 402: _mm_storeu_si128(bo + i, fb);
! 403: }
! 404: }
! 405:
! 406: /**
! 407: * AES-256 CBC decryption
! 408: */
! 409: static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in,
! 410: u_char *iv, u_char *out)
! 411: {
! 412: __m128i *ks, last, *bi, *bo;
! 413: __m128i t1, t2, t3, t4;
! 414: __m128i f1, f2, f3, f4;
! 415: u_int i, pblocks;
! 416:
! 417: ks = key->schedule;
! 418: bi = (__m128i*)in;
! 419: bo = (__m128i*)out;
! 420: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM);
! 421:
! 422: f1 = _mm_loadu_si128((__m128i*)iv);
! 423:
! 424: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM)
! 425: {
! 426: t1 = _mm_loadu_si128(bi + i + 0);
! 427: t2 = _mm_loadu_si128(bi + i + 1);
! 428: t3 = _mm_loadu_si128(bi + i + 2);
! 429: t4 = _mm_loadu_si128(bi + i + 3);
! 430:
! 431: f2 = t1;
! 432: f3 = t2;
! 433: f4 = t3;
! 434: last = t4;
! 435:
! 436: t1 = _mm_xor_si128(t1, ks[0]);
! 437: t2 = _mm_xor_si128(t2, ks[0]);
! 438: t3 = _mm_xor_si128(t3, ks[0]);
! 439: t4 = _mm_xor_si128(t4, ks[0]);
! 440:
! 441: t1 = _mm_aesdec_si128(t1, ks[1]);
! 442: t2 = _mm_aesdec_si128(t2, ks[1]);
! 443: t3 = _mm_aesdec_si128(t3, ks[1]);
! 444: t4 = _mm_aesdec_si128(t4, ks[1]);
! 445: t1 = _mm_aesdec_si128(t1, ks[2]);
! 446: t2 = _mm_aesdec_si128(t2, ks[2]);
! 447: t3 = _mm_aesdec_si128(t3, ks[2]);
! 448: t4 = _mm_aesdec_si128(t4, ks[2]);
! 449: t1 = _mm_aesdec_si128(t1, ks[3]);
! 450: t2 = _mm_aesdec_si128(t2, ks[3]);
! 451: t3 = _mm_aesdec_si128(t3, ks[3]);
! 452: t4 = _mm_aesdec_si128(t4, ks[3]);
! 453: t1 = _mm_aesdec_si128(t1, ks[4]);
! 454: t2 = _mm_aesdec_si128(t2, ks[4]);
! 455: t3 = _mm_aesdec_si128(t3, ks[4]);
! 456: t4 = _mm_aesdec_si128(t4, ks[4]);
! 457: t1 = _mm_aesdec_si128(t1, ks[5]);
! 458: t2 = _mm_aesdec_si128(t2, ks[5]);
! 459: t3 = _mm_aesdec_si128(t3, ks[5]);
! 460: t4 = _mm_aesdec_si128(t4, ks[5]);
! 461: t1 = _mm_aesdec_si128(t1, ks[6]);
! 462: t2 = _mm_aesdec_si128(t2, ks[6]);
! 463: t3 = _mm_aesdec_si128(t3, ks[6]);
! 464: t4 = _mm_aesdec_si128(t4, ks[6]);
! 465: t1 = _mm_aesdec_si128(t1, ks[7]);
! 466: t2 = _mm_aesdec_si128(t2, ks[7]);
! 467: t3 = _mm_aesdec_si128(t3, ks[7]);
! 468: t4 = _mm_aesdec_si128(t4, ks[7]);
! 469: t1 = _mm_aesdec_si128(t1, ks[8]);
! 470: t2 = _mm_aesdec_si128(t2, ks[8]);
! 471: t3 = _mm_aesdec_si128(t3, ks[8]);
! 472: t4 = _mm_aesdec_si128(t4, ks[8]);
! 473: t1 = _mm_aesdec_si128(t1, ks[9]);
! 474: t2 = _mm_aesdec_si128(t2, ks[9]);
! 475: t3 = _mm_aesdec_si128(t3, ks[9]);
! 476: t4 = _mm_aesdec_si128(t4, ks[9]);
! 477: t1 = _mm_aesdec_si128(t1, ks[10]);
! 478: t2 = _mm_aesdec_si128(t2, ks[10]);
! 479: t3 = _mm_aesdec_si128(t3, ks[10]);
! 480: t4 = _mm_aesdec_si128(t4, ks[10]);
! 481: t1 = _mm_aesdec_si128(t1, ks[11]);
! 482: t2 = _mm_aesdec_si128(t2, ks[11]);
! 483: t3 = _mm_aesdec_si128(t3, ks[11]);
! 484: t4 = _mm_aesdec_si128(t4, ks[11]);
! 485: t1 = _mm_aesdec_si128(t1, ks[12]);
! 486: t2 = _mm_aesdec_si128(t2, ks[12]);
! 487: t3 = _mm_aesdec_si128(t3, ks[12]);
! 488: t4 = _mm_aesdec_si128(t4, ks[12]);
! 489: t1 = _mm_aesdec_si128(t1, ks[13]);
! 490: t2 = _mm_aesdec_si128(t2, ks[13]);
! 491: t3 = _mm_aesdec_si128(t3, ks[13]);
! 492: t4 = _mm_aesdec_si128(t4, ks[13]);
! 493:
! 494: t1 = _mm_aesdeclast_si128(t1, ks[14]);
! 495: t2 = _mm_aesdeclast_si128(t2, ks[14]);
! 496: t3 = _mm_aesdeclast_si128(t3, ks[14]);
! 497: t4 = _mm_aesdeclast_si128(t4, ks[14]);
! 498: t1 = _mm_xor_si128(t1, f1);
! 499: t2 = _mm_xor_si128(t2, f2);
! 500: t3 = _mm_xor_si128(t3, f3);
! 501: t4 = _mm_xor_si128(t4, f4);
! 502: _mm_storeu_si128(bo + i + 0, t1);
! 503: _mm_storeu_si128(bo + i + 1, t2);
! 504: _mm_storeu_si128(bo + i + 2, t3);
! 505: _mm_storeu_si128(bo + i + 3, t4);
! 506: f1 = last;
! 507: }
! 508:
! 509: for (i = pblocks; i < blocks; i++)
! 510: {
! 511: last = _mm_loadu_si128(bi + i);
! 512: t1 = _mm_xor_si128(last, ks[0]);
! 513:
! 514: t1 = _mm_aesdec_si128(t1, ks[1]);
! 515: t1 = _mm_aesdec_si128(t1, ks[2]);
! 516: t1 = _mm_aesdec_si128(t1, ks[3]);
! 517: t1 = _mm_aesdec_si128(t1, ks[4]);
! 518: t1 = _mm_aesdec_si128(t1, ks[5]);
! 519: t1 = _mm_aesdec_si128(t1, ks[6]);
! 520: t1 = _mm_aesdec_si128(t1, ks[7]);
! 521: t1 = _mm_aesdec_si128(t1, ks[8]);
! 522: t1 = _mm_aesdec_si128(t1, ks[9]);
! 523: t1 = _mm_aesdec_si128(t1, ks[10]);
! 524: t1 = _mm_aesdec_si128(t1, ks[11]);
! 525: t1 = _mm_aesdec_si128(t1, ks[12]);
! 526: t1 = _mm_aesdec_si128(t1, ks[13]);
! 527:
! 528: t1 = _mm_aesdeclast_si128(t1, ks[14]);
! 529: t1 = _mm_xor_si128(t1, f1);
! 530: _mm_storeu_si128(bo + i, t1);
! 531: f1 = last;
! 532: }
! 533: }
! 534:
! 535: /**
! 536: * Do inline or allocated de/encryption using key schedule
! 537: */
! 538: static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key,
! 539: chunk_t data, chunk_t iv, chunk_t *out)
! 540: {
! 541: u_char *buf;
! 542:
! 543: if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE)
! 544: {
! 545: return FALSE;
! 546: }
! 547: if (out)
! 548: {
! 549: *out = chunk_alloc(data.len);
! 550: buf = out->ptr;
! 551: }
! 552: else
! 553: {
! 554: buf = data.ptr;
! 555: }
! 556: fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf);
! 557: return TRUE;
! 558: }
! 559:
! 560: METHOD(crypter_t, encrypt, bool,
! 561: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
! 562: {
! 563: return crypt(this->encrypt, this->ekey, data, iv, encrypted);
! 564: }
! 565:
! 566: METHOD(crypter_t, decrypt, bool,
! 567: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
! 568: {
! 569: return crypt(this->decrypt, this->dkey, data, iv, decrypted);
! 570: }
! 571:
! 572: METHOD(crypter_t, get_block_size, size_t,
! 573: private_aesni_cbc_t *this)
! 574: {
! 575: return AES_BLOCK_SIZE;
! 576: }
! 577:
! 578: METHOD(crypter_t, get_iv_size, size_t,
! 579: private_aesni_cbc_t *this)
! 580: {
! 581: return AES_BLOCK_SIZE;
! 582: }
! 583:
! 584: METHOD(crypter_t, get_key_size, size_t,
! 585: private_aesni_cbc_t *this)
! 586: {
! 587: return this->key_size;
! 588: }
! 589:
! 590: METHOD(crypter_t, set_key, bool,
! 591: private_aesni_cbc_t *this, chunk_t key)
! 592: {
! 593: if (key.len != this->key_size)
! 594: {
! 595: return FALSE;
! 596: }
! 597:
! 598: DESTROY_IF(this->ekey);
! 599: DESTROY_IF(this->dkey);
! 600:
! 601: this->ekey = aesni_key_create(TRUE, key);
! 602: this->dkey = aesni_key_create(FALSE, key);
! 603:
! 604: return this->ekey && this->dkey;
! 605: }
! 606:
! 607: METHOD(crypter_t, destroy, void,
! 608: private_aesni_cbc_t *this)
! 609: {
! 610: DESTROY_IF(this->ekey);
! 611: DESTROY_IF(this->dkey);
! 612: free_align(this);
! 613: }
! 614:
! 615: /**
! 616: * See header
! 617: */
! 618: aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size)
! 619: {
! 620: private_aesni_cbc_t *this;
! 621:
! 622: if (algo != ENCR_AES_CBC)
! 623: {
! 624: return NULL;
! 625: }
! 626: switch (key_size)
! 627: {
! 628: case 0:
! 629: key_size = 16;
! 630: break;
! 631: case 16:
! 632: case 24:
! 633: case 32:
! 634: break;
! 635: default:
! 636: return NULL;
! 637: }
! 638:
! 639: INIT_ALIGN(this, sizeof(__m128i),
! 640: .public = {
! 641: .crypter = {
! 642: .encrypt = _encrypt,
! 643: .decrypt = _decrypt,
! 644: .get_block_size = _get_block_size,
! 645: .get_iv_size = _get_iv_size,
! 646: .get_key_size = _get_key_size,
! 647: .set_key = _set_key,
! 648: .destroy = _destroy,
! 649: },
! 650: },
! 651: .key_size = key_size,
! 652: );
! 653:
! 654: switch (key_size)
! 655: {
! 656: case 16:
! 657: this->encrypt = encrypt_cbc128;
! 658: this->decrypt = decrypt_cbc128;
! 659: break;
! 660: case 24:
! 661: this->encrypt = encrypt_cbc192;
! 662: this->decrypt = decrypt_cbc192;
! 663: break;
! 664: case 32:
! 665: this->encrypt = encrypt_cbc256;
! 666: this->decrypt = decrypt_cbc256;
! 667: break;
! 668: }
! 669:
! 670: return &this->public;
! 671: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>