Return to aesni_cbc.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / strongswan / src / libstrongswan / plugins / aesni |
1.1 misho 1: /* 2: * Copyright (C) 2015 Martin Willi 3: * Copyright (C) 2015 revosec AG 4: * 5: * This program is free software; you can redistribute it and/or modify it 6: * under the terms of the GNU General Public License as published by the 7: * Free Software Foundation; either version 2 of the License, or (at your 8: * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. 9: * 10: * This program is distributed in the hope that it will be useful, but 11: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13: * for more details. 14: */ 15: 16: #include "aesni_cbc.h" 17: #include "aesni_key.h" 18: 19: /** 20: * Pipeline parallelism we use for CBC decryption 21: */ 22: #define CBC_DECRYPT_PARALLELISM 4 23: 24: typedef struct private_aesni_cbc_t private_aesni_cbc_t; 25: 26: /** 27: * CBC en/decryption method type 28: */ 29: typedef void (*aesni_cbc_fn_t)(aesni_key_t*, u_int, u_char*, u_char*, u_char*); 30: 31: /** 32: * Private data of an aesni_cbc_t object. 33: */ 34: struct private_aesni_cbc_t { 35: 36: /** 37: * Public aesni_cbc_t interface. 38: */ 39: aesni_cbc_t public; 40: 41: /** 42: * Key size 43: */ 44: u_int key_size; 45: 46: /** 47: * Encryption key schedule 48: */ 49: aesni_key_t *ekey; 50: 51: /** 52: * Decryption key schedule 53: */ 54: aesni_key_t *dkey; 55: 56: /** 57: * Encryption method 58: */ 59: aesni_cbc_fn_t encrypt; 60: 61: /** 62: * Decryption method 63: */ 64: aesni_cbc_fn_t decrypt; 65: }; 66: 67: /** 68: * AES-128 CBC encryption 69: */ 70: static void encrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in, 71: u_char *iv, u_char *out) 72: { 73: __m128i *ks, t, fb, *bi, *bo; 74: int i; 75: 76: ks = key->schedule; 77: bi = (__m128i*)in; 78: bo = (__m128i*)out; 79: 80: fb = _mm_loadu_si128((__m128i*)iv); 81: for (i = 0; i < blocks; i++) 82: { 83: t = _mm_loadu_si128(bi + i); 84: fb = _mm_xor_si128(t, fb); 85: fb = _mm_xor_si128(fb, ks[0]); 86: 87: fb = _mm_aesenc_si128(fb, ks[1]); 88: fb = _mm_aesenc_si128(fb, ks[2]); 89: fb = _mm_aesenc_si128(fb, ks[3]); 90: fb = _mm_aesenc_si128(fb, ks[4]); 91: fb = _mm_aesenc_si128(fb, ks[5]); 92: fb = _mm_aesenc_si128(fb, ks[6]); 93: fb = _mm_aesenc_si128(fb, ks[7]); 94: fb = _mm_aesenc_si128(fb, ks[8]); 95: fb = _mm_aesenc_si128(fb, ks[9]); 96: 97: fb = _mm_aesenclast_si128(fb, ks[10]); 98: _mm_storeu_si128(bo + i, fb); 99: } 100: } 101: 102: /** 103: * AES-128 CBC decryption 104: */ 105: static void decrypt_cbc128(aesni_key_t *key, u_int blocks, u_char *in, 106: u_char *iv, u_char *out) 107: { 108: __m128i *ks, last, *bi, *bo; 109: __m128i t1, t2, t3, t4; 110: __m128i f1, f2, f3, f4; 111: u_int i, pblocks; 112: 113: ks = key->schedule; 114: bi = (__m128i*)in; 115: bo = (__m128i*)out; 116: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); 117: 118: f1 = _mm_loadu_si128((__m128i*)iv); 119: 120: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) 121: { 122: t1 = _mm_loadu_si128(bi + i + 0); 123: t2 = _mm_loadu_si128(bi + i + 1); 124: t3 = _mm_loadu_si128(bi + i + 2); 125: t4 = _mm_loadu_si128(bi + i + 3); 126: 127: f2 = t1; 128: f3 = t2; 129: f4 = t3; 130: last = t4; 131: 132: t1 = _mm_xor_si128(t1, ks[0]); 133: t2 = _mm_xor_si128(t2, ks[0]); 134: t3 = _mm_xor_si128(t3, ks[0]); 135: t4 = _mm_xor_si128(t4, ks[0]); 136: 137: t1 = _mm_aesdec_si128(t1, ks[1]); 138: t2 = _mm_aesdec_si128(t2, ks[1]); 139: t3 = _mm_aesdec_si128(t3, ks[1]); 140: t4 = _mm_aesdec_si128(t4, ks[1]); 141: t1 = _mm_aesdec_si128(t1, ks[2]); 142: t2 = _mm_aesdec_si128(t2, ks[2]); 143: t3 = _mm_aesdec_si128(t3, ks[2]); 144: t4 = _mm_aesdec_si128(t4, ks[2]); 145: t1 = _mm_aesdec_si128(t1, ks[3]); 146: t2 = _mm_aesdec_si128(t2, ks[3]); 147: t3 = _mm_aesdec_si128(t3, ks[3]); 148: t4 = _mm_aesdec_si128(t4, ks[3]); 149: t1 = _mm_aesdec_si128(t1, ks[4]); 150: t2 = _mm_aesdec_si128(t2, ks[4]); 151: t3 = _mm_aesdec_si128(t3, ks[4]); 152: t4 = _mm_aesdec_si128(t4, ks[4]); 153: t1 = _mm_aesdec_si128(t1, ks[5]); 154: t2 = _mm_aesdec_si128(t2, ks[5]); 155: t3 = _mm_aesdec_si128(t3, ks[5]); 156: t4 = _mm_aesdec_si128(t4, ks[5]); 157: t1 = _mm_aesdec_si128(t1, ks[6]); 158: t2 = _mm_aesdec_si128(t2, ks[6]); 159: t3 = _mm_aesdec_si128(t3, ks[6]); 160: t4 = _mm_aesdec_si128(t4, ks[6]); 161: t1 = _mm_aesdec_si128(t1, ks[7]); 162: t2 = _mm_aesdec_si128(t2, ks[7]); 163: t3 = _mm_aesdec_si128(t3, ks[7]); 164: t4 = _mm_aesdec_si128(t4, ks[7]); 165: t1 = _mm_aesdec_si128(t1, ks[8]); 166: t2 = _mm_aesdec_si128(t2, ks[8]); 167: t3 = _mm_aesdec_si128(t3, ks[8]); 168: t4 = _mm_aesdec_si128(t4, ks[8]); 169: t1 = _mm_aesdec_si128(t1, ks[9]); 170: t2 = _mm_aesdec_si128(t2, ks[9]); 171: t3 = _mm_aesdec_si128(t3, ks[9]); 172: t4 = _mm_aesdec_si128(t4, ks[9]); 173: 174: t1 = _mm_aesdeclast_si128(t1, ks[10]); 175: t2 = _mm_aesdeclast_si128(t2, ks[10]); 176: t3 = _mm_aesdeclast_si128(t3, ks[10]); 177: t4 = _mm_aesdeclast_si128(t4, ks[10]); 178: t1 = _mm_xor_si128(t1, f1); 179: t2 = _mm_xor_si128(t2, f2); 180: t3 = _mm_xor_si128(t3, f3); 181: t4 = _mm_xor_si128(t4, f4); 182: _mm_storeu_si128(bo + i + 0, t1); 183: _mm_storeu_si128(bo + i + 1, t2); 184: _mm_storeu_si128(bo + i + 2, t3); 185: _mm_storeu_si128(bo + i + 3, t4); 186: f1 = last; 187: } 188: 189: for (i = pblocks; i < blocks; i++) 190: { 191: last = _mm_loadu_si128(bi + i); 192: t1 = _mm_xor_si128(last, ks[0]); 193: 194: t1 = _mm_aesdec_si128(t1, ks[1]); 195: t1 = _mm_aesdec_si128(t1, ks[2]); 196: t1 = _mm_aesdec_si128(t1, ks[3]); 197: t1 = _mm_aesdec_si128(t1, ks[4]); 198: t1 = _mm_aesdec_si128(t1, ks[5]); 199: t1 = _mm_aesdec_si128(t1, ks[6]); 200: t1 = _mm_aesdec_si128(t1, ks[7]); 201: t1 = _mm_aesdec_si128(t1, ks[8]); 202: t1 = _mm_aesdec_si128(t1, ks[9]); 203: 204: t1 = _mm_aesdeclast_si128(t1, ks[10]); 205: t1 = _mm_xor_si128(t1, f1); 206: _mm_storeu_si128(bo + i, t1); 207: f1 = last; 208: } 209: } 210: 211: /** 212: * AES-192 CBC encryption 213: */ 214: static void encrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in, 215: u_char *iv, u_char *out) 216: { 217: __m128i *ks, t, fb, *bi, *bo; 218: int i; 219: 220: ks = key->schedule; 221: bi = (__m128i*)in; 222: bo = (__m128i*)out; 223: 224: fb = _mm_loadu_si128((__m128i*)iv); 225: for (i = 0; i < blocks; i++) 226: { 227: t = _mm_loadu_si128(bi + i); 228: fb = _mm_xor_si128(t, fb); 229: fb = _mm_xor_si128(fb, ks[0]); 230: 231: fb = _mm_aesenc_si128(fb, ks[1]); 232: fb = _mm_aesenc_si128(fb, ks[2]); 233: fb = _mm_aesenc_si128(fb, ks[3]); 234: fb = _mm_aesenc_si128(fb, ks[4]); 235: fb = _mm_aesenc_si128(fb, ks[5]); 236: fb = _mm_aesenc_si128(fb, ks[6]); 237: fb = _mm_aesenc_si128(fb, ks[7]); 238: fb = _mm_aesenc_si128(fb, ks[8]); 239: fb = _mm_aesenc_si128(fb, ks[9]); 240: fb = _mm_aesenc_si128(fb, ks[10]); 241: fb = _mm_aesenc_si128(fb, ks[11]); 242: 243: fb = _mm_aesenclast_si128(fb, ks[12]); 244: _mm_storeu_si128(bo + i, fb); 245: } 246: } 247: 248: /** 249: * AES-192 CBC decryption 250: */ 251: static void decrypt_cbc192(aesni_key_t *key, u_int blocks, u_char *in, 252: u_char *iv, u_char *out) 253: { 254: __m128i *ks, last, *bi, *bo; 255: __m128i t1, t2, t3, t4; 256: __m128i f1, f2, f3, f4; 257: u_int i, pblocks; 258: 259: ks = key->schedule; 260: bi = (__m128i*)in; 261: bo = (__m128i*)out; 262: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); 263: 264: f1 = _mm_loadu_si128((__m128i*)iv); 265: 266: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) 267: { 268: t1 = _mm_loadu_si128(bi + i + 0); 269: t2 = _mm_loadu_si128(bi + i + 1); 270: t3 = _mm_loadu_si128(bi + i + 2); 271: t4 = _mm_loadu_si128(bi + i + 3); 272: 273: f2 = t1; 274: f3 = t2; 275: f4 = t3; 276: last = t4; 277: 278: t1 = _mm_xor_si128(t1, ks[0]); 279: t2 = _mm_xor_si128(t2, ks[0]); 280: t3 = _mm_xor_si128(t3, ks[0]); 281: t4 = _mm_xor_si128(t4, ks[0]); 282: 283: t1 = _mm_aesdec_si128(t1, ks[1]); 284: t2 = _mm_aesdec_si128(t2, ks[1]); 285: t3 = _mm_aesdec_si128(t3, ks[1]); 286: t4 = _mm_aesdec_si128(t4, ks[1]); 287: t1 = _mm_aesdec_si128(t1, ks[2]); 288: t2 = _mm_aesdec_si128(t2, ks[2]); 289: t3 = _mm_aesdec_si128(t3, ks[2]); 290: t4 = _mm_aesdec_si128(t4, ks[2]); 291: t1 = _mm_aesdec_si128(t1, ks[3]); 292: t2 = _mm_aesdec_si128(t2, ks[3]); 293: t3 = _mm_aesdec_si128(t3, ks[3]); 294: t4 = _mm_aesdec_si128(t4, ks[3]); 295: t1 = _mm_aesdec_si128(t1, ks[4]); 296: t2 = _mm_aesdec_si128(t2, ks[4]); 297: t3 = _mm_aesdec_si128(t3, ks[4]); 298: t4 = _mm_aesdec_si128(t4, ks[4]); 299: t1 = _mm_aesdec_si128(t1, ks[5]); 300: t2 = _mm_aesdec_si128(t2, ks[5]); 301: t3 = _mm_aesdec_si128(t3, ks[5]); 302: t4 = _mm_aesdec_si128(t4, ks[5]); 303: t1 = _mm_aesdec_si128(t1, ks[6]); 304: t2 = _mm_aesdec_si128(t2, ks[6]); 305: t3 = _mm_aesdec_si128(t3, ks[6]); 306: t4 = _mm_aesdec_si128(t4, ks[6]); 307: t1 = _mm_aesdec_si128(t1, ks[7]); 308: t2 = _mm_aesdec_si128(t2, ks[7]); 309: t3 = _mm_aesdec_si128(t3, ks[7]); 310: t4 = _mm_aesdec_si128(t4, ks[7]); 311: t1 = _mm_aesdec_si128(t1, ks[8]); 312: t2 = _mm_aesdec_si128(t2, ks[8]); 313: t3 = _mm_aesdec_si128(t3, ks[8]); 314: t4 = _mm_aesdec_si128(t4, ks[8]); 315: t1 = _mm_aesdec_si128(t1, ks[9]); 316: t2 = _mm_aesdec_si128(t2, ks[9]); 317: t3 = _mm_aesdec_si128(t3, ks[9]); 318: t4 = _mm_aesdec_si128(t4, ks[9]); 319: t1 = _mm_aesdec_si128(t1, ks[10]); 320: t2 = _mm_aesdec_si128(t2, ks[10]); 321: t3 = _mm_aesdec_si128(t3, ks[10]); 322: t4 = _mm_aesdec_si128(t4, ks[10]); 323: t1 = _mm_aesdec_si128(t1, ks[11]); 324: t2 = _mm_aesdec_si128(t2, ks[11]); 325: t3 = _mm_aesdec_si128(t3, ks[11]); 326: t4 = _mm_aesdec_si128(t4, ks[11]); 327: 328: t1 = _mm_aesdeclast_si128(t1, ks[12]); 329: t2 = _mm_aesdeclast_si128(t2, ks[12]); 330: t3 = _mm_aesdeclast_si128(t3, ks[12]); 331: t4 = _mm_aesdeclast_si128(t4, ks[12]); 332: t1 = _mm_xor_si128(t1, f1); 333: t2 = _mm_xor_si128(t2, f2); 334: t3 = _mm_xor_si128(t3, f3); 335: t4 = _mm_xor_si128(t4, f4); 336: _mm_storeu_si128(bo + i + 0, t1); 337: _mm_storeu_si128(bo + i + 1, t2); 338: _mm_storeu_si128(bo + i + 2, t3); 339: _mm_storeu_si128(bo + i + 3, t4); 340: f1 = last; 341: } 342: 343: for (i = pblocks; i < blocks; i++) 344: { 345: last = _mm_loadu_si128(bi + i); 346: t1 = _mm_xor_si128(last, ks[0]); 347: 348: t1 = _mm_aesdec_si128(t1, ks[1]); 349: t1 = _mm_aesdec_si128(t1, ks[2]); 350: t1 = _mm_aesdec_si128(t1, ks[3]); 351: t1 = _mm_aesdec_si128(t1, ks[4]); 352: t1 = _mm_aesdec_si128(t1, ks[5]); 353: t1 = _mm_aesdec_si128(t1, ks[6]); 354: t1 = _mm_aesdec_si128(t1, ks[7]); 355: t1 = _mm_aesdec_si128(t1, ks[8]); 356: t1 = _mm_aesdec_si128(t1, ks[9]); 357: t1 = _mm_aesdec_si128(t1, ks[10]); 358: t1 = _mm_aesdec_si128(t1, ks[11]); 359: 360: t1 = _mm_aesdeclast_si128(t1, ks[12]); 361: t1 = _mm_xor_si128(t1, f1); 362: _mm_storeu_si128(bo + i, t1); 363: f1 = last; 364: } 365: } 366: 367: /** 368: * AES-256 CBC encryption 369: */ 370: static void encrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in, 371: u_char *iv, u_char *out) 372: { 373: __m128i *ks, t, fb, *bi, *bo; 374: int i; 375: 376: ks = key->schedule; 377: bi = (__m128i*)in; 378: bo = (__m128i*)out; 379: 380: fb = _mm_loadu_si128((__m128i*)iv); 381: for (i = 0; i < blocks; i++) 382: { 383: t = _mm_loadu_si128(bi + i); 384: fb = _mm_xor_si128(t, fb); 385: fb = _mm_xor_si128(fb, ks[0]); 386: 387: fb = _mm_aesenc_si128(fb, ks[1]); 388: fb = _mm_aesenc_si128(fb, ks[2]); 389: fb = _mm_aesenc_si128(fb, ks[3]); 390: fb = _mm_aesenc_si128(fb, ks[4]); 391: fb = _mm_aesenc_si128(fb, ks[5]); 392: fb = _mm_aesenc_si128(fb, ks[6]); 393: fb = _mm_aesenc_si128(fb, ks[7]); 394: fb = _mm_aesenc_si128(fb, ks[8]); 395: fb = _mm_aesenc_si128(fb, ks[9]); 396: fb = _mm_aesenc_si128(fb, ks[10]); 397: fb = _mm_aesenc_si128(fb, ks[11]); 398: fb = _mm_aesenc_si128(fb, ks[12]); 399: fb = _mm_aesenc_si128(fb, ks[13]); 400: 401: fb = _mm_aesenclast_si128(fb, ks[14]); 402: _mm_storeu_si128(bo + i, fb); 403: } 404: } 405: 406: /** 407: * AES-256 CBC decryption 408: */ 409: static void decrypt_cbc256(aesni_key_t *key, u_int blocks, u_char *in, 410: u_char *iv, u_char *out) 411: { 412: __m128i *ks, last, *bi, *bo; 413: __m128i t1, t2, t3, t4; 414: __m128i f1, f2, f3, f4; 415: u_int i, pblocks; 416: 417: ks = key->schedule; 418: bi = (__m128i*)in; 419: bo = (__m128i*)out; 420: pblocks = blocks - (blocks % CBC_DECRYPT_PARALLELISM); 421: 422: f1 = _mm_loadu_si128((__m128i*)iv); 423: 424: for (i = 0; i < pblocks; i += CBC_DECRYPT_PARALLELISM) 425: { 426: t1 = _mm_loadu_si128(bi + i + 0); 427: t2 = _mm_loadu_si128(bi + i + 1); 428: t3 = _mm_loadu_si128(bi + i + 2); 429: t4 = _mm_loadu_si128(bi + i + 3); 430: 431: f2 = t1; 432: f3 = t2; 433: f4 = t3; 434: last = t4; 435: 436: t1 = _mm_xor_si128(t1, ks[0]); 437: t2 = _mm_xor_si128(t2, ks[0]); 438: t3 = _mm_xor_si128(t3, ks[0]); 439: t4 = _mm_xor_si128(t4, ks[0]); 440: 441: t1 = _mm_aesdec_si128(t1, ks[1]); 442: t2 = _mm_aesdec_si128(t2, ks[1]); 443: t3 = _mm_aesdec_si128(t3, ks[1]); 444: t4 = _mm_aesdec_si128(t4, ks[1]); 445: t1 = _mm_aesdec_si128(t1, ks[2]); 446: t2 = _mm_aesdec_si128(t2, ks[2]); 447: t3 = _mm_aesdec_si128(t3, ks[2]); 448: t4 = _mm_aesdec_si128(t4, ks[2]); 449: t1 = _mm_aesdec_si128(t1, ks[3]); 450: t2 = _mm_aesdec_si128(t2, ks[3]); 451: t3 = _mm_aesdec_si128(t3, ks[3]); 452: t4 = _mm_aesdec_si128(t4, ks[3]); 453: t1 = _mm_aesdec_si128(t1, ks[4]); 454: t2 = _mm_aesdec_si128(t2, ks[4]); 455: t3 = _mm_aesdec_si128(t3, ks[4]); 456: t4 = _mm_aesdec_si128(t4, ks[4]); 457: t1 = _mm_aesdec_si128(t1, ks[5]); 458: t2 = _mm_aesdec_si128(t2, ks[5]); 459: t3 = _mm_aesdec_si128(t3, ks[5]); 460: t4 = _mm_aesdec_si128(t4, ks[5]); 461: t1 = _mm_aesdec_si128(t1, ks[6]); 462: t2 = _mm_aesdec_si128(t2, ks[6]); 463: t3 = _mm_aesdec_si128(t3, ks[6]); 464: t4 = _mm_aesdec_si128(t4, ks[6]); 465: t1 = _mm_aesdec_si128(t1, ks[7]); 466: t2 = _mm_aesdec_si128(t2, ks[7]); 467: t3 = _mm_aesdec_si128(t3, ks[7]); 468: t4 = _mm_aesdec_si128(t4, ks[7]); 469: t1 = _mm_aesdec_si128(t1, ks[8]); 470: t2 = _mm_aesdec_si128(t2, ks[8]); 471: t3 = _mm_aesdec_si128(t3, ks[8]); 472: t4 = _mm_aesdec_si128(t4, ks[8]); 473: t1 = _mm_aesdec_si128(t1, ks[9]); 474: t2 = _mm_aesdec_si128(t2, ks[9]); 475: t3 = _mm_aesdec_si128(t3, ks[9]); 476: t4 = _mm_aesdec_si128(t4, ks[9]); 477: t1 = _mm_aesdec_si128(t1, ks[10]); 478: t2 = _mm_aesdec_si128(t2, ks[10]); 479: t3 = _mm_aesdec_si128(t3, ks[10]); 480: t4 = _mm_aesdec_si128(t4, ks[10]); 481: t1 = _mm_aesdec_si128(t1, ks[11]); 482: t2 = _mm_aesdec_si128(t2, ks[11]); 483: t3 = _mm_aesdec_si128(t3, ks[11]); 484: t4 = _mm_aesdec_si128(t4, ks[11]); 485: t1 = _mm_aesdec_si128(t1, ks[12]); 486: t2 = _mm_aesdec_si128(t2, ks[12]); 487: t3 = _mm_aesdec_si128(t3, ks[12]); 488: t4 = _mm_aesdec_si128(t4, ks[12]); 489: t1 = _mm_aesdec_si128(t1, ks[13]); 490: t2 = _mm_aesdec_si128(t2, ks[13]); 491: t3 = _mm_aesdec_si128(t3, ks[13]); 492: t4 = _mm_aesdec_si128(t4, ks[13]); 493: 494: t1 = _mm_aesdeclast_si128(t1, ks[14]); 495: t2 = _mm_aesdeclast_si128(t2, ks[14]); 496: t3 = _mm_aesdeclast_si128(t3, ks[14]); 497: t4 = _mm_aesdeclast_si128(t4, ks[14]); 498: t1 = _mm_xor_si128(t1, f1); 499: t2 = _mm_xor_si128(t2, f2); 500: t3 = _mm_xor_si128(t3, f3); 501: t4 = _mm_xor_si128(t4, f4); 502: _mm_storeu_si128(bo + i + 0, t1); 503: _mm_storeu_si128(bo + i + 1, t2); 504: _mm_storeu_si128(bo + i + 2, t3); 505: _mm_storeu_si128(bo + i + 3, t4); 506: f1 = last; 507: } 508: 509: for (i = pblocks; i < blocks; i++) 510: { 511: last = _mm_loadu_si128(bi + i); 512: t1 = _mm_xor_si128(last, ks[0]); 513: 514: t1 = _mm_aesdec_si128(t1, ks[1]); 515: t1 = _mm_aesdec_si128(t1, ks[2]); 516: t1 = _mm_aesdec_si128(t1, ks[3]); 517: t1 = _mm_aesdec_si128(t1, ks[4]); 518: t1 = _mm_aesdec_si128(t1, ks[5]); 519: t1 = _mm_aesdec_si128(t1, ks[6]); 520: t1 = _mm_aesdec_si128(t1, ks[7]); 521: t1 = _mm_aesdec_si128(t1, ks[8]); 522: t1 = _mm_aesdec_si128(t1, ks[9]); 523: t1 = _mm_aesdec_si128(t1, ks[10]); 524: t1 = _mm_aesdec_si128(t1, ks[11]); 525: t1 = _mm_aesdec_si128(t1, ks[12]); 526: t1 = _mm_aesdec_si128(t1, ks[13]); 527: 528: t1 = _mm_aesdeclast_si128(t1, ks[14]); 529: t1 = _mm_xor_si128(t1, f1); 530: _mm_storeu_si128(bo + i, t1); 531: f1 = last; 532: } 533: } 534: 535: /** 536: * Do inline or allocated de/encryption using key schedule 537: */ 538: static bool crypt(aesni_cbc_fn_t fn, aesni_key_t *key, 539: chunk_t data, chunk_t iv, chunk_t *out) 540: { 541: u_char *buf; 542: 543: if (!key || iv.len != AES_BLOCK_SIZE || data.len % AES_BLOCK_SIZE) 544: { 545: return FALSE; 546: } 547: if (out) 548: { 549: *out = chunk_alloc(data.len); 550: buf = out->ptr; 551: } 552: else 553: { 554: buf = data.ptr; 555: } 556: fn(key, data.len / AES_BLOCK_SIZE, data.ptr, iv.ptr, buf); 557: return TRUE; 558: } 559: 560: METHOD(crypter_t, encrypt, bool, 561: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted) 562: { 563: return crypt(this->encrypt, this->ekey, data, iv, encrypted); 564: } 565: 566: METHOD(crypter_t, decrypt, bool, 567: private_aesni_cbc_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted) 568: { 569: return crypt(this->decrypt, this->dkey, data, iv, decrypted); 570: } 571: 572: METHOD(crypter_t, get_block_size, size_t, 573: private_aesni_cbc_t *this) 574: { 575: return AES_BLOCK_SIZE; 576: } 577: 578: METHOD(crypter_t, get_iv_size, size_t, 579: private_aesni_cbc_t *this) 580: { 581: return AES_BLOCK_SIZE; 582: } 583: 584: METHOD(crypter_t, get_key_size, size_t, 585: private_aesni_cbc_t *this) 586: { 587: return this->key_size; 588: } 589: 590: METHOD(crypter_t, set_key, bool, 591: private_aesni_cbc_t *this, chunk_t key) 592: { 593: if (key.len != this->key_size) 594: { 595: return FALSE; 596: } 597: 598: DESTROY_IF(this->ekey); 599: DESTROY_IF(this->dkey); 600: 601: this->ekey = aesni_key_create(TRUE, key); 602: this->dkey = aesni_key_create(FALSE, key); 603: 604: return this->ekey && this->dkey; 605: } 606: 607: METHOD(crypter_t, destroy, void, 608: private_aesni_cbc_t *this) 609: { 610: DESTROY_IF(this->ekey); 611: DESTROY_IF(this->dkey); 612: free_align(this); 613: } 614: 615: /** 616: * See header 617: */ 618: aesni_cbc_t *aesni_cbc_create(encryption_algorithm_t algo, size_t key_size) 619: { 620: private_aesni_cbc_t *this; 621: 622: if (algo != ENCR_AES_CBC) 623: { 624: return NULL; 625: } 626: switch (key_size) 627: { 628: case 0: 629: key_size = 16; 630: break; 631: case 16: 632: case 24: 633: case 32: 634: break; 635: default: 636: return NULL; 637: } 638: 639: INIT_ALIGN(this, sizeof(__m128i), 640: .public = { 641: .crypter = { 642: .encrypt = _encrypt, 643: .decrypt = _decrypt, 644: .get_block_size = _get_block_size, 645: .get_iv_size = _get_iv_size, 646: .get_key_size = _get_key_size, 647: .set_key = _set_key, 648: .destroy = _destroy, 649: }, 650: }, 651: .key_size = key_size, 652: ); 653: 654: switch (key_size) 655: { 656: case 16: 657: this->encrypt = encrypt_cbc128; 658: this->decrypt = decrypt_cbc128; 659: break; 660: case 24: 661: this->encrypt = encrypt_cbc192; 662: this->decrypt = decrypt_cbc192; 663: break; 664: case 32: 665: this->encrypt = encrypt_cbc256; 666: this->decrypt = decrypt_cbc256; 667: break; 668: } 669: 670: return &this->public; 671: }