Return to aesni_ctr.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / strongswan / src / libstrongswan / plugins / aesni |
1.1 misho 1: /* 2: * Copyright (C) 2015 Martin Willi 3: * Copyright (C) 2015 revosec AG 4: * 5: * This program is free software; you can redistribute it and/or modify it 6: * under the terms of the GNU General Public License as published by the 7: * Free Software Foundation; either version 2 of the License, or (at your 8: * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. 9: * 10: * This program is distributed in the hope that it will be useful, but 11: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13: * for more details. 14: */ 15: 16: #include "aesni_ctr.h" 17: #include "aesni_key.h" 18: 19: #include <tmmintrin.h> 20: 21: /** 22: * Pipeline parallelism we use for CTR en/decryption 23: */ 24: #define CTR_CRYPT_PARALLELISM 4 25: 26: typedef struct private_aesni_ctr_t private_aesni_ctr_t; 27: 28: /** 29: * CTR en/decryption method type 30: */ 31: typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*); 32: 33: /** 34: * Private data of an aesni_ctr_t object. 35: */ 36: struct private_aesni_ctr_t { 37: 38: /** 39: * Public aesni_ctr_t interface. 40: */ 41: aesni_ctr_t public; 42: 43: /** 44: * Key size 45: */ 46: u_int key_size; 47: 48: /** 49: * Key schedule 50: */ 51: aesni_key_t *key; 52: 53: /** 54: * Encryption method 55: */ 56: aesni_ctr_fn_t crypt; 57: 58: /** 59: * Counter state 60: */ 61: struct { 62: char nonce[4]; 63: char iv[8]; 64: uint32_t counter; 65: } __attribute__((packed, aligned(sizeof(__m128i)))) state; 66: }; 67: 68: /** 69: * Do big-endian increment on x 70: */ 71: static inline __m128i increment_be(__m128i x) 72: { 73: __m128i swap; 74: 75: swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); 76: 77: x = _mm_shuffle_epi8(x, swap); 78: x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); 79: x = _mm_shuffle_epi8(x, swap); 80: 81: return x; 82: } 83: 84: /** 85: * AES-128 CTR encryption 86: */ 87: static void encrypt_ctr128(private_aesni_ctr_t *this, 88: size_t len, u_char *in, u_char *out) 89: { 90: __m128i t1, t2, t3, t4; 91: __m128i d1, d2, d3, d4; 92: __m128i *ks, state, b, *bi, *bo; 93: u_int i, blocks, pblocks, rem; 94: 95: state = _mm_load_si128((__m128i*)&this->state); 96: blocks = len / AES_BLOCK_SIZE; 97: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); 98: rem = len % AES_BLOCK_SIZE; 99: bi = (__m128i*)in; 100: bo = (__m128i*)out; 101: 102: ks = this->key->schedule; 103: 104: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) 105: { 106: d1 = _mm_loadu_si128(bi + i + 0); 107: d2 = _mm_loadu_si128(bi + i + 1); 108: d3 = _mm_loadu_si128(bi + i + 2); 109: d4 = _mm_loadu_si128(bi + i + 3); 110: 111: t1 = _mm_xor_si128(state, ks[0]); 112: state = increment_be(state); 113: t2 = _mm_xor_si128(state, ks[0]); 114: state = increment_be(state); 115: t3 = _mm_xor_si128(state, ks[0]); 116: state = increment_be(state); 117: t4 = _mm_xor_si128(state, ks[0]); 118: state = increment_be(state); 119: 120: t1 = _mm_aesenc_si128(t1, ks[1]); 121: t2 = _mm_aesenc_si128(t2, ks[1]); 122: t3 = _mm_aesenc_si128(t3, ks[1]); 123: t4 = _mm_aesenc_si128(t4, ks[1]); 124: t1 = _mm_aesenc_si128(t1, ks[2]); 125: t2 = _mm_aesenc_si128(t2, ks[2]); 126: t3 = _mm_aesenc_si128(t3, ks[2]); 127: t4 = _mm_aesenc_si128(t4, ks[2]); 128: t1 = _mm_aesenc_si128(t1, ks[3]); 129: t2 = _mm_aesenc_si128(t2, ks[3]); 130: t3 = _mm_aesenc_si128(t3, ks[3]); 131: t4 = _mm_aesenc_si128(t4, ks[3]); 132: t1 = _mm_aesenc_si128(t1, ks[4]); 133: t2 = _mm_aesenc_si128(t2, ks[4]); 134: t3 = _mm_aesenc_si128(t3, ks[4]); 135: t4 = _mm_aesenc_si128(t4, ks[4]); 136: t1 = _mm_aesenc_si128(t1, ks[5]); 137: t2 = _mm_aesenc_si128(t2, ks[5]); 138: t3 = _mm_aesenc_si128(t3, ks[5]); 139: t4 = _mm_aesenc_si128(t4, ks[5]); 140: t1 = _mm_aesenc_si128(t1, ks[6]); 141: t2 = _mm_aesenc_si128(t2, ks[6]); 142: t3 = _mm_aesenc_si128(t3, ks[6]); 143: t4 = _mm_aesenc_si128(t4, ks[6]); 144: t1 = _mm_aesenc_si128(t1, ks[7]); 145: t2 = _mm_aesenc_si128(t2, ks[7]); 146: t3 = _mm_aesenc_si128(t3, ks[7]); 147: t4 = _mm_aesenc_si128(t4, ks[7]); 148: t1 = _mm_aesenc_si128(t1, ks[8]); 149: t2 = _mm_aesenc_si128(t2, ks[8]); 150: t3 = _mm_aesenc_si128(t3, ks[8]); 151: t4 = _mm_aesenc_si128(t4, ks[8]); 152: t1 = _mm_aesenc_si128(t1, ks[9]); 153: t2 = _mm_aesenc_si128(t2, ks[9]); 154: t3 = _mm_aesenc_si128(t3, ks[9]); 155: t4 = _mm_aesenc_si128(t4, ks[9]); 156: 157: t1 = _mm_aesenclast_si128(t1, ks[10]); 158: t2 = _mm_aesenclast_si128(t2, ks[10]); 159: t3 = _mm_aesenclast_si128(t3, ks[10]); 160: t4 = _mm_aesenclast_si128(t4, ks[10]); 161: t1 = _mm_xor_si128(t1, d1); 162: t2 = _mm_xor_si128(t2, d2); 163: t3 = _mm_xor_si128(t3, d3); 164: t4 = _mm_xor_si128(t4, d4); 165: _mm_storeu_si128(bo + i + 0, t1); 166: _mm_storeu_si128(bo + i + 1, t2); 167: _mm_storeu_si128(bo + i + 2, t3); 168: _mm_storeu_si128(bo + i + 3, t4); 169: } 170: 171: for (i = pblocks; i < blocks; i++) 172: { 173: d1 = _mm_loadu_si128(bi + i); 174: 175: t1 = _mm_xor_si128(state, ks[0]); 176: state = increment_be(state); 177: 178: t1 = _mm_aesenc_si128(t1, ks[1]); 179: t1 = _mm_aesenc_si128(t1, ks[2]); 180: t1 = _mm_aesenc_si128(t1, ks[3]); 181: t1 = _mm_aesenc_si128(t1, ks[4]); 182: t1 = _mm_aesenc_si128(t1, ks[5]); 183: t1 = _mm_aesenc_si128(t1, ks[6]); 184: t1 = _mm_aesenc_si128(t1, ks[7]); 185: t1 = _mm_aesenc_si128(t1, ks[8]); 186: t1 = _mm_aesenc_si128(t1, ks[9]); 187: 188: t1 = _mm_aesenclast_si128(t1, ks[10]); 189: t1 = _mm_xor_si128(t1, d1); 190: _mm_storeu_si128(bo + i, t1); 191: } 192: 193: if (rem) 194: { 195: memset(&b, 0, sizeof(b)); 196: memcpy(&b, bi + blocks, rem); 197: 198: d1 = _mm_loadu_si128(&b); 199: t1 = _mm_xor_si128(state, ks[0]); 200: 201: t1 = _mm_aesenc_si128(t1, ks[1]); 202: t1 = _mm_aesenc_si128(t1, ks[2]); 203: t1 = _mm_aesenc_si128(t1, ks[3]); 204: t1 = _mm_aesenc_si128(t1, ks[4]); 205: t1 = _mm_aesenc_si128(t1, ks[5]); 206: t1 = _mm_aesenc_si128(t1, ks[6]); 207: t1 = _mm_aesenc_si128(t1, ks[7]); 208: t1 = _mm_aesenc_si128(t1, ks[8]); 209: t1 = _mm_aesenc_si128(t1, ks[9]); 210: 211: t1 = _mm_aesenclast_si128(t1, ks[10]); 212: t1 = _mm_xor_si128(t1, d1); 213: _mm_storeu_si128(&b, t1); 214: 215: memcpy(bo + blocks, &b, rem); 216: } 217: } 218: 219: /** 220: * AES-192 CTR encryption 221: */ 222: static void encrypt_ctr192(private_aesni_ctr_t *this, 223: size_t len, u_char *in, u_char *out) 224: { 225: __m128i t1, t2, t3, t4; 226: __m128i d1, d2, d3, d4; 227: __m128i *ks, state, b, *bi, *bo; 228: u_int i, blocks, pblocks, rem; 229: 230: state = _mm_load_si128((__m128i*)&this->state); 231: blocks = len / AES_BLOCK_SIZE; 232: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); 233: rem = len % AES_BLOCK_SIZE; 234: bi = (__m128i*)in; 235: bo = (__m128i*)out; 236: 237: ks = this->key->schedule; 238: 239: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) 240: { 241: d1 = _mm_loadu_si128(bi + i + 0); 242: d2 = _mm_loadu_si128(bi + i + 1); 243: d3 = _mm_loadu_si128(bi + i + 2); 244: d4 = _mm_loadu_si128(bi + i + 3); 245: 246: t1 = _mm_xor_si128(state, ks[0]); 247: state = increment_be(state); 248: t2 = _mm_xor_si128(state, ks[0]); 249: state = increment_be(state); 250: t3 = _mm_xor_si128(state, ks[0]); 251: state = increment_be(state); 252: t4 = _mm_xor_si128(state, ks[0]); 253: state = increment_be(state); 254: 255: t1 = _mm_aesenc_si128(t1, ks[1]); 256: t2 = _mm_aesenc_si128(t2, ks[1]); 257: t3 = _mm_aesenc_si128(t3, ks[1]); 258: t4 = _mm_aesenc_si128(t4, ks[1]); 259: t1 = _mm_aesenc_si128(t1, ks[2]); 260: t2 = _mm_aesenc_si128(t2, ks[2]); 261: t3 = _mm_aesenc_si128(t3, ks[2]); 262: t4 = _mm_aesenc_si128(t4, ks[2]); 263: t1 = _mm_aesenc_si128(t1, ks[3]); 264: t2 = _mm_aesenc_si128(t2, ks[3]); 265: t3 = _mm_aesenc_si128(t3, ks[3]); 266: t4 = _mm_aesenc_si128(t4, ks[3]); 267: t1 = _mm_aesenc_si128(t1, ks[4]); 268: t2 = _mm_aesenc_si128(t2, ks[4]); 269: t3 = _mm_aesenc_si128(t3, ks[4]); 270: t4 = _mm_aesenc_si128(t4, ks[4]); 271: t1 = _mm_aesenc_si128(t1, ks[5]); 272: t2 = _mm_aesenc_si128(t2, ks[5]); 273: t3 = _mm_aesenc_si128(t3, ks[5]); 274: t4 = _mm_aesenc_si128(t4, ks[5]); 275: t1 = _mm_aesenc_si128(t1, ks[6]); 276: t2 = _mm_aesenc_si128(t2, ks[6]); 277: t3 = _mm_aesenc_si128(t3, ks[6]); 278: t4 = _mm_aesenc_si128(t4, ks[6]); 279: t1 = _mm_aesenc_si128(t1, ks[7]); 280: t2 = _mm_aesenc_si128(t2, ks[7]); 281: t3 = _mm_aesenc_si128(t3, ks[7]); 282: t4 = _mm_aesenc_si128(t4, ks[7]); 283: t1 = _mm_aesenc_si128(t1, ks[8]); 284: t2 = _mm_aesenc_si128(t2, ks[8]); 285: t3 = _mm_aesenc_si128(t3, ks[8]); 286: t4 = _mm_aesenc_si128(t4, ks[8]); 287: t1 = _mm_aesenc_si128(t1, ks[9]); 288: t2 = _mm_aesenc_si128(t2, ks[9]); 289: t3 = _mm_aesenc_si128(t3, ks[9]); 290: t4 = _mm_aesenc_si128(t4, ks[9]); 291: t1 = _mm_aesenc_si128(t1, ks[10]); 292: t2 = _mm_aesenc_si128(t2, ks[10]); 293: t3 = _mm_aesenc_si128(t3, ks[10]); 294: t4 = _mm_aesenc_si128(t4, ks[10]); 295: t1 = _mm_aesenc_si128(t1, ks[11]); 296: t2 = _mm_aesenc_si128(t2, ks[11]); 297: t3 = _mm_aesenc_si128(t3, ks[11]); 298: t4 = _mm_aesenc_si128(t4, ks[11]); 299: 300: t1 = _mm_aesenclast_si128(t1, ks[12]); 301: t2 = _mm_aesenclast_si128(t2, ks[12]); 302: t3 = _mm_aesenclast_si128(t3, ks[12]); 303: t4 = _mm_aesenclast_si128(t4, ks[12]); 304: t1 = _mm_xor_si128(t1, d1); 305: t2 = _mm_xor_si128(t2, d2); 306: t3 = _mm_xor_si128(t3, d3); 307: t4 = _mm_xor_si128(t4, d4); 308: _mm_storeu_si128(bo + i + 0, t1); 309: _mm_storeu_si128(bo + i + 1, t2); 310: _mm_storeu_si128(bo + i + 2, t3); 311: _mm_storeu_si128(bo + i + 3, t4); 312: } 313: 314: for (i = pblocks; i < blocks; i++) 315: { 316: d1 = _mm_loadu_si128(bi + i); 317: 318: t1 = _mm_xor_si128(state, ks[0]); 319: state = increment_be(state); 320: 321: t1 = _mm_aesenc_si128(t1, ks[1]); 322: t1 = _mm_aesenc_si128(t1, ks[2]); 323: t1 = _mm_aesenc_si128(t1, ks[3]); 324: t1 = _mm_aesenc_si128(t1, ks[4]); 325: t1 = _mm_aesenc_si128(t1, ks[5]); 326: t1 = _mm_aesenc_si128(t1, ks[6]); 327: t1 = _mm_aesenc_si128(t1, ks[7]); 328: t1 = _mm_aesenc_si128(t1, ks[8]); 329: t1 = _mm_aesenc_si128(t1, ks[9]); 330: t1 = _mm_aesenc_si128(t1, ks[10]); 331: t1 = _mm_aesenc_si128(t1, ks[11]); 332: 333: t1 = _mm_aesenclast_si128(t1, ks[12]); 334: t1 = _mm_xor_si128(t1, d1); 335: _mm_storeu_si128(bo + i, t1); 336: } 337: 338: if (rem) 339: { 340: memset(&b, 0, sizeof(b)); 341: memcpy(&b, bi + blocks, rem); 342: 343: d1 = _mm_loadu_si128(&b); 344: t1 = _mm_xor_si128(state, ks[0]); 345: 346: t1 = _mm_aesenc_si128(t1, ks[1]); 347: t1 = _mm_aesenc_si128(t1, ks[2]); 348: t1 = _mm_aesenc_si128(t1, ks[3]); 349: t1 = _mm_aesenc_si128(t1, ks[4]); 350: t1 = _mm_aesenc_si128(t1, ks[5]); 351: t1 = _mm_aesenc_si128(t1, ks[6]); 352: t1 = _mm_aesenc_si128(t1, ks[7]); 353: t1 = _mm_aesenc_si128(t1, ks[8]); 354: t1 = _mm_aesenc_si128(t1, ks[9]); 355: t1 = _mm_aesenc_si128(t1, ks[10]); 356: t1 = _mm_aesenc_si128(t1, ks[11]); 357: 358: t1 = _mm_aesenclast_si128(t1, ks[12]); 359: t1 = _mm_xor_si128(t1, d1); 360: _mm_storeu_si128(&b, t1); 361: 362: memcpy(bo + blocks, &b, rem); 363: } 364: } 365: 366: /** 367: * AES-256 CTR encryption 368: */ 369: static void encrypt_ctr256(private_aesni_ctr_t *this, 370: size_t len, u_char *in, u_char *out) 371: { 372: __m128i t1, t2, t3, t4; 373: __m128i d1, d2, d3, d4; 374: __m128i *ks, state, b, *bi, *bo; 375: u_int i, blocks, pblocks, rem; 376: 377: state = _mm_load_si128((__m128i*)&this->state); 378: blocks = len / AES_BLOCK_SIZE; 379: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM); 380: rem = len % AES_BLOCK_SIZE; 381: bi = (__m128i*)in; 382: bo = (__m128i*)out; 383: 384: ks = this->key->schedule; 385: 386: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM) 387: { 388: d1 = _mm_loadu_si128(bi + i + 0); 389: d2 = _mm_loadu_si128(bi + i + 1); 390: d3 = _mm_loadu_si128(bi + i + 2); 391: d4 = _mm_loadu_si128(bi + i + 3); 392: 393: t1 = _mm_xor_si128(state, ks[0]); 394: state = increment_be(state); 395: t2 = _mm_xor_si128(state, ks[0]); 396: state = increment_be(state); 397: t3 = _mm_xor_si128(state, ks[0]); 398: state = increment_be(state); 399: t4 = _mm_xor_si128(state, ks[0]); 400: state = increment_be(state); 401: 402: t1 = _mm_aesenc_si128(t1, ks[1]); 403: t2 = _mm_aesenc_si128(t2, ks[1]); 404: t3 = _mm_aesenc_si128(t3, ks[1]); 405: t4 = _mm_aesenc_si128(t4, ks[1]); 406: t1 = _mm_aesenc_si128(t1, ks[2]); 407: t2 = _mm_aesenc_si128(t2, ks[2]); 408: t3 = _mm_aesenc_si128(t3, ks[2]); 409: t4 = _mm_aesenc_si128(t4, ks[2]); 410: t1 = _mm_aesenc_si128(t1, ks[3]); 411: t2 = _mm_aesenc_si128(t2, ks[3]); 412: t3 = _mm_aesenc_si128(t3, ks[3]); 413: t4 = _mm_aesenc_si128(t4, ks[3]); 414: t1 = _mm_aesenc_si128(t1, ks[4]); 415: t2 = _mm_aesenc_si128(t2, ks[4]); 416: t3 = _mm_aesenc_si128(t3, ks[4]); 417: t4 = _mm_aesenc_si128(t4, ks[4]); 418: t1 = _mm_aesenc_si128(t1, ks[5]); 419: t2 = _mm_aesenc_si128(t2, ks[5]); 420: t3 = _mm_aesenc_si128(t3, ks[5]); 421: t4 = _mm_aesenc_si128(t4, ks[5]); 422: t1 = _mm_aesenc_si128(t1, ks[6]); 423: t2 = _mm_aesenc_si128(t2, ks[6]); 424: t3 = _mm_aesenc_si128(t3, ks[6]); 425: t4 = _mm_aesenc_si128(t4, ks[6]); 426: t1 = _mm_aesenc_si128(t1, ks[7]); 427: t2 = _mm_aesenc_si128(t2, ks[7]); 428: t3 = _mm_aesenc_si128(t3, ks[7]); 429: t4 = _mm_aesenc_si128(t4, ks[7]); 430: t1 = _mm_aesenc_si128(t1, ks[8]); 431: t2 = _mm_aesenc_si128(t2, ks[8]); 432: t3 = _mm_aesenc_si128(t3, ks[8]); 433: t4 = _mm_aesenc_si128(t4, ks[8]); 434: t1 = _mm_aesenc_si128(t1, ks[9]); 435: t2 = _mm_aesenc_si128(t2, ks[9]); 436: t3 = _mm_aesenc_si128(t3, ks[9]); 437: t4 = _mm_aesenc_si128(t4, ks[9]); 438: t1 = _mm_aesenc_si128(t1, ks[10]); 439: t2 = _mm_aesenc_si128(t2, ks[10]); 440: t3 = _mm_aesenc_si128(t3, ks[10]); 441: t4 = _mm_aesenc_si128(t4, ks[10]); 442: t1 = _mm_aesenc_si128(t1, ks[11]); 443: t2 = _mm_aesenc_si128(t2, ks[11]); 444: t3 = _mm_aesenc_si128(t3, ks[11]); 445: t4 = _mm_aesenc_si128(t4, ks[11]); 446: t1 = _mm_aesenc_si128(t1, ks[12]); 447: t2 = _mm_aesenc_si128(t2, ks[12]); 448: t3 = _mm_aesenc_si128(t3, ks[12]); 449: t4 = _mm_aesenc_si128(t4, ks[12]); 450: t1 = _mm_aesenc_si128(t1, ks[13]); 451: t2 = _mm_aesenc_si128(t2, ks[13]); 452: t3 = _mm_aesenc_si128(t3, ks[13]); 453: t4 = _mm_aesenc_si128(t4, ks[13]); 454: 455: t1 = _mm_aesenclast_si128(t1, ks[14]); 456: t2 = _mm_aesenclast_si128(t2, ks[14]); 457: t3 = _mm_aesenclast_si128(t3, ks[14]); 458: t4 = _mm_aesenclast_si128(t4, ks[14]); 459: t1 = _mm_xor_si128(t1, d1); 460: t2 = _mm_xor_si128(t2, d2); 461: t3 = _mm_xor_si128(t3, d3); 462: t4 = _mm_xor_si128(t4, d4); 463: _mm_storeu_si128(bo + i + 0, t1); 464: _mm_storeu_si128(bo + i + 1, t2); 465: _mm_storeu_si128(bo + i + 2, t3); 466: _mm_storeu_si128(bo + i + 3, t4); 467: } 468: 469: for (i = pblocks; i < blocks; i++) 470: { 471: d1 = _mm_loadu_si128(bi + i); 472: 473: t1 = _mm_xor_si128(state, ks[0]); 474: state = increment_be(state); 475: 476: t1 = _mm_aesenc_si128(t1, ks[1]); 477: t1 = _mm_aesenc_si128(t1, ks[2]); 478: t1 = _mm_aesenc_si128(t1, ks[3]); 479: t1 = _mm_aesenc_si128(t1, ks[4]); 480: t1 = _mm_aesenc_si128(t1, ks[5]); 481: t1 = _mm_aesenc_si128(t1, ks[6]); 482: t1 = _mm_aesenc_si128(t1, ks[7]); 483: t1 = _mm_aesenc_si128(t1, ks[8]); 484: t1 = _mm_aesenc_si128(t1, ks[9]); 485: t1 = _mm_aesenc_si128(t1, ks[10]); 486: t1 = _mm_aesenc_si128(t1, ks[11]); 487: t1 = _mm_aesenc_si128(t1, ks[12]); 488: t1 = _mm_aesenc_si128(t1, ks[13]); 489: 490: t1 = _mm_aesenclast_si128(t1, ks[14]); 491: t1 = _mm_xor_si128(t1, d1); 492: _mm_storeu_si128(bo + i, t1); 493: } 494: 495: if (rem) 496: { 497: memset(&b, 0, sizeof(b)); 498: memcpy(&b, bi + blocks, rem); 499: 500: d1 = _mm_loadu_si128(&b); 501: t1 = _mm_xor_si128(state, ks[0]); 502: 503: t1 = _mm_aesenc_si128(t1, ks[1]); 504: t1 = _mm_aesenc_si128(t1, ks[2]); 505: t1 = _mm_aesenc_si128(t1, ks[3]); 506: t1 = _mm_aesenc_si128(t1, ks[4]); 507: t1 = _mm_aesenc_si128(t1, ks[5]); 508: t1 = _mm_aesenc_si128(t1, ks[6]); 509: t1 = _mm_aesenc_si128(t1, ks[7]); 510: t1 = _mm_aesenc_si128(t1, ks[8]); 511: t1 = _mm_aesenc_si128(t1, ks[9]); 512: t1 = _mm_aesenc_si128(t1, ks[10]); 513: t1 = _mm_aesenc_si128(t1, ks[11]); 514: t1 = _mm_aesenc_si128(t1, ks[12]); 515: t1 = _mm_aesenc_si128(t1, ks[13]); 516: 517: t1 = _mm_aesenclast_si128(t1, ks[14]); 518: t1 = _mm_xor_si128(t1, d1); 519: _mm_storeu_si128(&b, t1); 520: 521: memcpy(bo + blocks, &b, rem); 522: } 523: } 524: 525: METHOD(crypter_t, crypt, bool, 526: private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out) 527: { 528: u_char *buf; 529: 530: if (!this->key || iv.len != sizeof(this->state.iv)) 531: { 532: return FALSE; 533: } 534: memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv)); 535: this->state.counter = htonl(1); 536: 537: buf = in.ptr; 538: if (out) 539: { 540: *out = chunk_alloc(in.len); 541: buf = out->ptr; 542: } 543: this->crypt(this, in.len, in.ptr, buf); 544: return TRUE; 545: } 546: 547: METHOD(crypter_t, get_block_size, size_t, 548: private_aesni_ctr_t *this) 549: { 550: return 1; 551: } 552: 553: METHOD(crypter_t, get_iv_size, size_t, 554: private_aesni_ctr_t *this) 555: { 556: return sizeof(this->state.iv); 557: } 558: 559: METHOD(crypter_t, get_key_size, size_t, 560: private_aesni_ctr_t *this) 561: { 562: return this->key_size + sizeof(this->state.nonce); 563: } 564: 565: METHOD(crypter_t, set_key, bool, 566: private_aesni_ctr_t *this, chunk_t key) 567: { 568: if (key.len != get_key_size(this)) 569: { 570: return FALSE; 571: } 572: 573: memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce), 574: sizeof(this->state.nonce)); 575: key.len -= sizeof(this->state.nonce); 576: 577: DESTROY_IF(this->key); 578: this->key = aesni_key_create(TRUE, key); 579: 580: return this->key; 581: } 582: 583: METHOD(crypter_t, destroy, void, 584: private_aesni_ctr_t *this) 585: { 586: DESTROY_IF(this->key); 587: free_align(this); 588: } 589: 590: /** 591: * See header 592: */ 593: aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size) 594: { 595: private_aesni_ctr_t *this; 596: 597: if (algo != ENCR_AES_CTR) 598: { 599: return NULL; 600: } 601: switch (key_size) 602: { 603: case 0: 604: key_size = 16; 605: break; 606: case 16: 607: case 24: 608: case 32: 609: break; 610: default: 611: return NULL; 612: } 613: 614: INIT_ALIGN(this, sizeof(__m128i), 615: .public = { 616: .crypter = { 617: .encrypt = _crypt, 618: .decrypt = _crypt, 619: .get_block_size = _get_block_size, 620: .get_iv_size = _get_iv_size, 621: .get_key_size = _get_key_size, 622: .set_key = _set_key, 623: .destroy = _destroy, 624: }, 625: }, 626: .key_size = key_size, 627: ); 628: 629: switch (key_size) 630: { 631: case 16: 632: this->crypt = encrypt_ctr128; 633: break; 634: case 24: 635: this->crypt = encrypt_ctr192; 636: break; 637: case 32: 638: this->crypt = encrypt_ctr256; 639: break; 640: } 641: 642: return &this->public; 643: }