Annotation of embedaddon/strongswan/src/libstrongswan/plugins/aesni/aesni_ctr.c, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 2015 Martin Willi
! 3: * Copyright (C) 2015 revosec AG
! 4: *
! 5: * This program is free software; you can redistribute it and/or modify it
! 6: * under the terms of the GNU General Public License as published by the
! 7: * Free Software Foundation; either version 2 of the License, or (at your
! 8: * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
! 9: *
! 10: * This program is distributed in the hope that it will be useful, but
! 11: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 12: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
! 13: * for more details.
! 14: */
! 15:
! 16: #include "aesni_ctr.h"
! 17: #include "aesni_key.h"
! 18:
! 19: #include <tmmintrin.h>
! 20:
! 21: /**
! 22: * Pipeline parallelism we use for CTR en/decryption
! 23: */
! 24: #define CTR_CRYPT_PARALLELISM 4
! 25:
! 26: typedef struct private_aesni_ctr_t private_aesni_ctr_t;
! 27:
! 28: /**
! 29: * CTR en/decryption method type
! 30: */
! 31: typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*);
! 32:
! 33: /**
! 34: * Private data of an aesni_ctr_t object.
! 35: */
! 36: struct private_aesni_ctr_t {
! 37:
! 38: /**
! 39: * Public aesni_ctr_t interface.
! 40: */
! 41: aesni_ctr_t public;
! 42:
! 43: /**
! 44: * Key size
! 45: */
! 46: u_int key_size;
! 47:
! 48: /**
! 49: * Key schedule
! 50: */
! 51: aesni_key_t *key;
! 52:
! 53: /**
! 54: * Encryption method
! 55: */
! 56: aesni_ctr_fn_t crypt;
! 57:
! 58: /**
! 59: * Counter state
! 60: */
! 61: struct {
! 62: char nonce[4];
! 63: char iv[8];
! 64: uint32_t counter;
! 65: } __attribute__((packed, aligned(sizeof(__m128i)))) state;
! 66: };
! 67:
! 68: /**
! 69: * Do big-endian increment on x
! 70: */
! 71: static inline __m128i increment_be(__m128i x)
! 72: {
! 73: __m128i swap;
! 74:
! 75: swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
! 76:
! 77: x = _mm_shuffle_epi8(x, swap);
! 78: x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
! 79: x = _mm_shuffle_epi8(x, swap);
! 80:
! 81: return x;
! 82: }
! 83:
! 84: /**
! 85: * AES-128 CTR encryption
! 86: */
! 87: static void encrypt_ctr128(private_aesni_ctr_t *this,
! 88: size_t len, u_char *in, u_char *out)
! 89: {
! 90: __m128i t1, t2, t3, t4;
! 91: __m128i d1, d2, d3, d4;
! 92: __m128i *ks, state, b, *bi, *bo;
! 93: u_int i, blocks, pblocks, rem;
! 94:
! 95: state = _mm_load_si128((__m128i*)&this->state);
! 96: blocks = len / AES_BLOCK_SIZE;
! 97: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
! 98: rem = len % AES_BLOCK_SIZE;
! 99: bi = (__m128i*)in;
! 100: bo = (__m128i*)out;
! 101:
! 102: ks = this->key->schedule;
! 103:
! 104: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
! 105: {
! 106: d1 = _mm_loadu_si128(bi + i + 0);
! 107: d2 = _mm_loadu_si128(bi + i + 1);
! 108: d3 = _mm_loadu_si128(bi + i + 2);
! 109: d4 = _mm_loadu_si128(bi + i + 3);
! 110:
! 111: t1 = _mm_xor_si128(state, ks[0]);
! 112: state = increment_be(state);
! 113: t2 = _mm_xor_si128(state, ks[0]);
! 114: state = increment_be(state);
! 115: t3 = _mm_xor_si128(state, ks[0]);
! 116: state = increment_be(state);
! 117: t4 = _mm_xor_si128(state, ks[0]);
! 118: state = increment_be(state);
! 119:
! 120: t1 = _mm_aesenc_si128(t1, ks[1]);
! 121: t2 = _mm_aesenc_si128(t2, ks[1]);
! 122: t3 = _mm_aesenc_si128(t3, ks[1]);
! 123: t4 = _mm_aesenc_si128(t4, ks[1]);
! 124: t1 = _mm_aesenc_si128(t1, ks[2]);
! 125: t2 = _mm_aesenc_si128(t2, ks[2]);
! 126: t3 = _mm_aesenc_si128(t3, ks[2]);
! 127: t4 = _mm_aesenc_si128(t4, ks[2]);
! 128: t1 = _mm_aesenc_si128(t1, ks[3]);
! 129: t2 = _mm_aesenc_si128(t2, ks[3]);
! 130: t3 = _mm_aesenc_si128(t3, ks[3]);
! 131: t4 = _mm_aesenc_si128(t4, ks[3]);
! 132: t1 = _mm_aesenc_si128(t1, ks[4]);
! 133: t2 = _mm_aesenc_si128(t2, ks[4]);
! 134: t3 = _mm_aesenc_si128(t3, ks[4]);
! 135: t4 = _mm_aesenc_si128(t4, ks[4]);
! 136: t1 = _mm_aesenc_si128(t1, ks[5]);
! 137: t2 = _mm_aesenc_si128(t2, ks[5]);
! 138: t3 = _mm_aesenc_si128(t3, ks[5]);
! 139: t4 = _mm_aesenc_si128(t4, ks[5]);
! 140: t1 = _mm_aesenc_si128(t1, ks[6]);
! 141: t2 = _mm_aesenc_si128(t2, ks[6]);
! 142: t3 = _mm_aesenc_si128(t3, ks[6]);
! 143: t4 = _mm_aesenc_si128(t4, ks[6]);
! 144: t1 = _mm_aesenc_si128(t1, ks[7]);
! 145: t2 = _mm_aesenc_si128(t2, ks[7]);
! 146: t3 = _mm_aesenc_si128(t3, ks[7]);
! 147: t4 = _mm_aesenc_si128(t4, ks[7]);
! 148: t1 = _mm_aesenc_si128(t1, ks[8]);
! 149: t2 = _mm_aesenc_si128(t2, ks[8]);
! 150: t3 = _mm_aesenc_si128(t3, ks[8]);
! 151: t4 = _mm_aesenc_si128(t4, ks[8]);
! 152: t1 = _mm_aesenc_si128(t1, ks[9]);
! 153: t2 = _mm_aesenc_si128(t2, ks[9]);
! 154: t3 = _mm_aesenc_si128(t3, ks[9]);
! 155: t4 = _mm_aesenc_si128(t4, ks[9]);
! 156:
! 157: t1 = _mm_aesenclast_si128(t1, ks[10]);
! 158: t2 = _mm_aesenclast_si128(t2, ks[10]);
! 159: t3 = _mm_aesenclast_si128(t3, ks[10]);
! 160: t4 = _mm_aesenclast_si128(t4, ks[10]);
! 161: t1 = _mm_xor_si128(t1, d1);
! 162: t2 = _mm_xor_si128(t2, d2);
! 163: t3 = _mm_xor_si128(t3, d3);
! 164: t4 = _mm_xor_si128(t4, d4);
! 165: _mm_storeu_si128(bo + i + 0, t1);
! 166: _mm_storeu_si128(bo + i + 1, t2);
! 167: _mm_storeu_si128(bo + i + 2, t3);
! 168: _mm_storeu_si128(bo + i + 3, t4);
! 169: }
! 170:
! 171: for (i = pblocks; i < blocks; i++)
! 172: {
! 173: d1 = _mm_loadu_si128(bi + i);
! 174:
! 175: t1 = _mm_xor_si128(state, ks[0]);
! 176: state = increment_be(state);
! 177:
! 178: t1 = _mm_aesenc_si128(t1, ks[1]);
! 179: t1 = _mm_aesenc_si128(t1, ks[2]);
! 180: t1 = _mm_aesenc_si128(t1, ks[3]);
! 181: t1 = _mm_aesenc_si128(t1, ks[4]);
! 182: t1 = _mm_aesenc_si128(t1, ks[5]);
! 183: t1 = _mm_aesenc_si128(t1, ks[6]);
! 184: t1 = _mm_aesenc_si128(t1, ks[7]);
! 185: t1 = _mm_aesenc_si128(t1, ks[8]);
! 186: t1 = _mm_aesenc_si128(t1, ks[9]);
! 187:
! 188: t1 = _mm_aesenclast_si128(t1, ks[10]);
! 189: t1 = _mm_xor_si128(t1, d1);
! 190: _mm_storeu_si128(bo + i, t1);
! 191: }
! 192:
! 193: if (rem)
! 194: {
! 195: memset(&b, 0, sizeof(b));
! 196: memcpy(&b, bi + blocks, rem);
! 197:
! 198: d1 = _mm_loadu_si128(&b);
! 199: t1 = _mm_xor_si128(state, ks[0]);
! 200:
! 201: t1 = _mm_aesenc_si128(t1, ks[1]);
! 202: t1 = _mm_aesenc_si128(t1, ks[2]);
! 203: t1 = _mm_aesenc_si128(t1, ks[3]);
! 204: t1 = _mm_aesenc_si128(t1, ks[4]);
! 205: t1 = _mm_aesenc_si128(t1, ks[5]);
! 206: t1 = _mm_aesenc_si128(t1, ks[6]);
! 207: t1 = _mm_aesenc_si128(t1, ks[7]);
! 208: t1 = _mm_aesenc_si128(t1, ks[8]);
! 209: t1 = _mm_aesenc_si128(t1, ks[9]);
! 210:
! 211: t1 = _mm_aesenclast_si128(t1, ks[10]);
! 212: t1 = _mm_xor_si128(t1, d1);
! 213: _mm_storeu_si128(&b, t1);
! 214:
! 215: memcpy(bo + blocks, &b, rem);
! 216: }
! 217: }
! 218:
! 219: /**
! 220: * AES-192 CTR encryption
! 221: */
! 222: static void encrypt_ctr192(private_aesni_ctr_t *this,
! 223: size_t len, u_char *in, u_char *out)
! 224: {
! 225: __m128i t1, t2, t3, t4;
! 226: __m128i d1, d2, d3, d4;
! 227: __m128i *ks, state, b, *bi, *bo;
! 228: u_int i, blocks, pblocks, rem;
! 229:
! 230: state = _mm_load_si128((__m128i*)&this->state);
! 231: blocks = len / AES_BLOCK_SIZE;
! 232: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
! 233: rem = len % AES_BLOCK_SIZE;
! 234: bi = (__m128i*)in;
! 235: bo = (__m128i*)out;
! 236:
! 237: ks = this->key->schedule;
! 238:
! 239: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
! 240: {
! 241: d1 = _mm_loadu_si128(bi + i + 0);
! 242: d2 = _mm_loadu_si128(bi + i + 1);
! 243: d3 = _mm_loadu_si128(bi + i + 2);
! 244: d4 = _mm_loadu_si128(bi + i + 3);
! 245:
! 246: t1 = _mm_xor_si128(state, ks[0]);
! 247: state = increment_be(state);
! 248: t2 = _mm_xor_si128(state, ks[0]);
! 249: state = increment_be(state);
! 250: t3 = _mm_xor_si128(state, ks[0]);
! 251: state = increment_be(state);
! 252: t4 = _mm_xor_si128(state, ks[0]);
! 253: state = increment_be(state);
! 254:
! 255: t1 = _mm_aesenc_si128(t1, ks[1]);
! 256: t2 = _mm_aesenc_si128(t2, ks[1]);
! 257: t3 = _mm_aesenc_si128(t3, ks[1]);
! 258: t4 = _mm_aesenc_si128(t4, ks[1]);
! 259: t1 = _mm_aesenc_si128(t1, ks[2]);
! 260: t2 = _mm_aesenc_si128(t2, ks[2]);
! 261: t3 = _mm_aesenc_si128(t3, ks[2]);
! 262: t4 = _mm_aesenc_si128(t4, ks[2]);
! 263: t1 = _mm_aesenc_si128(t1, ks[3]);
! 264: t2 = _mm_aesenc_si128(t2, ks[3]);
! 265: t3 = _mm_aesenc_si128(t3, ks[3]);
! 266: t4 = _mm_aesenc_si128(t4, ks[3]);
! 267: t1 = _mm_aesenc_si128(t1, ks[4]);
! 268: t2 = _mm_aesenc_si128(t2, ks[4]);
! 269: t3 = _mm_aesenc_si128(t3, ks[4]);
! 270: t4 = _mm_aesenc_si128(t4, ks[4]);
! 271: t1 = _mm_aesenc_si128(t1, ks[5]);
! 272: t2 = _mm_aesenc_si128(t2, ks[5]);
! 273: t3 = _mm_aesenc_si128(t3, ks[5]);
! 274: t4 = _mm_aesenc_si128(t4, ks[5]);
! 275: t1 = _mm_aesenc_si128(t1, ks[6]);
! 276: t2 = _mm_aesenc_si128(t2, ks[6]);
! 277: t3 = _mm_aesenc_si128(t3, ks[6]);
! 278: t4 = _mm_aesenc_si128(t4, ks[6]);
! 279: t1 = _mm_aesenc_si128(t1, ks[7]);
! 280: t2 = _mm_aesenc_si128(t2, ks[7]);
! 281: t3 = _mm_aesenc_si128(t3, ks[7]);
! 282: t4 = _mm_aesenc_si128(t4, ks[7]);
! 283: t1 = _mm_aesenc_si128(t1, ks[8]);
! 284: t2 = _mm_aesenc_si128(t2, ks[8]);
! 285: t3 = _mm_aesenc_si128(t3, ks[8]);
! 286: t4 = _mm_aesenc_si128(t4, ks[8]);
! 287: t1 = _mm_aesenc_si128(t1, ks[9]);
! 288: t2 = _mm_aesenc_si128(t2, ks[9]);
! 289: t3 = _mm_aesenc_si128(t3, ks[9]);
! 290: t4 = _mm_aesenc_si128(t4, ks[9]);
! 291: t1 = _mm_aesenc_si128(t1, ks[10]);
! 292: t2 = _mm_aesenc_si128(t2, ks[10]);
! 293: t3 = _mm_aesenc_si128(t3, ks[10]);
! 294: t4 = _mm_aesenc_si128(t4, ks[10]);
! 295: t1 = _mm_aesenc_si128(t1, ks[11]);
! 296: t2 = _mm_aesenc_si128(t2, ks[11]);
! 297: t3 = _mm_aesenc_si128(t3, ks[11]);
! 298: t4 = _mm_aesenc_si128(t4, ks[11]);
! 299:
! 300: t1 = _mm_aesenclast_si128(t1, ks[12]);
! 301: t2 = _mm_aesenclast_si128(t2, ks[12]);
! 302: t3 = _mm_aesenclast_si128(t3, ks[12]);
! 303: t4 = _mm_aesenclast_si128(t4, ks[12]);
! 304: t1 = _mm_xor_si128(t1, d1);
! 305: t2 = _mm_xor_si128(t2, d2);
! 306: t3 = _mm_xor_si128(t3, d3);
! 307: t4 = _mm_xor_si128(t4, d4);
! 308: _mm_storeu_si128(bo + i + 0, t1);
! 309: _mm_storeu_si128(bo + i + 1, t2);
! 310: _mm_storeu_si128(bo + i + 2, t3);
! 311: _mm_storeu_si128(bo + i + 3, t4);
! 312: }
! 313:
! 314: for (i = pblocks; i < blocks; i++)
! 315: {
! 316: d1 = _mm_loadu_si128(bi + i);
! 317:
! 318: t1 = _mm_xor_si128(state, ks[0]);
! 319: state = increment_be(state);
! 320:
! 321: t1 = _mm_aesenc_si128(t1, ks[1]);
! 322: t1 = _mm_aesenc_si128(t1, ks[2]);
! 323: t1 = _mm_aesenc_si128(t1, ks[3]);
! 324: t1 = _mm_aesenc_si128(t1, ks[4]);
! 325: t1 = _mm_aesenc_si128(t1, ks[5]);
! 326: t1 = _mm_aesenc_si128(t1, ks[6]);
! 327: t1 = _mm_aesenc_si128(t1, ks[7]);
! 328: t1 = _mm_aesenc_si128(t1, ks[8]);
! 329: t1 = _mm_aesenc_si128(t1, ks[9]);
! 330: t1 = _mm_aesenc_si128(t1, ks[10]);
! 331: t1 = _mm_aesenc_si128(t1, ks[11]);
! 332:
! 333: t1 = _mm_aesenclast_si128(t1, ks[12]);
! 334: t1 = _mm_xor_si128(t1, d1);
! 335: _mm_storeu_si128(bo + i, t1);
! 336: }
! 337:
! 338: if (rem)
! 339: {
! 340: memset(&b, 0, sizeof(b));
! 341: memcpy(&b, bi + blocks, rem);
! 342:
! 343: d1 = _mm_loadu_si128(&b);
! 344: t1 = _mm_xor_si128(state, ks[0]);
! 345:
! 346: t1 = _mm_aesenc_si128(t1, ks[1]);
! 347: t1 = _mm_aesenc_si128(t1, ks[2]);
! 348: t1 = _mm_aesenc_si128(t1, ks[3]);
! 349: t1 = _mm_aesenc_si128(t1, ks[4]);
! 350: t1 = _mm_aesenc_si128(t1, ks[5]);
! 351: t1 = _mm_aesenc_si128(t1, ks[6]);
! 352: t1 = _mm_aesenc_si128(t1, ks[7]);
! 353: t1 = _mm_aesenc_si128(t1, ks[8]);
! 354: t1 = _mm_aesenc_si128(t1, ks[9]);
! 355: t1 = _mm_aesenc_si128(t1, ks[10]);
! 356: t1 = _mm_aesenc_si128(t1, ks[11]);
! 357:
! 358: t1 = _mm_aesenclast_si128(t1, ks[12]);
! 359: t1 = _mm_xor_si128(t1, d1);
! 360: _mm_storeu_si128(&b, t1);
! 361:
! 362: memcpy(bo + blocks, &b, rem);
! 363: }
! 364: }
! 365:
! 366: /**
! 367: * AES-256 CTR encryption
! 368: */
! 369: static void encrypt_ctr256(private_aesni_ctr_t *this,
! 370: size_t len, u_char *in, u_char *out)
! 371: {
! 372: __m128i t1, t2, t3, t4;
! 373: __m128i d1, d2, d3, d4;
! 374: __m128i *ks, state, b, *bi, *bo;
! 375: u_int i, blocks, pblocks, rem;
! 376:
! 377: state = _mm_load_si128((__m128i*)&this->state);
! 378: blocks = len / AES_BLOCK_SIZE;
! 379: pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
! 380: rem = len % AES_BLOCK_SIZE;
! 381: bi = (__m128i*)in;
! 382: bo = (__m128i*)out;
! 383:
! 384: ks = this->key->schedule;
! 385:
! 386: for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
! 387: {
! 388: d1 = _mm_loadu_si128(bi + i + 0);
! 389: d2 = _mm_loadu_si128(bi + i + 1);
! 390: d3 = _mm_loadu_si128(bi + i + 2);
! 391: d4 = _mm_loadu_si128(bi + i + 3);
! 392:
! 393: t1 = _mm_xor_si128(state, ks[0]);
! 394: state = increment_be(state);
! 395: t2 = _mm_xor_si128(state, ks[0]);
! 396: state = increment_be(state);
! 397: t3 = _mm_xor_si128(state, ks[0]);
! 398: state = increment_be(state);
! 399: t4 = _mm_xor_si128(state, ks[0]);
! 400: state = increment_be(state);
! 401:
! 402: t1 = _mm_aesenc_si128(t1, ks[1]);
! 403: t2 = _mm_aesenc_si128(t2, ks[1]);
! 404: t3 = _mm_aesenc_si128(t3, ks[1]);
! 405: t4 = _mm_aesenc_si128(t4, ks[1]);
! 406: t1 = _mm_aesenc_si128(t1, ks[2]);
! 407: t2 = _mm_aesenc_si128(t2, ks[2]);
! 408: t3 = _mm_aesenc_si128(t3, ks[2]);
! 409: t4 = _mm_aesenc_si128(t4, ks[2]);
! 410: t1 = _mm_aesenc_si128(t1, ks[3]);
! 411: t2 = _mm_aesenc_si128(t2, ks[3]);
! 412: t3 = _mm_aesenc_si128(t3, ks[3]);
! 413: t4 = _mm_aesenc_si128(t4, ks[3]);
! 414: t1 = _mm_aesenc_si128(t1, ks[4]);
! 415: t2 = _mm_aesenc_si128(t2, ks[4]);
! 416: t3 = _mm_aesenc_si128(t3, ks[4]);
! 417: t4 = _mm_aesenc_si128(t4, ks[4]);
! 418: t1 = _mm_aesenc_si128(t1, ks[5]);
! 419: t2 = _mm_aesenc_si128(t2, ks[5]);
! 420: t3 = _mm_aesenc_si128(t3, ks[5]);
! 421: t4 = _mm_aesenc_si128(t4, ks[5]);
! 422: t1 = _mm_aesenc_si128(t1, ks[6]);
! 423: t2 = _mm_aesenc_si128(t2, ks[6]);
! 424: t3 = _mm_aesenc_si128(t3, ks[6]);
! 425: t4 = _mm_aesenc_si128(t4, ks[6]);
! 426: t1 = _mm_aesenc_si128(t1, ks[7]);
! 427: t2 = _mm_aesenc_si128(t2, ks[7]);
! 428: t3 = _mm_aesenc_si128(t3, ks[7]);
! 429: t4 = _mm_aesenc_si128(t4, ks[7]);
! 430: t1 = _mm_aesenc_si128(t1, ks[8]);
! 431: t2 = _mm_aesenc_si128(t2, ks[8]);
! 432: t3 = _mm_aesenc_si128(t3, ks[8]);
! 433: t4 = _mm_aesenc_si128(t4, ks[8]);
! 434: t1 = _mm_aesenc_si128(t1, ks[9]);
! 435: t2 = _mm_aesenc_si128(t2, ks[9]);
! 436: t3 = _mm_aesenc_si128(t3, ks[9]);
! 437: t4 = _mm_aesenc_si128(t4, ks[9]);
! 438: t1 = _mm_aesenc_si128(t1, ks[10]);
! 439: t2 = _mm_aesenc_si128(t2, ks[10]);
! 440: t3 = _mm_aesenc_si128(t3, ks[10]);
! 441: t4 = _mm_aesenc_si128(t4, ks[10]);
! 442: t1 = _mm_aesenc_si128(t1, ks[11]);
! 443: t2 = _mm_aesenc_si128(t2, ks[11]);
! 444: t3 = _mm_aesenc_si128(t3, ks[11]);
! 445: t4 = _mm_aesenc_si128(t4, ks[11]);
! 446: t1 = _mm_aesenc_si128(t1, ks[12]);
! 447: t2 = _mm_aesenc_si128(t2, ks[12]);
! 448: t3 = _mm_aesenc_si128(t3, ks[12]);
! 449: t4 = _mm_aesenc_si128(t4, ks[12]);
! 450: t1 = _mm_aesenc_si128(t1, ks[13]);
! 451: t2 = _mm_aesenc_si128(t2, ks[13]);
! 452: t3 = _mm_aesenc_si128(t3, ks[13]);
! 453: t4 = _mm_aesenc_si128(t4, ks[13]);
! 454:
! 455: t1 = _mm_aesenclast_si128(t1, ks[14]);
! 456: t2 = _mm_aesenclast_si128(t2, ks[14]);
! 457: t3 = _mm_aesenclast_si128(t3, ks[14]);
! 458: t4 = _mm_aesenclast_si128(t4, ks[14]);
! 459: t1 = _mm_xor_si128(t1, d1);
! 460: t2 = _mm_xor_si128(t2, d2);
! 461: t3 = _mm_xor_si128(t3, d3);
! 462: t4 = _mm_xor_si128(t4, d4);
! 463: _mm_storeu_si128(bo + i + 0, t1);
! 464: _mm_storeu_si128(bo + i + 1, t2);
! 465: _mm_storeu_si128(bo + i + 2, t3);
! 466: _mm_storeu_si128(bo + i + 3, t4);
! 467: }
! 468:
! 469: for (i = pblocks; i < blocks; i++)
! 470: {
! 471: d1 = _mm_loadu_si128(bi + i);
! 472:
! 473: t1 = _mm_xor_si128(state, ks[0]);
! 474: state = increment_be(state);
! 475:
! 476: t1 = _mm_aesenc_si128(t1, ks[1]);
! 477: t1 = _mm_aesenc_si128(t1, ks[2]);
! 478: t1 = _mm_aesenc_si128(t1, ks[3]);
! 479: t1 = _mm_aesenc_si128(t1, ks[4]);
! 480: t1 = _mm_aesenc_si128(t1, ks[5]);
! 481: t1 = _mm_aesenc_si128(t1, ks[6]);
! 482: t1 = _mm_aesenc_si128(t1, ks[7]);
! 483: t1 = _mm_aesenc_si128(t1, ks[8]);
! 484: t1 = _mm_aesenc_si128(t1, ks[9]);
! 485: t1 = _mm_aesenc_si128(t1, ks[10]);
! 486: t1 = _mm_aesenc_si128(t1, ks[11]);
! 487: t1 = _mm_aesenc_si128(t1, ks[12]);
! 488: t1 = _mm_aesenc_si128(t1, ks[13]);
! 489:
! 490: t1 = _mm_aesenclast_si128(t1, ks[14]);
! 491: t1 = _mm_xor_si128(t1, d1);
! 492: _mm_storeu_si128(bo + i, t1);
! 493: }
! 494:
! 495: if (rem)
! 496: {
! 497: memset(&b, 0, sizeof(b));
! 498: memcpy(&b, bi + blocks, rem);
! 499:
! 500: d1 = _mm_loadu_si128(&b);
! 501: t1 = _mm_xor_si128(state, ks[0]);
! 502:
! 503: t1 = _mm_aesenc_si128(t1, ks[1]);
! 504: t1 = _mm_aesenc_si128(t1, ks[2]);
! 505: t1 = _mm_aesenc_si128(t1, ks[3]);
! 506: t1 = _mm_aesenc_si128(t1, ks[4]);
! 507: t1 = _mm_aesenc_si128(t1, ks[5]);
! 508: t1 = _mm_aesenc_si128(t1, ks[6]);
! 509: t1 = _mm_aesenc_si128(t1, ks[7]);
! 510: t1 = _mm_aesenc_si128(t1, ks[8]);
! 511: t1 = _mm_aesenc_si128(t1, ks[9]);
! 512: t1 = _mm_aesenc_si128(t1, ks[10]);
! 513: t1 = _mm_aesenc_si128(t1, ks[11]);
! 514: t1 = _mm_aesenc_si128(t1, ks[12]);
! 515: t1 = _mm_aesenc_si128(t1, ks[13]);
! 516:
! 517: t1 = _mm_aesenclast_si128(t1, ks[14]);
! 518: t1 = _mm_xor_si128(t1, d1);
! 519: _mm_storeu_si128(&b, t1);
! 520:
! 521: memcpy(bo + blocks, &b, rem);
! 522: }
! 523: }
! 524:
! 525: METHOD(crypter_t, crypt, bool,
! 526: private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out)
! 527: {
! 528: u_char *buf;
! 529:
! 530: if (!this->key || iv.len != sizeof(this->state.iv))
! 531: {
! 532: return FALSE;
! 533: }
! 534: memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv));
! 535: this->state.counter = htonl(1);
! 536:
! 537: buf = in.ptr;
! 538: if (out)
! 539: {
! 540: *out = chunk_alloc(in.len);
! 541: buf = out->ptr;
! 542: }
! 543: this->crypt(this, in.len, in.ptr, buf);
! 544: return TRUE;
! 545: }
! 546:
! 547: METHOD(crypter_t, get_block_size, size_t,
! 548: private_aesni_ctr_t *this)
! 549: {
! 550: return 1;
! 551: }
! 552:
! 553: METHOD(crypter_t, get_iv_size, size_t,
! 554: private_aesni_ctr_t *this)
! 555: {
! 556: return sizeof(this->state.iv);
! 557: }
! 558:
! 559: METHOD(crypter_t, get_key_size, size_t,
! 560: private_aesni_ctr_t *this)
! 561: {
! 562: return this->key_size + sizeof(this->state.nonce);
! 563: }
! 564:
! 565: METHOD(crypter_t, set_key, bool,
! 566: private_aesni_ctr_t *this, chunk_t key)
! 567: {
! 568: if (key.len != get_key_size(this))
! 569: {
! 570: return FALSE;
! 571: }
! 572:
! 573: memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce),
! 574: sizeof(this->state.nonce));
! 575: key.len -= sizeof(this->state.nonce);
! 576:
! 577: DESTROY_IF(this->key);
! 578: this->key = aesni_key_create(TRUE, key);
! 579:
! 580: return this->key;
! 581: }
! 582:
! 583: METHOD(crypter_t, destroy, void,
! 584: private_aesni_ctr_t *this)
! 585: {
! 586: DESTROY_IF(this->key);
! 587: free_align(this);
! 588: }
! 589:
! 590: /**
! 591: * See header
! 592: */
! 593: aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size)
! 594: {
! 595: private_aesni_ctr_t *this;
! 596:
! 597: if (algo != ENCR_AES_CTR)
! 598: {
! 599: return NULL;
! 600: }
! 601: switch (key_size)
! 602: {
! 603: case 0:
! 604: key_size = 16;
! 605: break;
! 606: case 16:
! 607: case 24:
! 608: case 32:
! 609: break;
! 610: default:
! 611: return NULL;
! 612: }
! 613:
! 614: INIT_ALIGN(this, sizeof(__m128i),
! 615: .public = {
! 616: .crypter = {
! 617: .encrypt = _crypt,
! 618: .decrypt = _crypt,
! 619: .get_block_size = _get_block_size,
! 620: .get_iv_size = _get_iv_size,
! 621: .get_key_size = _get_key_size,
! 622: .set_key = _set_key,
! 623: .destroy = _destroy,
! 624: },
! 625: },
! 626: .key_size = key_size,
! 627: );
! 628:
! 629: switch (key_size)
! 630: {
! 631: case 16:
! 632: this->crypt = encrypt_ctr128;
! 633: break;
! 634: case 24:
! 635: this->crypt = encrypt_ctr192;
! 636: break;
! 637: case 32:
! 638: this->crypt = encrypt_ctr256;
! 639: break;
! 640: }
! 641:
! 642: return &this->public;
! 643: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>