Annotation of embedaddon/strongswan/src/libstrongswan/plugins/aesni/aesni_ctr.c, revision 1.1

1.1     ! misho       1: /*
        !             2:  * Copyright (C) 2015 Martin Willi
        !             3:  * Copyright (C) 2015 revosec AG
        !             4:  *
        !             5:  * This program is free software; you can redistribute it and/or modify it
        !             6:  * under the terms of the GNU General Public License as published by the
        !             7:  * Free Software Foundation; either version 2 of the License, or (at your
        !             8:  * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
        !             9:  *
        !            10:  * This program is distributed in the hope that it will be useful, but
        !            11:  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            12:  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
        !            13:  * for more details.
        !            14:  */
        !            15: 
        !            16: #include "aesni_ctr.h"
        !            17: #include "aesni_key.h"
        !            18: 
        !            19: #include <tmmintrin.h>
        !            20: 
        !            21: /**
        !            22:  * Pipeline parallelism we use for CTR en/decryption
        !            23:  */
        !            24: #define CTR_CRYPT_PARALLELISM 4
        !            25: 
        !            26: typedef struct private_aesni_ctr_t private_aesni_ctr_t;
        !            27: 
        !            28: /**
        !            29:  * CTR en/decryption method type
        !            30:  */
        !            31: typedef void (*aesni_ctr_fn_t)(private_aesni_ctr_t*, size_t, u_char*, u_char*);
        !            32: 
        !            33: /**
        !            34:  * Private data of an aesni_ctr_t object.
        !            35:  */
        !            36: struct private_aesni_ctr_t {
        !            37: 
        !            38:        /**
        !            39:         * Public aesni_ctr_t interface.
        !            40:         */
        !            41:        aesni_ctr_t public;
        !            42: 
        !            43:        /**
        !            44:         * Key size
        !            45:         */
        !            46:        u_int key_size;
        !            47: 
        !            48:        /**
        !            49:         * Key schedule
        !            50:         */
        !            51:        aesni_key_t *key;
        !            52: 
        !            53:        /**
        !            54:         * Encryption method
        !            55:         */
        !            56:        aesni_ctr_fn_t crypt;
        !            57: 
        !            58:        /**
        !            59:         * Counter state
        !            60:         */
        !            61:        struct {
        !            62:                char nonce[4];
        !            63:                char iv[8];
        !            64:                uint32_t counter;
        !            65:        } __attribute__((packed, aligned(sizeof(__m128i)))) state;
        !            66: };
        !            67: 
        !            68: /**
        !            69:  * Do big-endian increment on x
        !            70:  */
        !            71: static inline __m128i increment_be(__m128i x)
        !            72: {
        !            73:        __m128i swap;
        !            74: 
        !            75:        swap = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
        !            76: 
        !            77:        x = _mm_shuffle_epi8(x, swap);
        !            78:        x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1));
        !            79:        x = _mm_shuffle_epi8(x, swap);
        !            80: 
        !            81:        return x;
        !            82: }
        !            83: 
        !            84: /**
        !            85:  * AES-128 CTR encryption
        !            86:  */
        !            87: static void encrypt_ctr128(private_aesni_ctr_t *this,
        !            88:                                                   size_t len, u_char *in, u_char *out)
        !            89: {
        !            90:        __m128i t1, t2, t3, t4;
        !            91:        __m128i d1, d2, d3, d4;
        !            92:        __m128i *ks, state, b, *bi, *bo;
        !            93:        u_int i, blocks, pblocks, rem;
        !            94: 
        !            95:        state = _mm_load_si128((__m128i*)&this->state);
        !            96:        blocks = len / AES_BLOCK_SIZE;
        !            97:        pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
        !            98:        rem = len % AES_BLOCK_SIZE;
        !            99:        bi = (__m128i*)in;
        !           100:        bo = (__m128i*)out;
        !           101: 
        !           102:        ks = this->key->schedule;
        !           103: 
        !           104:        for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
        !           105:        {
        !           106:                d1 = _mm_loadu_si128(bi + i + 0);
        !           107:                d2 = _mm_loadu_si128(bi + i + 1);
        !           108:                d3 = _mm_loadu_si128(bi + i + 2);
        !           109:                d4 = _mm_loadu_si128(bi + i + 3);
        !           110: 
        !           111:                t1 = _mm_xor_si128(state, ks[0]);
        !           112:                state = increment_be(state);
        !           113:                t2 = _mm_xor_si128(state, ks[0]);
        !           114:                state = increment_be(state);
        !           115:                t3 = _mm_xor_si128(state, ks[0]);
        !           116:                state = increment_be(state);
        !           117:                t4 = _mm_xor_si128(state, ks[0]);
        !           118:                state = increment_be(state);
        !           119: 
        !           120:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           121:                t2 = _mm_aesenc_si128(t2, ks[1]);
        !           122:                t3 = _mm_aesenc_si128(t3, ks[1]);
        !           123:                t4 = _mm_aesenc_si128(t4, ks[1]);
        !           124:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           125:                t2 = _mm_aesenc_si128(t2, ks[2]);
        !           126:                t3 = _mm_aesenc_si128(t3, ks[2]);
        !           127:                t4 = _mm_aesenc_si128(t4, ks[2]);
        !           128:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           129:                t2 = _mm_aesenc_si128(t2, ks[3]);
        !           130:                t3 = _mm_aesenc_si128(t3, ks[3]);
        !           131:                t4 = _mm_aesenc_si128(t4, ks[3]);
        !           132:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           133:                t2 = _mm_aesenc_si128(t2, ks[4]);
        !           134:                t3 = _mm_aesenc_si128(t3, ks[4]);
        !           135:                t4 = _mm_aesenc_si128(t4, ks[4]);
        !           136:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           137:                t2 = _mm_aesenc_si128(t2, ks[5]);
        !           138:                t3 = _mm_aesenc_si128(t3, ks[5]);
        !           139:                t4 = _mm_aesenc_si128(t4, ks[5]);
        !           140:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           141:                t2 = _mm_aesenc_si128(t2, ks[6]);
        !           142:                t3 = _mm_aesenc_si128(t3, ks[6]);
        !           143:                t4 = _mm_aesenc_si128(t4, ks[6]);
        !           144:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           145:                t2 = _mm_aesenc_si128(t2, ks[7]);
        !           146:                t3 = _mm_aesenc_si128(t3, ks[7]);
        !           147:                t4 = _mm_aesenc_si128(t4, ks[7]);
        !           148:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           149:                t2 = _mm_aesenc_si128(t2, ks[8]);
        !           150:                t3 = _mm_aesenc_si128(t3, ks[8]);
        !           151:                t4 = _mm_aesenc_si128(t4, ks[8]);
        !           152:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           153:                t2 = _mm_aesenc_si128(t2, ks[9]);
        !           154:                t3 = _mm_aesenc_si128(t3, ks[9]);
        !           155:                t4 = _mm_aesenc_si128(t4, ks[9]);
        !           156: 
        !           157:                t1 = _mm_aesenclast_si128(t1, ks[10]);
        !           158:                t2 = _mm_aesenclast_si128(t2, ks[10]);
        !           159:                t3 = _mm_aesenclast_si128(t3, ks[10]);
        !           160:                t4 = _mm_aesenclast_si128(t4, ks[10]);
        !           161:                t1 = _mm_xor_si128(t1, d1);
        !           162:                t2 = _mm_xor_si128(t2, d2);
        !           163:                t3 = _mm_xor_si128(t3, d3);
        !           164:                t4 = _mm_xor_si128(t4, d4);
        !           165:                _mm_storeu_si128(bo + i + 0, t1);
        !           166:                _mm_storeu_si128(bo + i + 1, t2);
        !           167:                _mm_storeu_si128(bo + i + 2, t3);
        !           168:                _mm_storeu_si128(bo + i + 3, t4);
        !           169:        }
        !           170: 
        !           171:        for (i = pblocks; i < blocks; i++)
        !           172:        {
        !           173:                d1 = _mm_loadu_si128(bi + i);
        !           174: 
        !           175:                t1 = _mm_xor_si128(state, ks[0]);
        !           176:                state = increment_be(state);
        !           177: 
        !           178:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           179:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           180:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           181:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           182:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           183:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           184:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           185:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           186:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           187: 
        !           188:                t1 = _mm_aesenclast_si128(t1, ks[10]);
        !           189:                t1 = _mm_xor_si128(t1, d1);
        !           190:                _mm_storeu_si128(bo + i, t1);
        !           191:        }
        !           192: 
        !           193:        if (rem)
        !           194:        {
        !           195:                memset(&b, 0, sizeof(b));
        !           196:                memcpy(&b, bi + blocks, rem);
        !           197: 
        !           198:                d1 = _mm_loadu_si128(&b);
        !           199:                t1 = _mm_xor_si128(state, ks[0]);
        !           200: 
        !           201:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           202:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           203:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           204:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           205:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           206:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           207:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           208:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           209:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           210: 
        !           211:                t1 = _mm_aesenclast_si128(t1, ks[10]);
        !           212:                t1 = _mm_xor_si128(t1, d1);
        !           213:                _mm_storeu_si128(&b, t1);
        !           214: 
        !           215:                memcpy(bo + blocks, &b, rem);
        !           216:        }
        !           217: }
        !           218: 
        !           219: /**
        !           220:  * AES-192 CTR encryption
        !           221:  */
        !           222: static void encrypt_ctr192(private_aesni_ctr_t *this,
        !           223:                                                   size_t len, u_char *in, u_char *out)
        !           224: {
        !           225:        __m128i t1, t2, t3, t4;
        !           226:        __m128i d1, d2, d3, d4;
        !           227:        __m128i *ks, state, b, *bi, *bo;
        !           228:        u_int i, blocks, pblocks, rem;
        !           229: 
        !           230:        state = _mm_load_si128((__m128i*)&this->state);
        !           231:        blocks = len / AES_BLOCK_SIZE;
        !           232:        pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
        !           233:        rem = len % AES_BLOCK_SIZE;
        !           234:        bi = (__m128i*)in;
        !           235:        bo = (__m128i*)out;
        !           236: 
        !           237:        ks = this->key->schedule;
        !           238: 
        !           239:        for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
        !           240:        {
        !           241:                d1 = _mm_loadu_si128(bi + i + 0);
        !           242:                d2 = _mm_loadu_si128(bi + i + 1);
        !           243:                d3 = _mm_loadu_si128(bi + i + 2);
        !           244:                d4 = _mm_loadu_si128(bi + i + 3);
        !           245: 
        !           246:                t1 = _mm_xor_si128(state, ks[0]);
        !           247:                state = increment_be(state);
        !           248:                t2 = _mm_xor_si128(state, ks[0]);
        !           249:                state = increment_be(state);
        !           250:                t3 = _mm_xor_si128(state, ks[0]);
        !           251:                state = increment_be(state);
        !           252:                t4 = _mm_xor_si128(state, ks[0]);
        !           253:                state = increment_be(state);
        !           254: 
        !           255:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           256:                t2 = _mm_aesenc_si128(t2, ks[1]);
        !           257:                t3 = _mm_aesenc_si128(t3, ks[1]);
        !           258:                t4 = _mm_aesenc_si128(t4, ks[1]);
        !           259:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           260:                t2 = _mm_aesenc_si128(t2, ks[2]);
        !           261:                t3 = _mm_aesenc_si128(t3, ks[2]);
        !           262:                t4 = _mm_aesenc_si128(t4, ks[2]);
        !           263:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           264:                t2 = _mm_aesenc_si128(t2, ks[3]);
        !           265:                t3 = _mm_aesenc_si128(t3, ks[3]);
        !           266:                t4 = _mm_aesenc_si128(t4, ks[3]);
        !           267:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           268:                t2 = _mm_aesenc_si128(t2, ks[4]);
        !           269:                t3 = _mm_aesenc_si128(t3, ks[4]);
        !           270:                t4 = _mm_aesenc_si128(t4, ks[4]);
        !           271:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           272:                t2 = _mm_aesenc_si128(t2, ks[5]);
        !           273:                t3 = _mm_aesenc_si128(t3, ks[5]);
        !           274:                t4 = _mm_aesenc_si128(t4, ks[5]);
        !           275:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           276:                t2 = _mm_aesenc_si128(t2, ks[6]);
        !           277:                t3 = _mm_aesenc_si128(t3, ks[6]);
        !           278:                t4 = _mm_aesenc_si128(t4, ks[6]);
        !           279:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           280:                t2 = _mm_aesenc_si128(t2, ks[7]);
        !           281:                t3 = _mm_aesenc_si128(t3, ks[7]);
        !           282:                t4 = _mm_aesenc_si128(t4, ks[7]);
        !           283:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           284:                t2 = _mm_aesenc_si128(t2, ks[8]);
        !           285:                t3 = _mm_aesenc_si128(t3, ks[8]);
        !           286:                t4 = _mm_aesenc_si128(t4, ks[8]);
        !           287:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           288:                t2 = _mm_aesenc_si128(t2, ks[9]);
        !           289:                t3 = _mm_aesenc_si128(t3, ks[9]);
        !           290:                t4 = _mm_aesenc_si128(t4, ks[9]);
        !           291:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           292:                t2 = _mm_aesenc_si128(t2, ks[10]);
        !           293:                t3 = _mm_aesenc_si128(t3, ks[10]);
        !           294:                t4 = _mm_aesenc_si128(t4, ks[10]);
        !           295:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           296:                t2 = _mm_aesenc_si128(t2, ks[11]);
        !           297:                t3 = _mm_aesenc_si128(t3, ks[11]);
        !           298:                t4 = _mm_aesenc_si128(t4, ks[11]);
        !           299: 
        !           300:                t1 = _mm_aesenclast_si128(t1, ks[12]);
        !           301:                t2 = _mm_aesenclast_si128(t2, ks[12]);
        !           302:                t3 = _mm_aesenclast_si128(t3, ks[12]);
        !           303:                t4 = _mm_aesenclast_si128(t4, ks[12]);
        !           304:                t1 = _mm_xor_si128(t1, d1);
        !           305:                t2 = _mm_xor_si128(t2, d2);
        !           306:                t3 = _mm_xor_si128(t3, d3);
        !           307:                t4 = _mm_xor_si128(t4, d4);
        !           308:                _mm_storeu_si128(bo + i + 0, t1);
        !           309:                _mm_storeu_si128(bo + i + 1, t2);
        !           310:                _mm_storeu_si128(bo + i + 2, t3);
        !           311:                _mm_storeu_si128(bo + i + 3, t4);
        !           312:        }
        !           313: 
        !           314:        for (i = pblocks; i < blocks; i++)
        !           315:        {
        !           316:                d1 = _mm_loadu_si128(bi + i);
        !           317: 
        !           318:                t1 = _mm_xor_si128(state, ks[0]);
        !           319:                state = increment_be(state);
        !           320: 
        !           321:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           322:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           323:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           324:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           325:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           326:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           327:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           328:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           329:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           330:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           331:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           332: 
        !           333:                t1 = _mm_aesenclast_si128(t1, ks[12]);
        !           334:                t1 = _mm_xor_si128(t1, d1);
        !           335:                _mm_storeu_si128(bo + i, t1);
        !           336:        }
        !           337: 
        !           338:        if (rem)
        !           339:        {
        !           340:                memset(&b, 0, sizeof(b));
        !           341:                memcpy(&b, bi + blocks, rem);
        !           342: 
        !           343:                d1 = _mm_loadu_si128(&b);
        !           344:                t1 = _mm_xor_si128(state, ks[0]);
        !           345: 
        !           346:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           347:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           348:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           349:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           350:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           351:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           352:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           353:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           354:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           355:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           356:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           357: 
        !           358:                t1 = _mm_aesenclast_si128(t1, ks[12]);
        !           359:                t1 = _mm_xor_si128(t1, d1);
        !           360:                _mm_storeu_si128(&b, t1);
        !           361: 
        !           362:                memcpy(bo + blocks, &b, rem);
        !           363:        }
        !           364: }
        !           365: 
        !           366: /**
        !           367:  * AES-256 CTR encryption
        !           368:  */
        !           369: static void encrypt_ctr256(private_aesni_ctr_t *this,
        !           370:                                                   size_t len, u_char *in, u_char *out)
        !           371: {
        !           372:        __m128i t1, t2, t3, t4;
        !           373:        __m128i d1, d2, d3, d4;
        !           374:        __m128i *ks, state, b, *bi, *bo;
        !           375:        u_int i, blocks, pblocks, rem;
        !           376: 
        !           377:        state = _mm_load_si128((__m128i*)&this->state);
        !           378:        blocks = len / AES_BLOCK_SIZE;
        !           379:        pblocks = blocks - (blocks % CTR_CRYPT_PARALLELISM);
        !           380:        rem = len % AES_BLOCK_SIZE;
        !           381:        bi = (__m128i*)in;
        !           382:        bo = (__m128i*)out;
        !           383: 
        !           384:        ks = this->key->schedule;
        !           385: 
        !           386:        for (i = 0; i < pblocks; i += CTR_CRYPT_PARALLELISM)
        !           387:        {
        !           388:                d1 = _mm_loadu_si128(bi + i + 0);
        !           389:                d2 = _mm_loadu_si128(bi + i + 1);
        !           390:                d3 = _mm_loadu_si128(bi + i + 2);
        !           391:                d4 = _mm_loadu_si128(bi + i + 3);
        !           392: 
        !           393:                t1 = _mm_xor_si128(state, ks[0]);
        !           394:                state = increment_be(state);
        !           395:                t2 = _mm_xor_si128(state, ks[0]);
        !           396:                state = increment_be(state);
        !           397:                t3 = _mm_xor_si128(state, ks[0]);
        !           398:                state = increment_be(state);
        !           399:                t4 = _mm_xor_si128(state, ks[0]);
        !           400:                state = increment_be(state);
        !           401: 
        !           402:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           403:                t2 = _mm_aesenc_si128(t2, ks[1]);
        !           404:                t3 = _mm_aesenc_si128(t3, ks[1]);
        !           405:                t4 = _mm_aesenc_si128(t4, ks[1]);
        !           406:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           407:                t2 = _mm_aesenc_si128(t2, ks[2]);
        !           408:                t3 = _mm_aesenc_si128(t3, ks[2]);
        !           409:                t4 = _mm_aesenc_si128(t4, ks[2]);
        !           410:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           411:                t2 = _mm_aesenc_si128(t2, ks[3]);
        !           412:                t3 = _mm_aesenc_si128(t3, ks[3]);
        !           413:                t4 = _mm_aesenc_si128(t4, ks[3]);
        !           414:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           415:                t2 = _mm_aesenc_si128(t2, ks[4]);
        !           416:                t3 = _mm_aesenc_si128(t3, ks[4]);
        !           417:                t4 = _mm_aesenc_si128(t4, ks[4]);
        !           418:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           419:                t2 = _mm_aesenc_si128(t2, ks[5]);
        !           420:                t3 = _mm_aesenc_si128(t3, ks[5]);
        !           421:                t4 = _mm_aesenc_si128(t4, ks[5]);
        !           422:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           423:                t2 = _mm_aesenc_si128(t2, ks[6]);
        !           424:                t3 = _mm_aesenc_si128(t3, ks[6]);
        !           425:                t4 = _mm_aesenc_si128(t4, ks[6]);
        !           426:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           427:                t2 = _mm_aesenc_si128(t2, ks[7]);
        !           428:                t3 = _mm_aesenc_si128(t3, ks[7]);
        !           429:                t4 = _mm_aesenc_si128(t4, ks[7]);
        !           430:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           431:                t2 = _mm_aesenc_si128(t2, ks[8]);
        !           432:                t3 = _mm_aesenc_si128(t3, ks[8]);
        !           433:                t4 = _mm_aesenc_si128(t4, ks[8]);
        !           434:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           435:                t2 = _mm_aesenc_si128(t2, ks[9]);
        !           436:                t3 = _mm_aesenc_si128(t3, ks[9]);
        !           437:                t4 = _mm_aesenc_si128(t4, ks[9]);
        !           438:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           439:                t2 = _mm_aesenc_si128(t2, ks[10]);
        !           440:                t3 = _mm_aesenc_si128(t3, ks[10]);
        !           441:                t4 = _mm_aesenc_si128(t4, ks[10]);
        !           442:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           443:                t2 = _mm_aesenc_si128(t2, ks[11]);
        !           444:                t3 = _mm_aesenc_si128(t3, ks[11]);
        !           445:                t4 = _mm_aesenc_si128(t4, ks[11]);
        !           446:                t1 = _mm_aesenc_si128(t1, ks[12]);
        !           447:                t2 = _mm_aesenc_si128(t2, ks[12]);
        !           448:                t3 = _mm_aesenc_si128(t3, ks[12]);
        !           449:                t4 = _mm_aesenc_si128(t4, ks[12]);
        !           450:                t1 = _mm_aesenc_si128(t1, ks[13]);
        !           451:                t2 = _mm_aesenc_si128(t2, ks[13]);
        !           452:                t3 = _mm_aesenc_si128(t3, ks[13]);
        !           453:                t4 = _mm_aesenc_si128(t4, ks[13]);
        !           454: 
        !           455:                t1 = _mm_aesenclast_si128(t1, ks[14]);
        !           456:                t2 = _mm_aesenclast_si128(t2, ks[14]);
        !           457:                t3 = _mm_aesenclast_si128(t3, ks[14]);
        !           458:                t4 = _mm_aesenclast_si128(t4, ks[14]);
        !           459:                t1 = _mm_xor_si128(t1, d1);
        !           460:                t2 = _mm_xor_si128(t2, d2);
        !           461:                t3 = _mm_xor_si128(t3, d3);
        !           462:                t4 = _mm_xor_si128(t4, d4);
        !           463:                _mm_storeu_si128(bo + i + 0, t1);
        !           464:                _mm_storeu_si128(bo + i + 1, t2);
        !           465:                _mm_storeu_si128(bo + i + 2, t3);
        !           466:                _mm_storeu_si128(bo + i + 3, t4);
        !           467:        }
        !           468: 
        !           469:        for (i = pblocks; i < blocks; i++)
        !           470:        {
        !           471:                d1 = _mm_loadu_si128(bi + i);
        !           472: 
        !           473:                t1 = _mm_xor_si128(state, ks[0]);
        !           474:                state = increment_be(state);
        !           475: 
        !           476:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           477:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           478:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           479:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           480:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           481:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           482:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           483:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           484:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           485:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           486:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           487:                t1 = _mm_aesenc_si128(t1, ks[12]);
        !           488:                t1 = _mm_aesenc_si128(t1, ks[13]);
        !           489: 
        !           490:                t1 = _mm_aesenclast_si128(t1, ks[14]);
        !           491:                t1 = _mm_xor_si128(t1, d1);
        !           492:                _mm_storeu_si128(bo + i, t1);
        !           493:        }
        !           494: 
        !           495:        if (rem)
        !           496:        {
        !           497:                memset(&b, 0, sizeof(b));
        !           498:                memcpy(&b, bi + blocks, rem);
        !           499: 
        !           500:                d1 = _mm_loadu_si128(&b);
        !           501:                t1 = _mm_xor_si128(state, ks[0]);
        !           502: 
        !           503:                t1 = _mm_aesenc_si128(t1, ks[1]);
        !           504:                t1 = _mm_aesenc_si128(t1, ks[2]);
        !           505:                t1 = _mm_aesenc_si128(t1, ks[3]);
        !           506:                t1 = _mm_aesenc_si128(t1, ks[4]);
        !           507:                t1 = _mm_aesenc_si128(t1, ks[5]);
        !           508:                t1 = _mm_aesenc_si128(t1, ks[6]);
        !           509:                t1 = _mm_aesenc_si128(t1, ks[7]);
        !           510:                t1 = _mm_aesenc_si128(t1, ks[8]);
        !           511:                t1 = _mm_aesenc_si128(t1, ks[9]);
        !           512:                t1 = _mm_aesenc_si128(t1, ks[10]);
        !           513:                t1 = _mm_aesenc_si128(t1, ks[11]);
        !           514:                t1 = _mm_aesenc_si128(t1, ks[12]);
        !           515:                t1 = _mm_aesenc_si128(t1, ks[13]);
        !           516: 
        !           517:                t1 = _mm_aesenclast_si128(t1, ks[14]);
        !           518:                t1 = _mm_xor_si128(t1, d1);
        !           519:                _mm_storeu_si128(&b, t1);
        !           520: 
        !           521:                memcpy(bo + blocks, &b, rem);
        !           522:        }
        !           523: }
        !           524: 
        !           525: METHOD(crypter_t, crypt, bool,
        !           526:        private_aesni_ctr_t *this, chunk_t in, chunk_t iv, chunk_t *out)
        !           527: {
        !           528:        u_char *buf;
        !           529: 
        !           530:        if (!this->key || iv.len != sizeof(this->state.iv))
        !           531:        {
        !           532:                return FALSE;
        !           533:        }
        !           534:        memcpy(this->state.iv, iv.ptr, sizeof(this->state.iv));
        !           535:        this->state.counter = htonl(1);
        !           536: 
        !           537:        buf = in.ptr;
        !           538:        if (out)
        !           539:        {
        !           540:                *out = chunk_alloc(in.len);
        !           541:                buf = out->ptr;
        !           542:        }
        !           543:        this->crypt(this, in.len, in.ptr, buf);
        !           544:        return TRUE;
        !           545: }
        !           546: 
        !           547: METHOD(crypter_t, get_block_size, size_t,
        !           548:        private_aesni_ctr_t *this)
        !           549: {
        !           550:        return 1;
        !           551: }
        !           552: 
        !           553: METHOD(crypter_t, get_iv_size, size_t,
        !           554:        private_aesni_ctr_t *this)
        !           555: {
        !           556:        return sizeof(this->state.iv);
        !           557: }
        !           558: 
        !           559: METHOD(crypter_t, get_key_size, size_t,
        !           560:        private_aesni_ctr_t *this)
        !           561: {
        !           562:        return this->key_size + sizeof(this->state.nonce);
        !           563: }
        !           564: 
        !           565: METHOD(crypter_t, set_key, bool,
        !           566:        private_aesni_ctr_t *this, chunk_t key)
        !           567: {
        !           568:        if (key.len != get_key_size(this))
        !           569:        {
        !           570:                return FALSE;
        !           571:        }
        !           572: 
        !           573:        memcpy(this->state.nonce, key.ptr + key.len - sizeof(this->state.nonce),
        !           574:                   sizeof(this->state.nonce));
        !           575:        key.len -= sizeof(this->state.nonce);
        !           576: 
        !           577:        DESTROY_IF(this->key);
        !           578:        this->key = aesni_key_create(TRUE, key);
        !           579: 
        !           580:        return this->key;
        !           581: }
        !           582: 
        !           583: METHOD(crypter_t, destroy, void,
        !           584:        private_aesni_ctr_t *this)
        !           585: {
        !           586:        DESTROY_IF(this->key);
        !           587:        free_align(this);
        !           588: }
        !           589: 
        !           590: /**
        !           591:  * See header
        !           592:  */
        !           593: aesni_ctr_t *aesni_ctr_create(encryption_algorithm_t algo, size_t key_size)
        !           594: {
        !           595:        private_aesni_ctr_t *this;
        !           596: 
        !           597:        if (algo != ENCR_AES_CTR)
        !           598:        {
        !           599:                return NULL;
        !           600:        }
        !           601:        switch (key_size)
        !           602:        {
        !           603:                case 0:
        !           604:                        key_size = 16;
        !           605:                        break;
        !           606:                case 16:
        !           607:                case 24:
        !           608:                case 32:
        !           609:                        break;
        !           610:                default:
        !           611:                        return NULL;
        !           612:        }
        !           613: 
        !           614:        INIT_ALIGN(this, sizeof(__m128i),
        !           615:                .public = {
        !           616:                        .crypter = {
        !           617:                                .encrypt = _crypt,
        !           618:                                .decrypt = _crypt,
        !           619:                                .get_block_size = _get_block_size,
        !           620:                                .get_iv_size = _get_iv_size,
        !           621:                                .get_key_size = _get_key_size,
        !           622:                                .set_key = _set_key,
        !           623:                                .destroy = _destroy,
        !           624:                        },
        !           625:                },
        !           626:                .key_size = key_size,
        !           627:        );
        !           628: 
        !           629:        switch (key_size)
        !           630:        {
        !           631:                case 16:
        !           632:                        this->crypt = encrypt_ctr128;
        !           633:                        break;
        !           634:                case 24:
        !           635:                        this->crypt = encrypt_ctr192;
        !           636:                        break;
        !           637:                case 32:
        !           638:                        this->crypt = encrypt_ctr256;
        !           639:                        break;
        !           640:        }
        !           641: 
        !           642:        return &this->public;
        !           643: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>