Annotation of embedaddon/strongswan/src/libstrongswan/plugins/aesni/aesni_ecb.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  * Copyright (C) 2015 Martin Willi
                      3:  * Copyright (C) 2015 revosec AG
                      4:  *
                      5:  * Copyright (C) 2019 Andreas Steffen
                      6:  * HSR Hochschule fuer Technik Rapperswil
                      7:  *
                      8:  * This program is free software; you can redistribute it and/or modify it
                      9:  * under the terms of the GNU General Public License as published by the
                     10:  * Free Software Foundation; either version 2 of the License, or (at your
                     11:  * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
                     12:  *
                     13:  * This program is distributed in the hope that it will be useful, but
                     14:  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15:  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
                     16:  * for more details.
                     17:  */
                     18: 
                     19: #include "aesni_ecb.h"
                     20: #include "aesni_key.h"
                     21: 
                     22: /**
                     23:  * Pipeline parallelism we use for ECB encryption/decryption
                     24:  */
                     25: #define ECB_PARALLELISM 4
                     26: 
                     27: typedef struct private_aesni_ecb_t private_aesni_ecb_t;
                     28: 
                     29: /**
                     30:  * ECB en/decryption method type
                     31:  */
                     32: typedef void (*aesni_ecb_fn_t)(aesni_key_t*, u_int, u_char*, u_char*);
                     33: 
                     34: /**
                     35:  * Private data of an aesni_ecb_t object.
                     36:  */
                     37: struct private_aesni_ecb_t {
                     38: 
                     39:        /**
                     40:         * Public aesni_ecb_t interface.
                     41:         */
                     42:        aesni_ecb_t public;
                     43: 
                     44:        /**
                     45:         * Key size
                     46:         */
                     47:        u_int key_size;
                     48: 
                     49:        /**
                     50:         * Encryption key schedule
                     51:         */
                     52:        aesni_key_t *ekey;
                     53: 
                     54:        /**
                     55:         * Decryption key schedule
                     56:         */
                     57:        aesni_key_t *dkey;
                     58: 
                     59:        /**
                     60:         * Encryption method
                     61:         */
                     62:        aesni_ecb_fn_t encrypt;
                     63: 
                     64:        /**
                     65:         * Decryption method
                     66:         */
                     67:        aesni_ecb_fn_t decrypt;
                     68: };
                     69: 
                     70: /**
                     71:  * AES-128 ECB encryption
                     72:  */
                     73: static void encrypt_ecb128(aesni_key_t *key, u_int blocks, u_char *in,
                     74:                                                   u_char *out)
                     75: {
                     76:        __m128i *ks, *bi, *bo;
                     77:        __m128i t1, t2, t3, t4;
                     78:        u_int i, pblocks;
                     79: 
                     80:        ks = key->schedule;
                     81:        bi = (__m128i*)in;
                     82:        bo = (__m128i*)out;
                     83:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                     84: 
                     85:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                     86:        {
                     87:                t1 = _mm_loadu_si128(bi + i + 0);
                     88:                t2 = _mm_loadu_si128(bi + i + 1);
                     89:                t3 = _mm_loadu_si128(bi + i + 2);
                     90:                t4 = _mm_loadu_si128(bi + i + 3);
                     91: 
                     92:                t1 = _mm_xor_si128(t1, ks[0]);
                     93:                t2 = _mm_xor_si128(t2, ks[0]);
                     94:                t3 = _mm_xor_si128(t3, ks[0]);
                     95:                t4 = _mm_xor_si128(t4, ks[0]);
                     96: 
                     97:                t1 = _mm_aesenc_si128(t1, ks[1]);
                     98:                t2 = _mm_aesenc_si128(t2, ks[1]);
                     99:                t3 = _mm_aesenc_si128(t3, ks[1]);
                    100:                t4 = _mm_aesenc_si128(t4, ks[1]);
                    101:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    102:                t2 = _mm_aesenc_si128(t2, ks[2]);
                    103:                t3 = _mm_aesenc_si128(t3, ks[2]);
                    104:                t4 = _mm_aesenc_si128(t4, ks[2]);
                    105:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    106:                t2 = _mm_aesenc_si128(t2, ks[3]);
                    107:                t3 = _mm_aesenc_si128(t3, ks[3]);
                    108:                t4 = _mm_aesenc_si128(t4, ks[3]);
                    109:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    110:                t2 = _mm_aesenc_si128(t2, ks[4]);
                    111:                t3 = _mm_aesenc_si128(t3, ks[4]);
                    112:                t4 = _mm_aesenc_si128(t4, ks[4]);
                    113:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    114:                t2 = _mm_aesenc_si128(t2, ks[5]);
                    115:                t3 = _mm_aesenc_si128(t3, ks[5]);
                    116:                t4 = _mm_aesenc_si128(t4, ks[5]);
                    117:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    118:                t2 = _mm_aesenc_si128(t2, ks[6]);
                    119:                t3 = _mm_aesenc_si128(t3, ks[6]);
                    120:                t4 = _mm_aesenc_si128(t4, ks[6]);
                    121:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    122:                t2 = _mm_aesenc_si128(t2, ks[7]);
                    123:                t3 = _mm_aesenc_si128(t3, ks[7]);
                    124:                t4 = _mm_aesenc_si128(t4, ks[7]);
                    125:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    126:                t2 = _mm_aesenc_si128(t2, ks[8]);
                    127:                t3 = _mm_aesenc_si128(t3, ks[8]);
                    128:                t4 = _mm_aesenc_si128(t4, ks[8]);
                    129:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    130:                t2 = _mm_aesenc_si128(t2, ks[9]);
                    131:                t3 = _mm_aesenc_si128(t3, ks[9]);
                    132:                t4 = _mm_aesenc_si128(t4, ks[9]);
                    133: 
                    134:                t1 = _mm_aesenclast_si128(t1, ks[10]);
                    135:                t2 = _mm_aesenclast_si128(t2, ks[10]);
                    136:                t3 = _mm_aesenclast_si128(t3, ks[10]);
                    137:                t4 = _mm_aesenclast_si128(t4, ks[10]);
                    138: 
                    139:                _mm_storeu_si128(bo + i + 0, t1);
                    140:                _mm_storeu_si128(bo + i + 1, t2);
                    141:                _mm_storeu_si128(bo + i + 2, t3);
                    142:                _mm_storeu_si128(bo + i + 3, t4);
                    143:        }
                    144: 
                    145:        for (i = pblocks; i < blocks; i++)
                    146:        {
                    147:                t1 = _mm_loadu_si128(bi + i);
                    148:                t1 = _mm_xor_si128(t1, ks[0]);
                    149: 
                    150:                t1 = _mm_aesenc_si128(t1, ks[1]);
                    151:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    152:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    153:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    154:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    155:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    156:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    157:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    158:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    159: 
                    160:                t1 = _mm_aesenclast_si128(t1, ks[10]);
                    161:                _mm_storeu_si128(bo + i, t1);
                    162:        }
                    163: }
                    164: 
                    165: /**
                    166:  * AES-128 ECB decryption
                    167:  */
                    168: static void decrypt_ecb128(aesni_key_t *key, u_int blocks, u_char *in,
                    169:                                                   u_char *out)
                    170: {
                    171:        __m128i *ks, *bi, *bo;
                    172:        __m128i t1, t2, t3, t4;
                    173:        u_int i, pblocks;
                    174: 
                    175:        ks = key->schedule;
                    176:        bi = (__m128i*)in;
                    177:        bo = (__m128i*)out;
                    178:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                    179: 
                    180:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                    181:        {
                    182:                t1 = _mm_loadu_si128(bi + i + 0);
                    183:                t2 = _mm_loadu_si128(bi + i + 1);
                    184:                t3 = _mm_loadu_si128(bi + i + 2);
                    185:                t4 = _mm_loadu_si128(bi + i + 3);
                    186: 
                    187:                t1 = _mm_xor_si128(t1, ks[0]);
                    188:                t2 = _mm_xor_si128(t2, ks[0]);
                    189:                t3 = _mm_xor_si128(t3, ks[0]);
                    190:                t4 = _mm_xor_si128(t4, ks[0]);
                    191: 
                    192:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    193:                t2 = _mm_aesdec_si128(t2, ks[1]);
                    194:                t3 = _mm_aesdec_si128(t3, ks[1]);
                    195:                t4 = _mm_aesdec_si128(t4, ks[1]);
                    196:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    197:                t2 = _mm_aesdec_si128(t2, ks[2]);
                    198:                t3 = _mm_aesdec_si128(t3, ks[2]);
                    199:                t4 = _mm_aesdec_si128(t4, ks[2]);
                    200:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    201:                t2 = _mm_aesdec_si128(t2, ks[3]);
                    202:                t3 = _mm_aesdec_si128(t3, ks[3]);
                    203:                t4 = _mm_aesdec_si128(t4, ks[3]);
                    204:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    205:                t2 = _mm_aesdec_si128(t2, ks[4]);
                    206:                t3 = _mm_aesdec_si128(t3, ks[4]);
                    207:                t4 = _mm_aesdec_si128(t4, ks[4]);
                    208:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    209:                t2 = _mm_aesdec_si128(t2, ks[5]);
                    210:                t3 = _mm_aesdec_si128(t3, ks[5]);
                    211:                t4 = _mm_aesdec_si128(t4, ks[5]);
                    212:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    213:                t2 = _mm_aesdec_si128(t2, ks[6]);
                    214:                t3 = _mm_aesdec_si128(t3, ks[6]);
                    215:                t4 = _mm_aesdec_si128(t4, ks[6]);
                    216:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    217:                t2 = _mm_aesdec_si128(t2, ks[7]);
                    218:                t3 = _mm_aesdec_si128(t3, ks[7]);
                    219:                t4 = _mm_aesdec_si128(t4, ks[7]);
                    220:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    221:                t2 = _mm_aesdec_si128(t2, ks[8]);
                    222:                t3 = _mm_aesdec_si128(t3, ks[8]);
                    223:                t4 = _mm_aesdec_si128(t4, ks[8]);
                    224:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    225:                t2 = _mm_aesdec_si128(t2, ks[9]);
                    226:                t3 = _mm_aesdec_si128(t3, ks[9]);
                    227:                t4 = _mm_aesdec_si128(t4, ks[9]);
                    228: 
                    229:                t1 = _mm_aesdeclast_si128(t1, ks[10]);
                    230:                t2 = _mm_aesdeclast_si128(t2, ks[10]);
                    231:                t3 = _mm_aesdeclast_si128(t3, ks[10]);
                    232:                t4 = _mm_aesdeclast_si128(t4, ks[10]);
                    233: 
                    234:                _mm_storeu_si128(bo + i + 0, t1);
                    235:                _mm_storeu_si128(bo + i + 1, t2);
                    236:                _mm_storeu_si128(bo + i + 2, t3);
                    237:                _mm_storeu_si128(bo + i + 3, t4);
                    238:        }
                    239: 
                    240:        for (i = pblocks; i < blocks; i++)
                    241:        {
                    242:                t1 = _mm_loadu_si128(bi + i);
                    243:                t1 = _mm_xor_si128(t1, ks[0]);
                    244: 
                    245:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    246:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    247:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    248:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    249:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    250:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    251:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    252:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    253:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    254: 
                    255:                t1 = _mm_aesdeclast_si128(t1, ks[10]);
                    256:                _mm_storeu_si128(bo + i, t1);
                    257:        }
                    258: }
                    259: 
                    260: /**
                    261:  * AES-192 ECB encryption
                    262:  */
                    263: static void encrypt_ecb192(aesni_key_t *key, u_int blocks, u_char *in,
                    264:                                                   u_char *out)
                    265: {
                    266:        __m128i *ks, *bi, *bo;
                    267:        __m128i t1, t2, t3, t4;
                    268:        u_int i, pblocks;
                    269: 
                    270:        ks = key->schedule;
                    271:        bi = (__m128i*)in;
                    272:        bo = (__m128i*)out;
                    273:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                    274: 
                    275:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                    276:        {
                    277:                t1 = _mm_loadu_si128(bi + i + 0);
                    278:                t2 = _mm_loadu_si128(bi + i + 1);
                    279:                t3 = _mm_loadu_si128(bi + i + 2);
                    280:                t4 = _mm_loadu_si128(bi + i + 3);
                    281: 
                    282:                t1 = _mm_xor_si128(t1, ks[0]);
                    283:                t2 = _mm_xor_si128(t2, ks[0]);
                    284:                t3 = _mm_xor_si128(t3, ks[0]);
                    285:                t4 = _mm_xor_si128(t4, ks[0]);
                    286: 
                    287:                t1 = _mm_aesenc_si128(t1, ks[1]);
                    288:                t2 = _mm_aesenc_si128(t2, ks[1]);
                    289:                t3 = _mm_aesenc_si128(t3, ks[1]);
                    290:                t4 = _mm_aesenc_si128(t4, ks[1]);
                    291:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    292:                t2 = _mm_aesenc_si128(t2, ks[2]);
                    293:                t3 = _mm_aesenc_si128(t3, ks[2]);
                    294:                t4 = _mm_aesenc_si128(t4, ks[2]);
                    295:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    296:                t2 = _mm_aesenc_si128(t2, ks[3]);
                    297:                t3 = _mm_aesenc_si128(t3, ks[3]);
                    298:                t4 = _mm_aesenc_si128(t4, ks[3]);
                    299:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    300:                t2 = _mm_aesenc_si128(t2, ks[4]);
                    301:                t3 = _mm_aesenc_si128(t3, ks[4]);
                    302:                t4 = _mm_aesenc_si128(t4, ks[4]);
                    303:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    304:                t2 = _mm_aesenc_si128(t2, ks[5]);
                    305:                t3 = _mm_aesenc_si128(t3, ks[5]);
                    306:                t4 = _mm_aesenc_si128(t4, ks[5]);
                    307:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    308:                t2 = _mm_aesenc_si128(t2, ks[6]);
                    309:                t3 = _mm_aesenc_si128(t3, ks[6]);
                    310:                t4 = _mm_aesenc_si128(t4, ks[6]);
                    311:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    312:                t2 = _mm_aesenc_si128(t2, ks[7]);
                    313:                t3 = _mm_aesenc_si128(t3, ks[7]);
                    314:                t4 = _mm_aesenc_si128(t4, ks[7]);
                    315:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    316:                t2 = _mm_aesenc_si128(t2, ks[8]);
                    317:                t3 = _mm_aesenc_si128(t3, ks[8]);
                    318:                t4 = _mm_aesenc_si128(t4, ks[8]);
                    319:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    320:                t2 = _mm_aesenc_si128(t2, ks[9]);
                    321:                t3 = _mm_aesenc_si128(t3, ks[9]);
                    322:                t4 = _mm_aesenc_si128(t4, ks[9]);
                    323:                t1 = _mm_aesenc_si128(t1, ks[10]);
                    324:                t2 = _mm_aesenc_si128(t2, ks[10]);
                    325:                t3 = _mm_aesenc_si128(t3, ks[10]);
                    326:                t4 = _mm_aesenc_si128(t4, ks[10]);
                    327:                t1 = _mm_aesenc_si128(t1, ks[11]);
                    328:                t2 = _mm_aesenc_si128(t2, ks[11]);
                    329:                t3 = _mm_aesenc_si128(t3, ks[11]);
                    330:                t4 = _mm_aesenc_si128(t4, ks[11]);
                    331: 
                    332:                t1 = _mm_aesenclast_si128(t1, ks[12]);
                    333:                t2 = _mm_aesenclast_si128(t2, ks[12]);
                    334:                t3 = _mm_aesenclast_si128(t3, ks[12]);
                    335:                t4 = _mm_aesenclast_si128(t4, ks[12]);
                    336: 
                    337:                _mm_storeu_si128(bo + i + 0, t1);
                    338:                _mm_storeu_si128(bo + i + 1, t2);
                    339:                _mm_storeu_si128(bo + i + 2, t3);
                    340:                _mm_storeu_si128(bo + i + 3, t4);
                    341:        }
                    342: 
                    343:        for (i = pblocks; i < blocks; i++)
                    344:        {
                    345:                t1 = _mm_loadu_si128(bi + i);
                    346:                t1 = _mm_xor_si128(t1, ks[0]);
                    347: 
                    348:                t1 = _mm_aesenc_si128(t1, ks[1]);
                    349:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    350:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    351:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    352:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    353:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    354:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    355:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    356:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    357:                t1 = _mm_aesenc_si128(t1, ks[10]);
                    358:                t1 = _mm_aesenc_si128(t1, ks[11]);
                    359: 
                    360:                t1 = _mm_aesenclast_si128(t1, ks[12]);
                    361:                _mm_storeu_si128(bo + i, t1);
                    362:        }
                    363: }
                    364: 
                    365: /**
                    366:  * AES-192 ECB decryption
                    367:  */
                    368: static void decrypt_ecb192(aesni_key_t *key, u_int blocks, u_char *in,
                    369:                                                   u_char *out)
                    370: {
                    371:        __m128i *ks, *bi, *bo;
                    372:        __m128i t1, t2, t3, t4;
                    373:        u_int i, pblocks;
                    374: 
                    375:        ks = key->schedule;
                    376:        bi = (__m128i*)in;
                    377:        bo = (__m128i*)out;
                    378:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                    379: 
                    380:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                    381:        {
                    382:                t1 = _mm_loadu_si128(bi + i + 0);
                    383:                t2 = _mm_loadu_si128(bi + i + 1);
                    384:                t3 = _mm_loadu_si128(bi + i + 2);
                    385:                t4 = _mm_loadu_si128(bi + i + 3);
                    386: 
                    387:                t1 = _mm_xor_si128(t1, ks[0]);
                    388:                t2 = _mm_xor_si128(t2, ks[0]);
                    389:                t3 = _mm_xor_si128(t3, ks[0]);
                    390:                t4 = _mm_xor_si128(t4, ks[0]);
                    391: 
                    392:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    393:                t2 = _mm_aesdec_si128(t2, ks[1]);
                    394:                t3 = _mm_aesdec_si128(t3, ks[1]);
                    395:                t4 = _mm_aesdec_si128(t4, ks[1]);
                    396:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    397:                t2 = _mm_aesdec_si128(t2, ks[2]);
                    398:                t3 = _mm_aesdec_si128(t3, ks[2]);
                    399:                t4 = _mm_aesdec_si128(t4, ks[2]);
                    400:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    401:                t2 = _mm_aesdec_si128(t2, ks[3]);
                    402:                t3 = _mm_aesdec_si128(t3, ks[3]);
                    403:                t4 = _mm_aesdec_si128(t4, ks[3]);
                    404:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    405:                t2 = _mm_aesdec_si128(t2, ks[4]);
                    406:                t3 = _mm_aesdec_si128(t3, ks[4]);
                    407:                t4 = _mm_aesdec_si128(t4, ks[4]);
                    408:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    409:                t2 = _mm_aesdec_si128(t2, ks[5]);
                    410:                t3 = _mm_aesdec_si128(t3, ks[5]);
                    411:                t4 = _mm_aesdec_si128(t4, ks[5]);
                    412:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    413:                t2 = _mm_aesdec_si128(t2, ks[6]);
                    414:                t3 = _mm_aesdec_si128(t3, ks[6]);
                    415:                t4 = _mm_aesdec_si128(t4, ks[6]);
                    416:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    417:                t2 = _mm_aesdec_si128(t2, ks[7]);
                    418:                t3 = _mm_aesdec_si128(t3, ks[7]);
                    419:                t4 = _mm_aesdec_si128(t4, ks[7]);
                    420:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    421:                t2 = _mm_aesdec_si128(t2, ks[8]);
                    422:                t3 = _mm_aesdec_si128(t3, ks[8]);
                    423:                t4 = _mm_aesdec_si128(t4, ks[8]);
                    424:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    425:                t2 = _mm_aesdec_si128(t2, ks[9]);
                    426:                t3 = _mm_aesdec_si128(t3, ks[9]);
                    427:                t4 = _mm_aesdec_si128(t4, ks[9]);
                    428:                t1 = _mm_aesdec_si128(t1, ks[10]);
                    429:                t2 = _mm_aesdec_si128(t2, ks[10]);
                    430:                t3 = _mm_aesdec_si128(t3, ks[10]);
                    431:                t4 = _mm_aesdec_si128(t4, ks[10]);
                    432:                t1 = _mm_aesdec_si128(t1, ks[11]);
                    433:                t2 = _mm_aesdec_si128(t2, ks[11]);
                    434:                t3 = _mm_aesdec_si128(t3, ks[11]);
                    435:                t4 = _mm_aesdec_si128(t4, ks[11]);
                    436: 
                    437:                t1 = _mm_aesdeclast_si128(t1, ks[12]);
                    438:                t2 = _mm_aesdeclast_si128(t2, ks[12]);
                    439:                t3 = _mm_aesdeclast_si128(t3, ks[12]);
                    440:                t4 = _mm_aesdeclast_si128(t4, ks[12]);
                    441: 
                    442:                _mm_storeu_si128(bo + i + 0, t1);
                    443:                _mm_storeu_si128(bo + i + 1, t2);
                    444:                _mm_storeu_si128(bo + i + 2, t3);
                    445:                _mm_storeu_si128(bo + i + 3, t4);
                    446:        }
                    447: 
                    448:        for (i = pblocks; i < blocks; i++)
                    449:        {
                    450:                t1 = _mm_loadu_si128(bi + i);
                    451:                t1 = _mm_xor_si128(t1, ks[0]);
                    452: 
                    453:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    454:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    455:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    456:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    457:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    458:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    459:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    460:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    461:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    462:                t1 = _mm_aesdec_si128(t1, ks[10]);
                    463:                t1 = _mm_aesdec_si128(t1, ks[11]);
                    464: 
                    465:                t1 = _mm_aesdeclast_si128(t1, ks[12]);
                    466:                _mm_storeu_si128(bo + i, t1);
                    467:        }
                    468: }
                    469: 
                    470: /**
                    471:  * AES-256 ECB encryption
                    472:  */
                    473: static void encrypt_ecb256(aesni_key_t *key, u_int blocks, u_char *in,
                    474:                                                   u_char *out)
                    475: {
                    476:        __m128i *ks, *bi, *bo;
                    477:        __m128i t1, t2, t3, t4;
                    478:        u_int i, pblocks;
                    479: 
                    480:        ks = key->schedule;
                    481:        bi = (__m128i*)in;
                    482:        bo = (__m128i*)out;
                    483:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                    484: 
                    485:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                    486:        {
                    487:                t1 = _mm_loadu_si128(bi + i + 0);
                    488:                t2 = _mm_loadu_si128(bi + i + 1);
                    489:                t3 = _mm_loadu_si128(bi + i + 2);
                    490:                t4 = _mm_loadu_si128(bi + i + 3);
                    491: 
                    492:                t1 = _mm_xor_si128(t1, ks[0]);
                    493:                t2 = _mm_xor_si128(t2, ks[0]);
                    494:                t3 = _mm_xor_si128(t3, ks[0]);
                    495:                t4 = _mm_xor_si128(t4, ks[0]);
                    496: 
                    497:                t1 = _mm_aesenc_si128(t1, ks[1]);
                    498:                t2 = _mm_aesenc_si128(t2, ks[1]);
                    499:                t3 = _mm_aesenc_si128(t3, ks[1]);
                    500:                t4 = _mm_aesenc_si128(t4, ks[1]);
                    501:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    502:                t2 = _mm_aesenc_si128(t2, ks[2]);
                    503:                t3 = _mm_aesenc_si128(t3, ks[2]);
                    504:                t4 = _mm_aesenc_si128(t4, ks[2]);
                    505:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    506:                t2 = _mm_aesenc_si128(t2, ks[3]);
                    507:                t3 = _mm_aesenc_si128(t3, ks[3]);
                    508:                t4 = _mm_aesenc_si128(t4, ks[3]);
                    509:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    510:                t2 = _mm_aesenc_si128(t2, ks[4]);
                    511:                t3 = _mm_aesenc_si128(t3, ks[4]);
                    512:                t4 = _mm_aesenc_si128(t4, ks[4]);
                    513:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    514:                t2 = _mm_aesenc_si128(t2, ks[5]);
                    515:                t3 = _mm_aesenc_si128(t3, ks[5]);
                    516:                t4 = _mm_aesenc_si128(t4, ks[5]);
                    517:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    518:                t2 = _mm_aesenc_si128(t2, ks[6]);
                    519:                t3 = _mm_aesenc_si128(t3, ks[6]);
                    520:                t4 = _mm_aesenc_si128(t4, ks[6]);
                    521:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    522:                t2 = _mm_aesenc_si128(t2, ks[7]);
                    523:                t3 = _mm_aesenc_si128(t3, ks[7]);
                    524:                t4 = _mm_aesenc_si128(t4, ks[7]);
                    525:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    526:                t2 = _mm_aesenc_si128(t2, ks[8]);
                    527:                t3 = _mm_aesenc_si128(t3, ks[8]);
                    528:                t4 = _mm_aesenc_si128(t4, ks[8]);
                    529:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    530:                t2 = _mm_aesenc_si128(t2, ks[9]);
                    531:                t3 = _mm_aesenc_si128(t3, ks[9]);
                    532:                t4 = _mm_aesenc_si128(t4, ks[9]);
                    533:                t1 = _mm_aesenc_si128(t1, ks[10]);
                    534:                t2 = _mm_aesenc_si128(t2, ks[10]);
                    535:                t3 = _mm_aesenc_si128(t3, ks[10]);
                    536:                t4 = _mm_aesenc_si128(t4, ks[10]);
                    537:                t1 = _mm_aesenc_si128(t1, ks[11]);
                    538:                t2 = _mm_aesenc_si128(t2, ks[11]);
                    539:                t3 = _mm_aesenc_si128(t3, ks[11]);
                    540:                t4 = _mm_aesenc_si128(t4, ks[11]);
                    541:                t1 = _mm_aesenc_si128(t1, ks[12]);
                    542:                t2 = _mm_aesenc_si128(t2, ks[12]);
                    543:                t3 = _mm_aesenc_si128(t3, ks[12]);
                    544:                t4 = _mm_aesenc_si128(t4, ks[12]);
                    545:                t1 = _mm_aesenc_si128(t1, ks[13]);
                    546:                t2 = _mm_aesenc_si128(t2, ks[13]);
                    547:                t3 = _mm_aesenc_si128(t3, ks[13]);
                    548:                t4 = _mm_aesenc_si128(t4, ks[13]);
                    549: 
                    550:                t1 = _mm_aesenclast_si128(t1, ks[14]);
                    551:                t2 = _mm_aesenclast_si128(t2, ks[14]);
                    552:                t3 = _mm_aesenclast_si128(t3, ks[14]);
                    553:                t4 = _mm_aesenclast_si128(t4, ks[14]);
                    554: 
                    555:                _mm_storeu_si128(bo + i + 0, t1);
                    556:                _mm_storeu_si128(bo + i + 1, t2);
                    557:                _mm_storeu_si128(bo + i + 2, t3);
                    558:                _mm_storeu_si128(bo + i + 3, t4);
                    559:        }
                    560: 
                    561:        for (i = pblocks; i < blocks; i++)
                    562:        {
                    563:                t1 = _mm_loadu_si128(bi + i);
                    564:                t1 = _mm_xor_si128(t1, ks[0]);
                    565: 
                    566:                t1 = _mm_aesenc_si128(t1, ks[1]);
                    567:                t1 = _mm_aesenc_si128(t1, ks[2]);
                    568:                t1 = _mm_aesenc_si128(t1, ks[3]);
                    569:                t1 = _mm_aesenc_si128(t1, ks[4]);
                    570:                t1 = _mm_aesenc_si128(t1, ks[5]);
                    571:                t1 = _mm_aesenc_si128(t1, ks[6]);
                    572:                t1 = _mm_aesenc_si128(t1, ks[7]);
                    573:                t1 = _mm_aesenc_si128(t1, ks[8]);
                    574:                t1 = _mm_aesenc_si128(t1, ks[9]);
                    575:                t1 = _mm_aesenc_si128(t1, ks[10]);
                    576:                t1 = _mm_aesenc_si128(t1, ks[11]);
                    577:                t1 = _mm_aesenc_si128(t1, ks[12]);
                    578:                t1 = _mm_aesenc_si128(t1, ks[13]);
                    579: 
                    580:                t1 = _mm_aesenclast_si128(t1, ks[14]);
                    581:                _mm_storeu_si128(bo + i, t1);
                    582:        }
                    583: }
                    584: 
                    585: /**
                    586:  * AES-256 ECB decryption
                    587:  */
                    588: static void decrypt_ecb256(aesni_key_t *key, u_int blocks, u_char *in,
                    589:                                                   u_char *out)
                    590: {
                    591:        __m128i *ks, *bi, *bo;
                    592:        __m128i t1, t2, t3, t4;
                    593:        u_int i, pblocks;
                    594: 
                    595:        ks = key->schedule;
                    596:        bi = (__m128i*)in;
                    597:        bo = (__m128i*)out;
                    598:        pblocks = blocks - (blocks % ECB_PARALLELISM);
                    599: 
                    600:        for (i = 0; i < pblocks; i += ECB_PARALLELISM)
                    601:        {
                    602:                t1 = _mm_loadu_si128(bi + i + 0);
                    603:                t2 = _mm_loadu_si128(bi + i + 1);
                    604:                t3 = _mm_loadu_si128(bi + i + 2);
                    605:                t4 = _mm_loadu_si128(bi + i + 3);
                    606: 
                    607:                t1 = _mm_xor_si128(t1, ks[0]);
                    608:                t2 = _mm_xor_si128(t2, ks[0]);
                    609:                t3 = _mm_xor_si128(t3, ks[0]);
                    610:                t4 = _mm_xor_si128(t4, ks[0]);
                    611: 
                    612:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    613:                t2 = _mm_aesdec_si128(t2, ks[1]);
                    614:                t3 = _mm_aesdec_si128(t3, ks[1]);
                    615:                t4 = _mm_aesdec_si128(t4, ks[1]);
                    616:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    617:                t2 = _mm_aesdec_si128(t2, ks[2]);
                    618:                t3 = _mm_aesdec_si128(t3, ks[2]);
                    619:                t4 = _mm_aesdec_si128(t4, ks[2]);
                    620:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    621:                t2 = _mm_aesdec_si128(t2, ks[3]);
                    622:                t3 = _mm_aesdec_si128(t3, ks[3]);
                    623:                t4 = _mm_aesdec_si128(t4, ks[3]);
                    624:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    625:                t2 = _mm_aesdec_si128(t2, ks[4]);
                    626:                t3 = _mm_aesdec_si128(t3, ks[4]);
                    627:                t4 = _mm_aesdec_si128(t4, ks[4]);
                    628:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    629:                t2 = _mm_aesdec_si128(t2, ks[5]);
                    630:                t3 = _mm_aesdec_si128(t3, ks[5]);
                    631:                t4 = _mm_aesdec_si128(t4, ks[5]);
                    632:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    633:                t2 = _mm_aesdec_si128(t2, ks[6]);
                    634:                t3 = _mm_aesdec_si128(t3, ks[6]);
                    635:                t4 = _mm_aesdec_si128(t4, ks[6]);
                    636:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    637:                t2 = _mm_aesdec_si128(t2, ks[7]);
                    638:                t3 = _mm_aesdec_si128(t3, ks[7]);
                    639:                t4 = _mm_aesdec_si128(t4, ks[7]);
                    640:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    641:                t2 = _mm_aesdec_si128(t2, ks[8]);
                    642:                t3 = _mm_aesdec_si128(t3, ks[8]);
                    643:                t4 = _mm_aesdec_si128(t4, ks[8]);
                    644:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    645:                t2 = _mm_aesdec_si128(t2, ks[9]);
                    646:                t3 = _mm_aesdec_si128(t3, ks[9]);
                    647:                t4 = _mm_aesdec_si128(t4, ks[9]);
                    648:                t1 = _mm_aesdec_si128(t1, ks[10]);
                    649:                t2 = _mm_aesdec_si128(t2, ks[10]);
                    650:                t3 = _mm_aesdec_si128(t3, ks[10]);
                    651:                t4 = _mm_aesdec_si128(t4, ks[10]);
                    652:                t1 = _mm_aesdec_si128(t1, ks[11]);
                    653:                t2 = _mm_aesdec_si128(t2, ks[11]);
                    654:                t3 = _mm_aesdec_si128(t3, ks[11]);
                    655:                t4 = _mm_aesdec_si128(t4, ks[11]);
                    656:                t1 = _mm_aesdec_si128(t1, ks[12]);
                    657:                t2 = _mm_aesdec_si128(t2, ks[12]);
                    658:                t3 = _mm_aesdec_si128(t3, ks[12]);
                    659:                t4 = _mm_aesdec_si128(t4, ks[12]);
                    660:                t1 = _mm_aesdec_si128(t1, ks[13]);
                    661:                t2 = _mm_aesdec_si128(t2, ks[13]);
                    662:                t3 = _mm_aesdec_si128(t3, ks[13]);
                    663:                t4 = _mm_aesdec_si128(t4, ks[13]);
                    664: 
                    665:                t1 = _mm_aesdeclast_si128(t1, ks[14]);
                    666:                t2 = _mm_aesdeclast_si128(t2, ks[14]);
                    667:                t3 = _mm_aesdeclast_si128(t3, ks[14]);
                    668:                t4 = _mm_aesdeclast_si128(t4, ks[14]);
                    669: 
                    670:                _mm_storeu_si128(bo + i + 0, t1);
                    671:                _mm_storeu_si128(bo + i + 1, t2);
                    672:                _mm_storeu_si128(bo + i + 2, t3);
                    673:                _mm_storeu_si128(bo + i + 3, t4);
                    674:        }
                    675: 
                    676:        for (i = pblocks; i < blocks; i++)
                    677:        {
                    678:                t1 = _mm_loadu_si128(bi + i);
                    679:                t1 = _mm_xor_si128(t1, ks[0]);
                    680: 
                    681:                t1 = _mm_aesdec_si128(t1, ks[1]);
                    682:                t1 = _mm_aesdec_si128(t1, ks[2]);
                    683:                t1 = _mm_aesdec_si128(t1, ks[3]);
                    684:                t1 = _mm_aesdec_si128(t1, ks[4]);
                    685:                t1 = _mm_aesdec_si128(t1, ks[5]);
                    686:                t1 = _mm_aesdec_si128(t1, ks[6]);
                    687:                t1 = _mm_aesdec_si128(t1, ks[7]);
                    688:                t1 = _mm_aesdec_si128(t1, ks[8]);
                    689:                t1 = _mm_aesdec_si128(t1, ks[9]);
                    690:                t1 = _mm_aesdec_si128(t1, ks[10]);
                    691:                t1 = _mm_aesdec_si128(t1, ks[11]);
                    692:                t1 = _mm_aesdec_si128(t1, ks[12]);
                    693:                t1 = _mm_aesdec_si128(t1, ks[13]);
                    694: 
                    695:                t1 = _mm_aesdeclast_si128(t1, ks[14]);
                    696:                _mm_storeu_si128(bo + i, t1);
                    697:        }
                    698: }
                    699: 
                    700: /**
                    701:  * Do inline or allocated de/encryption using key schedule
                    702:  */
                    703: static bool crypt(aesni_ecb_fn_t fn, aesni_key_t *key, chunk_t data,
                    704:                                  chunk_t *out)
                    705: {
                    706:        u_char *buf;
                    707: 
                    708:        if (!key || data.len % AES_BLOCK_SIZE)
                    709:        {
                    710:                return FALSE;
                    711:        }
                    712:        if (out)
                    713:        {
                    714:                *out = chunk_alloc(data.len);
                    715:                buf = out->ptr;
                    716:        }
                    717:        else
                    718:        {
                    719:                buf = data.ptr;
                    720:        }
                    721:        fn(key, data.len / AES_BLOCK_SIZE, data.ptr, buf);
                    722:        return TRUE;
                    723: }
                    724: 
                    725: METHOD(crypter_t, encrypt, bool,
                    726:        private_aesni_ecb_t *this, chunk_t data, chunk_t iv, chunk_t *encrypted)
                    727: {
                    728:        return crypt(this->encrypt, this->ekey, data, encrypted);
                    729: }
                    730: 
                    731: METHOD(crypter_t, decrypt, bool,
                    732:        private_aesni_ecb_t *this, chunk_t data, chunk_t iv, chunk_t *decrypted)
                    733: {
                    734:        return crypt(this->decrypt, this->dkey, data, decrypted);
                    735: }
                    736: 
                    737: METHOD(crypter_t, get_block_size, size_t,
                    738:        private_aesni_ecb_t *this)
                    739: {
                    740:        return AES_BLOCK_SIZE;
                    741: }
                    742: 
                    743: METHOD(crypter_t, get_iv_size, size_t,
                    744:        private_aesni_ecb_t *this)
                    745: {
                    746:        return 0;
                    747: }
                    748: 
                    749: METHOD(crypter_t, get_key_size, size_t,
                    750:        private_aesni_ecb_t *this)
                    751: {
                    752:        return this->key_size;
                    753: }
                    754: 
                    755: METHOD(crypter_t, set_key, bool,
                    756:        private_aesni_ecb_t *this, chunk_t key)
                    757: {
                    758:        if (key.len != this->key_size)
                    759:        {
                    760:                return FALSE;
                    761:        }
                    762: 
                    763:        DESTROY_IF(this->ekey);
                    764:        DESTROY_IF(this->dkey);
                    765: 
                    766:        this->ekey = aesni_key_create(TRUE, key);
                    767:        this->dkey = aesni_key_create(FALSE, key);
                    768: 
                    769:        return this->ekey && this->dkey;
                    770: }
                    771: 
                    772: METHOD(crypter_t, destroy, void,
                    773:        private_aesni_ecb_t *this)
                    774: {
                    775:        DESTROY_IF(this->ekey);
                    776:        DESTROY_IF(this->dkey);
                    777:        free_align(this);
                    778: }
                    779: 
                    780: /**
                    781:  * See header
                    782:  */
                    783: aesni_ecb_t *aesni_ecb_create(encryption_algorithm_t algo, size_t key_size)
                    784: {
                    785:        private_aesni_ecb_t *this;
                    786: 
                    787:        if (algo != ENCR_AES_ECB)
                    788:        {
                    789:                return NULL;
                    790:        }
                    791:        switch (key_size)
                    792:        {
                    793:                case 0:
                    794:                        key_size = 16;
                    795:                        break;
                    796:                case 16:
                    797:                case 24:
                    798:                case 32:
                    799:                        break;
                    800:                default:
                    801:                        return NULL;
                    802:        }
                    803: 
                    804:        INIT_ALIGN(this, sizeof(__m128i),
                    805:                .public = {
                    806:                        .crypter = {
                    807:                                .encrypt = _encrypt,
                    808:                                .decrypt = _decrypt,
                    809:                                .get_block_size = _get_block_size,
                    810:                                .get_iv_size = _get_iv_size,
                    811:                                .get_key_size = _get_key_size,
                    812:                                .set_key = _set_key,
                    813:                                .destroy = _destroy,
                    814:                        },
                    815:                },
                    816:                .key_size = key_size,
                    817:        );
                    818: 
                    819:        switch (key_size)
                    820:        {
                    821:                case 16:
                    822:                        this->encrypt = encrypt_ecb128;
                    823:                        this->decrypt = decrypt_ecb128;
                    824:                        break;
                    825:                case 24:
                    826:                        this->encrypt = encrypt_ecb192;
                    827:                        this->decrypt = decrypt_ecb192;
                    828:                        break;
                    829:                case 32:
                    830:                        this->encrypt = encrypt_ecb256;
                    831:                        this->decrypt = decrypt_ecb256;
                    832:                        break;
                    833:        }
                    834: 
                    835:        return &this->public;
                    836: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>