Annotation of embedaddon/strongswan/src/libstrongswan/plugins/curve25519/ref10/ref10.c, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 2016 Andreas Steffen
! 3: * HSR Hochschule fuer Technik Rapperswil
! 4: *
! 5: * Based on the public domain libsodium adaptation by Frank Denis
! 6: * of the SUPERCOP ref10 implementation by Daniel J. Bernstein,
! 7: * Niels Duif, Peter Schwabe, Tanja Lange and Bo-Yin Yang.
! 8: */
! 9:
! 10: #include <stddef.h>
! 11: #include <stdint.h>
! 12: #include <string.h>
! 13:
! 14: #include "ref10.h"
! 15:
! 16: #include <utils/utils.h>
! 17:
! 18: static uint64_t load_3(const uint8_t *in)
! 19: {
! 20: uint64_t result;
! 21:
! 22: result = (uint64_t) in[0];
! 23: result |= ((uint64_t) in[1]) << 8;
! 24: result |= ((uint64_t) in[2]) << 16;
! 25:
! 26: return result;
! 27: }
! 28:
! 29: static uint64_t load_4(const uint8_t *in)
! 30: {
! 31: uint64_t result;
! 32:
! 33: result = (uint64_t) in[0];
! 34: result |= ((uint64_t) in[1]) << 8;
! 35: result |= ((uint64_t) in[2]) << 16;
! 36: result |= ((uint64_t) in[3]) << 24;
! 37:
! 38: return result;
! 39: }
! 40:
! 41: /**
! 42: * h = 0
! 43: */
! 44: static void fe_0(fe h)
! 45: {
! 46: memset(&h[0], 0, 10 * sizeof h[0]);
! 47: }
! 48:
! 49: /**
! 50: * h = 1
! 51: */
! 52: static void fe_1(fe h)
! 53: {
! 54: h[0] = 1;
! 55: h[1] = 0;
! 56: memset(&h[2], 0, 8 * sizeof h[0]);
! 57: }
! 58:
! 59: /**
! 60: * h = f + g
! 61: * Can overlap h with f or g.
! 62: *
! 63: * Preconditions:
! 64: * |f| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 65: * |g| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 66: *
! 67: * Postconditions:
! 68: * |h| bounded by 1.1*2^26, 1.1*2^25, 1.1*2^26, 1.1*2^25, etc.
! 69: */
! 70: static void fe_add(fe h, const fe f, const fe g)
! 71: {
! 72: int32_t f0 = f[0];
! 73: int32_t f1 = f[1];
! 74: int32_t f2 = f[2];
! 75: int32_t f3 = f[3];
! 76: int32_t f4 = f[4];
! 77: int32_t f5 = f[5];
! 78: int32_t f6 = f[6];
! 79: int32_t f7 = f[7];
! 80: int32_t f8 = f[8];
! 81: int32_t f9 = f[9];
! 82:
! 83: int32_t g0 = g[0];
! 84: int32_t g1 = g[1];
! 85: int32_t g2 = g[2];
! 86: int32_t g3 = g[3];
! 87: int32_t g4 = g[4];
! 88: int32_t g5 = g[5];
! 89: int32_t g6 = g[6];
! 90: int32_t g7 = g[7];
! 91: int32_t g8 = g[8];
! 92: int32_t g9 = g[9];
! 93:
! 94: int32_t h0 = f0 + g0;
! 95: int32_t h1 = f1 + g1;
! 96: int32_t h2 = f2 + g2;
! 97: int32_t h3 = f3 + g3;
! 98: int32_t h4 = f4 + g4;
! 99: int32_t h5 = f5 + g5;
! 100: int32_t h6 = f6 + g6;
! 101: int32_t h7 = f7 + g7;
! 102: int32_t h8 = f8 + g8;
! 103: int32_t h9 = f9 + g9;
! 104:
! 105: h[0] = h0;
! 106: h[1] = h1;
! 107: h[2] = h2;
! 108: h[3] = h3;
! 109: h[4] = h4;
! 110: h[5] = h5;
! 111: h[6] = h6;
! 112: h[7] = h7;
! 113: h[8] = h8;
! 114: h[9] = h9;
! 115: }
! 116:
! 117: /**
! 118: * Replace (f,g) with (g,g) if b == 1;
! 119: * replace (f,g) with (f,g) if b == 0.
! 120: *
! 121: * Preconditions: b in {0,1}.
! 122: */
! 123: static void fe_cmov(fe f, const fe g, unsigned int b)
! 124: {
! 125: int32_t f0 = f[0];
! 126: int32_t f1 = f[1];
! 127: int32_t f2 = f[2];
! 128: int32_t f3 = f[3];
! 129: int32_t f4 = f[4];
! 130: int32_t f5 = f[5];
! 131: int32_t f6 = f[6];
! 132: int32_t f7 = f[7];
! 133: int32_t f8 = f[8];
! 134: int32_t f9 = f[9];
! 135:
! 136: int32_t g0 = g[0];
! 137: int32_t g1 = g[1];
! 138: int32_t g2 = g[2];
! 139: int32_t g3 = g[3];
! 140: int32_t g4 = g[4];
! 141: int32_t g5 = g[5];
! 142: int32_t g6 = g[6];
! 143: int32_t g7 = g[7];
! 144: int32_t g8 = g[8];
! 145: int32_t g9 = g[9];
! 146:
! 147: int32_t x0 = f0 ^ g0;
! 148: int32_t x1 = f1 ^ g1;
! 149: int32_t x2 = f2 ^ g2;
! 150: int32_t x3 = f3 ^ g3;
! 151: int32_t x4 = f4 ^ g4;
! 152: int32_t x5 = f5 ^ g5;
! 153: int32_t x6 = f6 ^ g6;
! 154: int32_t x7 = f7 ^ g7;
! 155: int32_t x8 = f8 ^ g8;
! 156: int32_t x9 = f9 ^ g9;
! 157:
! 158: b = (unsigned int) (- (int) b);
! 159:
! 160: x0 &= b;
! 161: x1 &= b;
! 162: x2 &= b;
! 163: x3 &= b;
! 164: x4 &= b;
! 165: x5 &= b;
! 166: x6 &= b;
! 167: x7 &= b;
! 168: x8 &= b;
! 169: x9 &= b;
! 170:
! 171: f[0] = f0 ^ x0;
! 172: f[1] = f1 ^ x1;
! 173: f[2] = f2 ^ x2;
! 174: f[3] = f3 ^ x3;
! 175: f[4] = f4 ^ x4;
! 176: f[5] = f5 ^ x5;
! 177: f[6] = f6 ^ x6;
! 178: f[7] = f7 ^ x7;
! 179: f[8] = f8 ^ x8;
! 180: f[9] = f9 ^ x9;
! 181: }
! 182:
! 183: /**
! 184: * h = f
! 185: */
! 186: static void fe_copy(fe h, const fe f)
! 187: {
! 188: int32_t f0 = f[0];
! 189: int32_t f1 = f[1];
! 190: int32_t f2 = f[2];
! 191: int32_t f3 = f[3];
! 192: int32_t f4 = f[4];
! 193: int32_t f5 = f[5];
! 194: int32_t f6 = f[6];
! 195: int32_t f7 = f[7];
! 196: int32_t f8 = f[8];
! 197: int32_t f9 = f[9];
! 198:
! 199: h[0] = f0;
! 200: h[1] = f1;
! 201: h[2] = f2;
! 202: h[3] = f3;
! 203: h[4] = f4;
! 204: h[5] = f5;
! 205: h[6] = f6;
! 206: h[7] = f7;
! 207: h[8] = f8;
! 208: h[9] = f9;
! 209: }
! 210:
! 211: /**
! 212: * Ignores top bit of h.
! 213: */
! 214: static void fe_frombytes(fe h, const uint8_t *s)
! 215: {
! 216: int64_t h0 = load_4(s);
! 217: int64_t h1 = load_3(s + 4) << 6;
! 218: int64_t h2 = load_3(s + 7) << 5;
! 219: int64_t h3 = load_3(s + 10) << 3;
! 220: int64_t h4 = load_3(s + 13) << 2;
! 221: int64_t h5 = load_4(s + 16);
! 222: int64_t h6 = load_3(s + 20) << 7;
! 223: int64_t h7 = load_3(s + 23) << 5;
! 224: int64_t h8 = load_3(s + 26) << 4;
! 225: int64_t h9 = (load_3(s + 29) & 8388607) << 2;
! 226:
! 227: int64_t carry0, carry1, carry2, carry3, carry4;
! 228: int64_t carry5, carry6, carry7, carry8, carry9;
! 229:
! 230: carry9 = (h9 + (int64_t) (1L << 24)) >> 25;
! 231: h0 += carry9 * 19;
! 232: h9 -= carry9 * ((uint64_t) 1L << 25);
! 233:
! 234: carry1 = (h1 + (int64_t) (1L << 24)) >> 25;
! 235: h2 += carry1;
! 236: h1 -= carry1 * ((uint64_t) 1L << 25);
! 237:
! 238: carry3 = (h3 + (int64_t) (1L << 24)) >> 25;
! 239: h4 += carry3;
! 240: h3 -= carry3 * ((uint64_t) 1L << 25);
! 241:
! 242: carry5 = (h5 + (int64_t) (1L << 24)) >> 25;
! 243: h6 += carry5;
! 244: h5 -= carry5 * ((uint64_t) 1L << 25);
! 245:
! 246: carry7 = (h7 + (int64_t) (1L << 24)) >> 25;
! 247: h8 += carry7;
! 248: h7 -= carry7 * ((uint64_t) 1L << 25);
! 249:
! 250: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 251: h1 += carry0;
! 252: h0 -= carry0 * ((uint64_t) 1L << 26);
! 253:
! 254: carry2 = (h2 + (int64_t) (1L << 25)) >> 26;
! 255: h3 += carry2;
! 256: h2 -= carry2 * ((uint64_t) 1L << 26);
! 257:
! 258: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 259: h5 += carry4;
! 260: h4 -= carry4 * ((uint64_t) 1L << 26);
! 261:
! 262: carry6 = (h6 + (int64_t) (1L << 25)) >> 26;
! 263: h7 += carry6;
! 264: h6 -= carry6 * ((uint64_t) 1L << 26);
! 265:
! 266: carry8 = (h8 + (int64_t) (1L << 25)) >> 26;
! 267: h9 += carry8;
! 268: h8 -= carry8 * ((uint64_t) 1L << 26);
! 269:
! 270: h[0] = (int32_t) h0;
! 271: h[1] = (int32_t) h1;
! 272: h[2] = (int32_t) h2;
! 273: h[3] = (int32_t) h3;
! 274: h[4] = (int32_t) h4;
! 275: h[5] = (int32_t) h5;
! 276: h[6] = (int32_t) h6;
! 277: h[7] = (int32_t) h7;
! 278: h[8] = (int32_t) h8;
! 279: h[9] = (int32_t) h9;
! 280: }
! 281:
! 282: /**
! 283: * Preconditions:
! 284: * |h| bounded by 1.1*2^26, 1.1*2^25, 1.1*2^26, 1.1*2^25, etc.
! 285: *
! 286: * Write p=2^255-19; q=floor(h/p).
! 287: * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
! 288: *
! 289: * Proof:
! 290: * Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
! 291: * Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
! 292: *
! 293: * Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
! 294: * Then 0<y<1.
! 295: *
! 296: * Write r=h-pq.
! 297: * Have 0<=r<=p-1=2^255-20.
! 298: * Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
! 299: *
! 300: * Write x=r+19(2^-255)r+y.
! 301: * Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
! 302: *
! 303: * Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
! 304: * so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
! 305: */
! 306: static void fe_tobytes(uint8_t *s, const fe h)
! 307: {
! 308: int32_t h0 = h[0];
! 309: int32_t h1 = h[1];
! 310: int32_t h2 = h[2];
! 311: int32_t h3 = h[3];
! 312: int32_t h4 = h[4];
! 313: int32_t h5 = h[5];
! 314: int32_t h6 = h[6];
! 315: int32_t h7 = h[7];
! 316: int32_t h8 = h[8];
! 317: int32_t h9 = h[9];
! 318:
! 319: int32_t carry0, carry1, carry2, carry3, carry4;
! 320: int32_t carry5, carry6, carry7, carry8, carry9;
! 321: int32_t q;
! 322:
! 323: q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
! 324: q = (h0 + q) >> 26;
! 325: q = (h1 + q) >> 25;
! 326: q = (h2 + q) >> 26;
! 327: q = (h3 + q) >> 25;
! 328: q = (h4 + q) >> 26;
! 329: q = (h5 + q) >> 25;
! 330: q = (h6 + q) >> 26;
! 331: q = (h7 + q) >> 25;
! 332: q = (h8 + q) >> 26;
! 333: q = (h9 + q) >> 25;
! 334:
! 335: /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
! 336: h0 += 19 * q;
! 337: /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
! 338:
! 339: carry0 = h0 >> 26;
! 340: h1 += carry0;
! 341: h0 -= carry0 * ((uint32_t) 1L << 26);
! 342:
! 343: carry1 = h1 >> 25;
! 344: h2 += carry1;
! 345: h1 -= carry1 * ((uint32_t) 1L << 25);
! 346:
! 347: carry2 = h2 >> 26;
! 348: h3 += carry2;
! 349: h2 -= carry2 * ((uint32_t) 1L << 26);
! 350:
! 351: carry3 = h3 >> 25;
! 352: h4 += carry3;
! 353: h3 -= carry3 * ((uint32_t) 1L << 25);
! 354:
! 355: carry4 = h4 >> 26;
! 356: h5 += carry4;
! 357: h4 -= carry4 * ((uint32_t) 1L << 26);
! 358:
! 359: carry5 = h5 >> 25;
! 360: h6 += carry5;
! 361: h5 -= carry5 * ((uint32_t) 1L << 25);
! 362:
! 363: carry6 = h6 >> 26;
! 364: h7 += carry6;
! 365: h6 -= carry6 * ((uint32_t) 1L << 26);
! 366:
! 367: carry7 = h7 >> 25;
! 368: h8 += carry7;
! 369: h7 -= carry7 * ((uint32_t) 1L << 25);
! 370:
! 371: carry8 = h8 >> 26;
! 372: h9 += carry8;
! 373: h8 -= carry8 * ((uint32_t) 1L << 26);
! 374:
! 375: carry9 = h9 >> 25;
! 376: h9 -= carry9 * ((uint32_t) 1L << 25);
! 377: /* h10 = carry9 */
! 378:
! 379: /**
! 380: * Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
! 381: * Have h0+...+2^230 h9 between 0 and 2^255-1;
! 382: * evidently 2^255 h10-2^255 q = 0.
! 383: * Goal: Output h0+...+2^230 h9.
! 384: */
! 385: s[0] = h0 >> 0;
! 386: s[1] = h0 >> 8;
! 387: s[2] = h0 >> 16;
! 388: s[3] = (h0 >> 24) | (h1 * ((uint32_t) 1 << 2));
! 389: s[4] = h1 >> 6;
! 390: s[5] = h1 >> 14;
! 391: s[6] = (h1 >> 22) | (h2 * ((uint32_t) 1 << 3));
! 392: s[7] = h2 >> 5;
! 393: s[8] = h2 >> 13;
! 394: s[9] = (h2 >> 21) | (h3 * ((uint32_t) 1 << 5));
! 395: s[10] = h3 >> 3;
! 396: s[11] = h3 >> 11;
! 397: s[12] = (h3 >> 19) | (h4 * ((uint32_t) 1 << 6));
! 398: s[13] = h4 >> 2;
! 399: s[14] = h4 >> 10;
! 400: s[15] = h4 >> 18;
! 401: s[16] = h5 >> 0;
! 402: s[17] = h5 >> 8;
! 403: s[18] = h5 >> 16;
! 404: s[19] = (h5 >> 24) | (h6 * ((uint32_t) 1 << 1));
! 405: s[20] = h6 >> 7;
! 406: s[21] = h6 >> 15;
! 407: s[22] = (h6 >> 23) | (h7 * ((uint32_t) 1 << 3));
! 408: s[23] = h7 >> 5;
! 409: s[24] = h7 >> 13;
! 410: s[25] = (h7 >> 21) | (h8 * ((uint32_t) 1 << 4));
! 411: s[26] = h8 >> 4;
! 412: s[27] = h8 >> 12;
! 413: s[28] = (h8 >> 20) | (h9 * ((uint32_t) 1 << 6));
! 414: s[29] = h9 >> 2;
! 415: s[30] = h9 >> 10;
! 416: s[31] = h9 >> 18;
! 417: }
! 418:
! 419: /**
! 420: * return 1 if f is in {1,3,5,...,q-2}
! 421: * return 0 if f is in {0,2,4,...,q-1}
! 422: *
! 423: * Preconditions:
! 424: * |f| bounded by 1.1*2^26, 1.1*2^25, 1.1*2^26, 1.1*2^25, etc.
! 425: */
! 426: static int fe_isnegative(const fe f)
! 427: {
! 428: uint8_t s[32];
! 429:
! 430: fe_tobytes(s,f);
! 431:
! 432: return s[0] & 1;
! 433: }
! 434:
! 435: /**
! 436: * return 1 if f != 0
! 437: * return 0 if f == 0
! 438: *
! 439: * Preconditions:
! 440: * |f| bounded by 1.1*2^26, 1.1*2^25, 1.1*2^26, 1.1*2^25,etc.
! 441: */
! 442: static uint8_t zero[32];
! 443:
! 444: static int fe_isnonzero(const fe f)
! 445: {
! 446: uint8_t s[32];
! 447:
! 448: fe_tobytes(s, f);
! 449:
! 450: return !memeq_const(s, zero, 32);
! 451: }
! 452:
! 453: /**
! 454: * h = f * g
! 455: * Can overlap h with f or g.
! 456: *
! 457: * Preconditions:
! 458: * |f| bounded by 1.65*2^26, 1.65*2^25, 1.65*2^26, 1.65*2^25, etc.
! 459: * |g| bounded by 1.65*2^26, 1.65*2^25, 1.65*2^26, 1.65*2^25, etc.
! 460: *
! 461: * Postconditions:
! 462: * |h| bounded by 1.01*2^25, 1.01*2^24, 1.01*2^25, 1.01*2^24, etc.
! 463: */
! 464:
! 465: /**
! 466: * Notes on implementation strategy:
! 467: *
! 468: * Using schoolbook multiplication.
! 469: * Karatsuba would save a little in some cost models.
! 470: *
! 471: * Most multiplications by 2 and 19 are 32-bit precomputations;
! 472: * cheaper than 64-bit postcomputations.
! 473: *
! 474: * There is one remaining multiplication by 19 in the carry chain;
! 475: * one *19 precomputation can be merged into this,
! 476: * but the resulting data flow is considerably less clean.
! 477: *
! 478: * There are 12 carries below.
! 479: * 10 of them are 2-way parallelizable and vectorizable.
! 480: * Can get away with 11 carries, but then data flow is much deeper.
! 481: *
! 482: * With tighter constraints on inputs can squeeze carries into int32.
! 483: */
! 484:
! 485: static void fe_mul(fe h, const fe f, const fe g)
! 486: {
! 487: int32_t f0 = f[0];
! 488: int32_t f1 = f[1];
! 489: int32_t f2 = f[2];
! 490: int32_t f3 = f[3];
! 491: int32_t f4 = f[4];
! 492: int32_t f5 = f[5];
! 493: int32_t f6 = f[6];
! 494: int32_t f7 = f[7];
! 495: int32_t f8 = f[8];
! 496: int32_t f9 = f[9];
! 497:
! 498: int32_t g0 = g[0];
! 499: int32_t g1 = g[1];
! 500: int32_t g2 = g[2];
! 501: int32_t g3 = g[3];
! 502: int32_t g4 = g[4];
! 503: int32_t g5 = g[5];
! 504: int32_t g6 = g[6];
! 505: int32_t g7 = g[7];
! 506: int32_t g8 = g[8];
! 507: int32_t g9 = g[9];
! 508:
! 509: int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
! 510: int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
! 511: int32_t g3_19 = 19 * g3;
! 512: int32_t g4_19 = 19 * g4;
! 513: int32_t g5_19 = 19 * g5;
! 514: int32_t g6_19 = 19 * g6;
! 515: int32_t g7_19 = 19 * g7;
! 516: int32_t g8_19 = 19 * g8;
! 517: int32_t g9_19 = 19 * g9;
! 518:
! 519: int32_t f1_2 = 2 * f1;
! 520: int32_t f3_2 = 2 * f3;
! 521: int32_t f5_2 = 2 * f5;
! 522: int32_t f7_2 = 2 * f7;
! 523: int32_t f9_2 = 2 * f9;
! 524:
! 525: int64_t f0g0 = f0 * (int64_t) g0;
! 526: int64_t f0g1 = f0 * (int64_t) g1;
! 527: int64_t f0g2 = f0 * (int64_t) g2;
! 528: int64_t f0g3 = f0 * (int64_t) g3;
! 529: int64_t f0g4 = f0 * (int64_t) g4;
! 530: int64_t f0g5 = f0 * (int64_t) g5;
! 531: int64_t f0g6 = f0 * (int64_t) g6;
! 532: int64_t f0g7 = f0 * (int64_t) g7;
! 533: int64_t f0g8 = f0 * (int64_t) g8;
! 534: int64_t f0g9 = f0 * (int64_t) g9;
! 535:
! 536: int64_t f1g0 = f1 * (int64_t) g0;
! 537: int64_t f1g1_2 = f1_2 * (int64_t) g1;
! 538: int64_t f1g2 = f1 * (int64_t) g2;
! 539: int64_t f1g3_2 = f1_2 * (int64_t) g3;
! 540: int64_t f1g4 = f1 * (int64_t) g4;
! 541: int64_t f1g5_2 = f1_2 * (int64_t) g5;
! 542: int64_t f1g6 = f1 * (int64_t) g6;
! 543: int64_t f1g7_2 = f1_2 * (int64_t) g7;
! 544: int64_t f1g8 = f1 * (int64_t) g8;
! 545: int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
! 546:
! 547: int64_t f2g0 = f2 * (int64_t) g0;
! 548: int64_t f2g1 = f2 * (int64_t) g1;
! 549: int64_t f2g2 = f2 * (int64_t) g2;
! 550: int64_t f2g3 = f2 * (int64_t) g3;
! 551: int64_t f2g4 = f2 * (int64_t) g4;
! 552: int64_t f2g5 = f2 * (int64_t) g5;
! 553: int64_t f2g6 = f2 * (int64_t) g6;
! 554: int64_t f2g7 = f2 * (int64_t) g7;
! 555: int64_t f2g8_19 = f2 * (int64_t) g8_19;
! 556: int64_t f2g9_19 = f2 * (int64_t) g9_19;
! 557:
! 558: int64_t f3g0 = f3 * (int64_t) g0;
! 559: int64_t f3g1_2 = f3_2 * (int64_t) g1;
! 560: int64_t f3g2 = f3 * (int64_t) g2;
! 561: int64_t f3g3_2 = f3_2 * (int64_t) g3;
! 562: int64_t f3g4 = f3 * (int64_t) g4;
! 563: int64_t f3g5_2 = f3_2 * (int64_t) g5;
! 564: int64_t f3g6 = f3 * (int64_t) g6;
! 565: int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
! 566: int64_t f3g8_19 = f3 * (int64_t) g8_19;
! 567: int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
! 568:
! 569: int64_t f4g0 = f4 * (int64_t) g0;
! 570: int64_t f4g1 = f4 * (int64_t) g1;
! 571: int64_t f4g2 = f4 * (int64_t) g2;
! 572: int64_t f4g3 = f4 * (int64_t) g3;
! 573: int64_t f4g4 = f4 * (int64_t) g4;
! 574: int64_t f4g5 = f4 * (int64_t) g5;
! 575: int64_t f4g6_19 = f4 * (int64_t) g6_19;
! 576: int64_t f4g7_19 = f4 * (int64_t) g7_19;
! 577: int64_t f4g8_19 = f4 * (int64_t) g8_19;
! 578: int64_t f4g9_19 = f4 * (int64_t) g9_19;
! 579:
! 580: int64_t f5g0 = f5 * (int64_t) g0;
! 581: int64_t f5g1_2 = f5_2 * (int64_t) g1;
! 582: int64_t f5g2 = f5 * (int64_t) g2;
! 583: int64_t f5g3_2 = f5_2 * (int64_t) g3;
! 584: int64_t f5g4 = f5 * (int64_t) g4;
! 585: int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
! 586: int64_t f5g6_19 = f5 * (int64_t) g6_19;
! 587: int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
! 588: int64_t f5g8_19 = f5 * (int64_t) g8_19;
! 589: int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
! 590:
! 591: int64_t f6g0 = f6 * (int64_t) g0;
! 592: int64_t f6g1 = f6 * (int64_t) g1;
! 593: int64_t f6g2 = f6 * (int64_t) g2;
! 594: int64_t f6g3 = f6 * (int64_t) g3;
! 595: int64_t f6g4_19 = f6 * (int64_t) g4_19;
! 596: int64_t f6g5_19 = f6 * (int64_t) g5_19;
! 597: int64_t f6g6_19 = f6 * (int64_t) g6_19;
! 598: int64_t f6g7_19 = f6 * (int64_t) g7_19;
! 599: int64_t f6g8_19 = f6 * (int64_t) g8_19;
! 600: int64_t f6g9_19 = f6 * (int64_t) g9_19;
! 601:
! 602: int64_t f7g0 = f7 * (int64_t) g0;
! 603: int64_t f7g1_2 = f7_2 * (int64_t) g1;
! 604: int64_t f7g2 = f7 * (int64_t) g2;
! 605: int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
! 606: int64_t f7g4_19 = f7 * (int64_t) g4_19;
! 607: int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
! 608: int64_t f7g6_19 = f7 * (int64_t) g6_19;
! 609: int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
! 610: int64_t f7g8_19 = f7 * (int64_t) g8_19;
! 611: int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
! 612:
! 613: int64_t f8g0 = f8 * (int64_t) g0;
! 614: int64_t f8g1 = f8 * (int64_t) g1;
! 615: int64_t f8g2_19 = f8 * (int64_t) g2_19;
! 616: int64_t f8g3_19 = f8 * (int64_t) g3_19;
! 617: int64_t f8g4_19 = f8 * (int64_t) g4_19;
! 618: int64_t f8g5_19 = f8 * (int64_t) g5_19;
! 619: int64_t f8g6_19 = f8 * (int64_t) g6_19;
! 620: int64_t f8g7_19 = f8 * (int64_t) g7_19;
! 621: int64_t f8g8_19 = f8 * (int64_t) g8_19;
! 622: int64_t f8g9_19 = f8 * (int64_t) g9_19;
! 623:
! 624: int64_t f9g0 = f9 * (int64_t) g0;
! 625: int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
! 626: int64_t f9g2_19 = f9 * (int64_t) g2_19;
! 627: int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
! 628: int64_t f9g4_19 = f9 * (int64_t) g4_19;
! 629: int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
! 630: int64_t f9g6_19 = f9 * (int64_t) g6_19;
! 631: int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
! 632: int64_t f9g8_19 = f9 * (int64_t) g8_19;
! 633: int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
! 634:
! 635: int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 +
! 636: f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
! 637: int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 +
! 638: f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
! 639: int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 +
! 640: f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
! 641: int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 +
! 642: f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
! 643: int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 +
! 644: f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
! 645: int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 +
! 646: f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
! 647: int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 +
! 648: f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
! 649: int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 +
! 650: f6g1 + f7g0 + f8g9_19 + f9g8_19;
! 651: int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 +
! 652: f6g2 + f7g1_2 + f8g0 + f9g9_38;
! 653: int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 +
! 654: f6g3 + f7g2 + f8g1 + f9g0 ;
! 655:
! 656: int64_t carry0, carry1, carry2, carry3, carry4;
! 657: int64_t carry5, carry6, carry7, carry8, carry9;
! 658:
! 659: /**
! 660: * |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
! 661: * i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
! 662: * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
! 663: * i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
! 664: */
! 665:
! 666: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 667: h1 += carry0;
! 668: h0 -= carry0 * ((uint64_t) 1L << 26);
! 669: /* |h0| <= 2^25 */
! 670: /* |h1| <= 1.71*2^59 */
! 671:
! 672: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 673: h5 += carry4;
! 674: h4 -= carry4 * ((uint64_t) 1L << 26);
! 675: /* |h4| <= 2^25 */
! 676: /* |h5| <= 1.71*2^59 */
! 677:
! 678: carry1 = (h1 + (int64_t) (1L << 24)) >> 25;
! 679: h2 += carry1;
! 680: h1 -= carry1 * ((uint64_t) 1L << 25);
! 681: /* |h1| <= 2^24; from now on fits into int32 */
! 682: /* |h2| <= 1.41*2^60 */
! 683:
! 684: carry5 = (h5 + (int64_t) (1L << 24)) >> 25;
! 685: h6 += carry5;
! 686: h5 -= carry5 * ((uint64_t) 1L << 25);
! 687: /* |h5| <= 2^24; from now on fits into int32 */
! 688: /* |h6| <= 1.41*2^60 */
! 689:
! 690: carry2 = (h2 + (int64_t) (1L << 25)) >> 26;
! 691: h3 += carry2;
! 692: h2 -= carry2 * ((uint64_t) 1L << 26);
! 693: /* |h2| <= 2^25; from now on fits into int32 unchanged */
! 694: /* |h3| <= 1.71*2^59 */
! 695:
! 696: carry6 = (h6 + (int64_t) (1L << 25)) >> 26;
! 697: h7 += carry6;
! 698: h6 -= carry6 * ((uint64_t) 1L << 26);
! 699: /* |h6| <= 2^25; from now on fits into int32 unchanged */
! 700: /* |h7| <= 1.71*2^59 */
! 701:
! 702: carry3 = (h3 + (int64_t) (1L << 24)) >> 25;
! 703: h4 += carry3;
! 704: h3 -= carry3 * ((uint64_t) 1L << 25);
! 705: /* |h3| <= 2^24; from now on fits into int32 unchanged */
! 706: /* |h4| <= 1.72*2^34 */
! 707:
! 708: carry7 = (h7 + (int64_t) (1L << 24)) >> 25;
! 709: h8 += carry7;
! 710: h7 -= carry7 * ((uint64_t) 1L << 25);
! 711: /* |h7| <= 2^24; from now on fits into int32 unchanged */
! 712: /* |h8| <= 1.41*2^60 */
! 713:
! 714: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 715: h5 += carry4;
! 716: h4 -= carry4 * ((uint64_t) 1L << 26);
! 717: /* |h4| <= 2^25; from now on fits into int32 unchanged */
! 718: /* |h5| <= 1.01*2^24 */
! 719:
! 720: carry8 = (h8 + (int64_t) (1L << 25)) >> 26;
! 721: h9 += carry8;
! 722: h8 -= carry8 * ((uint64_t) 1L << 26);
! 723: /* |h8| <= 2^25; from now on fits into int32 unchanged */
! 724: /* |h9| <= 1.71*2^59 */
! 725:
! 726: carry9 = (h9 + (int64_t) (1L << 24)) >> 25;
! 727: h0 += carry9 * 19;
! 728: h9 -= carry9 * ((uint64_t) 1L << 25);
! 729: /* |h9| <= 2^24; from now on fits into int32 unchanged */
! 730: /* |h0| <= 1.1*2^39 */
! 731:
! 732: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 733: h1 += carry0;
! 734: h0 -= carry0 * ((uint64_t) 1L << 26);
! 735: /* |h0| <= 2^25; from now on fits into int32 unchanged */
! 736: /* |h1| <= 1.01*2^24 */
! 737:
! 738: h[0] = (int32_t) h0;
! 739: h[1] = (int32_t) h1;
! 740: h[2] = (int32_t) h2;
! 741: h[3] = (int32_t) h3;
! 742: h[4] = (int32_t) h4;
! 743: h[5] = (int32_t) h5;
! 744: h[6] = (int32_t) h6;
! 745: h[7] = (int32_t) h7;
! 746: h[8] = (int32_t) h8;
! 747: h[9] = (int32_t) h9;
! 748: }
! 749:
! 750: /**
! 751: * h = -f
! 752: *
! 753: * Preconditions:
! 754: * |f| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 755: *
! 756: * Postconditions:
! 757: * |h| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 758: */
! 759: static void fe_neg(fe h,const fe f)
! 760: {
! 761: int32_t f0 = f[0];
! 762: int32_t f1 = f[1];
! 763: int32_t f2 = f[2];
! 764: int32_t f3 = f[3];
! 765: int32_t f4 = f[4];
! 766: int32_t f5 = f[5];
! 767: int32_t f6 = f[6];
! 768: int32_t f7 = f[7];
! 769: int32_t f8 = f[8];
! 770: int32_t f9 = f[9];
! 771:
! 772: int32_t h0 = -f0;
! 773: int32_t h1 = -f1;
! 774: int32_t h2 = -f2;
! 775: int32_t h3 = -f3;
! 776: int32_t h4 = -f4;
! 777: int32_t h5 = -f5;
! 778: int32_t h6 = -f6;
! 779: int32_t h7 = -f7;
! 780: int32_t h8 = -f8;
! 781: int32_t h9 = -f9;
! 782:
! 783: h[0] = h0;
! 784: h[1] = h1;
! 785: h[2] = h2;
! 786: h[3] = h3;
! 787: h[4] = h4;
! 788: h[5] = h5;
! 789: h[6] = h6;
! 790: h[7] = h7;
! 791: h[8] = h8;
! 792: h[9] = h9;
! 793: }
! 794:
! 795: /**
! 796: * h = f * f
! 797: * Can overlap h with f.
! 798: *
! 799: * Preconditions:
! 800: * |f| bounded by 1.65*2^26, 1.65*2^25, 1.65*2^26, 1.65*2^25, etc.
! 801: *
! 802: * Postconditions:
! 803: * |h| bounded by 1.01*2^25, 1.01*2^24, 1.01*2^25, 1.01*2^24, etc.
! 804: *
! 805: * See fe_mul.c for discussion of implementation strategy.
! 806: */
! 807: static void fe_sq(fe h, const fe f)
! 808: {
! 809: int32_t f0 = f[0];
! 810: int32_t f1 = f[1];
! 811: int32_t f2 = f[2];
! 812: int32_t f3 = f[3];
! 813: int32_t f4 = f[4];
! 814: int32_t f5 = f[5];
! 815: int32_t f6 = f[6];
! 816: int32_t f7 = f[7];
! 817: int32_t f8 = f[8];
! 818: int32_t f9 = f[9];
! 819:
! 820: int32_t f0_2 = 2 * f0;
! 821: int32_t f1_2 = 2 * f1;
! 822: int32_t f2_2 = 2 * f2;
! 823: int32_t f3_2 = 2 * f3;
! 824: int32_t f4_2 = 2 * f4;
! 825: int32_t f5_2 = 2 * f5;
! 826: int32_t f6_2 = 2 * f6;
! 827: int32_t f7_2 = 2 * f7;
! 828:
! 829: int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
! 830: int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
! 831: int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
! 832: int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
! 833: int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
! 834:
! 835: int64_t f0f0 = f0 * (int64_t) f0;
! 836: int64_t f0f1_2 = f0_2 * (int64_t) f1;
! 837: int64_t f0f2_2 = f0_2 * (int64_t) f2;
! 838: int64_t f0f3_2 = f0_2 * (int64_t) f3;
! 839: int64_t f0f4_2 = f0_2 * (int64_t) f4;
! 840: int64_t f0f5_2 = f0_2 * (int64_t) f5;
! 841: int64_t f0f6_2 = f0_2 * (int64_t) f6;
! 842: int64_t f0f7_2 = f0_2 * (int64_t) f7;
! 843: int64_t f0f8_2 = f0_2 * (int64_t) f8;
! 844: int64_t f0f9_2 = f0_2 * (int64_t) f9;
! 845:
! 846: int64_t f1f1_2 = f1_2 * (int64_t) f1;
! 847: int64_t f1f2_2 = f1_2 * (int64_t) f2;
! 848: int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
! 849: int64_t f1f4_2 = f1_2 * (int64_t) f4;
! 850: int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
! 851: int64_t f1f6_2 = f1_2 * (int64_t) f6;
! 852: int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
! 853: int64_t f1f8_2 = f1_2 * (int64_t) f8;
! 854: int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
! 855:
! 856: int64_t f2f2 = f2 * (int64_t) f2;
! 857: int64_t f2f3_2 = f2_2 * (int64_t) f3;
! 858: int64_t f2f4_2 = f2_2 * (int64_t) f4;
! 859: int64_t f2f5_2 = f2_2 * (int64_t) f5;
! 860: int64_t f2f6_2 = f2_2 * (int64_t) f6;
! 861: int64_t f2f7_2 = f2_2 * (int64_t) f7;
! 862: int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
! 863: int64_t f2f9_38 = f2 * (int64_t) f9_38;
! 864:
! 865: int64_t f3f3_2 = f3_2 * (int64_t) f3;
! 866: int64_t f3f4_2 = f3_2 * (int64_t) f4;
! 867: int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
! 868: int64_t f3f6_2 = f3_2 * (int64_t) f6;
! 869: int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
! 870: int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
! 871: int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
! 872:
! 873: int64_t f4f4 = f4 * (int64_t) f4;
! 874: int64_t f4f5_2 = f4_2 * (int64_t) f5;
! 875: int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
! 876: int64_t f4f7_38 = f4 * (int64_t) f7_38;
! 877: int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
! 878: int64_t f4f9_38 = f4 * (int64_t) f9_38;
! 879:
! 880: int64_t f5f5_38 = f5 * (int64_t) f5_38;
! 881: int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
! 882: int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
! 883: int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
! 884: int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
! 885:
! 886: int64_t f6f6_19 = f6 * (int64_t) f6_19;
! 887: int64_t f6f7_38 = f6 * (int64_t) f7_38;
! 888: int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
! 889: int64_t f6f9_38 = f6 * (int64_t) f9_38;
! 890:
! 891: int64_t f7f7_38 = f7 * (int64_t) f7_38;
! 892: int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
! 893: int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
! 894:
! 895: int64_t f8f8_19 = f8 * (int64_t) f8_19;
! 896: int64_t f8f9_38 = f8 * (int64_t) f9_38;
! 897:
! 898: int64_t f9f9_38 = f9 * (int64_t) f9_38;
! 899:
! 900: int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
! 901: int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
! 902: int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
! 903: int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
! 904: int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
! 905: int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
! 906: int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
! 907: int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
! 908: int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
! 909: int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
! 910:
! 911: int64_t carry0, carry1, carry2, carry3, carry4;
! 912: int64_t carry5, carry6, carry7, carry8, carry9;
! 913:
! 914: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 915: h1 += carry0;
! 916: h0 -= carry0 * ((uint64_t) 1L << 26);
! 917:
! 918: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 919: h5 += carry4;
! 920: h4 -= carry4 * ((uint64_t) 1L << 26);
! 921:
! 922: carry1 = (h1 + (int64_t) (1L << 24)) >> 25;
! 923: h2 += carry1;
! 924: h1 -= carry1 * ((uint64_t) 1L << 25);
! 925:
! 926: carry5 = (h5 + (int64_t) (1L << 24)) >> 25;
! 927: h6 += carry5;
! 928: h5 -= carry5 * ((uint64_t) 1L << 25);
! 929:
! 930: carry2 = (h2 + (int64_t) (1L << 25)) >> 26;
! 931: h3 += carry2;
! 932: h2 -= carry2 * ((uint64_t) 1L << 26);
! 933:
! 934: carry6 = (h6 + (int64_t) (1L << 25)) >> 26;
! 935: h7 += carry6;
! 936: h6 -= carry6 * ((uint64_t) 1L << 26);
! 937:
! 938: carry3 = (h3 + (int64_t) (1L << 24)) >> 25;
! 939: h4 += carry3;
! 940: h3 -= carry3 * ((uint64_t) 1L << 25);
! 941:
! 942: carry7 = (h7 + (int64_t) (1L << 24)) >> 25;
! 943: h8 += carry7;
! 944: h7 -= carry7 * ((uint64_t) 1L << 25);
! 945:
! 946: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 947: h5 += carry4;
! 948: h4 -= carry4 * ((uint64_t) 1L << 26);
! 949:
! 950: carry8 = (h8 + (int64_t) (1L << 25)) >> 26;
! 951: h9 += carry8;
! 952: h8 -= carry8 * ((uint64_t) 1L << 26);
! 953:
! 954: carry9 = (h9 + (int64_t) (1L << 24)) >> 25;
! 955: h0 += carry9 * 19;
! 956: h9 -= carry9 * ((uint64_t) 1L << 25);
! 957:
! 958: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 959: h1 += carry0;
! 960: h0 -= carry0 * ((uint64_t) 1L << 26);
! 961:
! 962: h[0] = (int32_t) h0;
! 963: h[1] = (int32_t) h1;
! 964: h[2] = (int32_t) h2;
! 965: h[3] = (int32_t) h3;
! 966: h[4] = (int32_t) h4;
! 967: h[5] = (int32_t) h5;
! 968: h[6] = (int32_t) h6;
! 969: h[7] = (int32_t) h7;
! 970: h[8] = (int32_t) h8;
! 971: h[9] = (int32_t) h9;
! 972: }
! 973:
! 974: /**
! 975: * h = 2 * f * f
! 976: * Can overlap h with f.
! 977: *
! 978: * Preconditions:
! 979: *|f| bounded by 1.65*2^26, 1.65*2^25, 1.65*2^26, 1.65*2^25, etc.
! 980: *
! 981: * Postconditions:
! 982: * |h| bounded by 1.01*2^25, 1.01*2^24, 1.01*2^25, 1.01*2^24, etc.
! 983: *
! 984: * See fe_mul.c for discussion of implementation strategy.
! 985: */
! 986: static void fe_sq2(fe h, const fe f)
! 987: {
! 988: int32_t f0 = f[0];
! 989: int32_t f1 = f[1];
! 990: int32_t f2 = f[2];
! 991: int32_t f3 = f[3];
! 992: int32_t f4 = f[4];
! 993: int32_t f5 = f[5];
! 994: int32_t f6 = f[6];
! 995: int32_t f7 = f[7];
! 996: int32_t f8 = f[8];
! 997: int32_t f9 = f[9];
! 998:
! 999: int32_t f0_2 = 2 * f0;
! 1000: int32_t f1_2 = 2 * f1;
! 1001: int32_t f2_2 = 2 * f2;
! 1002: int32_t f3_2 = 2 * f3;
! 1003: int32_t f4_2 = 2 * f4;
! 1004: int32_t f5_2 = 2 * f5;
! 1005: int32_t f6_2 = 2 * f6;
! 1006: int32_t f7_2 = 2 * f7;
! 1007:
! 1008: int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
! 1009: int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
! 1010: int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
! 1011: int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
! 1012: int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
! 1013:
! 1014: int64_t f0f0 = f0 * (int64_t) f0;
! 1015: int64_t f0f1_2 = f0_2 * (int64_t) f1;
! 1016: int64_t f0f2_2 = f0_2 * (int64_t) f2;
! 1017: int64_t f0f3_2 = f0_2 * (int64_t) f3;
! 1018: int64_t f0f4_2 = f0_2 * (int64_t) f4;
! 1019: int64_t f0f5_2 = f0_2 * (int64_t) f5;
! 1020: int64_t f0f6_2 = f0_2 * (int64_t) f6;
! 1021: int64_t f0f7_2 = f0_2 * (int64_t) f7;
! 1022: int64_t f0f8_2 = f0_2 * (int64_t) f8;
! 1023: int64_t f0f9_2 = f0_2 * (int64_t) f9;
! 1024:
! 1025: int64_t f1f1_2 = f1_2 * (int64_t) f1;
! 1026: int64_t f1f2_2 = f1_2 * (int64_t) f2;
! 1027: int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
! 1028: int64_t f1f4_2 = f1_2 * (int64_t) f4;
! 1029: int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
! 1030: int64_t f1f6_2 = f1_2 * (int64_t) f6;
! 1031: int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
! 1032: int64_t f1f8_2 = f1_2 * (int64_t) f8;
! 1033: int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
! 1034:
! 1035: int64_t f2f2 = f2 * (int64_t) f2;
! 1036: int64_t f2f3_2 = f2_2 * (int64_t) f3;
! 1037: int64_t f2f4_2 = f2_2 * (int64_t) f4;
! 1038: int64_t f2f5_2 = f2_2 * (int64_t) f5;
! 1039: int64_t f2f6_2 = f2_2 * (int64_t) f6;
! 1040: int64_t f2f7_2 = f2_2 * (int64_t) f7;
! 1041: int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
! 1042: int64_t f2f9_38 = f2 * (int64_t) f9_38;
! 1043:
! 1044: int64_t f3f3_2 = f3_2 * (int64_t) f3;
! 1045: int64_t f3f4_2 = f3_2 * (int64_t) f4;
! 1046: int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
! 1047: int64_t f3f6_2 = f3_2 * (int64_t) f6;
! 1048: int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
! 1049: int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
! 1050: int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
! 1051:
! 1052: int64_t f4f4 = f4 * (int64_t) f4;
! 1053: int64_t f4f5_2 = f4_2 * (int64_t) f5;
! 1054: int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
! 1055: int64_t f4f7_38 = f4 * (int64_t) f7_38;
! 1056: int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
! 1057: int64_t f4f9_38 = f4 * (int64_t) f9_38;
! 1058:
! 1059: int64_t f5f5_38 = f5 * (int64_t) f5_38;
! 1060: int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
! 1061: int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
! 1062: int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
! 1063: int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
! 1064:
! 1065: int64_t f6f6_19 = f6 * (int64_t) f6_19;
! 1066: int64_t f6f7_38 = f6 * (int64_t) f7_38;
! 1067: int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
! 1068: int64_t f6f9_38 = f6 * (int64_t) f9_38;
! 1069:
! 1070: int64_t f7f7_38 = f7 * (int64_t) f7_38;
! 1071: int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
! 1072: int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
! 1073:
! 1074: int64_t f8f8_19 = f8 * (int64_t) f8_19;
! 1075: int64_t f8f9_38 = f8 * (int64_t) f9_38;
! 1076:
! 1077: int64_t f9f9_38 = f9 * (int64_t) f9_38;
! 1078:
! 1079: int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
! 1080: int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
! 1081: int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
! 1082: int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
! 1083: int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
! 1084: int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
! 1085: int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
! 1086: int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
! 1087: int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
! 1088: int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
! 1089:
! 1090: int64_t carry0, carry1, carry2, carry3, carry4;
! 1091: int64_t carry5, carry6, carry7, carry8, carry9;
! 1092:
! 1093: h0 += h0;
! 1094: h1 += h1;
! 1095: h2 += h2;
! 1096: h3 += h3;
! 1097: h4 += h4;
! 1098: h5 += h5;
! 1099: h6 += h6;
! 1100: h7 += h7;
! 1101: h8 += h8;
! 1102: h9 += h9;
! 1103:
! 1104: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 1105: h1 += carry0;
! 1106: h0 -= carry0 * ((uint64_t) 1L << 26);
! 1107:
! 1108: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 1109: h5 += carry4;
! 1110: h4 -= carry4 * ((uint64_t) 1L << 26);
! 1111:
! 1112: carry1 = (h1 + (int64_t) (1L << 24)) >> 25;
! 1113: h2 += carry1;
! 1114: h1 -= carry1 * ((uint64_t) 1L << 25);
! 1115:
! 1116: carry5 = (h5 + (int64_t) (1L << 24)) >> 25;
! 1117: h6 += carry5;
! 1118: h5 -= carry5 * ((uint64_t) 1L << 25);
! 1119:
! 1120: carry2 = (h2 + (int64_t) (1L << 25)) >> 26;
! 1121: h3 += carry2;
! 1122: h2 -= carry2 * ((uint64_t) 1L << 26);
! 1123:
! 1124: carry6 = (h6 + (int64_t) (1L << 25)) >> 26;
! 1125: h7 += carry6;
! 1126: h6 -= carry6 * ((uint64_t) 1L << 26);
! 1127:
! 1128: carry3 = (h3 + (int64_t) (1L << 24)) >> 25;
! 1129: h4 += carry3;
! 1130: h3 -= carry3 * ((uint64_t) 1L << 25);
! 1131:
! 1132: carry7 = (h7 + (int64_t) (1L << 24)) >> 25;
! 1133: h8 += carry7;
! 1134: h7 -= carry7 * ((uint64_t) 1L << 25);
! 1135:
! 1136: carry4 = (h4 + (int64_t) (1L << 25)) >> 26;
! 1137: h5 += carry4;
! 1138: h4 -= carry4 * ((uint64_t) 1L << 26);
! 1139:
! 1140: carry8 = (h8 + (int64_t) (1L << 25)) >> 26;
! 1141: h9 += carry8;
! 1142: h8 -= carry8 * ((uint64_t) 1L << 26);
! 1143:
! 1144: carry9 = (h9 + (int64_t) (1L << 24)) >> 25;
! 1145: h0 += carry9 * 19;
! 1146: h9 -= carry9 * ((uint64_t) 1L << 25);
! 1147:
! 1148: carry0 = (h0 + (int64_t) (1L << 25)) >> 26;
! 1149: h1 += carry0;
! 1150: h0 -= carry0 * ((uint64_t) 1L << 26);
! 1151:
! 1152: h[0] = (int32_t) h0;
! 1153: h[1] = (int32_t) h1;
! 1154: h[2] = (int32_t) h2;
! 1155: h[3] = (int32_t) h3;
! 1156: h[4] = (int32_t) h4;
! 1157: h[5] = (int32_t) h5;
! 1158: h[6] = (int32_t) h6;
! 1159: h[7] = (int32_t) h7;
! 1160: h[8] = (int32_t) h8;
! 1161: h[9] = (int32_t) h9;
! 1162: }
! 1163:
! 1164: static void fe_invert(fe out, const fe z)
! 1165: {
! 1166: fe t0, t1, t2, t3;
! 1167: int i;
! 1168:
! 1169: fe_sq(t0, z);
! 1170: fe_sq(t1, t0);
! 1171: fe_sq(t1, t1);
! 1172: fe_mul(t1, z, t1);
! 1173: fe_mul(t0, t0, t1);
! 1174: fe_sq(t2, t0);
! 1175: fe_mul(t1, t1, t2);
! 1176: fe_sq(t2, t1);
! 1177:
! 1178: for (i = 1; i < 5; ++i)
! 1179: {
! 1180: fe_sq(t2, t2);
! 1181: }
! 1182:
! 1183: fe_mul(t1, t2, t1);
! 1184: fe_sq(t2, t1);
! 1185:
! 1186: for (i = 1; i < 10; ++i)
! 1187: {
! 1188: fe_sq(t2, t2);
! 1189: }
! 1190:
! 1191: fe_mul(t2, t2, t1);
! 1192: fe_sq(t3, t2);
! 1193:
! 1194: for (i = 1; i < 20; ++i)
! 1195: {
! 1196: fe_sq(t3, t3);
! 1197: }
! 1198:
! 1199: fe_mul(t2, t3, t2);
! 1200: fe_sq(t2, t2);
! 1201:
! 1202: for (i = 1; i < 10; ++i)
! 1203: {
! 1204: fe_sq(t2, t2);
! 1205: }
! 1206:
! 1207: fe_mul(t1, t2, t1);
! 1208: fe_sq(t2, t1);
! 1209:
! 1210: for (i = 1; i < 50; ++i)
! 1211: {
! 1212: fe_sq(t2, t2);
! 1213: }
! 1214:
! 1215: fe_mul(t2, t2, t1);
! 1216: fe_sq(t3, t2);
! 1217:
! 1218: for (i = 1; i < 100; ++i)
! 1219: {
! 1220: fe_sq(t3, t3);
! 1221: }
! 1222:
! 1223: fe_mul(t2, t3, t2);
! 1224: fe_sq(t2, t2);
! 1225:
! 1226: for (i = 1; i < 50; ++i)
! 1227: {
! 1228: fe_sq(t2, t2);
! 1229: }
! 1230:
! 1231: fe_mul(t1, t2, t1);
! 1232: fe_sq(t1, t1);
! 1233:
! 1234: for (i = 1; i < 5; ++i)
! 1235: {
! 1236: fe_sq(t1, t1);
! 1237: }
! 1238:
! 1239: fe_mul(out, t1, t0);
! 1240: }
! 1241:
! 1242: static void fe_pow22523(fe out, const fe z)
! 1243: {
! 1244: fe t0, t1, t2;
! 1245: int i;
! 1246:
! 1247: fe_sq(t0, z);
! 1248: fe_sq(t1, t0);
! 1249: fe_sq(t1, t1);
! 1250: fe_mul(t1, z, t1);
! 1251: fe_mul(t0, t0, t1);
! 1252: fe_sq(t0, t0);
! 1253: fe_mul(t0, t1, t0);
! 1254: fe_sq(t1, t0);
! 1255:
! 1256: for (i = 1; i < 5; ++i)
! 1257: {
! 1258: fe_sq(t1, t1);
! 1259: }
! 1260:
! 1261: fe_mul(t0, t1, t0);
! 1262: fe_sq(t1, t0);
! 1263:
! 1264: for (i = 1; i < 10; ++i)
! 1265: {
! 1266: fe_sq(t1, t1);
! 1267: }
! 1268:
! 1269: fe_mul(t1, t1, t0);
! 1270: fe_sq(t2, t1);
! 1271:
! 1272: for (i = 1; i < 20; ++i)
! 1273: {
! 1274: fe_sq(t2, t2);
! 1275: }
! 1276:
! 1277: fe_mul(t1, t2, t1);
! 1278: fe_sq(t1, t1);
! 1279:
! 1280: for (i = 1; i < 10; ++i)
! 1281: {
! 1282: fe_sq(t1, t1);
! 1283: }
! 1284:
! 1285: fe_mul(t0, t1, t0);
! 1286: fe_sq(t1, t0);
! 1287:
! 1288: for (i = 1; i < 50; ++i)
! 1289: {
! 1290: fe_sq(t1, t1);
! 1291: }
! 1292:
! 1293: fe_mul(t1, t1, t0);
! 1294: fe_sq(t2, t1);
! 1295: for (i = 1; i < 100; ++i)
! 1296: {
! 1297: fe_sq(t2, t2);
! 1298: }
! 1299:
! 1300: fe_mul(t1, t2, t1);
! 1301: fe_sq(t1, t1);
! 1302:
! 1303: for (i = 1; i < 50; ++i)
! 1304: {
! 1305: fe_sq(t1, t1);
! 1306: }
! 1307:
! 1308: fe_mul(t0, t1, t0);
! 1309: fe_sq(t0, t0);
! 1310: fe_sq(t0, t0);
! 1311: fe_mul(out, t0, z);
! 1312: }
! 1313:
! 1314: /**
! 1315: * h = f - g
! 1316: * Can overlap h with f or g.
! 1317: *
! 1318: * Preconditions:
! 1319: * |f| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 1320: * |g| bounded by 1.1*2^25, 1.1*2^24, 1.1*2^25, 1.1*2^24, etc.
! 1321: *
! 1322: * Postconditions:
! 1323: * |h| bounded by 1.1*2^26, 1.1*2^25, 1.1*2^26, 1.1*2^25, etc.
! 1324: */
! 1325: static void fe_sub(fe h, const fe f, const fe g)
! 1326: {
! 1327: int32_t f0 = f[0];
! 1328: int32_t f1 = f[1];
! 1329: int32_t f2 = f[2];
! 1330: int32_t f3 = f[3];
! 1331: int32_t f4 = f[4];
! 1332: int32_t f5 = f[5];
! 1333: int32_t f6 = f[6];
! 1334: int32_t f7 = f[7];
! 1335: int32_t f8 = f[8];
! 1336: int32_t f9 = f[9];
! 1337:
! 1338: int32_t g0 = g[0];
! 1339: int32_t g1 = g[1];
! 1340: int32_t g2 = g[2];
! 1341: int32_t g3 = g[3];
! 1342: int32_t g4 = g[4];
! 1343: int32_t g5 = g[5];
! 1344: int32_t g6 = g[6];
! 1345: int32_t g7 = g[7];
! 1346: int32_t g8 = g[8];
! 1347: int32_t g9 = g[9];
! 1348:
! 1349: int32_t h0 = f0 - g0;
! 1350: int32_t h1 = f1 - g1;
! 1351: int32_t h2 = f2 - g2;
! 1352: int32_t h3 = f3 - g3;
! 1353: int32_t h4 = f4 - g4;
! 1354: int32_t h5 = f5 - g5;
! 1355: int32_t h6 = f6 - g6;
! 1356: int32_t h7 = f7 - g7;
! 1357: int32_t h8 = f8 - g8;
! 1358: int32_t h9 = f9 - g9;
! 1359:
! 1360: h[0] = h0;
! 1361: h[1] = h1;
! 1362: h[2] = h2;
! 1363: h[3] = h3;
! 1364: h[4] = h4;
! 1365: h[5] = h5;
! 1366: h[6] = h6;
! 1367: h[7] = h7;
! 1368: h[8] = h8;
! 1369: h[9] = h9;
! 1370: }
! 1371:
! 1372: /**
! 1373: * r = p + q
! 1374: */
! 1375: static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
! 1376: {
! 1377: fe t0;
! 1378:
! 1379: fe_add(r->X, p->Y, p->X);
! 1380: fe_sub(r->Y, p->Y, p->X);
! 1381: fe_mul(r->Z, r->X, q->YplusX);
! 1382: fe_mul(r->Y, r->Y, q->YminusX);
! 1383: fe_mul(r->T, q->T2d, p->T);
! 1384: fe_mul(r->X, p->Z, q->Z);
! 1385: fe_add(t0, r->X, r->X);
! 1386: fe_sub(r->X, r->Z, r->Y);
! 1387: fe_add(r->Y, r->Z, r->Y);
! 1388: fe_add(r->Z, t0, r->T);
! 1389: fe_sub(r->T, t0, r->T);
! 1390: }
! 1391:
! 1392: static void slide(int8_t *r, const uint8_t *a)
! 1393: {
! 1394: int i, b, k;
! 1395:
! 1396: for (i = 0; i < 256; ++i)
! 1397: {
! 1398: r[i] = 1 & (a[i >> 3] >> (i & 7));
! 1399: }
! 1400:
! 1401: for (i = 0; i < 256; ++i)
! 1402: {
! 1403: if (r[i])
! 1404: {
! 1405: for (b = 1; b <= 6 && i + b < 256; ++b)
! 1406: {
! 1407: if (r[i + b])
! 1408: {
! 1409: if (r[i] + (r[i + b] << b) <= 15)
! 1410: {
! 1411: r[i] += r[i + b] << b; r[i + b] = 0;
! 1412: }
! 1413: else if (r[i] - (r[i + b] << b) >= -15)
! 1414: {
! 1415: r[i] -= r[i + b] << b;
! 1416:
! 1417: for (k = i + b; k < 256; ++k)
! 1418: {
! 1419: if (!r[k])
! 1420: {
! 1421: r[k] = 1;
! 1422: break;
! 1423: }
! 1424: r[k] = 0;
! 1425: }
! 1426: }
! 1427: else
! 1428: {
! 1429: break;
! 1430: }
! 1431: }
! 1432: }
! 1433: }
! 1434: }
! 1435: }
! 1436:
! 1437: static const ge_precomp Bi[8] = {
! 1438: #include "base2.h"
! 1439: };
! 1440:
! 1441: /* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
! 1442: static const fe d = {
! 1443: -10913610, 13857413, -15372611, 6949391, 114729,
! 1444: -8787816, -6275908, -3247719, -18696448, -12055116
! 1445: };
! 1446:
! 1447: /* sqrt(-1) */
! 1448: static const fe sqrtm1 = {
! 1449: -32595792, -7943725, 9377950, 3500415, 12389472,
! 1450: -272473, -25146209, -2005654, 326686, 11406482
! 1451: };
! 1452:
! 1453: int ge_frombytes_negate_vartime(ge_p3 *h, const uint8_t *s)
! 1454: {
! 1455: fe u, v, v3, vxx, check;
! 1456:
! 1457: fe_frombytes(h->Y,s);
! 1458: fe_1(h->Z);
! 1459: fe_sq(u,h->Y);
! 1460: fe_mul(v,u,d);
! 1461: fe_sub(u,u,h->Z); /* u = y^2-1 */
! 1462: fe_add(v,v,h->Z); /* v = dy^2+1 */
! 1463:
! 1464: fe_sq(v3,v);
! 1465: fe_mul(v3,v3,v); /* v3 = v^3 */
! 1466: fe_sq(h->X,v3);
! 1467: fe_mul(h->X,h->X,v);
! 1468: fe_mul(h->X,h->X,u); /* x = uv^7 */
! 1469:
! 1470: fe_pow22523(h->X,h->X); /* x = (uv^7)^((q-5)/8) */
! 1471: fe_mul(h->X,h->X,v3);
! 1472: fe_mul(h->X,h->X,u); /* x = uv^3(uv^7)^((q-5)/8) */
! 1473:
! 1474: fe_sq(vxx,h->X);
! 1475: fe_mul(vxx,vxx,v);
! 1476: fe_sub(check,vxx,u); /* vx^2-u */
! 1477:
! 1478: if (fe_isnonzero(check))
! 1479: {
! 1480: fe_add(check,vxx,u); /* vx^2+u */
! 1481:
! 1482: if (fe_isnonzero(check))
! 1483: {
! 1484: return -1;
! 1485: }
! 1486: fe_mul(h->X,h->X,sqrtm1);
! 1487: }
! 1488:
! 1489: if (fe_isnegative(h->X) == (s[31] >> 7))
! 1490: {
! 1491: fe_neg(h->X,h->X);
! 1492: }
! 1493: fe_mul(h->T,h->X,h->Y);
! 1494:
! 1495: return 0;
! 1496: }
! 1497:
! 1498: /**
! 1499: * r = p + q
! 1500: */
! 1501: static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
! 1502: {
! 1503: fe t0;
! 1504:
! 1505: fe_add(r->X, p->Y, p->X);
! 1506: fe_sub(r->Y, p->Y, p->X);
! 1507: fe_mul(r->Z, r->X, q->yplusx);
! 1508: fe_mul(r->Y, r->Y, q->yminusx);
! 1509: fe_mul(r->T, q->xy2d, p->T);
! 1510: fe_add(t0, p->Z, p->Z);
! 1511: fe_sub(r->X, r->Z, r->Y);
! 1512: fe_add(r->Y, r->Z, r->Y);
! 1513: fe_add(r->Z, t0, r->T);
! 1514: fe_sub(r->T, t0, r->T);
! 1515: }
! 1516:
! 1517: /**
! 1518: * r = p - q
! 1519: */
! 1520: static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
! 1521: {
! 1522: fe t0;
! 1523:
! 1524: fe_add(r->X, p->Y, p->X);
! 1525: fe_sub(r->Y, p->Y, p->X);
! 1526: fe_mul(r->Z, r->X, q->yminusx);
! 1527: fe_mul(r->Y, r->Y, q->yplusx);
! 1528: fe_mul(r->T, q->xy2d, p->T);
! 1529: fe_add(t0, p->Z, p->Z);
! 1530: fe_sub(r->X, r->Z, r->Y);
! 1531: fe_add(r->Y, r->Z, r->Y);
! 1532: fe_sub(r->Z, t0, r->T);
! 1533: fe_add(r->T, t0, r->T);
! 1534: }
! 1535:
! 1536: /**
! 1537: * r = p
! 1538: */
! 1539: static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p)
! 1540: {
! 1541: fe_mul(r->X,p->X,p->T);
! 1542: fe_mul(r->Y,p->Y,p->Z);
! 1543: fe_mul(r->Z,p->Z,p->T);
! 1544: }
! 1545:
! 1546: /**
! 1547: * r = p
! 1548: */
! 1549: static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p)
! 1550: {
! 1551: fe_mul(r->X,p->X,p->T);
! 1552: fe_mul(r->Y,p->Y,p->Z);
! 1553: fe_mul(r->Z,p->Z,p->T);
! 1554: fe_mul(r->T,p->X,p->Y);
! 1555: }
! 1556:
! 1557: static void ge_p2_0(ge_p2 *h)
! 1558: {
! 1559: fe_0(h->X);
! 1560: fe_1(h->Y);
! 1561: fe_1(h->Z);
! 1562: }
! 1563:
! 1564: /**
! 1565: * r = 2 * p
! 1566: */
! 1567: static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p)
! 1568: {
! 1569: fe t0;
! 1570:
! 1571: fe_sq(r->X, p->X);
! 1572: fe_sq(r->Z, p->Y);
! 1573: fe_sq2(r->T, p->Z);
! 1574: fe_add(r->Y, p->X, p->Y);
! 1575: fe_sq(t0, r->Y);
! 1576: fe_add(r->Y, r->Z, r->X);
! 1577: fe_sub(r->Z, r->Z, r->X);
! 1578: fe_sub(r->X, t0, r->Y);
! 1579: fe_sub(r->T, r->T, r->Z);
! 1580: }
! 1581:
! 1582: static void ge_p3_0(ge_p3 *h)
! 1583: {
! 1584: fe_0(h->X);
! 1585: fe_1(h->Y);
! 1586: fe_1(h->Z);
! 1587: fe_0(h->T);
! 1588: }
! 1589:
! 1590: /**
! 1591: * r = p
! 1592: */
! 1593:
! 1594: /* 2 * d = 16295367250680780974490674513165176452449235426866156013048779062215315747161 */
! 1595: static const fe d2 = {
! 1596: -21827239, -5839606, -30745221, 13898782, 229458,
! 1597: 15978800, -12551817, -6495438, 29715968, 9444199
! 1598: };
! 1599:
! 1600: static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p)
! 1601: {
! 1602: fe_add(r->YplusX,p->Y,p->X);
! 1603: fe_sub(r->YminusX,p->Y,p->X);
! 1604: fe_copy(r->Z,p->Z);
! 1605: fe_mul(r->T2d,p->T,d2);
! 1606: }
! 1607:
! 1608: /**
! 1609: * r = p
! 1610: */
! 1611: static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p)
! 1612: {
! 1613: fe_copy(r->X,p->X);
! 1614: fe_copy(r->Y,p->Y);
! 1615: fe_copy(r->Z,p->Z);
! 1616: }
! 1617:
! 1618: void ge_p3_tobytes(uint8_t *s, const ge_p3 *h)
! 1619: {
! 1620: fe recip, x, y;
! 1621:
! 1622: fe_invert(recip,h->Z);
! 1623: fe_mul(x,h->X,recip);
! 1624: fe_mul(y,h->Y,recip);
! 1625: fe_tobytes(s,y);
! 1626:
! 1627: s[31] ^= fe_isnegative(x) << 7;
! 1628: }
! 1629:
! 1630: /**
! 1631: * r = 2 * p
! 1632: */
! 1633: static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p)
! 1634: {
! 1635: ge_p2 q;
! 1636: ge_p3_to_p2(&q,p);
! 1637: ge_p2_dbl(r,&q);
! 1638: }
! 1639:
! 1640: static void ge_precomp_0(ge_precomp *h)
! 1641: {
! 1642: fe_1(h->yplusx);
! 1643: fe_1(h->yminusx);
! 1644: fe_0(h->xy2d);
! 1645: }
! 1646:
! 1647: static uint8_t equal(int8_t b, int8_t c)
! 1648: {
! 1649: uint8_t ub = b;
! 1650: uint8_t uc = c;
! 1651: uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
! 1652: uint32_t y = x; /* 0: yes; 1..255: no */
! 1653:
! 1654: y -= 1; /* 4294967295: yes; 0..254: no */
! 1655: y >>= 31; /* 1: yes; 0: no */
! 1656:
! 1657: return y;
! 1658: }
! 1659:
! 1660: static uint8_t negative(int8_t b)
! 1661: {
! 1662: uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
! 1663:
! 1664: x >>= 63; /* 1: yes; 0: no */
! 1665:
! 1666: return x;
! 1667: }
! 1668:
! 1669: static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b)
! 1670: {
! 1671: fe_cmov(t->yplusx,u->yplusx,b);
! 1672: fe_cmov(t->yminusx,u->yminusx,b);
! 1673: fe_cmov(t->xy2d,u->xy2d,b);
! 1674: }
! 1675:
! 1676: /**
! 1677: * base[i][j] = (j+1)*256^i*B
! 1678: */
! 1679: static const ge_precomp base[32][8] = {
! 1680: #include "base.h"
! 1681: };
! 1682:
! 1683: static void ge_select(ge_precomp *t, int pos, int8_t b)
! 1684: {
! 1685: ge_precomp minust;
! 1686: uint8_t bnegative = negative(b);
! 1687: uint8_t babs = b - (((-bnegative) & b) * ((int8_t) 1 << 1));
! 1688:
! 1689: ge_precomp_0(t);
! 1690: cmov(t,&base[pos][0],equal(babs,1));
! 1691: cmov(t,&base[pos][1],equal(babs,2));
! 1692: cmov(t,&base[pos][2],equal(babs,3));
! 1693: cmov(t,&base[pos][3],equal(babs,4));
! 1694: cmov(t,&base[pos][4],equal(babs,5));
! 1695: cmov(t,&base[pos][5],equal(babs,6));
! 1696: cmov(t,&base[pos][6],equal(babs,7));
! 1697: cmov(t,&base[pos][7],equal(babs,8));
! 1698: fe_copy(minust.yplusx,t->yminusx);
! 1699: fe_copy(minust.yminusx,t->yplusx);
! 1700: fe_neg(minust.xy2d,t->xy2d);
! 1701: cmov(t,&minust,bnegative);
! 1702: }
! 1703:
! 1704: /**
! 1705: *r = p - q
! 1706: */
! 1707: static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
! 1708: {
! 1709: fe t0;
! 1710:
! 1711: fe_add(r->X, p->Y, p->X);
! 1712: fe_sub(r->Y, p->Y, p->X);
! 1713: fe_mul(r->Z, r->X, q->YminusX);
! 1714: fe_mul(r->Y, r->Y, q->YplusX);
! 1715: fe_mul(r->T, q->T2d, p->T);
! 1716: fe_mul(r->X, p->Z, q->Z);
! 1717: fe_add(t0, r->X, r->X);
! 1718: fe_sub(r->X, r->Z, r->Y);
! 1719: fe_add(r->Y, r->Z, r->Y);
! 1720: fe_sub(r->Z, t0, r->T);
! 1721: fe_add(r->T, t0, r->T);
! 1722: }
! 1723:
! 1724: void ge_tobytes(uint8_t *s, const ge_p2 *h)
! 1725: {
! 1726: fe recip, x, y;
! 1727:
! 1728: fe_invert(recip,h->Z);
! 1729: fe_mul(x,h->X,recip);
! 1730: fe_mul(y,h->Y,recip);
! 1731: fe_tobytes(s,y);
! 1732:
! 1733: s[31] ^= fe_isnegative(x) << 7;
! 1734: }
! 1735:
! 1736: /**
! 1737: * h = a * B
! 1738: * where a = a[0]+256*a[1]+...+256^31 a[31]
! 1739: * B is the Ed25519 base point (x,4/5) with x positive.
! 1740: *
! 1741: * Preconditions:
! 1742: * a[31] <= 127
! 1743: */
! 1744:
! 1745: /**
! 1746: * r = a * A + b * B
! 1747: * where a = a[0]+256*a[1]+...+256^31 a[31].
! 1748: * and b = b[0]+256*b[1]+...+256^31 b[31].
! 1749: * B is the Ed25519 base point (x,4/5) with x positive.
! 1750: */
! 1751: void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a, const ge_p3 *A,
! 1752: const uint8_t *b)
! 1753: {
! 1754: int8_t aslide[256];
! 1755: int8_t bslide[256];
! 1756: ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
! 1757: ge_p1p1 t;
! 1758: ge_p3 u, A2;
! 1759: int i;
! 1760:
! 1761: slide(aslide,a);
! 1762: slide(bslide,b);
! 1763:
! 1764: ge_p3_to_cached(&Ai[0],A);
! 1765: ge_p3_dbl(&t,A);
! 1766: ge_p1p1_to_p3(&A2,&t);
! 1767:
! 1768: ge_add(&t,&A2,&Ai[0]);
! 1769: ge_p1p1_to_p3(&u,&t);
! 1770: ge_p3_to_cached(&Ai[1],&u);
! 1771:
! 1772: ge_add(&t,&A2,&Ai[1]);
! 1773: ge_p1p1_to_p3(&u,&t);
! 1774: ge_p3_to_cached(&Ai[2],&u);
! 1775:
! 1776: ge_add(&t,&A2,&Ai[2]);
! 1777: ge_p1p1_to_p3(&u,&t);
! 1778: ge_p3_to_cached(&Ai[3],&u);
! 1779:
! 1780: ge_add(&t,&A2,&Ai[3]);
! 1781: ge_p1p1_to_p3(&u,&t);
! 1782: ge_p3_to_cached(&Ai[4],&u);
! 1783:
! 1784: ge_add(&t,&A2,&Ai[4]);
! 1785: ge_p1p1_to_p3(&u,&t);
! 1786: ge_p3_to_cached(&Ai[5],&u);
! 1787:
! 1788: ge_add(&t,&A2,&Ai[5]);
! 1789: ge_p1p1_to_p3(&u,&t);
! 1790: ge_p3_to_cached(&Ai[6],&u);
! 1791:
! 1792: ge_add(&t,&A2,&Ai[6]);
! 1793: ge_p1p1_to_p3(&u,&t);
! 1794: ge_p3_to_cached(&Ai[7],&u);
! 1795:
! 1796: ge_p2_0(r);
! 1797:
! 1798: for (i = 255; i >= 0; --i)
! 1799: {
! 1800: if (aslide[i] || bslide[i])
! 1801: {
! 1802: break;
! 1803: }
! 1804: }
! 1805:
! 1806: for (; i >= 0 ;--i)
! 1807: {
! 1808: ge_p2_dbl(&t,r);
! 1809:
! 1810: if (aslide[i] > 0)
! 1811: {
! 1812: ge_p1p1_to_p3(&u,&t);
! 1813: ge_add(&t,&u,&Ai[aslide[i]/2]);
! 1814: }
! 1815: else if (aslide[i] < 0)
! 1816: {
! 1817: ge_p1p1_to_p3(&u,&t);
! 1818: ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
! 1819: }
! 1820:
! 1821: if (bslide[i] > 0)
! 1822: {
! 1823: ge_p1p1_to_p3(&u,&t);
! 1824: ge_madd(&t,&u,&Bi[bslide[i]/2]);
! 1825: }
! 1826: else if (bslide[i] < 0)
! 1827: {
! 1828: ge_p1p1_to_p3(&u,&t);
! 1829: ge_msub(&t,&u,&Bi[(-bslide[i])/2]);
! 1830: }
! 1831: ge_p1p1_to_p2(r,&t);
! 1832: }
! 1833: }
! 1834:
! 1835: void ge_scalarmult_base(ge_p3 *h, const uint8_t *a)
! 1836: {
! 1837: int8_t e[64];
! 1838: int8_t carry = 0;
! 1839: ge_p1p1 r;
! 1840: ge_p2 s;
! 1841: ge_precomp t;
! 1842: int i;
! 1843:
! 1844: for (i = 0; i < 32; ++i)
! 1845: {
! 1846: e[2 * i + 0] = (a[i] >> 0) & 15;
! 1847: e[2 * i + 1] = (a[i] >> 4) & 15;
! 1848: }
! 1849: /* each e[i] is between 0 and 15 */
! 1850: /* e[63] is between 0 and 7 */
! 1851:
! 1852: for (i = 0; i < 63; ++i) {
! 1853: e[i] += carry;
! 1854: carry = e[i] + 8;
! 1855: carry >>= 4;
! 1856: e[i] -= carry * ((int8_t) 1 << 4);
! 1857: }
! 1858: e[63] += carry;
! 1859: /* each e[i] is between -8 and 8 */
! 1860:
! 1861: ge_p3_0(h);
! 1862: for (i = 1; i < 64; i += 2)
! 1863: {
! 1864: ge_select(&t,i / 2,e[i]);
! 1865: ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
! 1866: }
! 1867:
! 1868: ge_p3_dbl(&r,h); ge_p1p1_to_p2(&s,&r);
! 1869: ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
! 1870: ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
! 1871: ge_p2_dbl(&r,&s); ge_p1p1_to_p3(h,&r);
! 1872:
! 1873: for (i = 0; i < 64; i += 2)
! 1874: {
! 1875: ge_select(&t,i / 2,e[i]);
! 1876: ge_madd(&r,h,&t);
! 1877: ge_p1p1_to_p3(h,&r);
! 1878: }
! 1879: }
! 1880:
! 1881: /**
! 1882: * Input:
! 1883: * a[0]+256*a[1]+...+256^31*a[31] = a
! 1884: * b[0]+256*b[1]+...+256^31*b[31] = b
! 1885: * c[0]+256*c[1]+...+256^31*c[31] = c
! 1886: *
! 1887: * Output:
! 1888: * s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
! 1889: * where l = 2^252 + 27742317777372353535851937790883648493.
! 1890: */
! 1891: void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b, const uint8_t *c)
! 1892: {
! 1893: int64_t a0 = 2097151 & load_3(a);
! 1894: int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
! 1895: int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
! 1896: int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
! 1897: int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
! 1898: int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
! 1899: int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
! 1900: int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
! 1901: int64_t a8 = 2097151 & load_3(a + 21);
! 1902: int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
! 1903: int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
! 1904: int64_t a11 = (load_4(a + 28) >> 7);
! 1905:
! 1906: int64_t b0 = 2097151 & load_3(b);
! 1907: int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
! 1908: int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
! 1909: int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
! 1910: int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
! 1911: int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
! 1912: int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
! 1913: int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
! 1914: int64_t b8 = 2097151 & load_3(b + 21);
! 1915: int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
! 1916: int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
! 1917: int64_t b11 = (load_4(b + 28) >> 7);
! 1918:
! 1919: int64_t c0 = 2097151 & load_3(c);
! 1920: int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
! 1921: int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
! 1922: int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
! 1923: int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
! 1924: int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
! 1925: int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
! 1926: int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
! 1927: int64_t c8 = 2097151 & load_3(c + 21);
! 1928: int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
! 1929: int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
! 1930: int64_t c11 = (load_4(c + 28) >> 7);
! 1931:
! 1932: int64_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11;
! 1933: int64_t s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23;
! 1934:
! 1935: int64_t carry0, carry1, carry2, carry3, carry4, carry5, carry6;
! 1936: int64_t carry7, carry8, carry9, carry10, carry11, carry12, carry13;
! 1937: int64_t carry14, carry15, carry16, carry17, carry18, carry19, carry20;
! 1938: int64_t carry21, carry22;
! 1939:
! 1940: s0 = c0 + a0*b0;
! 1941: s1 = c1 + a0*b1 + a1*b0;
! 1942: s2 = c2 + a0*b2 + a1*b1 + a2*b0;
! 1943: s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0;
! 1944: s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0;
! 1945: s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0;
! 1946: s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0;
! 1947: s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0;
! 1948: s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0;
! 1949: s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0;
! 1950: s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0;
! 1951: s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0;
! 1952: s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1;
! 1953: s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2;
! 1954: s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3;
! 1955: s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4;
! 1956: s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5;
! 1957: s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6;
! 1958: s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7;
! 1959: s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8;
! 1960: s20 = a9*b11 + a10*b10 + a11*b9;
! 1961: s21 = a10*b11 + a11*b10;
! 1962: s22 = a11*b11;
! 1963: s23 = 0;
! 1964:
! 1965: carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
! 1966: s1 += carry0;
! 1967: s0 -= carry0 * ((uint64_t) 1L << 21);
! 1968:
! 1969: carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
! 1970: s3 += carry2;
! 1971: s2 -= carry2 * ((uint64_t) 1L << 21);
! 1972:
! 1973: carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
! 1974: s5 += carry4;
! 1975: s4 -= carry4 * ((uint64_t) 1L << 21);
! 1976:
! 1977: carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
! 1978: s7 += carry6;
! 1979: s6 -= carry6 * ((uint64_t) 1L << 21);
! 1980:
! 1981: carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
! 1982: s9 += carry8;
! 1983: s8 -= carry8 * ((uint64_t) 1L << 21);
! 1984:
! 1985: carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
! 1986: s11 += carry10;
! 1987: s10 -= carry10 * ((uint64_t) 1L << 21);
! 1988:
! 1989: carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
! 1990: s13 += carry12;
! 1991: s12 -= carry12 * ((uint64_t) 1L << 21);
! 1992:
! 1993: carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
! 1994: s15 += carry14;
! 1995: s14 -= carry14 * ((uint64_t) 1L << 21);
! 1996:
! 1997: carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
! 1998: s17 += carry16;
! 1999: s16 -= carry16 * ((uint64_t) 1L << 21);
! 2000:
! 2001: carry18 = (s18 + (int64_t) (1L << 20)) >> 21;
! 2002: s19 += carry18;
! 2003: s18 -= carry18 * ((uint64_t) 1L << 21);
! 2004:
! 2005: carry20 = (s20 + (int64_t) (1L << 20)) >> 21;
! 2006: s21 += carry20;
! 2007: s20 -= carry20 * ((uint64_t) 1L << 21);
! 2008:
! 2009: carry22 = (s22 + (int64_t) (1L << 20)) >> 21;
! 2010: s23 += carry22;
! 2011: s22 -= carry22 * ((uint64_t) 1L << 21);
! 2012:
! 2013: carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
! 2014: s2 += carry1;
! 2015: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2016:
! 2017: carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
! 2018: s4 += carry3;
! 2019: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2020:
! 2021: carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
! 2022: s6 += carry5;
! 2023: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2024:
! 2025: carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
! 2026: s8 += carry7;
! 2027: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2028:
! 2029: carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
! 2030: s10 += carry9;
! 2031: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2032:
! 2033: carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
! 2034: s12 += carry11;
! 2035: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2036:
! 2037: carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
! 2038: s14 += carry13;
! 2039: s13 -= carry13 * ((uint64_t) 1L << 21);
! 2040:
! 2041: carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
! 2042: s16 += carry15;
! 2043: s15 -= carry15 * ((uint64_t) 1L << 21);
! 2044:
! 2045: carry17 = (s17 + (int64_t) (1L << 20)) >> 21;
! 2046: s18 += carry17;
! 2047: s17 -= carry17 * ((uint64_t) 1L << 21);
! 2048:
! 2049: carry19 = (s19 + (int64_t) (1L << 20)) >> 21;
! 2050: s20 += carry19;
! 2051: s19 -= carry19 * ((uint64_t) 1L << 21);
! 2052:
! 2053: carry21 = (s21 + (int64_t) (1L << 20)) >> 21;
! 2054: s22 += carry21;
! 2055: s21 -= carry21 * ((uint64_t) 1L << 21);
! 2056:
! 2057: s11 += s23 * 666643;
! 2058: s12 += s23 * 470296;
! 2059: s13 += s23 * 654183;
! 2060: s14 -= s23 * 997805;
! 2061: s15 += s23 * 136657;
! 2062: s16 -= s23 * 683901;
! 2063:
! 2064: s10 += s22 * 666643;
! 2065: s11 += s22 * 470296;
! 2066: s12 += s22 * 654183;
! 2067: s13 -= s22 * 997805;
! 2068: s14 += s22 * 136657;
! 2069: s15 -= s22 * 683901;
! 2070:
! 2071: s9 += s21 * 666643;
! 2072: s10 += s21 * 470296;
! 2073: s11 += s21 * 654183;
! 2074: s12 -= s21 * 997805;
! 2075: s13 += s21 * 136657;
! 2076: s14 -= s21 * 683901;
! 2077:
! 2078: s8 += s20 * 666643;
! 2079: s9 += s20 * 470296;
! 2080: s10 += s20 * 654183;
! 2081: s11 -= s20 * 997805;
! 2082: s12 += s20 * 136657;
! 2083: s13 -= s20 * 683901;
! 2084:
! 2085: s7 += s19 * 666643;
! 2086: s8 += s19 * 470296;
! 2087: s9 += s19 * 654183;
! 2088: s10 -= s19 * 997805;
! 2089: s11 += s19 * 136657;
! 2090: s12 -= s19 * 683901;
! 2091:
! 2092: s6 += s18 * 666643;
! 2093: s7 += s18 * 470296;
! 2094: s8 += s18 * 654183;
! 2095: s9 -= s18 * 997805;
! 2096: s10 += s18 * 136657;
! 2097: s11 -= s18 * 683901;
! 2098:
! 2099: carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
! 2100: s7 += carry6;
! 2101: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2102:
! 2103: carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
! 2104: s9 += carry8;
! 2105: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2106:
! 2107: carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
! 2108: s11 += carry10;
! 2109: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2110:
! 2111: carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
! 2112: s13 += carry12;
! 2113: s12 -= carry12 * ((uint64_t) 1L << 21);
! 2114:
! 2115: carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
! 2116: s15 += carry14;
! 2117: s14 -= carry14 * ((uint64_t) 1L << 21);
! 2118:
! 2119: carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
! 2120: s17 += carry16;
! 2121: s16 -= carry16 * ((uint64_t) 1L << 21);
! 2122:
! 2123: carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
! 2124: s8 += carry7;
! 2125: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2126:
! 2127: carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
! 2128: s10 += carry9;
! 2129: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2130:
! 2131: carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
! 2132: s12 += carry11;
! 2133: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2134:
! 2135: carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
! 2136: s14 += carry13;
! 2137: s13 -= carry13 * ((uint64_t) 1L << 21);
! 2138:
! 2139: carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
! 2140: s16 += carry15;
! 2141: s15 -= carry15 * ((uint64_t) 1L << 21);
! 2142:
! 2143: s5 += s17 * 666643;
! 2144: s6 += s17 * 470296;
! 2145: s7 += s17 * 654183;
! 2146: s8 -= s17 * 997805;
! 2147: s9 += s17 * 136657;
! 2148: s10 -= s17 * 683901;
! 2149:
! 2150: s4 += s16 * 666643;
! 2151: s5 += s16 * 470296;
! 2152: s6 += s16 * 654183;
! 2153: s7 -= s16 * 997805;
! 2154: s8 += s16 * 136657;
! 2155: s9 -= s16 * 683901;
! 2156:
! 2157: s3 += s15 * 666643;
! 2158: s4 += s15 * 470296;
! 2159: s5 += s15 * 654183;
! 2160: s6 -= s15 * 997805;
! 2161: s7 += s15 * 136657;
! 2162: s8 -= s15 * 683901;
! 2163:
! 2164: s2 += s14 * 666643;
! 2165: s3 += s14 * 470296;
! 2166: s4 += s14 * 654183;
! 2167: s5 -= s14 * 997805;
! 2168: s6 += s14 * 136657;
! 2169: s7 -= s14 * 683901;
! 2170:
! 2171: s1 += s13 * 666643;
! 2172: s2 += s13 * 470296;
! 2173: s3 += s13 * 654183;
! 2174: s4 -= s13 * 997805;
! 2175: s5 += s13 * 136657;
! 2176: s6 -= s13 * 683901;
! 2177:
! 2178: s0 += s12 * 666643;
! 2179: s1 += s12 * 470296;
! 2180: s2 += s12 * 654183;
! 2181: s3 -= s12 * 997805;
! 2182: s4 += s12 * 136657;
! 2183: s5 -= s12 * 683901;
! 2184: s12 = 0;
! 2185:
! 2186: carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
! 2187: s1 += carry0;
! 2188: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2189:
! 2190: carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
! 2191: s3 += carry2;
! 2192: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2193:
! 2194: carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
! 2195: s5 += carry4;
! 2196: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2197:
! 2198: carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
! 2199: s7 += carry6;
! 2200: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2201:
! 2202: carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
! 2203: s9 += carry8;
! 2204: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2205:
! 2206: carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
! 2207: s11 += carry10;
! 2208: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2209:
! 2210: carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
! 2211: s2 += carry1;
! 2212: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2213:
! 2214: carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
! 2215: s4 += carry3;
! 2216: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2217:
! 2218: carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
! 2219: s6 += carry5;
! 2220: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2221:
! 2222: carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
! 2223: s8 += carry7;
! 2224: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2225:
! 2226: carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
! 2227: s10 += carry9;
! 2228: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2229:
! 2230: carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
! 2231: s12 += carry11;
! 2232: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2233:
! 2234: s0 += s12 * 666643;
! 2235: s1 += s12 * 470296;
! 2236: s2 += s12 * 654183;
! 2237: s3 -= s12 * 997805;
! 2238: s4 += s12 * 136657;
! 2239: s5 -= s12 * 683901;
! 2240: s12 = 0;
! 2241:
! 2242: carry0 = s0 >> 21;
! 2243: s1 += carry0;
! 2244: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2245:
! 2246: carry1 = s1 >> 21;
! 2247: s2 += carry1;
! 2248: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2249:
! 2250: carry2 = s2 >> 21;
! 2251: s3 += carry2;
! 2252: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2253:
! 2254: carry3 = s3 >> 21;
! 2255: s4 += carry3;
! 2256: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2257:
! 2258: carry4 = s4 >> 21;
! 2259: s5 += carry4;
! 2260: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2261:
! 2262: carry5 = s5 >> 21;
! 2263: s6 += carry5;
! 2264: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2265:
! 2266: carry6 = s6 >> 21;
! 2267: s7 += carry6;
! 2268: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2269:
! 2270: carry7 = s7 >> 21;
! 2271: s8 += carry7;
! 2272: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2273:
! 2274: carry8 = s8 >> 21;
! 2275: s9 += carry8;
! 2276: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2277:
! 2278: carry9 = s9 >> 21;
! 2279: s10 += carry9;
! 2280: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2281:
! 2282: carry10 = s10 >> 21;
! 2283: s11 += carry10;
! 2284: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2285:
! 2286: carry11 = s11 >> 21;
! 2287: s12 += carry11;
! 2288: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2289:
! 2290: s0 += s12 * 666643;
! 2291: s1 += s12 * 470296;
! 2292: s2 += s12 * 654183;
! 2293: s3 -= s12 * 997805;
! 2294: s4 += s12 * 136657;
! 2295: s5 -= s12 * 683901;
! 2296:
! 2297: carry0 = s0 >> 21;
! 2298: s1 += carry0;
! 2299: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2300:
! 2301: carry1 = s1 >> 21;
! 2302: s2 += carry1;
! 2303: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2304:
! 2305: carry2 = s2 >> 21;
! 2306: s3 += carry2;
! 2307: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2308:
! 2309: carry3 = s3 >> 21;
! 2310: s4 += carry3;
! 2311: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2312:
! 2313: carry4 = s4 >> 21;
! 2314: s5 += carry4;
! 2315: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2316:
! 2317: carry5 = s5 >> 21;
! 2318: s6 += carry5;
! 2319: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2320:
! 2321: carry6 = s6 >> 21;
! 2322: s7 += carry6;
! 2323: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2324:
! 2325: carry7 = s7 >> 21;
! 2326: s8 += carry7;
! 2327: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2328:
! 2329: carry8 = s8 >> 21;
! 2330: s9 += carry8;
! 2331: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2332:
! 2333: carry9 = s9 >> 21;
! 2334: s10 += carry9;
! 2335: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2336:
! 2337: carry10 = s10 >> 21;
! 2338: s11 += carry10;
! 2339: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2340:
! 2341: s[0] = s0 >> 0;
! 2342: s[1] = s0 >> 8;
! 2343: s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
! 2344: s[3] = s1 >> 3;
! 2345: s[4] = s1 >> 11;
! 2346: s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
! 2347: s[6] = s2 >> 6;
! 2348: s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
! 2349: s[8] = s3 >> 1;
! 2350: s[9] = s3 >> 9;
! 2351: s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
! 2352: s[11] = s4 >> 4;
! 2353: s[12] = s4 >> 12;
! 2354: s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
! 2355: s[14] = s5 >> 7;
! 2356: s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
! 2357: s[16] = s6 >> 2;
! 2358: s[17] = s6 >> 10;
! 2359: s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
! 2360: s[19] = s7 >> 5;
! 2361: s[20] = s7 >> 13;
! 2362: s[21] = s8 >> 0;
! 2363: s[22] = s8 >> 8;
! 2364: s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
! 2365: s[24] = s9 >> 3;
! 2366: s[25] = s9 >> 11;
! 2367: s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
! 2368: s[27] = s10 >> 6;
! 2369: s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
! 2370: s[29] = s11 >> 1;
! 2371: s[30] = s11 >> 9;
! 2372: s[31] = s11 >> 17;
! 2373: }
! 2374:
! 2375: /**
! 2376: * Input:
! 2377: * s[0]+256*s[1]+...+256^63*s[63] = s
! 2378: *
! 2379: * Output:
! 2380: * s[0]+256*s[1]+...+256^31*s[31] = s mod l
! 2381: * where l = 2^252 + 27742317777372353535851937790883648493.
! 2382: * Overwrites s in place.
! 2383: */
! 2384: void sc_reduce(uint8_t *s)
! 2385: {
! 2386: int64_t s0 = 2097151 & load_3(s);
! 2387: int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
! 2388: int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
! 2389: int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
! 2390: int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
! 2391: int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
! 2392: int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
! 2393: int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
! 2394: int64_t s8 = 2097151 & load_3(s + 21);
! 2395: int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
! 2396: int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
! 2397: int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
! 2398: int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
! 2399: int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
! 2400: int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
! 2401: int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
! 2402: int64_t s16 = 2097151 & load_3(s + 42);
! 2403: int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
! 2404: int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
! 2405: int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
! 2406: int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
! 2407: int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
! 2408: int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
! 2409: int64_t s23 = (load_4(s + 60) >> 3);
! 2410:
! 2411: int64_t carry0, carry1, carry2, carry3, carry4, carry5, carry6;
! 2412: int64_t carry7, carry8, carry9, carry10, carry11, carry12, carry13;
! 2413: int64_t carry14, carry15, carry16;
! 2414:
! 2415: s11 += s23 * 666643;
! 2416: s12 += s23 * 470296;
! 2417: s13 += s23 * 654183;
! 2418: s14 -= s23 * 997805;
! 2419: s15 += s23 * 136657;
! 2420: s16 -= s23 * 683901;
! 2421:
! 2422: s10 += s22 * 666643;
! 2423: s11 += s22 * 470296;
! 2424: s12 += s22 * 654183;
! 2425: s13 -= s22 * 997805;
! 2426: s14 += s22 * 136657;
! 2427: s15 -= s22 * 683901;
! 2428:
! 2429: s9 += s21 * 666643;
! 2430: s10 += s21 * 470296;
! 2431: s11 += s21 * 654183;
! 2432: s12 -= s21 * 997805;
! 2433: s13 += s21 * 136657;
! 2434: s14 -= s21 * 683901;
! 2435:
! 2436: s8 += s20 * 666643;
! 2437: s9 += s20 * 470296;
! 2438: s10 += s20 * 654183;
! 2439: s11 -= s20 * 997805;
! 2440: s12 += s20 * 136657;
! 2441: s13 -= s20 * 683901;
! 2442:
! 2443: s7 += s19 * 666643;
! 2444: s8 += s19 * 470296;
! 2445: s9 += s19 * 654183;
! 2446: s10 -= s19 * 997805;
! 2447: s11 += s19 * 136657;
! 2448: s12 -= s19 * 683901;
! 2449:
! 2450: s6 += s18 * 666643;
! 2451: s7 += s18 * 470296;
! 2452: s8 += s18 * 654183;
! 2453: s9 -= s18 * 997805;
! 2454: s10 += s18 * 136657;
! 2455: s11 -= s18 * 683901;
! 2456:
! 2457: carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
! 2458: s7 += carry6;
! 2459: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2460:
! 2461: carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
! 2462: s9 += carry8;
! 2463: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2464:
! 2465: carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
! 2466: s11 += carry10;
! 2467: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2468:
! 2469: carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
! 2470: s13 += carry12;
! 2471: s12 -= carry12 * ((uint64_t) 1L << 21);
! 2472:
! 2473: carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
! 2474: s15 += carry14;
! 2475: s14 -= carry14 * ((uint64_t) 1L << 21);
! 2476:
! 2477: carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
! 2478: s17 += carry16;
! 2479: s16 -= carry16 * ((uint64_t) 1L << 21);
! 2480:
! 2481: carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
! 2482: s8 += carry7;
! 2483: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2484:
! 2485: carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
! 2486: s10 += carry9;
! 2487: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2488:
! 2489: carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
! 2490: s12 += carry11;
! 2491: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2492:
! 2493: carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
! 2494: s14 += carry13;
! 2495: s13 -= carry13 * ((uint64_t) 1L << 21);
! 2496:
! 2497: carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
! 2498: s16 += carry15;
! 2499: s15 -= carry15 * ((uint64_t) 1L << 21);
! 2500:
! 2501: s5 += s17 * 666643;
! 2502: s6 += s17 * 470296;
! 2503: s7 += s17 * 654183;
! 2504: s8 -= s17 * 997805;
! 2505: s9 += s17 * 136657;
! 2506: s10 -= s17 * 683901;
! 2507:
! 2508: s4 += s16 * 666643;
! 2509: s5 += s16 * 470296;
! 2510: s6 += s16 * 654183;
! 2511: s7 -= s16 * 997805;
! 2512: s8 += s16 * 136657;
! 2513: s9 -= s16 * 683901;
! 2514:
! 2515: s3 += s15 * 666643;
! 2516: s4 += s15 * 470296;
! 2517: s5 += s15 * 654183;
! 2518: s6 -= s15 * 997805;
! 2519: s7 += s15 * 136657;
! 2520: s8 -= s15 * 683901;
! 2521:
! 2522: s2 += s14 * 666643;
! 2523: s3 += s14 * 470296;
! 2524: s4 += s14 * 654183;
! 2525: s5 -= s14 * 997805;
! 2526: s6 += s14 * 136657;
! 2527: s7 -= s14 * 683901;
! 2528:
! 2529: s1 += s13 * 666643;
! 2530: s2 += s13 * 470296;
! 2531: s3 += s13 * 654183;
! 2532: s4 -= s13 * 997805;
! 2533: s5 += s13 * 136657;
! 2534: s6 -= s13 * 683901;
! 2535:
! 2536: s0 += s12 * 666643;
! 2537: s1 += s12 * 470296;
! 2538: s2 += s12 * 654183;
! 2539: s3 -= s12 * 997805;
! 2540: s4 += s12 * 136657;
! 2541: s5 -= s12 * 683901;
! 2542: s12 = 0;
! 2543:
! 2544: carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
! 2545: s1 += carry0;
! 2546: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2547:
! 2548: carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
! 2549: s3 += carry2;
! 2550: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2551:
! 2552: carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
! 2553: s5 += carry4;
! 2554: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2555:
! 2556: carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
! 2557: s7 += carry6;
! 2558: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2559:
! 2560: carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
! 2561: s9 += carry8;
! 2562: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2563:
! 2564: carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
! 2565: s11 += carry10;
! 2566: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2567:
! 2568: carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
! 2569: s2 += carry1;
! 2570: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2571:
! 2572: carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
! 2573: s4 += carry3;
! 2574: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2575:
! 2576: carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
! 2577: s6 += carry5;
! 2578: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2579:
! 2580: carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
! 2581: s8 += carry7;
! 2582: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2583:
! 2584: carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
! 2585: s10 += carry9;
! 2586: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2587:
! 2588: carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
! 2589: s12 += carry11;
! 2590: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2591:
! 2592: s0 += s12 * 666643;
! 2593: s1 += s12 * 470296;
! 2594: s2 += s12 * 654183;
! 2595: s3 -= s12 * 997805;
! 2596: s4 += s12 * 136657;
! 2597: s5 -= s12 * 683901;
! 2598: s12 = 0;
! 2599:
! 2600: carry0 = s0 >> 21;
! 2601: s1 += carry0;
! 2602: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2603:
! 2604: carry1 = s1 >> 21;
! 2605: s2 += carry1;
! 2606: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2607:
! 2608: carry2 = s2 >> 21;
! 2609: s3 += carry2;
! 2610: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2611:
! 2612: carry3 = s3 >> 21;
! 2613: s4 += carry3;
! 2614: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2615:
! 2616: carry4 = s4 >> 21;
! 2617: s5 += carry4;
! 2618: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2619:
! 2620: carry5 = s5 >> 21;
! 2621: s6 += carry5;
! 2622: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2623:
! 2624: carry6 = s6 >> 21;
! 2625: s7 += carry6;
! 2626: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2627:
! 2628: carry7 = s7 >> 21;
! 2629: s8 += carry7;
! 2630: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2631:
! 2632: carry8 = s8 >> 21;
! 2633: s9 += carry8;
! 2634: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2635:
! 2636: carry9 = s9 >> 21;
! 2637: s10 += carry9;
! 2638: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2639:
! 2640: carry10 = s10 >> 21;
! 2641: s11 += carry10;
! 2642: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2643:
! 2644: carry11 = s11 >> 21;
! 2645: s12 += carry11;
! 2646: s11 -= carry11 * ((uint64_t) 1L << 21);
! 2647:
! 2648: s0 += s12 * 666643;
! 2649: s1 += s12 * 470296;
! 2650: s2 += s12 * 654183;
! 2651: s3 -= s12 * 997805;
! 2652: s4 += s12 * 136657;
! 2653: s5 -= s12 * 683901;
! 2654:
! 2655: carry0 = s0 >> 21;
! 2656: s1 += carry0;
! 2657: s0 -= carry0 * ((uint64_t) 1L << 21);
! 2658:
! 2659: carry1 = s1 >> 21;
! 2660: s2 += carry1;
! 2661: s1 -= carry1 * ((uint64_t) 1L << 21);
! 2662:
! 2663: carry2 = s2 >> 21;
! 2664: s3 += carry2;
! 2665: s2 -= carry2 * ((uint64_t) 1L << 21);
! 2666:
! 2667: carry3 = s3 >> 21;
! 2668: s4 += carry3;
! 2669: s3 -= carry3 * ((uint64_t) 1L << 21);
! 2670:
! 2671: carry4 = s4 >> 21;
! 2672: s5 += carry4;
! 2673: s4 -= carry4 * ((uint64_t) 1L << 21);
! 2674:
! 2675: carry5 = s5 >> 21;
! 2676: s6 += carry5;
! 2677: s5 -= carry5 * ((uint64_t) 1L << 21);
! 2678:
! 2679: carry6 = s6 >> 21;
! 2680: s7 += carry6;
! 2681: s6 -= carry6 * ((uint64_t) 1L << 21);
! 2682:
! 2683: carry7 = s7 >> 21;
! 2684: s8 += carry7;
! 2685: s7 -= carry7 * ((uint64_t) 1L << 21);
! 2686:
! 2687: carry8 = s8 >> 21;
! 2688: s9 += carry8;
! 2689: s8 -= carry8 * ((uint64_t) 1L << 21);
! 2690:
! 2691: carry9 = s9 >> 21;
! 2692: s10 += carry9;
! 2693: s9 -= carry9 * ((uint64_t) 1L << 21);
! 2694:
! 2695: carry10 = s10 >> 21;
! 2696: s11 += carry10;
! 2697: s10 -= carry10 * ((uint64_t) 1L << 21);
! 2698:
! 2699: s[0] = s0 >> 0;
! 2700: s[1] = s0 >> 8;
! 2701: s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
! 2702: s[3] = s1 >> 3;
! 2703: s[4] = s1 >> 11;
! 2704: s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
! 2705: s[6] = s2 >> 6;
! 2706: s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
! 2707: s[8] = s3 >> 1;
! 2708: s[9] = s3 >> 9;
! 2709: s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
! 2710: s[11] = s4 >> 4;
! 2711: s[12] = s4 >> 12;
! 2712: s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
! 2713: s[14] = s5 >> 7;
! 2714: s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
! 2715: s[16] = s6 >> 2;
! 2716: s[17] = s6 >> 10;
! 2717: s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
! 2718: s[19] = s7 >> 5;
! 2719: s[20] = s7 >> 13;
! 2720: s[21] = s8 >> 0;
! 2721: s[22] = s8 >> 8;
! 2722: s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
! 2723: s[24] = s9 >> 3;
! 2724: s[25] = s9 >> 11;
! 2725: s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
! 2726: s[27] = s10 >> 6;
! 2727: s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
! 2728: s[29] = s11 >> 1;
! 2729: s[30] = s11 >> 9;
! 2730: s[31] = s11 >> 17;
! 2731: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>