diff options
Diffstat (limited to 'src/crypto/bigint.c')
| -rw-r--r-- | src/crypto/bigint.c | 839 |
1 files changed, 758 insertions, 81 deletions
diff --git a/src/crypto/bigint.c b/src/crypto/bigint.c index 656f979e5..5d2f7b560 100644 --- a/src/crypto/bigint.c +++ b/src/crypto/bigint.c @@ -22,11 +22,12 @@ */ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); +FILE_SECBOOT ( PERMITTED ); #include <stdint.h> #include <string.h> #include <assert.h> -#include <ipxe/profile.h> +#include <stdio.h> #include <ipxe/bigint.h> /** @file @@ -34,21 +35,51 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); * Big integer support */ -/** Modular multiplication overall profiler */ -static struct profiler bigint_mod_multiply_profiler __profiler = - { .name = "bigint_mod_multiply" }; +/** Minimum number of least significant bytes included in transcription */ +#define BIGINT_NTOA_LSB_MIN 16 -/** Modular multiplication multiply step profiler */ -static struct profiler bigint_mod_multiply_multiply_profiler __profiler = - { .name = "bigint_mod_multiply.multiply" }; +/** + * Transcribe big integer (for debugging) + * + * @v value0 Element 0 of big integer to be transcribed + * @v size Number of elements + * @ret string Big integer in string form (may be abbreviated) + */ +const char * bigint_ntoa_raw ( const bigint_element_t *value0, + unsigned int size ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *value = ( ( const void * ) value0 ); + bigint_element_t element; + static char buf[256]; + unsigned int count; + int threshold; + uint8_t byte; + char *tmp; + int i; + + /* Calculate abbreviation threshold, if any */ + count = ( size * sizeof ( element ) ); + threshold = count; + if ( count >= ( ( sizeof ( buf ) - 1 /* NUL */ ) / 2 /* "xx" */ ) ) { + threshold -= ( ( sizeof ( buf ) - 3 /* "..." */ + - ( BIGINT_NTOA_LSB_MIN * 2 /* "xx" */ ) + - 1 /* NUL */ ) / 2 /* "xx" */ ); + } -/** Modular multiplication rescale step profiler */ -static struct profiler bigint_mod_multiply_rescale_profiler __profiler = - { .name = "bigint_mod_multiply.rescale" }; + /* Transcribe bytes, abbreviating with "..." if necessary */ + for ( tmp = buf, i = ( count - 1 ) ; i >= 0 ; i-- ) { + element = value->element[ i / sizeof ( element ) ]; + byte = ( element >> ( 8 * ( i % sizeof ( element ) ) ) ); + tmp += sprintf ( tmp, "%02x", byte ); + if ( i == threshold ) { + tmp += sprintf ( tmp, "..." ); + i = BIGINT_NTOA_LSB_MIN; + } + } + assert ( tmp < ( buf + sizeof ( buf ) ) ); -/** Modular multiplication subtract step profiler */ -static struct profiler bigint_mod_multiply_subtract_profiler __profiler = - { .name = "bigint_mod_multiply.subtract" }; + return buf; +} /** * Conditionally swap big integers (in constant time) @@ -76,72 +107,635 @@ void bigint_swap_raw ( bigint_element_t *first0, bigint_element_t *second0, } /** - * Perform modular multiplication of big integers + * Multiply big integers * * @v multiplicand0 Element 0 of big integer to be multiplied + * @v multiplicand_size Number of elements in multiplicand * @v multiplier0 Element 0 of big integer to be multiplied + * @v multiplier_size Number of elements in multiplier + * @v result0 Element 0 of big integer to hold result + */ +void bigint_multiply_raw ( const bigint_element_t *multiplicand0, + unsigned int multiplicand_size, + const bigint_element_t *multiplier0, + unsigned int multiplier_size, + bigint_element_t *result0 ) { + unsigned int result_size = ( multiplicand_size + multiplier_size ); + const bigint_t ( multiplicand_size ) __attribute__ (( may_alias )) + *multiplicand = ( ( const void * ) multiplicand0 ); + const bigint_t ( multiplier_size ) __attribute__ (( may_alias )) + *multiplier = ( ( const void * ) multiplier0 ); + bigint_t ( result_size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + bigint_element_t multiplicand_element; + const bigint_element_t *multiplier_element; + bigint_element_t *result_element; + bigint_element_t carry_element; + unsigned int i; + unsigned int j; + + /* Zero required portion of result + * + * All elements beyond the length of the multiplier will be + * written before they are read, and so do not need to be + * zeroed in advance. + */ + memset ( result, 0, sizeof ( *multiplier ) ); + + /* Multiply integers one element at a time, adding the low + * half of the double-element product directly into the + * result, and maintaining a running single-element carry. + * + * The running carry can never overflow beyond a single + * element. At each step, the calculation we perform is: + * + * carry:result[i+j] := ( ( multiplicand[i] * multiplier[j] ) + * + result[i+j] + carry ) + * + * The maximum value (for n-bit elements) is therefore: + * + * (2^n - 1)*(2^n - 1) + (2^n - 1) + (2^n - 1) = 2^(2n) - 1 + * + * This is precisely the maximum value for a 2n-bit integer, + * and so the carry out remains within the range of an n-bit + * integer, i.e. a single element. + */ + for ( i = 0 ; i < multiplicand_size ; i++ ) { + multiplicand_element = multiplicand->element[i]; + multiplier_element = &multiplier->element[0]; + result_element = &result->element[i]; + carry_element = 0; + for ( j = 0 ; j < multiplier_size ; j++ ) { + bigint_multiply_one ( multiplicand_element, + *(multiplier_element++), + result_element++, + &carry_element ); + } + *result_element = carry_element; + } +} + +/** + * Reduce big integer R^2 modulo N + * * @v modulus0 Element 0 of big integer modulus * @v result0 Element 0 of big integer to hold result - * @v size Number of elements in base, modulus, and result - * @v tmp Temporary working space + * @v size Number of elements in modulus and result + * + * Reduce the value R^2 modulo N, where R=2^n and n is the number of + * bits in the representation of the modulus N, including any leading + * zero bits. */ -void bigint_mod_multiply_raw ( const bigint_element_t *multiplicand0, - const bigint_element_t *multiplier0, - const bigint_element_t *modulus0, - bigint_element_t *result0, - unsigned int size, void *tmp ) { - const bigint_t ( size ) __attribute__ (( may_alias )) *multiplicand = - ( ( const void * ) multiplicand0 ); - const bigint_t ( size ) __attribute__ (( may_alias )) *multiplier = - ( ( const void * ) multiplier0 ); - const bigint_t ( size ) __attribute__ (( may_alias )) *modulus = - ( ( const void * ) modulus0 ); - bigint_t ( size ) __attribute__ (( may_alias )) *result = - ( ( void * ) result0 ); - struct { - bigint_t ( size * 2 ) result; - bigint_t ( size * 2 ) modulus; - } *temp = tmp; - int rotation; - int i; +void bigint_reduce_raw ( const bigint_element_t *modulus0, + bigint_element_t *result0, unsigned int size ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *modulus = ( ( const void * ) modulus0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + const unsigned int width = ( 8 * sizeof ( bigint_element_t ) ); + unsigned int shift; + int max; + int sign; + int msb; + int carry; + + /* We have the constants: + * + * N = modulus + * + * n = number of bits in the modulus (including any leading zeros) + * + * R = 2^n + * + * Let r be the extension of the n-bit result register by a + * separate two's complement sign bit, such that -R <= r < R, + * and define: + * + * x = r * 2^k + * + * as the value being reduced modulo N, where k is a + * non-negative integer bit shift. + * + * We want to reduce the initial value R^2=2^(2n), which we + * may trivially represent using r=1 and k=2n. + * + * We then iterate over decrementing k, maintaining the loop + * invariant: + * + * -N <= r < N + * + * On each iteration we must first double r, to compensate for + * having decremented k: + * + * k' = k - 1 + * + * r' = 2r + * + * x = r * 2^k = 2r * 2^(k-1) = r' * 2^k' + * + * Note that doubling the n-bit result register will create a + * value of n+1 bits: this extra bit needs to be handled + * separately during the calculation. + * + * We then subtract N (if r is currently non-negative) or add + * N (if r is currently negative) to restore the loop + * invariant: + * + * 0 <= r < N => r" = 2r - N => -N <= r" < N + * -N <= r < 0 => r" = 2r + N => -N <= r" < N + * + * Note that since N may use all n bits, the most significant + * bit of the n-bit result register is not a valid two's + * complement sign bit for r: the extra sign bit therefore + * also needs to be handled separately. + * + * Once we reach k=0, we have x=r and therefore: + * + * -N <= x < N + * + * After this last loop iteration (with k=0), we may need to + * add a single multiple of N to ensure that x is positive, + * i.e. lies within the range 0 <= x < N. + * + * Since neither the modulus nor the value R^2 are secret, we + * may elide approximately half of the total number of + * iterations by constructing the initial representation of + * R^2 as r=2^m and k=2n-m (for some m such that 2^m < N). + */ + + /* Initialise x=R^2 */ + memset ( result, 0, sizeof ( *result ) ); + max = ( bigint_max_set_bit ( modulus ) - 2 ); + if ( max < 0 ) { + /* Degenerate case of N=0 or N=1: return a zero result */ + return; + } + bigint_set_bit ( result, max ); + shift = ( ( 2 * size * width ) - max ); + sign = 0; + + /* Iterate as described above */ + while ( shift-- ) { + + /* Calculate 2r, storing extra bit separately */ + msb = bigint_shl ( result ); + + /* Add or subtract N according to current sign */ + if ( sign ) { + carry = bigint_add ( modulus, result ); + } else { + carry = bigint_subtract ( modulus, result ); + } + + /* Calculate new sign of result + * + * We know the result lies in the range -N <= r < N + * and so the tuple (old sign, msb, carry) cannot ever + * take the values (0, 1, 0) or (1, 0, 0). We can + * therefore treat these as don't-care inputs, which + * allows us to simplify the boolean expression by + * ignoring the old sign completely. + */ + assert ( ( sign == msb ) || carry ); + sign = ( msb ^ carry ); + } - /* Start profiling */ - profile_start ( &bigint_mod_multiply_profiler ); + /* Add N to make result positive if necessary */ + if ( sign ) + bigint_add ( modulus, result ); /* Sanity check */ - assert ( sizeof ( *temp ) == bigint_mod_multiply_tmp_len ( modulus ) ); - - /* Perform multiplication */ - profile_start ( &bigint_mod_multiply_multiply_profiler ); - bigint_multiply ( multiplicand, multiplier, &temp->result ); - profile_stop ( &bigint_mod_multiply_multiply_profiler ); - - /* Rescale modulus to match result */ - profile_start ( &bigint_mod_multiply_rescale_profiler ); - bigint_grow ( modulus, &temp->modulus ); - rotation = ( bigint_max_set_bit ( &temp->result ) - - bigint_max_set_bit ( &temp->modulus ) ); - for ( i = 0 ; i < rotation ; i++ ) - bigint_rol ( &temp->modulus ); - profile_stop ( &bigint_mod_multiply_rescale_profiler ); - - /* Subtract multiples of modulus */ - profile_start ( &bigint_mod_multiply_subtract_profiler ); - for ( i = 0 ; i <= rotation ; i++ ) { - if ( bigint_is_geq ( &temp->result, &temp->modulus ) ) - bigint_subtract ( &temp->modulus, &temp->result ); - bigint_ror ( &temp->modulus ); + assert ( ! bigint_is_geq ( result, modulus ) ); +} + +/** + * Compute inverse of odd big integer modulo any power of two + * + * @v invertend0 Element 0 of odd big integer to be inverted + * @v inverse0 Element 0 of big integer to hold result + * @v size Number of elements in invertend and result + */ +void bigint_mod_invert_raw ( const bigint_element_t *invertend0, + bigint_element_t *inverse0, unsigned int size ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *invertend = ( ( const void * ) invertend0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *inverse = ( ( void * ) inverse0 ); + bigint_element_t accum; + bigint_element_t bit; + unsigned int i; + + /* Sanity check */ + assert ( bigint_bit_is_set ( invertend, 0 ) ); + + /* Initialise output */ + memset ( inverse, 0xff, sizeof ( *inverse ) ); + + /* Compute inverse modulo 2^(width) + * + * This method is a lightly modified version of the pseudocode + * presented in "A New Algorithm for Inversion mod p^k (Koç, + * 2017)". + * + * Each inner loop iteration calculates one bit of the + * inverse. The residue value is the two's complement + * negation of the value "b" as used by Koç, to allow for + * division by two using a logical right shift (since we have + * no arithmetic right shift operation for big integers). + * + * The residue is stored in the as-yet uncalculated portion of + * the inverse. The size of the residue therefore decreases + * by one element for each outer loop iteration. Trivial + * inspection of the algorithm shows that any higher bits + * could not contribute to the eventual output value, and so + * we may safely reuse storage this way. + * + * Due to the suffix property of inverses mod 2^k, the result + * represents the least significant bits of the inverse modulo + * an arbitrarily large 2^k. + */ + for ( i = size ; i > 0 ; i-- ) { + const bigint_t ( i ) __attribute__ (( may_alias )) + *addend = ( ( const void * ) invertend ); + bigint_t ( i ) __attribute__ (( may_alias )) + *residue = ( ( void * ) inverse ); + + /* Calculate one element's worth of inverse bits */ + for ( accum = 0, bit = 1 ; bit ; bit <<= 1 ) { + if ( bigint_bit_is_set ( residue, 0 ) ) { + accum |= bit; + bigint_add ( addend, residue ); + } + bigint_shr ( residue ); + } + + /* Store in the element no longer required to hold residue */ + inverse->element[ i - 1 ] = accum; + } + + /* Correct order of inverse elements */ + for ( i = 0 ; i < ( size / 2 ) ; i++ ) { + accum = inverse->element[i]; + inverse->element[i] = inverse->element[ size - 1 - i ]; + inverse->element[ size - 1 - i ] = accum; + } +} + +/** + * Perform relaxed Montgomery reduction (REDC) of a big integer + * + * @v modulus0 Element 0 of big integer odd modulus + * @v value0 Element 0 of big integer to be reduced + * @v result0 Element 0 of big integer to hold result + * @v size Number of elements in modulus and result + * @ret carry Carry out + * + * The value to be reduced will be made divisible by the size of the + * modulus while retaining its residue class (i.e. multiples of the + * modulus will be added until the low half of the value is zero). + * + * The result may be expressed as + * + * tR = x + mN + * + * where x is the input value, N is the modulus, R=2^n (where n is the + * number of bits in the representation of the modulus, including any + * leading zero bits), and m is the number of multiples of the modulus + * added to make the result tR divisible by R. + * + * The maximum addend is mN <= (R-1)*N (and such an m can be proven to + * exist since N is limited to being odd and therefore coprime to R). + * + * Since the result of this addition is one bit larger than the input + * value, a carry out bit is also returned. The caller may be able to + * prove that the carry out is always zero, in which case it may be + * safely ignored. + * + * The upper half of the output value (i.e. t) will also be copied to + * the result pointer. It is permissible for the result pointer to + * overlap the lower half of the input value. + * + * External knowledge of constraints on the modulus and the input + * value may be used to prove constraints on the result. The + * constraint on the modulus may be generally expressed as + * + * R > kN + * + * for some positive integer k. The value k=1 is allowed, and simply + * expresses that the modulus fits within the number of bits in its + * own representation. + * + * For classic Montgomery reduction, we have k=1, i.e. R > N and a + * separate constraint that the input value is in the range x < RN. + * This gives the result constraint + * + * tR < RN + (R-1)N + * < 2RN - N + * < 2RN + * t < 2N + * + * A single subtraction of the modulus may therefore be required to + * bring it into the range t < N. + * + * When the input value is known to be a product of two integers A and + * B, with A < aN and B < bN, we get the result constraint + * + * tR < abN^2 + (R-1)N + * < (ab/k)RN + RN - N + * < (1 + ab/k)RN + * t < (1 + ab/k)N + * + * If we have k=a=b=1, i.e. R > N with A < N and B < N, then the + * result is in the range t < 2N and may require a single subtraction + * of the modulus to bring it into the range t < N so that it may be + * used as an input on a subsequent iteration. + * + * If we have k=4 and a=b=2, i.e. R > 4N with A < 2N and B < 2N, then + * the result is in the range t < 2N and may immediately be used as an + * input on a subsequent iteration, without requiring a subtraction. + * + * Larger values of k may be used to allow for larger values of a and + * b, which can be useful to elide intermediate reductions in a + * calculation chain that involves additions and subtractions between + * multiplications (as used in elliptic curve point addition, for + * example). As a general rule: each intermediate addition or + * subtraction will require k to be doubled. + * + * When the input value is known to be a single integer A, with A < aN + * (as used when converting out of Montgomery form), we get the result + * constraint + * + * tR < aN + (R-1)N + * < RN + (a-1)N + * + * If we have a=1, i.e. A < N, then the constraint becomes + * + * tR < RN + * t < N + * + * and so the result is immediately in the range t < N with no + * subtraction of the modulus required. + * + * For any larger value of a, the result value t=N becomes possible. + * Additional external knowledge may potentially be used to prove that + * t=N cannot occur. For example: if the caller is performing modular + * exponentiation with a prime modulus (or, more generally, a modulus + * that is coprime to the base), then there is no way for a non-zero + * base value to end up producing an exact multiple of the modulus. + * If t=N cannot be disproved, then conversion out of Montgomery form + * may require an additional subtraction of the modulus. + */ +int bigint_montgomery_relaxed_raw ( const bigint_element_t *modulus0, + bigint_element_t *value0, + bigint_element_t *result0, + unsigned int size ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *modulus = ( ( const void * ) modulus0 ); + union { + bigint_t ( size * 2 ) full; + struct { + bigint_t ( size ) low; + bigint_t ( size ) high; + } __attribute__ (( packed )); + } __attribute__ (( may_alias )) *value = ( ( void * ) value0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + static bigint_t ( 1 ) cached; + static bigint_t ( 1 ) negmodinv; + bigint_element_t multiple; + bigint_element_t carry; + unsigned int i; + unsigned int j; + int overflow; + + /* Sanity checks */ + assert ( bigint_bit_is_set ( modulus, 0 ) ); + + /* Calculate inverse (or use cached version) */ + if ( cached.element[0] != modulus->element[0] ) { + bigint_mod_invert ( modulus, &negmodinv ); + negmodinv.element[0] = -negmodinv.element[0]; + cached.element[0] = modulus->element[0]; + } + + /* Perform multiprecision Montgomery reduction */ + for ( i = 0 ; i < size ; i++ ) { + + /* Determine scalar multiple for this round */ + multiple = ( value->low.element[i] * negmodinv.element[0] ); + + /* Multiply value to make it divisible by 2^(width*i) */ + carry = 0; + for ( j = 0 ; j < size ; j++ ) { + bigint_multiply_one ( multiple, modulus->element[j], + &value->full.element[ i + j ], + &carry ); + } + + /* Since value is now divisible by 2^(width*i), we + * know that the current low element must have been + * zeroed. + */ + assert ( value->low.element[i] == 0 ); + + /* Store the multiplication carry out in the result, + * avoiding the need to immediately propagate the + * carry through the remaining elements. + */ + result->element[i] = carry; } - profile_stop ( &bigint_mod_multiply_subtract_profiler ); - /* Resize result */ - bigint_shrink ( &temp->result, result ); + /* Add the accumulated carries */ + overflow = bigint_add ( result, &value->high ); + + /* Copy to result buffer */ + bigint_copy ( &value->high, result ); + + return overflow; +} + +/** + * Perform classic Montgomery reduction (REDC) of a big integer + * + * @v modulus0 Element 0 of big integer odd modulus + * @v value0 Element 0 of big integer to be reduced + * @v result0 Element 0 of big integer to hold result + * @v size Number of elements in modulus and result + */ +void bigint_montgomery_raw ( const bigint_element_t *modulus0, + bigint_element_t *value0, + bigint_element_t *result0, + unsigned int size ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *modulus = ( ( const void * ) modulus0 ); + union { + bigint_t ( size * 2 ) full; + struct { + bigint_t ( size ) low; + bigint_t ( size ) high; + } __attribute__ (( packed )); + } __attribute__ (( may_alias )) *value = ( ( void * ) value0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + int overflow; + int underflow; /* Sanity check */ - assert ( bigint_is_geq ( modulus, result ) ); + assert ( ! bigint_is_geq ( &value->high, modulus ) ); + + /* Perform relaxed Montgomery reduction */ + overflow = bigint_montgomery_relaxed ( modulus, &value->full, result ); - /* Stop profiling */ - profile_stop ( &bigint_mod_multiply_profiler ); + /* Conditionally subtract the modulus once */ + underflow = bigint_subtract ( modulus, result ); + bigint_swap ( result, &value->high, ( underflow & ~overflow ) ); + + /* Sanity check */ + assert ( ! bigint_is_geq ( result, modulus ) ); +} + +/** + * Perform generalised exponentiation via a Montgomery ladder + * + * @v result0 Element 0 of result (initialised to identity element) + * @v multiple0 Element 0 of multiple (initialised to generator) + * @v size Number of elements in result and multiple + * @v exponent0 Element 0 of exponent + * @v exponent_size Number of elements in exponent + * @v op Montgomery ladder commutative operation + * @v ctx Operation context (if needed) + * @v tmp Temporary working space (if needed) + * + * The Montgomery ladder may be used to perform any operation that is + * isomorphic to exponentiation, i.e. to compute the result + * + * r = g^e = g * g * g * g * .... * g + * + * for an arbitrary commutative operation "*", generator "g" and + * exponent "e". + * + * The result "r" is computed in constant time (assuming that the + * underlying operation is constant time) in k steps, where k is the + * number of bits in the big integer representation of the exponent. + * + * The result "r" must be initialised to the operation's identity + * element, and the multiple must be initialised to the generator "g". + * On exit, the multiple will contain + * + * m = r * g = g^(e+1) + * + * Note that the terminology used here refers to exponentiation + * defined as repeated multiplication, but that the ladder may equally + * well be used to perform any isomorphic operation (such as + * multiplication defined as repeated addition). + */ +void bigint_ladder_raw ( bigint_element_t *result0, + bigint_element_t *multiple0, unsigned int size, + const bigint_element_t *exponent0, + unsigned int exponent_size, bigint_ladder_op_t *op, + const void *ctx, void *tmp ) { + bigint_t ( size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *multiple = ( ( void * ) multiple0 ); + const bigint_t ( exponent_size ) __attribute__ (( may_alias )) + *exponent = ( ( const void * ) exponent0 ); + const unsigned int width = ( 8 * sizeof ( bigint_element_t ) ); + unsigned int bit = ( exponent_size * width ); + int toggle = 0; + int previous; + + /* We have two physical storage locations: Rres (the "result" + * register) and Rmul (the "multiple" register). + * + * On entry we have: + * + * Rres = g^0 = 1 (the identity element) + * Rmul = g (the generator) + * + * For calculation purposes, define two virtual registers R[0] + * and R[1], mapped to the underlying physical storage + * locations via a boolean toggle state "t": + * + * R[t] -> Rres + * R[~t] -> Rmul + * + * Define the initial toggle state to be t=0, so that we have: + * + * R[0] = g^0 = 1 (the identity element) + * R[1] = g (the generator) + * + * The Montgomery ladder then iterates over each bit "b" of + * the exponent "e" (from MSB down to LSB), computing: + * + * R[1] := R[0] * R[1], R[0] := R[0] * R[0] (if b=0) + * R[0] := R[1] * R[0], R[1] := R[1] * R[1] (if b=1) + * + * i.e. + * + * R[~b] := R[b] * R[~b], R[b] := R[b] * R[b] + * + * To avoid data-dependent memory access patterns, we + * implement this as: + * + * Rmul := Rres * Rmul + * Rres := Rres * Rres + * + * and update the toggle state "t" (via constant-time + * conditional swaps) as needed to ensure that R[b] always + * maps to Rres and R[~b] always maps to Rmul. + */ + while ( bit-- ) { + + /* Update toggle state t := b + * + * If the new toggle state is not equal to the old + * toggle state, then we must swap R[0] and R[1] (in + * constant time). + */ + previous = toggle; + toggle = bigint_bit_is_set ( exponent, bit ); + bigint_swap ( result, multiple, ( toggle ^ previous ) ); + + /* Calculate Rmul = R[~b] := R[b] * R[~b] = Rres * Rmul */ + op ( result->element, multiple->element, size, ctx, tmp ); + + /* Calculate Rres = R[b] := R[b] * R[b] = Rres * Rres */ + op ( result->element, result->element, size, ctx, tmp ); + } + + /* Reset toggle state to zero */ + bigint_swap ( result, multiple, toggle ); +} + +/** + * Perform modular multiplication as part of a Montgomery ladder + * + * @v operand Element 0 of first input operand (may overlap result) + * @v result Element 0 of second input operand and result + * @v size Number of elements in operands and result + * @v ctx Operation context (odd modulus, or NULL) + * @v tmp Temporary working space + */ +void bigint_mod_exp_ladder ( const bigint_element_t *multiplier0, + bigint_element_t *result0, unsigned int size, + const void *ctx, void *tmp ) { + const bigint_t ( size ) __attribute__ (( may_alias )) + *multiplier = ( ( const void * ) multiplier0 ); + bigint_t ( size ) __attribute__ (( may_alias )) + *result = ( ( void * ) result0 ); + const bigint_t ( size ) __attribute__ (( may_alias )) + *modulus = ( ( const void * ) ctx ); + bigint_t ( size * 2 ) __attribute__ (( may_alias )) + *product = ( ( void * ) tmp ); + + /* Multiply and reduce */ + bigint_multiply ( result, multiplier, product ); + if ( modulus ) { + bigint_montgomery ( modulus, product, result ); + } else { + bigint_shrink ( product, result ); + } } /** @@ -169,25 +763,108 @@ void bigint_mod_exp_raw ( const bigint_element_t *base0, *exponent = ( ( const void * ) exponent0 ); bigint_t ( size ) __attribute__ (( may_alias )) *result = ( ( void * ) result0 ); - size_t mod_multiply_len = bigint_mod_multiply_tmp_len ( modulus ); + const unsigned int width = ( 8 * sizeof ( bigint_element_t ) ); struct { - bigint_t ( size ) base; - bigint_t ( exponent_size ) exponent; - uint8_t mod_multiply[mod_multiply_len]; + struct { + bigint_t ( size ) modulus; + bigint_t ( size ) stash; + }; + union { + bigint_t ( 2 * size ) full; + bigint_t ( size ) low; + } product; } *temp = tmp; - static const uint8_t start[1] = { 0x01 }; + const uint8_t one[1] = { 1 }; + bigint_element_t submask; + unsigned int subsize; + unsigned int scale; + unsigned int i; - memcpy ( &temp->base, base, sizeof ( temp->base ) ); - memcpy ( &temp->exponent, exponent, sizeof ( temp->exponent ) ); - bigint_init ( result, start, sizeof ( start ) ); + /* Sanity check */ + assert ( sizeof ( *temp ) == bigint_mod_exp_tmp_len ( modulus ) ); - while ( ! bigint_is_zero ( &temp->exponent ) ) { - if ( bigint_bit_is_set ( &temp->exponent, 0 ) ) { - bigint_mod_multiply ( result, &temp->base, modulus, - result, temp->mod_multiply ); - } - bigint_ror ( &temp->exponent ); - bigint_mod_multiply ( &temp->base, &temp->base, modulus, - &temp->base, temp->mod_multiply ); + /* Handle degenerate case of zero modulus */ + if ( ! bigint_max_set_bit ( modulus ) ) { + memset ( result, 0, sizeof ( *result ) ); + return; + } + + /* Factor modulus as (N * 2^scale) where N is odd */ + bigint_copy ( modulus, &temp->modulus ); + for ( scale = 0 ; ( ! bigint_bit_is_set ( &temp->modulus, 0 ) ) ; + scale++ ) { + bigint_shr ( &temp->modulus ); + } + subsize = ( ( scale + width - 1 ) / width ); + submask = ( ( 1UL << ( scale % width ) ) - 1 ); + if ( ! submask ) + submask = ~submask; + + /* Calculate (R^2 mod N) */ + bigint_reduce ( &temp->modulus, &temp->stash ); + + /* Initialise result = Montgomery(1, R^2 mod N) */ + bigint_grow ( &temp->stash, &temp->product.full ); + bigint_montgomery ( &temp->modulus, &temp->product.full, result ); + + /* Convert base into Montgomery form */ + bigint_multiply ( base, &temp->stash, &temp->product.full ); + bigint_montgomery ( &temp->modulus, &temp->product.full, + &temp->stash ); + + /* Calculate x1 = base^exponent modulo N */ + bigint_ladder ( result, &temp->stash, exponent, bigint_mod_exp_ladder, + &temp->modulus, &temp->product ); + + /* Convert back out of Montgomery form */ + bigint_grow ( result, &temp->product.full ); + bigint_montgomery_relaxed ( &temp->modulus, &temp->product.full, + result ); + + /* Handle even moduli via Garner's algorithm */ + if ( subsize ) { + const bigint_t ( subsize ) __attribute__ (( may_alias )) + *subbase = ( ( const void * ) base ); + bigint_t ( subsize ) __attribute__ (( may_alias )) + *submodulus = ( ( void * ) &temp->modulus ); + bigint_t ( subsize ) __attribute__ (( may_alias )) + *substash = ( ( void * ) &temp->stash ); + bigint_t ( subsize ) __attribute__ (( may_alias )) + *subresult = ( ( void * ) result ); + union { + bigint_t ( 2 * subsize ) full; + bigint_t ( subsize ) low; + } __attribute__ (( may_alias )) + *subproduct = ( ( void * ) &temp->product.full ); + + /* Calculate x2 = base^exponent modulo 2^k */ + bigint_init ( substash, one, sizeof ( one ) ); + bigint_copy ( subbase, submodulus ); + bigint_ladder ( substash, submodulus, exponent, + bigint_mod_exp_ladder, NULL, subproduct ); + + /* Reconstruct N */ + bigint_copy ( modulus, &temp->modulus ); + for ( i = 0 ; i < scale ; i++ ) + bigint_shr ( &temp->modulus ); + + /* Calculate N^-1 modulo 2^k */ + bigint_mod_invert ( submodulus, &subproduct->low ); + bigint_copy ( &subproduct->low, submodulus ); + + /* Calculate y = (x2 - x1) * N^-1 modulo 2^k */ + bigint_subtract ( subresult, substash ); + bigint_multiply ( substash, submodulus, &subproduct->full ); + subproduct->low.element[ subsize - 1 ] &= submask; + bigint_grow ( &subproduct->low, &temp->stash ); + + /* Reconstruct N */ + bigint_mod_invert ( submodulus, &subproduct->low ); + bigint_copy ( &subproduct->low, submodulus ); + + /* Calculate x = x1 + N * y */ + bigint_multiply ( &temp->modulus, &temp->stash, + &temp->product.full ); + bigint_add ( &temp->product.low, result ); } } |
