summaryrefslogblamecommitdiffstats
path: root/src/crypto/gcm.c
blob: a32890d595830341dd0035951d419ed37aea1898 (plain) (tree)










































                                                                                       















                                                                     


























































































































































































































































                                                                           
                                  
                                                                       
                                      
                                       

                                                                       
                                                                       




                            






                                                                        

                                                                   


                              
                                                                   





                                                  


                                                                          

















                                                                                
                                        



                                                                            


                                 




































































































                                                                              
 







                                                                           


                                                                     

                           










                                                                            
                                                                    
                                                    
                                                     


                                                                             















                                                                              

                          
                                                               











                                                                              

                          
                                                  
 
/*
 * Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 *
 * You can also choose to distribute this program under the terms of
 * the Unmodified Binary Distribution Licence (as given in the file
 * COPYING.UBDL), provided that you have satisfied its requirements.
 */

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

/** @file
 *
 * Galois/Counter Mode (GCM)
 *
 * The GCM algorithm is specified in
 *
 * https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
 * https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf
 *
 */

#include <stdint.h>
#include <string.h>
#include <byteswap.h>
#include <ipxe/crypto.h>
#include <ipxe/gcm.h>

/**
 * Perform encryption
 *
 * This value is chosen to allow for ANDing with a fragment length.
 */
#define GCM_FL_ENCRYPT 0x00ff

/**
 * Calculate hash over an initialisation vector value
 *
 * The hash calculation for a non 96-bit initialisation vector is
 * identical to the calculation used for additional data, except that
 * the non-additional data length counter is used.
 */
#define GCM_FL_IV 0x0100

/**
 * GCM field polynomial
 *
 * GCM treats 128-bit blocks as polynomials in GF(2^128) with the
 * field polynomial f(x) = 1 + x + x^2 + x^7 + x^128.
 *
 * In a somewhat bloody-minded interpretation of "big-endian", the
 * constant term (with degree zero) is arbitrarily placed in the
 * leftmost bit of the big-endian binary representation (i.e. the most
 * significant bit of byte 0), thereby failing to correspond to the
 * bit ordering in any CPU architecture in existence.  This
 * necessitates some wholly gratuitous byte reversals when
 * constructing the multiplication tables, since all CPUs will treat
 * bit 0 as being the least significant bit within a byte.
 *
 * The field polynomial maps to the 128-bit constant
 * 0xe1000000000000000000000000000000 (with the x^128 term outside the
 * 128-bit range), and can therefore be treated as a single-byte
 * value.
 */
#define GCM_POLY 0xe1

/**
 * Hash key for which multiplication tables are cached
 *
 * GCM operates much more efficiently with a cached multiplication
 * table, which costs 4kB per hash key.  Since this exceeds the
 * available stack space, we place a single 4kB cache in .bss and
 * recalculate the cached values as required.  In the common case of a
 * single HTTPS connection being used to download a (relatively) large
 * file, the same key will be used repeatedly for almost all GCM
 * operations, and so the overhead of recalculation is negligible.
 */
static const union gcm_block *gcm_cached_key;

/**
 * Cached multiplication table (M0) for Shoup's method
 *
 * Each entry within this table represents the result of multiplying
 * the cached hash key by an arbitrary 8-bit polynomial.
 */
static union gcm_block gcm_cached_mult[256];

/**
 * Cached reduction table (R) for Shoup's method
 *
 * Each entry within this table represents the result of multiplying
 * the fixed polynomial x^128 by an arbitrary 8-bit polynomial.  Only
 * the leftmost 16 bits are stored, since all other bits within the
 * result will always be zero.
 */
static uint16_t gcm_cached_reduce[256];

/**
 * Reverse bits in a byte
 *
 * @v byte		Byte
 * @ret etyb		Bit-reversed byte
 */
static inline __attribute__ (( always_inline )) uint8_t
gcm_reverse ( const uint8_t byte ) {
	uint8_t etyb = etyb;
	uint8_t mask;

	for ( mask = 1 ; mask ; mask <<= 1 ) {
		etyb <<= 1;
		if ( byte & mask )
			etyb |= 1;
	}
	return etyb;
}

/**
 * Update GCM counter
 *
 * @v ctr		Counter
 * @v delta		Amount to add to counter
 */
static inline __attribute__ (( always_inline )) void
gcm_count ( union gcm_block *ctr, uint32_t delta ) {
	uint32_t *value = &ctr->ctr.value;

	/* Update counter modulo 2^32 */
	*value = cpu_to_be32 ( be32_to_cpu ( *value ) + delta );
}

/**
 * XOR partial data block
 *
 * @v src1		Source buffer 1
 * @v src2		Source buffer 2
 * @v dst		Destination buffer
 * @v len		Length
 */
static inline void gcm_xor ( const void *src1, const void *src2, void *dst,
			     size_t len ) {
	uint8_t *dst_bytes = dst;
	const uint8_t *src1_bytes = src1;
	const uint8_t *src2_bytes = src2;

	/* XOR one byte at a time */
	while ( len-- )
		*(dst_bytes++) = ( *(src1_bytes++) ^ *(src2_bytes++) );
}

/**
 * XOR whole data block in situ
 *
 * @v src		Source block
 * @v dst		Destination block
 */
static inline void gcm_xor_block ( const union gcm_block *src,
				   union gcm_block *dst ) {

	/* XOR whole dwords */
	dst->dword[0] ^= src->dword[0];
	dst->dword[1] ^= src->dword[1];
	dst->dword[2] ^= src->dword[2];
	dst->dword[3] ^= src->dword[3];
}

/**
 * Multiply polynomial by (x)
 *
 * @v mult		Multiplicand
 * @v res		Result
 */
static void gcm_multiply_x ( const union gcm_block *mult,
			     union gcm_block *res ) {
	unsigned int i;
	uint8_t byte;
	uint8_t carry;

	/* Multiply by (x) by shifting all bits rightward */
	for ( i = 0, carry = 0 ; i < sizeof ( res->byte ) ; i++ ) {
		byte = mult->byte[i];
		res->byte[i] = ( ( carry << 7 ) | ( byte >> 1 ) );
		carry = ( byte & 0x01 );
	}

	/* If result overflows, reduce modulo the field polynomial */
	if ( carry )
		res->byte[0] ^= GCM_POLY;
}

/**
 * Construct cached tables
 *
 * @v key		Hash key
 * @v context		Context
 */
static void gcm_cache ( const union gcm_block *key ) {
	union gcm_block *mult;
	uint16_t reduce;
	unsigned int this;
	unsigned int other;
	unsigned int i;

	/* Calculate M0[1..255] and R[1..255]
	 *
	 * The R[] values are independent of the key, but the overhead
	 * of recalculating them here is negligible and saves on
	 * overall code size since the calculations are related.
	 */
	for ( i = 1 ; i < 256 ; i++ ) {

		/* Reverse bit order to compensate for poor life choices */
		this = gcm_reverse ( i );

		/* Construct entries */
		mult = &gcm_cached_mult[this];
		if ( this & 0x80 ) {

			/* Odd number: entry[i] = entry[i - 1] + poly */
			other = ( this & 0x7f ); /* bit-reversed (i - 1) */
			gcm_xor ( key, &gcm_cached_mult[other], mult,
				  sizeof ( *mult ) );
			reduce = gcm_cached_reduce[other];
			reduce ^= be16_to_cpu ( GCM_POLY << 8 );
			gcm_cached_reduce[this] = reduce;

		} else {

			/* Even number: entry[i] = entry[i/2] * (x) */
			other = ( this << 1 ); /* bit-reversed (i / 2) */
			gcm_multiply_x ( &gcm_cached_mult[other], mult );
			reduce = be16_to_cpu ( gcm_cached_reduce[other] );
			reduce >>= 1;
			gcm_cached_reduce[this] = cpu_to_be16 ( reduce );
		}
	}

	/* Record cached key */
	gcm_cached_key = key;
}

/**
 * Multiply polynomial by (x^8) in situ
 *
 * @v poly		Multiplicand and result
 */
static void gcm_multiply_x_8 ( union gcm_block *poly ) {
	uint8_t *byte;
	uint8_t msb;

	/* Reduction table must already have been calculated */
	assert ( gcm_cached_key != NULL );

	/* Record most significant byte */
	byte = &poly->byte[ sizeof ( poly->byte ) - 1 ];
	msb = *byte;

	/* Multiply least significant bytes by shifting */
	for ( ; byte > &poly->byte[0] ; byte-- )
		*byte = *( byte - 1 );
	*byte = 0;

	/* Multiply most significant byte via reduction table */
	poly->word[0] ^= gcm_cached_reduce[msb];
}

/**
 * Multiply polynomial by hash key in situ
 *
 * @v key		Hash key
 * @v poly		Multiplicand and result
 */
static void gcm_multiply_key ( const union gcm_block *key,
			       union gcm_block *poly ) {
	union gcm_block res;
	uint8_t *byte;

	/* Construct tables, if necessary */
	if ( gcm_cached_key != key )
		gcm_cache ( key );

	/* Multiply using Shoup's algorithm */
	byte = &poly->byte[ sizeof ( poly->byte ) - 1 ];
	memcpy ( &res, &gcm_cached_mult[ *byte ], sizeof ( res ) );
	for ( byte-- ; byte >= &poly->byte[0] ; byte-- ) {
		gcm_multiply_x_8 ( &res );
		gcm_xor_block ( &gcm_cached_mult[ *byte ], &res );
	}

	/* Overwrite result */
	memcpy ( poly, &res, sizeof ( *poly ) );
}

/**
 * Encrypt/decrypt/authenticate data
 *
 * @v context		Context
 * @v src		Input data
 * @v dst		Output data, or NULL to process additional data
 * @v len		Length of data
 * @v flags		Operation flags
 */
static void gcm_process ( struct gcm_context *context, const void *src,
			  void *dst, size_t len, unsigned int flags ) {
	union gcm_block tmp;
	uint64_t *total;
	size_t frag_len;
	unsigned int block;

	/* Calculate block number (for debugging) */
	block = ( ( ( context->len.len.add + 8 * sizeof ( tmp ) - 1 ) /
		    ( 8 * sizeof ( tmp ) ) ) +
		  ( ( context->len.len.data + 8 * sizeof ( tmp ) - 1 ) /
		    ( 8 * sizeof ( tmp ) ) ) + 1 );

	/* Update total length (in bits) */
	total = ( ( dst || ( flags & GCM_FL_IV ) ) ?
		  &context->len.len.data : &context->len.len.add );
	*total += ( len * 8 );

	/* Process data */
	for ( ; len ; src += frag_len, len -= frag_len, block++ ) {

		/* Calculate fragment length */
		frag_len = len;
		if ( frag_len > sizeof ( tmp ) )
			frag_len = sizeof ( tmp );

		/* Update hash with input data */
		gcm_xor ( src, &context->hash, &context->hash, frag_len );

		/* Encrypt/decrypt block, if applicable */
		if ( dst ) {

			/* Increment counter */
			gcm_count ( &context->ctr, 1 );

			/* Encrypt counter */
			DBGC2 ( context, "GCM %p Y[%d]:\n", context, block );
			DBGC2_HDA ( context, 0, &context->ctr,
				    sizeof ( context->ctr ) );
			cipher_encrypt ( context->raw_cipher, &context->raw_ctx,
					 &context->ctr, &tmp, sizeof ( tmp ) );
			DBGC2 ( context, "GCM %p E(K,Y[%d]):\n",
				context, block );
			DBGC2_HDA ( context, 0, &tmp, sizeof ( tmp ) );

			/* Encrypt/decrypt data */
			gcm_xor ( src, &tmp, dst, frag_len );
			dst += frag_len;

			/* Update hash with encrypted data, if applicable */
			gcm_xor ( &tmp, &context->hash, &context->hash,
				  ( frag_len & flags ) );
		}

		/* Update hash */
		gcm_multiply_key ( &context->key, &context->hash );
		DBGC2 ( context, "GCM %p X[%d]:\n", context, block );
		DBGC2_HDA ( context, 0, &context->hash,
			    sizeof ( context->hash ) );
	}
}

/**
 * Construct hash
 *
 * @v context		Context
 * @v hash		Hash to fill in
 */
static void gcm_hash ( struct gcm_context *context, union gcm_block *hash ) {

	/* Construct big-endian lengths block */
	hash->len.add = cpu_to_be64 ( context->len.len.add );
	hash->len.data = cpu_to_be64 ( context->len.len.data );
	DBGC2 ( context, "GCM %p len(A)||len(C):\n", context );
	DBGC2_HDA ( context, 0, hash, sizeof ( *hash ) );

	/* Update hash */
	gcm_xor_block ( &context->hash, hash );
	gcm_multiply_key ( &context->key, hash );
	DBGC2 ( context, "GCM %p GHASH(H,A,C):\n", context );
	DBGC2_HDA ( context, 0, hash, sizeof ( *hash ) );
}

/**
 * Construct tag
 *
 * @v context		Context
 * @v tag		Tag
 */
void gcm_tag ( struct gcm_context *context, union gcm_block *tag ) {
	union gcm_block tmp;
	uint32_t offset;

	/* Construct hash */
	gcm_hash ( context, tag );

	/* Construct encrypted initial counter value */
	memcpy ( &tmp, &context->ctr, sizeof ( tmp ) );
	offset = ( ( -context->len.len.data ) / ( 8 * sizeof ( tmp ) ) );
	gcm_count ( &tmp, offset );
	cipher_encrypt ( context->raw_cipher, &context->raw_ctx, &tmp,
			 &tmp, sizeof ( tmp ) );
	DBGC2 ( context, "GCM %p E(K,Y[0]):\n", context );
	DBGC2_HDA ( context, 0, &tmp, sizeof ( tmp ) );

	/* Construct tag */
	gcm_xor_block ( &tmp, tag );
	DBGC2 ( context, "GCM %p T:\n", context );
	DBGC2_HDA ( context, 0, tag, sizeof ( *tag ) );
}

/**
 * Set key
 *
 * @v context		Context
 * @v key		Key
 * @v keylen		Key length
 * @v raw_cipher	Underlying cipher
 * @ret rc		Return status code
 */
int gcm_setkey ( struct gcm_context *context, const void *key, size_t keylen,
		 struct cipher_algorithm *raw_cipher ) {
	int rc;

	/* Initialise GCM context */
	memset ( context, 0, sizeof ( *context ) );
	context->raw_cipher = raw_cipher;

	/* Set underlying block cipher key */
	if ( ( rc = cipher_setkey ( raw_cipher, context->raw_ctx, key,
				    keylen ) ) != 0 )
		return rc;

	/* Construct GCM hash key */
	cipher_encrypt ( raw_cipher, context->raw_ctx, &context->ctr,
			 &context->key, sizeof ( context->key ) );
	DBGC2 ( context, "GCM %p H:\n", context );
	DBGC2_HDA ( context, 0, &context->key, sizeof ( context->key ) );

	/* Reset counter */
	context->ctr.ctr.value = cpu_to_be32 ( 1 );

	/* Construct cached tables */
	gcm_cache ( &context->key );

	return 0;
}

/**
 * Set initialisation vector
 *
 * @v ctx		Context
 * @v iv		Initialisation vector
 * @v ivlen		Initialisation vector length
 */
void gcm_setiv ( struct gcm_context *context, const void *iv, size_t ivlen ) {

	/* Sanity check: ensure that memset()s will clear expected state */
	build_assert ( &context->hash < &context->ctr );
	build_assert ( &context->len < &context->ctr );
	build_assert ( &context->ctr < &context->key );
	build_assert ( ( ( void * ) &context->raw_cipher ) >
		       ( ( void * ) &context->key ) );
	build_assert ( ( ( void * ) context->raw_ctx ) >
		       ( ( void * ) &context->key ) );

	/* Reset non-key state */
	memset ( context, 0, offsetof ( typeof ( *context ), key ) );

	/* Reset counter */
	context->ctr.ctr.value = cpu_to_be32 ( 1 );

	/* Process initialisation vector */
	if ( ivlen == sizeof ( context->ctr.ctr.iv ) ) {

		/* Initialisation vector is exactly 96 bits, use it as-is */
		memcpy ( context->ctr.ctr.iv, iv, ivlen );

	} else {

		/* Calculate hash over initialisation vector */
		gcm_process ( context, iv, NULL, ivlen, GCM_FL_IV );
		gcm_hash ( context, &context->ctr );
		assert ( context->len.len.add == 0 );

		/* Reset non-key, non-counter state */
		memset ( context, 0, offsetof ( typeof ( *context ), ctr ) );
	}

	DBGC2 ( context, "GCM %p Y[0]:\n", context );
	DBGC2_HDA ( context, 0, &context->ctr, sizeof ( context->ctr ) );
}

/**
 * Encrypt data
 *
 * @v context		Context
 * @v src		Data to encrypt
 * @v dst		Buffer for encrypted data, or NULL for additional data
 * @v len		Length of data
 */
void gcm_encrypt ( struct gcm_context *context, const void *src, void *dst,
		   size_t len ) {

	/* Process data */
	gcm_process ( context, src, dst, len, GCM_FL_ENCRYPT );
}

/**
 * Decrypt data
 *
 * @v context		Context
 * @v src		Data to decrypt
 * @v dst		Buffer for decrypted data, or NULL for additional data
 * @v len		Length of data
 */
void gcm_decrypt ( struct gcm_context *context, const void *src, void *dst,
		   size_t len ) {

	/* Process data */
	gcm_process ( context, src, dst, len, 0 );
}