[uri] Decode/encode URIs when parsing/unparsing

Currently, handling of URI escapes is ad-hoc; escaped strings are stored as-is in the URI structure, and it is up to the individual protocol to unescape as necessary. This is error-prone and expensive in terms of code size. Modify this behavior by unescaping in parse_uri() and escaping in unparse_uri() those fields that typically handle URI escapes (hostname, user, password, path, query, fragment), and allowing unparse_uri() to accept a subset of fields to print so it can be easily used to generate e.g. the escaped HTTP path?query request. Signed-off-by: Joshua Oreman <oremanj@rwcr.net> Signed-off-by: Marty Connor <mdc@etherboot.org>
author: Joshua Oreman 2009-12-30 04:36:04 +0100
committer: Marty Connor 2010-01-21 00:14:28 +0100
commit: 3d9dd93a1452e28c728483b03e352691238491ed (patch)
tree: 73a41396fd514d12dbd7bd69ebfc49810bf73c7c
parent: [settings] Add automagic "netX" settings block for last opened netdev (diff)
download: ipxe-3d9dd93a1452e28c728483b03e352691238491ed.tar.gz
ipxe-3d9dd93a1452e28c728483b03e352691238491ed.tar.xz
ipxe-3d9dd93a1452e28c728483b03e352691238491ed.zip
7 files changed, 155 insertions, 93 deletions
diff --git a/src/core/settings.c b/src/core/settings.c
index fe67d6b7..2fefdb90 100644
--- a/src/core/settings.c
+++ b/src/core/settings.c
@@ -1085,7 +1085,7 @@ static int fetchf_uristring ( struct settings *settings,
        
 		fetch_string_setting ( settings, setting, raw_buf,
 				       sizeof ( raw_buf ) );
-		return uri_encode ( raw_buf, buf, len );
+		return uri_encode ( raw_buf, buf, len, URI_FRAGMENT );
 	}
 }
 
diff --git a/src/core/uri.c b/src/core/uri.c
index 50a96d35..96667782 100644
--- a/src/core/uri.c
+++ b/src/core/uri.c
@@ -76,6 +76,7 @@ struct uri * parse_uri ( const char *uri_string ) {
 	char *tmp;
 	char *path = NULL;
 	char *authority = NULL;
+	int i;
 	size_t raw_len;
 
 	/* Allocate space for URI struct and a copy of the string */
@@ -171,6 +172,14 @@ struct uri * parse_uri ( const char *uri_string ) {
 		uri->port = tmp;
 	}
 
+	/* Decode fields that should be decoded */
+	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
+		const char *field = uri_get_field ( uri, i );
+		if ( field && ( URI_ENCODED & ( 1 << i ) ) )
+			uri_decode ( field, ( char * ) field,
+				     strlen ( field ) + 1 /* NUL */ );
+	}
+
  done:
 	DBG ( "URI \"%s\" split into", uri_string );
 	dump_uri ( uri );
@@ -198,10 +207,19 @@ unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
  * @v buf		Buffer to fill with URI string
  * @v size		Size of buffer
  * @v uri		URI to write into buffer, or NULL
+ * @v fields		Bitmask of fields to include in URI string, or URI_ALL
  * @ret len		Length of URI string
  */
-int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
+int unparse_uri ( char *buf, size_t size, struct uri *uri,
+		  unsigned int fields ) {
+	/* List of characters that typically go before certain fields */
+	static char separators[] = { /* scheme */ 0, /* opaque */ ':',
+				     /* user */ 0, /* password */ ':',
+				     /* host */ '@', /* port */ ':',
+				     /* path */ 0, /* query */ '?',
+				     /* fragment */ '#' };
 	int used = 0;
+	int i;
 
 	DBG ( "URI unparsing" );
 	dump_uri ( uri );
@@ -214,55 +232,39 @@ int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
 		return 0;
 	}
 
-	/* Special-case opaque URIs */
-	if ( uri->opaque ) {
-		return ssnprintf ( ( buf + used ), ( size - used ),
-				   "%s:%s", uri->scheme, uri->opaque );
-	}
-
-	/* scheme:// */
-	if ( uri->scheme ) {
-		used += ssnprintf ( ( buf + used ), ( size - used ),
-				    "%s://", uri->scheme );
-	}
-
-	/* [user[:password]@]host[:port] */
-	if ( uri->host ) {
-		if ( uri->user ) {
-			used += ssnprintf ( ( buf + used ), ( size - used ),
-					    "%s", uri->user );
-			if ( uri->password ) {
-				used += ssnprintf ( ( buf + used ),
-						    ( size - used ),
-						    ":%s", uri->password );
+	/* Iterate through requested fields */
+	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
+		const char *field = uri_get_field ( uri, i );
+		char sep = separators[i];
+
+		/* Ensure `fields' only contains bits for fields that exist */
+		if ( ! field )
+			fields &= ~( 1 << i );
+
+		/* Store this field if we were asked to */
+		if ( fields & ( 1 << i ) ) {
+			/* Print :// if we're non-opaque and had a scheme */
+			if ( ( fields & URI_SCHEME_BIT ) &&
+			     ( i > URI_OPAQUE ) ) {
+				used += ssnprintf ( buf + used, size - used,
+						    "://" );
+				/* Only print :// once */
+				fields &= ~URI_SCHEME_BIT;
 			}
-			used += ssnprintf ( ( buf + used ), ( size - used ),
-					    "@" );
-		}
-		used += ssnprintf ( ( buf + used ), ( size - used ), "%s",
-				    uri->host );
-		if ( uri->port ) {
-			used += ssnprintf ( ( buf + used ), ( size - used ),
-					    ":%s", uri->port );
-		}
-	}
-
-	/* /path */
-	if ( uri->path ) {
-		used += ssnprintf ( ( buf + used ), ( size - used ),
-				    "%s", uri->path );
-	}
 
-	/* ?query */
-	if ( uri->query ) {
-		used += ssnprintf ( ( buf + used ), ( size - used ),
-				    "?%s", uri->query );
-	}
-
-	/* #fragment */
-	if ( uri->fragment ) {
-		used += ssnprintf ( ( buf + used ), ( size - used ),
-				    "#%s", uri->fragment );
+			/* Only print separator if an earlier field exists */
+			if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
+				used += ssnprintf ( buf + used, size - used,
+						    "%c", sep );
+
+			/* Print contents of field, possibly encoded */
+			if ( URI_ENCODED & ( 1 << i ) )
+				used += uri_encode ( field, buf + used,
+						     size - used, i );
+			else
+				used += ssnprintf ( buf + used, size - used,
+						    "%s", field );
+		}
 	}
 
 	return used;
@@ -277,10 +279,10 @@ int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
  * Creates a modifiable copy of a URI.
  */
 struct uri * uri_dup ( struct uri *uri ) {
-	size_t len = ( unparse_uri ( NULL, 0, uri ) + 1 );
+	size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
 	char buf[len];
 
-	unparse_uri ( buf, len, uri );
+	unparse_uri ( buf, len, uri, URI_ALL );
 	return parse_uri ( buf );
 }
 
@@ -393,16 +395,31 @@ struct uri * resolve_uri ( struct uri *base_uri,
  * Test for unreserved URI characters
  *
  * @v c			Character to test
+ * @v field		Field of URI in which character lies
  * @ret is_unreserved	Character is an unreserved character
  */
-static int is_unreserved_uri_char ( int c ) {
+static int is_unreserved_uri_char ( int c, int field ) {
 	/* According to RFC3986, the unreserved character set is
 	 *
 	 * A-Z a-z 0-9 - _ . ~
+	 *
+	 * but we also pass & ; = in queries, / in paths,
+	 * and everything in opaques
 	 */
-	return ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
-		 ( c == '-' ) || ( c == '_' ) ||
-		 ( c == '.' ) || ( c == '~' ) );
+	int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
+		    ( c == '-' ) || ( c == '_' ) ||
+		    ( c == '.' ) || ( c == '~' ) );
+
+	if ( field == URI_QUERY )
+		ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
+
+	if ( field == URI_PATH )
+		ok = ok || ( c == '/' );
+
+	if ( field == URI_OPAQUE )
+		ok = 1;
+
+	return ok;
 }
 
 /**
@@ -411,18 +428,20 @@ static int is_unreserved_uri_char ( int c ) {
  * @v raw_string	String to be URI-encoded
  * @v buf		Buffer to contain encoded string
  * @v len		Length of buffer
+ * @v field		Field of URI in which string lies
  * @ret len		Length of encoded string (excluding NUL)
  */
-size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
+size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
+		    int field ) {
 	ssize_t remaining = len;
 	size_t used;
 	unsigned char c;
 
-	if ( len )
+	if ( len > 0 )
 		buf[0] = '\0';
 
 	while ( ( c = *(raw_string++) ) ) {
-		if ( is_unreserved_uri_char ( c ) ) {
+		if ( is_unreserved_uri_char ( c, field ) ) {
 			used = ssnprintf ( buf, remaining, "%c", c );
 		} else {
 			used = ssnprintf ( buf, remaining, "%%%02X", c );
@@ -441,17 +460,17 @@ size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
  * @v buf		Buffer to contain decoded string
  * @v len		Length of buffer
  * @ret len		Length of decoded string (excluding NUL)
+ *
+ * This function may be used in-place, with @a buf the same as
+ * @a encoded_string.
  */
-size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
-	ssize_t remaining = len;
+size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
+	ssize_t remaining;
 	char hexbuf[3];
 	char *hexbuf_end;
 	unsigned char c;
 
-	if ( len )
-		buf[0] = '\0';
-
-	while ( *encoded_string ) {
+	for ( remaining = len; *encoded_string; remaining-- ) {
 		if ( *encoded_string == '%' ) {
 			encoded_string++;
 			snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
@@ -461,7 +480,12 @@ size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
 		} else {
 			c = *(encoded_string++);
 		}
-		ssnprintf ( buf++, remaining--, "%c", c );
+		if ( remaining > 1 )
+			*buf++ = c;
 	}
+
+	if ( len )
+		*buf = 0;
+
 	return ( len - remaining );
 }
diff --git a/src/include/gpxe/uri.h b/src/include/gpxe/uri.h
index 03c88d25..405d0ce9 100644
--- a/src/include/gpxe/uri.h
+++ b/src/include/gpxe/uri.h
@@ -20,6 +20,10 @@ FILE_LICENCE ( GPL2_OR_LATER );
  *
  * Note that all fields within a URI are optional and may be NULL.
  *
+ * The pointers to the various fields are packed together so they can
+ * be accessed in array fashion in some places in uri.c where doing so
+ * saves significant code size.
+ *
  * Some examples are probably helpful:
  *
  *   http://www.etherboot.org/wiki :
@@ -61,8 +65,40 @@ struct uri {
 	const char *query;
 	/** Fragment */
 	const char *fragment;
+} __attribute__ (( packed ));
+
+/** A field in a URI
+ *
+ * The order of the indices in this enumeration must match the order
+ * of the fields in the URI structure.
+ */
+enum {
+	URI_SCHEME = 0,		URI_SCHEME_BIT = ( 1 << URI_SCHEME ),
+	URI_OPAQUE = 1,		URI_OPAQUE_BIT = ( 1 << URI_OPAQUE ),
+	URI_USER = 2,		URI_USER_BIT = ( 1 << URI_USER ),
+	URI_PASSWORD = 3,	URI_PASSWORD_BIT = ( 1 << URI_PASSWORD ),
+	URI_HOST = 4,		URI_HOST_BIT = ( 1 << URI_HOST ),
+	URI_PORT = 5,		URI_PORT_BIT = ( 1 << URI_PORT ),
+	URI_PATH = 6,		URI_PATH_BIT = ( 1 << URI_PATH ),
+	URI_QUERY = 7,		URI_QUERY_BIT = ( 1 << URI_QUERY ),
+	URI_FRAGMENT = 8,	URI_FRAGMENT_BIT = ( 1 << URI_FRAGMENT ),
+
+	URI_FIRST_FIELD = URI_SCHEME,
+	URI_LAST_FIELD = URI_FRAGMENT,
 };
 
+/** Extract field from URI */
+#define uri_get_field( uri, field )	(&uri->scheme)[field]
+
+/** All URI fields */
+#define URI_ALL		( URI_SCHEME_BIT | URI_OPAQUE_BIT | URI_USER_BIT | \
+			  URI_PASSWORD_BIT | URI_HOST_BIT | URI_PORT_BIT | \
+			  URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
+
+/** URI fields that should be decoded on storage */
+#define URI_ENCODED	( URI_USER_BIT | URI_PASSWORD_BIT | URI_HOST_BIT | \
+			  URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
+
 /**
  * URI is an absolute URI
  *
@@ -131,14 +167,16 @@ extern struct uri *cwuri;
 
 extern struct uri * parse_uri ( const char *uri_string );
 extern unsigned int uri_port ( struct uri *uri, unsigned int default_port );
-extern int unparse_uri ( char *buf, size_t size, struct uri *uri );
+extern int unparse_uri ( char *buf, size_t size, struct uri *uri,
+			 unsigned int fields );
 extern struct uri * uri_dup ( struct uri *uri );
 extern char * resolve_path ( const char *base_path,
 			     const char *relative_path );
 extern struct uri * resolve_uri ( struct uri *base_uri,
 				  struct uri *relative_uri );
 extern void churi ( struct uri *uri );
-extern size_t uri_encode ( const char *raw_string, char *buf, size_t len );
-extern size_t uri_decode ( const char *encoded_string, char *buf, size_t len );
+extern size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
+			   int field );
+extern size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len );
 
 #endif /* _GPXE_URI_H */
diff --git a/src/net/tcp/http.c b/src/net/tcp/http.c
index a02408a9..807a0c3e 100644
--- a/src/net/tcp/http.c
+++ b/src/net/tcp/http.c
@@ -417,9 +417,7 @@ static int http_socket_deliver_iob ( struct xfer_interface *socket,
 static void http_step ( struct process *process ) {
 	struct http_request *http =
 		container_of ( process, struct http_request, process );
-	const char *path = http->uri->path;
 	const char *host = http->uri->host;
-	const char *query = http->uri->query;
 	const char *user = http->uri->user;
 	const char *password =
 		( http->uri->password ? http->uri->password : "" );
@@ -429,27 +427,24 @@ static void http_step ( struct process *process ) {
 	char user_pw[ user_pw_len + 1 /* NUL */ ];
 	char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ];
 	int rc;
+	int request_len = unparse_uri ( NULL, 0, http->uri,
+					URI_PATH_BIT | URI_QUERY_BIT );
 
 	if ( xfer_window ( &http->socket ) ) {
+		char request[request_len + 1];
+
+		/* Construct path?query request */
+		unparse_uri ( request, sizeof ( request ), http->uri,
+			      URI_PATH_BIT | URI_QUERY_BIT );
 
 		/* We want to execute only once */
 		process_del ( &http->process );
 
 		/* Construct authorisation, if applicable */
 		if ( user ) {
-			char *buf = user_pw;
-			ssize_t remaining = sizeof ( user_pw );
-			size_t len;
-
-			/* URI-decode the username and password */
-			len = uri_decode ( user, buf, remaining );
-			buf += len;
-			remaining -= len;
-			*(remaining--, buf++) = ':';
-			len = uri_decode ( password, buf, remaining );
-			buf += len;
-			remaining -= len;
-			assert ( remaining >= 0 );
+			/* Make "user:password" string from decoded fields */
+			snprintf ( user_pw, sizeof ( user_pw ), "%s:%s",
+				   user, password );
 
 			/* Base64-encode the "user:password" string */
 			base64_encode ( user_pw, user_pw_base64 );
@@ -457,14 +452,12 @@ static void http_step ( struct process *process ) {
 
 		/* Send GET request */
 		if ( ( rc = xfer_printf ( &http->socket,
-					  "GET %s%s%s HTTP/1.0\r\n"
+					  "GET %s HTTP/1.0\r\n"
 					  "User-Agent: gPXE/" VERSION "\r\n"
 					  "%s%s%s"
 					  "Host: %s\r\n"
 					  "\r\n",
-					  ( path ? path : "/" ),
-					  ( query ? "?" : "" ),
-					  ( query ? query : "" ),
+					  request,
 					  ( user ?
 					    "Authorization: Basic " : "" ),
 					  ( user ? user_pw_base64 : "" ),
diff --git a/src/tests/uri_test.c b/src/tests/uri_test.c
index 25487603..ca3aed9f 100644
--- a/src/tests/uri_test.c
+++ b/src/tests/uri_test.c
@@ -22,6 +22,9 @@ static struct uri_test uri_tests[] = {
 	  "http://etherboot.org/page3" },
 	{ "tftp://192.168.0.1/", "/tftpboot/vmlinuz",
 	  "tftp://192.168.0.1/tftpboot/vmlinuz" },
+	{ "ftp://the%41nswer%3d:%34ty%32wo@ether%62oot.org:8080/p%41th/foo",
+	  "to?%41=b#%43d",
+	  "ftp://theAnswer%3d:4ty2wo@etherboot.org:8080/path/to?a=b#cd" },
 #if 0
 	"http://www.etherboot.org/wiki",
 	"mailto:bob@nowhere.com",
@@ -41,7 +44,7 @@ static int test_parse_unparse ( const char *uri_string ) {
 		rc = -ENOMEM;
 		goto done;
 	}
-	len = unparse_uri ( buf, sizeof ( buf ), uri );
+	len = unparse_uri ( buf, sizeof ( buf ), uri, URI_ALL );
 
 	/* Compare result */
 	if ( strcmp ( buf, uri_string ) != 0 ) {
@@ -92,7 +95,7 @@ static int test_resolve ( const char *base_uri_string,
 	}
 
 	/* Compare result */
-	len = unparse_uri ( buf, sizeof ( buf ), resolved_uri );
+	len = unparse_uri ( buf, sizeof ( buf ), resolved_uri, URI_ALL );
 	if ( strcmp ( buf, resolved_uri_string ) != 0 ) {
 		printf ( "Resolution of \"%s\"+\"%s\" produced \"%s\"\n",
 			 base_uri_string, relative_uri_string, buf );
diff --git a/src/usr/autoboot.c b/src/usr/autoboot.c
index 8c4398c3..2fa10e6b 100644
--- a/src/usr/autoboot.c
+++ b/src/usr/autoboot.c
@@ -61,7 +61,9 @@ int boot_next_server_and_filename ( struct in_addr next_server,
 				    const char *filename ) {
 	struct uri *uri;
 	struct image *image;
-	char buf[ 23 /* tftp://xxx.xxx.xxx.xxx/ */ + strlen(filename) + 1 ];
+	char buf[ 23 /* tftp://xxx.xxx.xxx.xxx/ */ +
+		  ( 3 * strlen(filename) ) /* completely URI-encoded */
+		  + 1 /* NUL */ ];
 	int filename_is_absolute;
 	int rc;
 
@@ -78,8 +80,10 @@ int boot_next_server_and_filename ( struct in_addr next_server,
 		 * between filenames with and without initial slashes,
 		 * which is significant for TFTP.
 		 */
-		snprintf ( buf, sizeof ( buf ), "tftp://%s/%s",
-			   inet_ntoa ( next_server ), filename );
+		snprintf ( buf, sizeof ( buf ), "tftp://%s/",
+			   inet_ntoa ( next_server ) );
+		uri_encode ( filename, buf + strlen ( buf ),
+			     sizeof ( buf ) - strlen ( buf ), URI_PATH );
 		filename = buf;
 	}
 
diff --git a/src/usr/imgmgmt.c b/src/usr/imgmgmt.c
index 0ffff5d4..023e3f0f 100644
--- a/src/usr/imgmgmt.c
+++ b/src/usr/imgmgmt.c
@@ -61,7 +61,7 @@ int imgfetch ( struct image *image, const char *uri_string,
 	if ( password )
 		uri->password = "***";
 	unparse_uri ( uri_string_redacted, sizeof ( uri_string_redacted ),
-		      uri );
+		      uri, URI_ALL );
 	uri->password = password;
 
 	if ( ( rc = create_downloader ( &monojob, image, image_register,
author	Joshua Oreman	2009-12-30 04:36:04 +0100
committer	Marty Connor	2010-01-21 00:14:28 +0100
commit	3d9dd93a1452e28c728483b03e352691238491ed (patch)
tree	73a41396fd514d12dbd7bd69ebfc49810bf73c7c
parent	[settings] Add automagic "netX" settings block for last opened netdev (diff)
download	ipxe-3d9dd93a1452e28c728483b03e352691238491ed.tar.gz ipxe-3d9dd93a1452e28c728483b03e352691238491ed.tar.xz ipxe-3d9dd93a1452e28c728483b03e352691238491ed.zip