summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Brown2021-11-12 00:31:23 +0100
committerMichael Brown2021-11-12 10:58:29 +0100
commit1844aacc837bf81cb1959fa65f2e52dcc70a0cae (patch)
tree8ad8156c38473e608269251518c125a8cb0bda5d
parent[xhci] Allow for permanent failure of the command mechanism (diff)
downloadipxe-1844aacc837bf81cb1959fa65f2e52dcc70a0cae.tar.gz
ipxe-1844aacc837bf81cb1959fa65f2e52dcc70a0cae.tar.xz
ipxe-1844aacc837bf81cb1959fa65f2e52dcc70a0cae.zip
[uri] Retain original encodings for path, query, and fragment fields
iPXE decodes any percent-encoded characters during the URI parsing stage, thereby allowing protocol implementations to consume the raw field values directly without further decoding. When reconstructing a URI string for use in an HTTP request line, the percent-encoding is currently reapplied in a reversible way: we guarantee that our reconstructed URI string could be decoded to give the same raw field values. This technically violates RFC3986, which states that "URIs that differ in the replacement of a reserved character with its corresponding percent-encoded octet are not equivalent". Experiments show that several HTTP server applications will attach meaning to the choice of whether or not a particular character was percent-encoded, even when the percent-encoding is unnecessary from the perspective of parsing the URI into its component fields. Fix by storing the originally encoded substrings for the path, query, and fragment fields and using these original encoded versions when reconstructing a URI string. The path field is also stored as a decoded string, for use by protocols such as TFTP that communicate using raw strings rather than URI-encoded strings. All other fields (such as the username and password) continue to be stored only in their decoded versions since nothing ever needs to know the originally encoded versions of these fields. Signed-off-by: Michael Brown <mcb30@ipxe.org>
-rw-r--r--src/core/uri.c131
-rw-r--r--src/include/ipxe/uri.h31
-rw-r--r--src/net/tcp/httpcore.c4
-rw-r--r--src/tests/uri_test.c53
-rw-r--r--src/usr/imgmgmt.c4
5 files changed, 148 insertions, 75 deletions
diff --git a/src/core/uri.c b/src/core/uri.c
index e9e512ab..a0f79e9e 100644
--- a/src/core/uri.c
+++ b/src/core/uri.c
@@ -79,12 +79,10 @@ size_t uri_decode ( const char *encoded, void *buf, size_t len ) {
/**
* Decode URI field in-place
*
- * @v uri URI
- * @v field URI field index
+ * @v encoded Encoded field, or NULL
*/
-static void uri_decode_inplace ( struct uri *uri, unsigned int field ) {
- const char *encoded = uri_field ( uri, field );
- char *decoded = ( ( char * ) encoded );
+static void uri_decode_inplace ( char *encoded ) {
+ char *decoded = encoded;
size_t len;
/* Do nothing if field is not present */
@@ -150,7 +148,7 @@ static int uri_character_escaped ( char c, unsigned int field ) {
* parser but for any other URI parsers (e.g. HTTP query
* string parsers, which care about '=' and '&').
*/
- static const char *escaped[URI_FIELDS] = {
+ static const char *escaped[URI_EPATH] = {
/* Scheme or default: escape everything */
[URI_SCHEME] = "/#:@?=&",
/* Opaque part: escape characters which would affect
@@ -172,20 +170,21 @@ static int uri_character_escaped ( char c, unsigned int field ) {
* appears within paths.
*/
[URI_PATH] = "#:@?",
- /* Query: escape everything except '/', which
- * sometimes appears within queries.
- */
- [URI_QUERY] = "#:@?",
- /* Fragment: escape everything */
- [URI_FRAGMENT] = "/#:@?",
};
- return ( /* Always escape non-printing characters and whitespace */
- ( ! isprint ( c ) ) || ( c == ' ' ) ||
- /* Always escape '%' */
- ( c == '%' ) ||
- /* Escape field-specific characters */
- strchr ( escaped[field], c ) );
+ /* Always escape non-printing characters and whitespace */
+ if ( ( ! isprint ( c ) ) || ( c == ' ' ) )
+ return 1;
+
+ /* Escape nothing else in already-escaped fields */
+ if ( field >= URI_EPATH )
+ return 0;
+
+ /* Escape '%' and any field-specific characters */
+ if ( ( c == '%' ) || strchr ( escaped[field], c ) )
+ return 1;
+
+ return 0;
}
/**
@@ -262,10 +261,12 @@ static void uri_dump ( const struct uri *uri ) {
DBGC ( uri, " port \"%s\"", uri->port );
if ( uri->path )
DBGC ( uri, " path \"%s\"", uri->path );
- if ( uri->query )
- DBGC ( uri, " query \"%s\"", uri->query );
- if ( uri->fragment )
- DBGC ( uri, " fragment \"%s\"", uri->fragment );
+ if ( uri->epath )
+ DBGC ( uri, " epath \"%s\"", uri->epath );
+ if ( uri->equery )
+ DBGC ( uri, " equery \"%s\"", uri->equery );
+ if ( uri->efragment )
+ DBGC ( uri, " efragment \"%s\"", uri->efragment );
if ( uri->params )
DBGC ( uri, " params \"%s\"", uri->params->name );
}
@@ -298,17 +299,19 @@ struct uri * parse_uri ( const char *uri_string ) {
char *raw;
char *tmp;
char *path;
+ char *epath;
char *authority;
size_t raw_len;
unsigned int field;
- /* Allocate space for URI struct and a copy of the string */
+ /* Allocate space for URI struct and two copies of the string */
raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
- uri = zalloc ( sizeof ( *uri ) + raw_len );
+ uri = zalloc ( sizeof ( *uri ) + ( 2 * raw_len ) );
if ( ! uri )
return NULL;
ref_init ( &uri->refcnt, uri_free );
raw = ( ( ( void * ) uri ) + sizeof ( *uri ) );
+ path = ( raw + raw_len );
/* Copy in the raw string */
memcpy ( raw, uri_string, raw_len );
@@ -328,7 +331,7 @@ struct uri * parse_uri ( const char *uri_string ) {
/* Chop off the fragment, if it exists */
if ( ( tmp = strchr ( raw, '#' ) ) ) {
*(tmp++) = '\0';
- uri->fragment = tmp;
+ uri->efragment = tmp;
}
/* Identify absolute/relative URI */
@@ -338,47 +341,47 @@ struct uri * parse_uri ( const char *uri_string ) {
*(tmp++) = '\0';
if ( *tmp == '/' ) {
/* Absolute URI with hierarchical part */
- path = tmp;
+ epath = tmp;
} else {
/* Absolute URI with opaque part */
uri->opaque = tmp;
- path = NULL;
+ epath = NULL;
}
} else {
/* Relative URI */
- path = raw;
+ epath = raw;
}
/* If we don't have a path (i.e. we have an absolute URI with
* an opaque portion, we're already finished processing
*/
- if ( ! path )
+ if ( ! epath )
goto done;
/* Chop off the query, if it exists */
- if ( ( tmp = strchr ( path, '?' ) ) ) {
+ if ( ( tmp = strchr ( epath, '?' ) ) ) {
*(tmp++) = '\0';
- uri->query = tmp;
+ uri->equery = tmp;
}
/* If we have no path remaining, then we're already finished
* processing.
*/
- if ( ! path[0] )
+ if ( ! epath[0] )
goto done;
/* Identify net/absolute/relative path */
- if ( uri->scheme && ( strncmp ( path, "//", 2 ) == 0 ) ) {
+ if ( uri->scheme && ( strncmp ( epath, "//", 2 ) == 0 ) ) {
/* Net path. If this is terminated by the first '/'
* of an absolute path, then we have no space for a
* terminator after the authority field, so shuffle
* the authority down by one byte, overwriting one of
* the two slashes.
*/
- authority = ( path + 2 );
+ authority = ( epath + 2 );
if ( ( tmp = strchr ( authority, '/' ) ) ) {
/* Shuffle down */
- uri->path = tmp;
+ uri->epath = tmp;
memmove ( ( authority - 1 ), authority,
( tmp - authority ) );
authority--;
@@ -386,10 +389,16 @@ struct uri * parse_uri ( const char *uri_string ) {
}
} else {
/* Absolute/relative path */
- uri->path = path;
+ uri->epath = epath;
authority = NULL;
}
+ /* Create copy of path for decoding */
+ if ( uri->epath ) {
+ strcpy ( path, uri->epath );
+ uri->path = path;
+ }
+
/* If we don't have an authority (i.e. we have a non-net
* path), we're already finished processing
*/
@@ -421,8 +430,8 @@ struct uri * parse_uri ( const char *uri_string ) {
done:
/* Decode fields in-place */
- for ( field = 0 ; field < URI_FIELDS ; field++ )
- uri_decode_inplace ( uri, field );
+ for ( field = 0 ; field < URI_EPATH ; field++ )
+ uri_decode_inplace ( ( char * ) uri_field ( uri, field ) );
DBGC ( uri, "URI parsed \"%s\" to", uri_string );
uri_dump ( uri );
@@ -458,8 +467,8 @@ size_t format_uri ( const struct uri *uri, char *buf, size_t len ) {
static const char prefixes[URI_FIELDS] = {
[URI_PASSWORD] = ':',
[URI_PORT] = ':',
- [URI_QUERY] = '?',
- [URI_FRAGMENT] = '#',
+ [URI_EQUERY] = '?',
+ [URI_EFRAGMENT] = '#',
};
char prefix;
size_t used = 0;
@@ -480,6 +489,10 @@ size_t format_uri ( const struct uri *uri, char *buf, size_t len ) {
if ( ! uri_field ( uri, field ) )
continue;
+ /* Skip path field if encoded path is present */
+ if ( ( field == URI_PATH ) && uri->epath )
+ continue;
+
/* Prefix this field, if applicable */
prefix = prefixes[field];
if ( ( field == URI_HOST ) && ( uri->user != NULL ) )
@@ -676,6 +689,7 @@ char * resolve_path ( const char *base_path,
struct uri * resolve_uri ( const struct uri *base_uri,
struct uri *relative_uri ) {
struct uri tmp_uri;
+ char *tmp_epath = NULL;
char *tmp_path = NULL;
struct uri *new_uri;
@@ -685,20 +699,27 @@ struct uri * resolve_uri ( const struct uri *base_uri,
/* Mangle URI */
memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
- if ( relative_uri->path ) {
- tmp_path = resolve_path ( ( base_uri->path ?
- base_uri->path : "/" ),
- relative_uri->path );
+ if ( relative_uri->epath ) {
+ tmp_epath = resolve_path ( ( base_uri->epath ?
+ base_uri->epath : "/" ),
+ relative_uri->epath );
+ if ( ! tmp_epath )
+ goto err_epath;
+ tmp_path = strdup ( tmp_epath );
+ if ( ! tmp_path )
+ goto err_path;
+ uri_decode_inplace ( tmp_path );
+ tmp_uri.epath = tmp_epath;
tmp_uri.path = tmp_path;
- tmp_uri.query = relative_uri->query;
- tmp_uri.fragment = relative_uri->fragment;
+ tmp_uri.equery = relative_uri->equery;
+ tmp_uri.efragment = relative_uri->efragment;
tmp_uri.params = relative_uri->params;
- } else if ( relative_uri->query ) {
- tmp_uri.query = relative_uri->query;
- tmp_uri.fragment = relative_uri->fragment;
+ } else if ( relative_uri->equery ) {
+ tmp_uri.equery = relative_uri->equery;
+ tmp_uri.efragment = relative_uri->efragment;
tmp_uri.params = relative_uri->params;
- } else if ( relative_uri->fragment ) {
- tmp_uri.fragment = relative_uri->fragment;
+ } else if ( relative_uri->efragment ) {
+ tmp_uri.efragment = relative_uri->efragment;
tmp_uri.params = relative_uri->params;
} else if ( relative_uri->params ) {
tmp_uri.params = relative_uri->params;
@@ -707,7 +728,14 @@ struct uri * resolve_uri ( const struct uri *base_uri,
/* Create demangled URI */
new_uri = uri_dup ( &tmp_uri );
free ( tmp_path );
+ free ( tmp_epath );
return new_uri;
+
+ free ( tmp_path );
+ err_path:
+ free ( tmp_epath );
+ err_epath:
+ return NULL;
}
/**
@@ -746,6 +774,7 @@ static struct uri * tftp_uri ( struct sockaddr *sa_server,
if ( asprintf ( &path, "/%s", filename ) < 0 )
goto err_path;
tmp.path = path;
+ tmp.epath = path;
/* Demangle URI */
uri = uri_dup ( &tmp );
diff --git a/src/include/ipxe/uri.h b/src/include/ipxe/uri.h
index 3879a0e7..e5b7c861 100644
--- a/src/include/ipxe/uri.h
+++ b/src/include/ipxe/uri.h
@@ -46,6 +46,20 @@ struct parameters;
* scheme = "ftp", user = "joe", password = "secret",
* host = "insecure.org", port = "8081", path = "/hidden/path/to",
* query = "what=is", fragment = "this"
+ *
+ * The URI syntax includes a percent-encoding mechanism that can be
+ * used to represent characters that would otherwise not be possible,
+ * such as a '/' character within the password field. These encodings
+ * are decoded during the URI parsing stage, thereby allowing protocol
+ * implementations to consume the raw field values directly without
+ * further decoding.
+ *
+ * Some protocols (such as HTTP) communicate using URI-encoded values.
+ * For these protocols, the original encoded substring must be
+ * retained verbatim since the choice of whether or not to encode a
+ * particular character may have significance to the receiving
+ * application. We therefore retain the originally-encoded substrings
+ * for the path, query, and fragment fields.
*/
struct uri {
/** Reference count */
@@ -62,12 +76,14 @@ struct uri {
const char *host;
/** Port number */
const char *port;
- /** Path */
+ /** Path (after URI decoding) */
const char *path;
- /** Query */
- const char *query;
- /** Fragment */
- const char *fragment;
+ /** Path (with original URI encoding) */
+ const char *epath;
+ /** Query (with original URI encoding) */
+ const char *equery;
+ /** Fragment (with original URI encoding) */
+ const char *efragment;
/** Form parameters */
struct parameters *params;
} __attribute__ (( packed ));
@@ -100,8 +116,9 @@ enum uri_fields {
URI_HOST = URI_FIELD ( host ),
URI_PORT = URI_FIELD ( port ),
URI_PATH = URI_FIELD ( path ),
- URI_QUERY = URI_FIELD ( query ),
- URI_FRAGMENT = URI_FIELD ( fragment ),
+ URI_EPATH = URI_FIELD ( epath ),
+ URI_EQUERY = URI_FIELD ( equery ),
+ URI_EFRAGMENT = URI_FIELD ( efragment ),
URI_FIELDS
};
diff --git a/src/net/tcp/httpcore.c b/src/net/tcp/httpcore.c
index 01bb496b..fd94b5f0 100644
--- a/src/net/tcp/httpcore.c
+++ b/src/net/tcp/httpcore.c
@@ -614,8 +614,8 @@ int http_open ( struct interface *xfer, struct http_method *method,
/* Calculate request URI length */
memset ( &request_uri, 0, sizeof ( request_uri ) );
- request_uri.path = ( uri->path ? uri->path : "/" );
- request_uri.query = uri->query;
+ request_uri.epath = ( uri->epath ? uri->epath : "/" );
+ request_uri.equery = uri->equery;
request_uri_len =
( format_uri ( &request_uri, NULL, 0 ) + 1 /* NUL */);
diff --git a/src/tests/uri_test.c b/src/tests/uri_test.c
index 92c2f903..929ab363 100644
--- a/src/tests/uri_test.c
+++ b/src/tests/uri_test.c
@@ -149,8 +149,10 @@ static void uri_okx ( struct uri *uri, struct uri *expected, const char *file,
okx ( uristrcmp ( uri->host, expected->host ) == 0, file, line );
okx ( uristrcmp ( uri->port, expected->port ) == 0, file, line );
okx ( uristrcmp ( uri->path, expected->path ) == 0, file, line );
- okx ( uristrcmp ( uri->query, expected->query ) == 0, file, line );
- okx ( uristrcmp ( uri->fragment, expected->fragment ) == 0, file, line);
+ okx ( uristrcmp ( uri->epath, expected->epath ) == 0, file, line );
+ okx ( uristrcmp ( uri->equery, expected->equery ) == 0, file, line );
+ okx ( uristrcmp ( uri->efragment, expected->efragment ) == 0,
+ file, line);
okx ( uri->params == expected->params, file, line );
}
#define uri_ok( uri, expected ) uri_okx ( uri, expected, __FILE__, __LINE__ )
@@ -490,25 +492,33 @@ static struct uri_test uri_empty = {
/** Basic HTTP URI */
static struct uri_test uri_boot_ipxe_org = {
"http://boot.ipxe.org/demo/boot.php",
- { .scheme = "http", .host = "boot.ipxe.org", .path = "/demo/boot.php" }
+ { .scheme = "http", .host = "boot.ipxe.org",
+ .path = "/demo/boot.php", .epath = "/demo/boot.php" },
};
/** Basic opaque URI */
static struct uri_test uri_mailto = {
"mailto:ipxe-devel@lists.ipxe.org",
- { .scheme = "mailto", .opaque = "ipxe-devel@lists.ipxe.org" }
+ { .scheme = "mailto", .opaque = "ipxe-devel@lists.ipxe.org" },
+};
+
+/** Basic host-only URI */
+static struct uri_test uri_host = {
+ "http://boot.ipxe.org",
+ { .scheme = "http", .host = "boot.ipxe.org" },
};
/** Basic path-only URI */
static struct uri_test uri_path = {
"/var/lib/tftpboot/pxelinux.0",
- { .path = "/var/lib/tftpboot/pxelinux.0" },
+ { .path = "/var/lib/tftpboot/pxelinux.0",
+ .epath ="/var/lib/tftpboot/pxelinux.0" },
};
/** Path-only URI with escaped characters */
static struct uri_test uri_path_escaped = {
"/hello%20world%3F",
- { .path = "/hello world?" },
+ { .path = "/hello world?", .epath = "/hello%20world%3F" },
};
/** HTTP URI with all the trimmings */
@@ -521,8 +531,9 @@ static struct uri_test uri_http_all = {
.host = "example.com",
.port = "3001",
.path = "/~foo/cgi-bin/foo.pl",
- .query = "a=b&c=d",
- .fragment = "bit",
+ .epath = "/~foo/cgi-bin/foo.pl",
+ .equery = "a=b&c=d",
+ .efragment = "bit",
},
};
@@ -533,8 +544,9 @@ static struct uri_test uri_http_escaped = {
.scheme = "https",
.host = "test.ipxe.org",
.path = "/wtf?\n",
- .query = "kind#of/uri is",
- .fragment = "this?",
+ .epath = "/wtf%3F%0A",
+ .equery = "kind%23of/uri%20is",
+ .efragment = "this%3F",
},
};
@@ -550,8 +562,9 @@ static struct uri_test uri_http_escaped_improper = {
.scheme = "https",
.host = "test.ipxe.org",
.path = "/wtf?\n",
- .query = "kind#of/uri is",
- .fragment = "this?",
+ .epath = "/wt%66%3f\n",
+ .equery = "kind%23of/uri is",
+ .efragment = "this?",
},
};
@@ -562,6 +575,7 @@ static struct uri_test uri_ipv6 = {
.scheme = "http",
.host = "[2001:ba8:0:1d4::6950:5845]",
.path = "/",
+ .epath = "/",
},
};
@@ -573,6 +587,7 @@ static struct uri_test uri_ipv6_port = {
.host = "[2001:ba8:0:1d4::6950:5845]",
.port = "8001",
.path = "/boot",
+ .epath = "/boot",
},
};
@@ -583,6 +598,7 @@ static struct uri_test uri_ipv6_local = {
.scheme = "http",
.host = "[fe80::69ff:fe50:5845%net0]",
.path = "/ipxe",
+ .epath = "/ipxe",
},
};
@@ -598,6 +614,7 @@ static struct uri_test uri_ipv6_local_non_conforming = {
.scheme = "http",
.host = "[fe80::69ff:fe50:5845%net0]",
.path = "/ipxe",
+ .epath = "/ipxe",
},
};
@@ -625,6 +642,7 @@ static struct uri_test uri_file_absolute = {
{
.scheme = "file",
.path = "/boot/script.ipxe",
+ .epath = "/boot/script.ipxe",
},
};
@@ -635,6 +653,7 @@ static struct uri_test uri_file_volume = {
.scheme = "file",
.host = "hpilo",
.path = "/boot/script.ipxe",
+ .epath = "/boot/script.ipxe",
},
};
@@ -736,6 +755,7 @@ static struct uri_pxe_test uri_pxe_absolute = {
.scheme = "http",
.host = "not.a.tftp",
.path = "/uri",
+ .epath = "/uri",
},
"http://not.a.tftp/uri",
};
@@ -754,6 +774,7 @@ static struct uri_pxe_test uri_pxe_absolute_path = {
.scheme = "tftp",
.host = "192.168.0.2",
.path = "//absolute/path",
+ .epath = "//absolute/path",
},
"tftp://192.168.0.2//absolute/path",
};
@@ -772,6 +793,7 @@ static struct uri_pxe_test uri_pxe_relative_path = {
.scheme = "tftp",
.host = "192.168.0.3",
.path = "/relative/path",
+ .epath = "/relative/path",
},
"tftp://192.168.0.3/relative/path",
};
@@ -790,8 +812,9 @@ static struct uri_pxe_test uri_pxe_icky = {
.scheme = "tftp",
.host = "10.0.0.6",
.path = "/C:\\tftpboot\\icky#path",
+ .epath = "/C:\\tftpboot\\icky#path",
},
- "tftp://10.0.0.6/C%3A\\tftpboot\\icky%23path",
+ "tftp://10.0.0.6/C:\\tftpboot\\icky#path",
};
/** PXE URI with custom port */
@@ -810,6 +833,7 @@ static struct uri_pxe_test uri_pxe_port = {
.host = "192.168.0.1",
.port = "4069",
.path = "//another/path",
+ .epath = "//another/path",
},
"tftp://192.168.0.1:4069//another/path",
};
@@ -873,6 +897,7 @@ static struct uri_params_test uri_params = {
.scheme = "http",
.host = "boot.ipxe.org",
.path = "/demo/boot.php",
+ .epath = "/demo/boot.php",
},
NULL,
uri_params_list,
@@ -902,6 +927,7 @@ static struct uri_params_test uri_named_params = {
.host = "192.168.100.4",
.port = "3001",
.path = "/register",
+ .epath = "/register",
},
"foo",
uri_named_params_list,
@@ -917,6 +943,7 @@ static void uri_test_exec ( void ) {
uri_parse_format_dup_ok ( &uri_empty );
uri_parse_format_dup_ok ( &uri_boot_ipxe_org );
uri_parse_format_dup_ok ( &uri_mailto );
+ uri_parse_format_dup_ok ( &uri_host );
uri_parse_format_dup_ok ( &uri_path );
uri_parse_format_dup_ok ( &uri_path_escaped );
uri_parse_format_dup_ok ( &uri_http_all );
diff --git a/src/usr/imgmgmt.c b/src/usr/imgmgmt.c
index f8d14915..b7fc8293 100644
--- a/src/usr/imgmgmt.c
+++ b/src/usr/imgmgmt.c
@@ -58,8 +58,8 @@ int imgdownload ( struct uri *uri, unsigned long timeout,
memcpy ( &uri_redacted, uri, sizeof ( uri_redacted ) );
uri_redacted.user = NULL;
uri_redacted.password = NULL;
- uri_redacted.query = NULL;
- uri_redacted.fragment = NULL;
+ uri_redacted.equery = NULL;
+ uri_redacted.efragment = NULL;
uri_string_redacted = format_uri_alloc ( &uri_redacted );
if ( ! uri_string_redacted ) {
rc = -ENOMEM;