Do not allow invalid UTF8 in method, uri, header names

This commit is contained in:
cpq 2023-11-25 11:33:07 +00:00
parent 28162f8034
commit bd53e46873
3 changed files with 52 additions and 28 deletions

View File

@ -1948,13 +1948,20 @@ struct mg_str *mg_http_get_header(struct mg_http_message *h, const char *name) {
return NULL;
}
// Get character length. Used to parse method, URI, headers
static size_t clen(const char *s) {
uint8_t c = *(uint8_t *) s;
// Is it a valid utf-8 continuation byte
static bool vcb(uint8_t c) {
return (c & 0xc0) == 0x80;
}
// Get character length (valid utf-8). Used to parse method, URI, headers
static size_t clen(const char *s, const char *end) {
const unsigned char *u = (unsigned char *) s, c = *u;
long n = end - s;
if (c > ' ' && c < '~') return 1; // Usual ascii printed char
if ((c & 0xe0) == 0xc0) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0) return 3; // 3-byte UTF8
if ((c & 0xf8) == 0xf0) return 4; // 4-byte UTF8
if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3;
if ((c & 0xf8) == 0xf0 && n > 3 && vcb(u[1]) && vcb(u[2]) && vcb(u[3]))
return 4;
return 0;
}
@ -1976,9 +1983,11 @@ static bool mg_http_parse_headers(const char *s, const char *end,
if (s >= end) return false;
if (s[0] == '\n' || (s[0] == '\r' && s[1] == '\n')) break;
k.ptr = s;
while (s < end && s[0] != ':' && (n = clen(s)) > 0) s += n, k.len += n;
while (s < end && s[0] != ':' && (n = clen(s, end)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || *s++ != ':') return false; // Invalid, not followed by :
if (s >= end || clen(s, end) == 0) return false; // Invalid UTF-8
if (*s++ != ':') return false; // Invalid, not followed by :
// if (clen(s, end) == 0) return false; // Invalid UTF-8
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &v)) == NULL) return false;
while (v.len > 0 && v.ptr[v.len - 1] == ' ') v.len--; // Trim spaces
@ -2004,10 +2013,10 @@ int mg_http_parse(const char *s, size_t len, struct mg_http_message *hm) {
// Parse request line
hm->method.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->method.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->method.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
hm->uri.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->uri.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->uri.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &hm->proto)) == NULL) return false;

View File

@ -193,13 +193,20 @@ struct mg_str *mg_http_get_header(struct mg_http_message *h, const char *name) {
return NULL;
}
// Get character length. Used to parse method, URI, headers
static size_t clen(const char *s) {
uint8_t c = *(uint8_t *) s;
// Is it a valid utf-8 continuation byte
static bool vcb(uint8_t c) {
return (c & 0xc0) == 0x80;
}
// Get character length (valid utf-8). Used to parse method, URI, headers
static size_t clen(const char *s, const char *end) {
const unsigned char *u = (unsigned char *) s, c = *u;
long n = end - s;
if (c > ' ' && c < '~') return 1; // Usual ascii printed char
if ((c & 0xe0) == 0xc0) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0) return 3; // 3-byte UTF8
if ((c & 0xf8) == 0xf0) return 4; // 4-byte UTF8
if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3;
if ((c & 0xf8) == 0xf0 && n > 3 && vcb(u[1]) && vcb(u[2]) && vcb(u[3]))
return 4;
return 0;
}
@ -221,9 +228,11 @@ static bool mg_http_parse_headers(const char *s, const char *end,
if (s >= end) return false;
if (s[0] == '\n' || (s[0] == '\r' && s[1] == '\n')) break;
k.ptr = s;
while (s < end && s[0] != ':' && (n = clen(s)) > 0) s += n, k.len += n;
while (s < end && s[0] != ':' && (n = clen(s, end)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || *s++ != ':') return false; // Invalid, not followed by :
if (s >= end || clen(s, end) == 0) return false; // Invalid UTF-8
if (*s++ != ':') return false; // Invalid, not followed by :
// if (clen(s, end) == 0) return false; // Invalid UTF-8
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &v)) == NULL) return false;
while (v.len > 0 && v.ptr[v.len - 1] == ' ') v.len--; // Trim spaces
@ -249,10 +258,10 @@ int mg_http_parse(const char *s, size_t len, struct mg_http_message *hm) {
// Parse request line
hm->method.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->method.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->method.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
hm->uri.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->uri.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->uri.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &hm->proto)) == NULL) return false;

View File

@ -1482,8 +1482,8 @@ static void test_http_parse(void) {
}
// #2292: fail on stray \r inside the headers
ASSERT(mg_http_parse("a є\n\n", 6, &req) > 0);
ASSERT(mg_http_parse("a b\n\n", 5, &req) > 0);
ASSERT(mg_http_parse("a є\n\n", 6, &req) == 6);
ASSERT(mg_http_parse("a b\n\n", 5, &req) == 5);
ASSERT(mg_http_parse("a b\na:\n\n", 8, &req) > 0);
ASSERT(mg_http_parse("a b\na:\r\n\n", 9, &req) > 0);
ASSERT(mg_http_parse("a b\n\ra:\r\n\n", 10, &req) == -1);
@ -1602,6 +1602,12 @@ static void test_http_parse(void) {
ASSERT(mg_http_parse(s, strlen(s), &hm) == (int) strlen(s));
s = "a\nb:b\nc:c\n\n";
ASSERT(mg_http_parse(s, strlen(s), &hm) < 0);
s = "a b\nc: \xc0\n\n"; // Invalid UTF in the header value: accept
ASSERT(mg_http_parse(s, strlen(s), &hm) == (int) strlen(s));
ASSERT((v = mg_http_get_header(&hm, "c")) != NULL);
ASSERT(mg_vcmp(v, "\xc0") == 0);
s = "a b\n\xc0: 2\n\n"; // Invalid UTF in the header name: do NOT accept
ASSERT(mg_http_parse(s, strlen(s), &hm) == -1);
}
}