url: remove \t \n \r in url.parse() similar to WHATWG

WHATWG URL removes tab, new line, and carraige return characters before
processing URL strings. To narrow the differences between WHATWG URL and
url.parse(), and thus reduce opportunities for host spoofing etc. due to
differences between the two APIs, let's do the same with url.parse().

PR-URL: https://github.com/nodejs/node/pull/45116
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Tobias Nießen <tniessen@tnie.de>
Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
This commit is contained in:
Rich Trott
2022-10-24 14:24:46 -07:00
committed by GitHub
parent 1930fcd7ef
commit fa7e08cfc9
2 changed files with 11 additions and 7 deletions

View File

@@ -319,6 +319,10 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
case CHAR_TAB:
case CHAR_LINE_FEED:
case CHAR_CARRIAGE_RETURN:
// WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too.
rest = rest.slice(0, i) + rest.slice(i + 1);
i -= 1;
break;
case CHAR_SPACE:
case CHAR_DOUBLE_QUOTE:
case CHAR_PERCENT:

View File

@@ -853,16 +853,16 @@ const parseTests = {
'http://a\r" \t\n<\'b:b@c\r\nd/e?f': {
protocol: 'http:',
slashes: true,
auth: 'a\r" \t\n<\'b:b',
host: 'c',
auth: 'a" <\'b:b',
host: 'cd',
port: null,
hostname: 'c',
hostname: 'cd',
hash: null,
search: '?f',
query: 'f',
pathname: '%0D%0Ad/e',
path: '%0D%0Ad/e?f',
href: 'http://a%0D%22%20%09%0A%3C\'b:b@c/%0D%0Ad/e?f'
pathname: '/e',
path: '/e?f',
href: 'http://a%22%20%3C\'b:b@cd/e?f'
},
'https://*': {
@@ -1007,7 +1007,7 @@ for (const u in parseTests) {
assert.deepStrictEqual(
actual,
expected,
`expected ${inspect(expected)}, got ${inspect(actual)}`
`parsing ${u} and expected ${inspect(expected)} but got ${inspect(actual)}`
);
assert.deepStrictEqual(
spaced,