diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f4711d4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI + +on: + - push + - pull_request + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + node: + - 12 + - 14 + - 16 + - 17 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: ${{ matrix.node }} + - run: npm install + - run: npm test + - uses: coverallsapp/github-action@1.1.3 + if: matrix.node == 12 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-browser: + runs-on: ubuntu-latest + env: + SAUCE_USERNAME: url-parse + SAUCE_ACCESS_KEY: ${{ secrets.SAUCE_ACCESS_KEY }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: 12 + - run: npm install + - run: npm run test-browser diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7333c3e..0000000 --- a/.travis.yml +++ /dev/null @@ -1,25 +0,0 @@ -language: node_js -matrix: - fast_finish: true - include: - - node_js: "14" - env: SCRIPT=test - - node_js: "12" - env: SCRIPT=test - - node_js: "10" - env: SCRIPT=test - - node_js: "12" - env: - - secure: IF01oyIKSs0C5dARdYRTilKnU1TG4zenjjEPClkQxAWIpUOxl9xcNJWDVEOPxJ/4pVt+pozyT80Rp7efh6ZiREJIQI1tUboBKSqZzSbnD5uViQNSbQ90PaDP0FIUc0IQ5o07W36rijBB0DTmtU1VofzN9PKkJO7XiSSXevI8RcM= - - SAUCE_USERNAME=url-parse - - SCRIPT=test-browser -script: - - "npm run ${SCRIPT}" -after_script: - - 'if [ "${SCRIPT}" == "test" ]; then c8 report --reporter=text-lcov | coveralls; fi' -notifications: - irc: - channels: - - "irc.freenode.org#unshift" - on_success: change - on_failure: change diff --git a/README.md b/README.md index f81f919..e5bf8d7 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,15 @@ # url-parse -[![Made by unshift](https://img.shields.io/badge/made%20by-unshift-00ffcc.svg?style=flat-square)](http://unshift.io)[![Version npm](https://img.shields.io/npm/v/url-parse.svg?style=flat-square)](https://www.npmjs.com/package/url-parse)[![Build Status](https://img.shields.io/travis/unshiftio/url-parse/master.svg?style=flat-square)](https://travis-ci.org/unshiftio/url-parse)[![Dependencies](https://img.shields.io/david/unshiftio/url-parse.svg?style=flat-square)](https://david-dm.org/unshiftio/url-parse)[![Coverage Status](https://img.shields.io/coveralls/unshiftio/url-parse/master.svg?style=flat-square)](https://coveralls.io/r/unshiftio/url-parse?branch=master)[![IRC channel](https://img.shields.io/badge/IRC-irc.freenode.net%23unshift-00a8ff.svg?style=flat-square)](https://webchat.freenode.net/?channels=unshift) +[![Version npm](https://img.shields.io/npm/v/url-parse.svg?style=flat-square)](https://www.npmjs.com/package/url-parse)[![Build Status](https://img.shields.io/github/workflow/status/unshiftio/url-parse/CI/master?label=CI&style=flat-square)](https://github.com/unshiftio/url-parse/actions?query=workflow%3ACI+branch%3Amaster)[![Coverage Status](https://img.shields.io/coveralls/unshiftio/url-parse/master.svg?style=flat-square)](https://coveralls.io/r/unshiftio/url-parse?branch=master) [![Sauce Test Status](https://saucelabs.com/browser-matrix/url-parse.svg)](https://saucelabs.com/u/url-parse) +**`url-parse` was created in 2014 when the WHATWG URL API was not available in +Node.js and the `URL` interface was supported only in some browsers. Today this +is no longer true. The `URL` interface is available in all supported Node.js +release lines and basically all browsers. Consider using it for better security +and accuracy.** + The `url-parse` method exposes two different API interfaces. The [`url`](https://nodejs.org/api/url.html) interface that you know from Node.js and the new [`URL`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) @@ -80,8 +86,8 @@ The returned `url` instance contains the following properties: - `auth`: Authentication information portion (e.g. `username:password`). - `username`: Username of basic authentication. - `password`: Password of basic authentication. -- `host`: Host name with port number. -- `hostname`: Host name without port number. +- `host`: Host name with port number. The hostname might be invalid. +- `hostname`: Host name without port number. This might be an invalid hostname. - `port`: Optional port number. - `pathname`: URL path. - `query`: Parsed object containing query string, unless parsing is set to false. diff --git a/SECURITY.md b/SECURITY.md index 31ef5b4..f642347 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -33,6 +33,64 @@ acknowledge your responsible disclosure, if you wish. ## History +> Leading control characters are not removed. This allows an attacker to bypass +> hostname checks and makes the `extractProtocol` method return false positives. + +- **Reporter credits** + - Haxatron + - GitHub: [@haxatron](https://github.com/haxatron) + - Twitter: [@haxatron1](https://twitter.com/haxatron1) +- Huntr report: https://www.huntr.dev/bounties/57124ed5-4b68-4934-8325-2c546257f2e4/ +- Fixed in: 1.5.9 + +--- + +> A URL with a specified but empty port can be used to bypass authorization +> checks. + +- **Reporter credits** + - Rohan Sharma + - GitHub: [@r0hansh](https://github.com/r0hansh) +- Huntr report: https://www.huntr.dev/bounties/55fd06cd-9054-4d80-83be-eb5a454be78c/ +- Fixed in: 1.5.8 + +--- + +> A specially crafted URL with empty userinfo and no host can be used to bypass +> authorization checks. + +- **Reporter credits** + - Haxatron + - GitHub: [@haxatron](https://github.com/haxatron) + - Twitter: [@haxatron1](https://twitter.com/haxatron1) +- Huntr report: https://www.huntr.dev/bounties/83a6bc9a-b542-4a38-82cd-d995a1481155/ +- Fixed in: 1.5.7 + +--- + +> Incorrect handling of username and password can lead to authorization bypass. + +- **Reporter credits** + - ranjit-git + - GitHub: [@ranjit-git](https://github.com/ranjit-git) +- Huntr report: https://www.huntr.dev/bounties/6d1bc51f-1876-4f5b-a2c2-734e09e8e05b/ +- Fixed in: 1.5.6 + +--- + +> url-parse mishandles certain uses of a single (back) slash such as https:\ & +> https:/ and interprets the URI as a relative path. Browsers accept a single +> backslash after the protocol, and treat it as a normal slash, while url-parse +> sees it as a relative path. + +- **Reporter credits** + - ready-research + - GitHub: [@ready-research](https://github.com/ready-research) +- Huntr report: https://www.huntr.dev/bounties/1625557993985-unshiftio/url-parse/ +- Fixed in: 1.5.2 + +--- + > Using backslash in the protocol is valid in the browser, while url-parse > thinks it’s a relative path. An application that validates a url using > url-parse might pass a malicious link. @@ -42,6 +100,8 @@ acknowledge your responsible disclosure, if you wish. - Twitter: [Yaniv Nizry](https://twitter.com/ynizry) - Fixed in: 1.5.0 +--- + > The `extractProtocol` method does not return the correct protocol when > provided with unsanitized content which could lead to false positives. diff --git a/index.js b/index.js index 72b27c0..b86c29f 100644 --- a/index.js +++ b/index.js @@ -2,19 +2,23 @@ var required = require('requires-port') , qs = require('querystringify') - , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:[\\/]+/ - , protocolre = /^([a-z][a-z0-9.+-]*:)?([\\/]{1,})?([\S\s]*)/i - , whitespace = '[\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\xA0\\u1680\\u180E\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000\\u2028\\u2029\\uFEFF]' - , left = new RegExp('^'+ whitespace +'+'); + , controlOrWhitespace = /^[\x00-\x20\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/ + , CRHTLF = /[\n\r\t]/g + , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\// + , port = /:\d+$/ + , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i + , windowsDriveLetter = /^[a-zA-Z]:/; /** - * Trim a given string. + * Remove control characters and whitespace from the beginning of a string. * - * @param {String} str String to trim. + * @param {Object|String} str String to trim. + * @returns {String} A new string representing `str` stripped of control + * characters and whitespace from its beginning. * @public */ function trimLeft(str) { - return (str ? str : '').toString().replace(left, ''); + return (str ? str : '').toString().replace(controlOrWhitespace, ''); } /** @@ -32,13 +36,13 @@ function trimLeft(str) { var rules = [ ['#', 'hash'], // Extract from the back. ['?', 'query'], // Extract from the back. - function sanitize(address) { // Sanitize what is left of the address - return address.replace('\\', '/'); + function sanitize(address, url) { // Sanitize what is left of the address + return isSpecial(url.protocol) ? address.replace(/\\/g, '/') : address; }, ['/', 'pathname'], // Extract from the back. ['@', 'auth', 1], // Extract from the front. [NaN, 'host', undefined, 1, 1], // Set left over value. - [/:(\d+)$/, 'port', undefined, 1], // RegExp the back. + [/:(\d*)$/, 'port', undefined, 1], // RegExp the back. [NaN, 'hostname', undefined, 1, 1] // Set left over. ]; @@ -98,6 +102,24 @@ function lolcation(loc) { return finaldestination; } +/** + * Check whether a protocol scheme is special. + * + * @param {String} The protocol scheme of the URL + * @return {Boolean} `true` if the protocol scheme is special, else `false` + * @private + */ +function isSpecial(scheme) { + return ( + scheme === 'file:' || + scheme === 'ftp:' || + scheme === 'http:' || + scheme === 'https:' || + scheme === 'ws:' || + scheme === 'wss:' + ); +} + /** * @typedef ProtocolExtract * @type Object @@ -110,20 +132,57 @@ function lolcation(loc) { * Extract protocol information from a URL with/without double slash ("//"). * * @param {String} address URL we want to extract from. + * @param {Object} location * @return {ProtocolExtract} Extracted information. * @private */ -function extractProtocol(address) { +function extractProtocol(address, location) { address = trimLeft(address); + address = address.replace(CRHTLF, ''); + location = location || {}; + + var match = protocolre.exec(address); + var protocol = match[1] ? match[1].toLowerCase() : ''; + var forwardSlashes = !!match[2]; + var otherSlashes = !!match[3]; + var slashesCount = 0; + var rest; + + if (forwardSlashes) { + if (otherSlashes) { + rest = match[2] + match[3] + match[4]; + slashesCount = match[2].length + match[3].length; + } else { + rest = match[2] + match[4]; + slashesCount = match[2].length; + } + } else { + if (otherSlashes) { + rest = match[3] + match[4]; + slashesCount = match[3].length; + } else { + rest = match[4] + } + } - var match = protocolre.exec(address) - , protocol = match[1] ? match[1].toLowerCase() : '' - , slashes = !!(match[2] && match[2].length >= 2) - , rest = match[2] && match[2].length === 1 ? '/' + match[3] : match[3]; + if (protocol === 'file:') { + if (slashesCount >= 2) { + rest = rest.slice(2); + } + } else if (isSpecial(protocol)) { + rest = match[4]; + } else if (protocol) { + if (forwardSlashes) { + rest = rest.slice(2); + } + } else if (slashesCount >= 2 && isSpecial(location.protocol)) { + rest = match[4]; + } return { protocol: protocol, - slashes: slashes, + slashes: forwardSlashes || isSpecial(protocol), + slashesCount: slashesCount, rest: rest }; } @@ -180,6 +239,7 @@ function resolve(relative, base) { */ function Url(address, location, parser) { address = trimLeft(address); + address = address.replace(CRHTLF, ''); if (!(this instanceof Url)) { return new Url(address, location, parser); @@ -214,7 +274,7 @@ function Url(address, location, parser) { // // Extract protocol information before running the instructions. // - extracted = extractProtocol(address || ''); + extracted = extractProtocol(address || '', location); relative = !extracted.protocol && !extracted.slashes; url.slashes = extracted.slashes || relative && location.slashes; url.protocol = extracted.protocol || location.protocol || ''; @@ -224,13 +284,22 @@ function Url(address, location, parser) { // When the authority component is absent the URL starts with a path // component. // - if (!extracted.slashes) instructions[3] = [/(.*)/, 'pathname']; + if ( + extracted.protocol === 'file:' && ( + extracted.slashesCount !== 2 || windowsDriveLetter.test(address)) || + (!extracted.slashes && + (extracted.protocol || + extracted.slashesCount < 2 || + !isSpecial(url.protocol))) + ) { + instructions[3] = [/(.*)/, 'pathname']; + } for (; i < instructions.length; i++) { instruction = instructions[i]; if (typeof instruction === 'function') { - address = instruction(address); + address = instruction(address, url); continue; } @@ -240,7 +309,11 @@ function Url(address, location, parser) { if (parse !== parse) { url[key] = address; } else if ('string' === typeof parse) { - if (~(index = address.indexOf(parse))) { + index = parse === '@' + ? address.lastIndexOf(parse) + : address.indexOf(parse); + + if (~index) { if ('number' === typeof instruction[2]) { url[key] = address.slice(0, index); address = address.slice(index + instruction[2]); @@ -288,7 +361,7 @@ function Url(address, location, parser) { // Default to a / for pathname if none exists. This normalizes the URL // to always have a / // - if (url.pathname.charAt(0) !== '/' && url.hostname) { + if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) { url.pathname = '/' + url.pathname; } @@ -306,13 +379,24 @@ function Url(address, location, parser) { // Parse down the `auth` for the username and password. // url.username = url.password = ''; + if (url.auth) { - instruction = url.auth.split(':'); - url.username = instruction[0] || ''; - url.password = instruction[1] || ''; + index = url.auth.indexOf(':'); + + if (~index) { + url.username = url.auth.slice(0, index); + url.username = encodeURIComponent(decodeURIComponent(url.username)); + + url.password = url.auth.slice(index + 1); + url.password = encodeURIComponent(decodeURIComponent(url.password)) + } else { + url.username = encodeURIComponent(decodeURIComponent(url.auth)); + } + + url.auth = url.password ? url.username +':'+ url.password : url.username; } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -369,7 +453,7 @@ function set(part, value, fn) { case 'host': url[part] = value; - if (/:\d+$/.test(value)) { + if (port.test(value)) { value = value.split(':'); url.port = value.pop(); url.hostname = value.join(':'); @@ -395,8 +479,23 @@ function set(part, value, fn) { } break; - default: - url[part] = value; + case 'username': + case 'password': + url[part] = encodeURIComponent(value); + break; + + case 'auth': + var index = value.indexOf(':'); + + if (~index) { + url.username = value.slice(0, index); + url.username = encodeURIComponent(decodeURIComponent(url.username)); + + url.password = value.slice(index + 1); + url.password = encodeURIComponent(decodeURIComponent(url.password)); + } else { + url.username = encodeURIComponent(decodeURIComponent(value)); + } } for (var i = 0; i < rules.length; i++) { @@ -405,7 +504,9 @@ function set(part, value, fn) { if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase(); } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.auth = url.password ? url.username +':'+ url.password : url.username; + + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -426,19 +527,45 @@ function toString(stringify) { var query , url = this + , host = url.host , protocol = url.protocol; if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':'; - var result = protocol + (url.slashes ? '//' : ''); + var result = + protocol + + ((url.protocol && url.slashes) || isSpecial(url.protocol) ? '//' : ''); if (url.username) { result += url.username; if (url.password) result += ':'+ url.password; result += '@'; + } else if (url.password) { + result += ':'+ url.password; + result += '@'; + } else if ( + url.protocol !== 'file:' && + isSpecial(url.protocol) && + !host && + url.pathname !== '/' + ) { + // + // Add back the empty userinfo, otherwise the original invalid URL + // might be transformed into a valid one with `url.pathname` as host. + // + result += '@'; + } + + // + // Trailing colon is removed from `url.host` when it is parsed. If it still + // ends with a colon, then add back the trailing colon that was removed. This + // prevents an invalid URL from being transformed into a valid one. + // + if (host[host.length - 1] === ':' || (port.test(url.hostname) && !url.port)) { + host += ':'; } - result += url.host + url.pathname; + result += host + url.pathname; query = 'object' === typeof url.query ? stringify(url.query) : url.query; if (query) result += '?' !== query.charAt(0) ? '?'+ query : query; diff --git a/package.json b/package.json index f84b62e..8d1bbbe 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,12 @@ { "name": "url-parse", - "version": "1.5.1", + "version": "1.5.10", "description": "Small footprint URL parser that works seamlessly across Node.js and browser environments", "main": "index.js", "scripts": { "browserify": "rm -rf dist && mkdir -p dist && browserify index.js -s URLParse -o dist/url-parse.js", "minify": "uglifyjs dist/url-parse.js --source-map -cm -o dist/url-parse.min.js", - "test": "c8 --reporter=html --reporter=text mocha test/test.js", + "test": "c8 --reporter=lcov --reporter=text mocha test/test.js", "test-browser": "node test/browser.js", "prepublishOnly": "npm run browserify && npm run minify", "watch": "mocha --watch test/test.js" @@ -38,10 +38,9 @@ }, "devDependencies": { "assume": "^2.2.0", - "browserify": "^16.2.3", + "browserify": "^17.0.0", "c8": "^7.3.1", - "coveralls": "^3.1.0", - "mocha": "^8.0.1", + "mocha": "^9.0.3", "pre-commit": "^1.2.2", "sauce-browsers": "^2.0.0", "sauce-test": "^1.3.3", diff --git a/test/browser.js b/test/browser.js index 200ec5e..63ee99b 100644 --- a/test/browser.js +++ b/test/browser.js @@ -29,12 +29,12 @@ const platforms = sauceBrowsers([ }); run(path.join(__dirname, 'test.js'), 'saucelabs', { + jobInfo: { name: pkg.name, build: process.env.GITHUB_RUN_ID }, html: path.join(__dirname, 'index.html'), accessKey: process.env.SAUCE_ACCESS_KEY, username: process.env.SAUCE_USERNAME, browserify: true, disableSSL: true, - name: pkg.name, parallel: 5, platforms }).done((results) => { diff --git a/test/test.js b/test/test.js index 216891e..4108fe6 100644 --- a/test/test.js +++ b/test/test.js @@ -47,8 +47,14 @@ describe('url-parse', function () { assume(parse.trimLeft).is.a('function'); }); - it('removes whitespace on the left', function () { - assume(parse.trimLeft(' lol')).equals('lol'); + it('removes control characters on the left', function () { + var i = 0; + var prefix = '' + + for (; i < 33; i++) { + prefix = String.fromCharCode(i); + assume(parse.trimLeft(prefix + prefix +'lol')).equals('lol'); + } }); it('calls toString on a given value', function () { @@ -71,7 +77,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('http://example.com')).eql({ slashes: true, protocol: 'http:', - rest: 'example.com' + rest: 'example.com', + slashesCount: 2 }); }); @@ -79,7 +86,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('')).eql({ slashes: false, protocol: '', - rest: '' + rest: '', + slashesCount: 0 }); }); @@ -87,31 +95,44 @@ describe('url-parse', function () { assume(parse.extractProtocol('/foo')).eql({ slashes: false, protocol: '', - rest: '/foo' + rest: '/foo', + slashesCount: 1 }); assume(parse.extractProtocol('//foo/bar')).eql({ slashes: true, protocol: '', - rest: 'foo/bar' + rest: '//foo/bar', + slashesCount: 2 }); }); it('does not truncate the input string', function () { - var input = 'foo\nbar\rbaz\u2028qux\u2029'; + var input = 'foo\x0bbar\x0cbaz\u2028qux\u2029'; assume(parse.extractProtocol(input)).eql({ slashes: false, protocol: '', - rest: input + rest: input, + slashesCount: 0 }); }); it('trimsLeft', function () { - assume(parse.extractProtocol(' javascript://foo')).eql({ + assume(parse.extractProtocol('\x0b\x0c javascript://foo')).eql({ + slashes: true, + protocol: 'javascript:', + rest: 'foo', + slashesCount: 2 + }); + }); + + it('removes CR, HT, and LF', function () { + assume(parse.extractProtocol('jav\n\rasc\nript\r:/\t/fo\no')).eql({ slashes: true, protocol: 'javascript:', - rest: 'foo' + rest: 'foo', + slashesCount: 2 }); }); }); @@ -281,20 +302,192 @@ describe('url-parse', function () { assume(parsed.host).equals('what-is-up.com'); assume(parsed.href).equals('http://what-is-up.com/'); + + url = '\\\\\\\\what-is-up.com' + parsed = parse(url, parse('http://google.com')); + + assume(parsed.host).equals('what-is-up.com'); + assume(parsed.href).equals('http://what-is-up.com/'); }); - it('does not see a slash after the protocol as path', function () { + it('ignores slashes after the protocol for special URLs', function () { var url = 'https:\\/github.com/foo/bar' , parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); - url = 'https:/\/\/\github.com/foo/bar'; + url = 'https:/\\/\\/\\github.com/foo/bar'; + parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:/github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:\\github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + }); + + it('handles slashes after the protocol for non special URLs', function () { + var url = 'foo:example.com' + , parsed = parse(url); + + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('example.com'); + assume(parsed.href).equals('foo:example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:/example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:/example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\example.com'); + assume(parsed.href).equals('foo:\\example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo://example.com'; + parsed = parse(url); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals(''); + assume(parsed.href).equals('foo://example.com'); + assume(parsed.slashes).is.true(); + + url = 'foo:\\\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\\\example.com'); + assume(parsed.href).equals('foo:\\\\example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:///example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:///example.com'); + assume(parsed.slashes).is.true(); + + url = 'foo:\\\\\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\\\\\example.com'); + assume(parsed.href).equals('foo:\\\\\\example.com'); + assume(parsed.slashes).is.false(); + + url = '\\\\example.com/foo/bar'; + parsed = parse(url, 'foo://bar.com'); + assume(parsed.hostname).equals('bar.com'); + assume(parsed.pathname).equals('/\\\\example.com/foo/bar'); + assume(parsed.href).equals('foo://bar.com/\\\\example.com/foo/bar'); + assume(parsed.slashes).is.true(); + }); + + it('does not readd slashes to href if there is no protocol', function() { + var parsed = parse('//example.com', {}); + + assume(parsed.pathname).equals('//example.com'); + assume(parsed.href).equals('//example.com'); + }); + + it('removes CR, HT, and LF', function () { + var parsed = parse( + 'ht\ntp://a\rb:\tcd@exam\rple.com:80\t80/pat\thname?fo\no=b\rar#ba\tz' + ); + + assume(parsed.protocol).equals('http:'); + assume(parsed.username).equals('ab'); + assume(parsed.password).equals('cd'); + assume(parsed.host).equals('example.com:8080'); + assume(parsed.hostname).equals('example.com'); + assume(parsed.port).equals('8080'); + assume(parsed.pathname).equals('/pathname'); + assume(parsed.query).equals('?foo=bar'); + assume(parsed.hash).equals('#baz'); + assume(parsed.href).equals( + 'http://ab:cd@example.com:8080/pathname?foo=bar#baz' + ); + + parsed = parse('s\nip:al\rice@atl\tanta.com'); + + assume(parsed.protocol).equals('sip:'); + assume(parsed.pathname).equals('alice@atlanta.com'); + assume(parsed.href).equals('sip:alice@atlanta.com'); + }); + + it('handles the case where the port is specified but empty', function () { + var parsed = parse('http://example.com:'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com'); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com'); + assume(parsed.href).equals('http://example.com/'); + + parsed = parse('http://example.com::'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com:'); + assume(parsed.hostname).equals('example.com:'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:'); + assume(parsed.href).equals('http://example.com::/'); + + parsed = parse('http://example.com:8080:'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com:8080'); + assume(parsed.hostname).equals('example.com:8080'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:8080'); + assume(parsed.href).equals('http://example.com:8080:/'); + + parsed = parse('http://example.com:8000:8080'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals('8080'); + assume(parsed.host).equals('example.com:8000:8080'); + assume(parsed.hostname).equals('example.com:8000'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:8000:8080'); + assume(parsed.href).equals('http://example.com:8000:8080/'); }); describe('origin', function () { @@ -319,6 +512,13 @@ describe('url-parse', function () { assume(parsed.origin).equals('null'); }); + it('is null for non special URLs', function () { + var o = parse('foo://example.com/pathname'); + assume(o.hostname).equals('example.com'); + assume(o.pathname).equals('/pathname'); + assume(o.origin).equals('null'); + }); + it('removes default ports for http', function () { var o = parse('http://google.com:80/pathname'); assume(o.origin).equals('http://google.com'); @@ -438,6 +638,67 @@ describe('url-parse', function () { data.set('protocol', 'https:'); assume(data.href).equals('https://google.com/foo'); }); + + it('handles the file: protocol', function () { + var slashes = ['', '/', '//', '///']; + var data; + var url; + + for (var i = 0; i < slashes.length; i++) { + data = parse('file:' + slashes[i]); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/'); + assume(data.href).equals('file:///'); + } + + url = 'file:////'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('//'); + assume(data.href).equals(url); + + url = 'file://///'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('///'); + assume(data.href).equals(url); + + url = 'file:///Users/foo/BAR/baz.pdf'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/Users/foo/BAR/baz.pdf'); + assume(data.href).equals(url); + + url = 'file:///foo/bar?baz=qux#hash'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.hash).equals('#hash'); + assume(data.query).equals('?baz=qux'); + assume(data.pathname).equals('/foo/bar'); + assume(data.href).equals(url); + + data = parse('file://c:\\foo\\bar\\'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/c:/foo/bar/'); + assume(data.href).equals('file:///c:/foo/bar/'); + + data = parse('file://host/file'); + assume(data.protocol).equals('file:'); + assume(data.host).equals('host'); + assume(data.hostname).equals('host'); + assume(data.pathname).equals('/file'); + assume(data.href).equals('file://host/file'); + + data = parse('foo/bar', 'file:///baz'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/foo/bar'); + assume(data.href).equals('file:///foo/bar'); + + data = parse('foo/bar', 'file:///baz/'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/baz/foo/bar'); + assume(data.href).equals('file:///baz/foo/bar'); + }); }); describe('ip', function () { @@ -510,6 +771,113 @@ describe('url-parse', function () { assume(parsed.hostname).equals('www.example.com'); assume(parsed.href).equals(url); }); + + it('handles @ in username', function () { + var url = 'http://user@@www.example.com/' + , parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals(''); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40@www.example.com/'); + + url = 'http://user%40@www.example.com/'; + parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals(''); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40@www.example.com/'); + }); + + it('handles @ in password', function () { + var url = 'http://user@:pas:s@@www.example.com/' + , parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40:pas%3As%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals('pas%3As%40'); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40:pas%3As%40@www.example.com/'); + + url = 'http://user%40:pas%3As%40@www.example.com/' + parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40:pas%3As%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals('pas%3As%40'); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40:pas%3As%40@www.example.com/'); + }); + + it('adds @ to href if auth and host are empty', function () { + var parsed, i = 0; + var urls = [ + 'http:@/127.0.0.1', + 'http::@/127.0.0.1', + 'http:/@/127.0.0.1', + 'http:/:@/127.0.0.1', + 'http://@/127.0.0.1', + 'http://:@/127.0.0.1', + 'http:///@/127.0.0.1', + 'http:///:@/127.0.0.1' + ]; + + for (; i < urls.length; i++) { + parsed = parse(urls[i]); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals(''); + assume(parsed.username).equals(''); + assume(parsed.password).equals(''); + assume(parsed.host).equals(''); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/127.0.0.1'); + assume(parsed.origin).equals('null'); + assume(parsed.href).equals('http://@/127.0.0.1'); + assume(parsed.toString()).equals('http://@/127.0.0.1'); + } + + urls = [ + 'http:@/', + 'http:@', + 'http::@/', + 'http::@', + 'http:/@/', + 'http:/@', + 'http:/:@/', + 'http:/:@', + 'http://@/', + 'http://@', + 'http://:@/', + 'http://:@' + ]; + + for (i = 0; i < urls.length; i++) { + parsed = parse(urls[i]); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals(''); + assume(parsed.username).equals(''); + assume(parsed.password).equals(''); + assume(parsed.host).equals(''); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('null'); + assume(parsed.href).equals('http:///'); + assume(parsed.toString()).equals('http:///'); + } + }); }); it('accepts multiple ???', function () { @@ -867,6 +1235,106 @@ describe('url-parse', function () { assume(data.href).equals('mailto:alice@atlanta.com'); }); + it('updates auth when updating username', function() { + var data = parse('https://example.com'); + + assume(data.set('username', 'foo')).equals(data); + assume(data.username).equals('foo'); + assume(data.auth).equals('foo') + assume(data.href).equals('https://foo@example.com/'); + + data.set('username', ''); + + assume(data.username).equals(''); + assume(data.auth).equals('') + assume(data.href).equals('https://example.com/'); + + data.set('username', 'foo:'); + + assume(data.username).equals('foo%3A'); + assume(data.auth).equals('foo%3A') + assume(data.href).equals('https://foo%3A@example.com/'); + + data = parse('https://foo:bar@example.com') + data.set('username', 'baz'); + + assume(data.username).equals('baz'); + assume(data.auth).equals('baz:bar') + assume(data.href).equals('https://baz:bar@example.com/'); + }); + + it('updates auth when updating password', function() { + var data = parse('https://example.com'); + + assume(data.set('password', 'foo')).equals(data); + assume(data.password).equals('foo'); + assume(data.auth).equals(':foo') + assume(data.href).equals('https://:foo@example.com/'); + + data.set('password', ''); + + assume(data.password).equals(''); + assume(data.auth).equals('') + assume(data.href).equals('https://example.com/'); + + data.set('password', ':foo@'); + + assume(data.password).equals('%3Afoo%40'); + assume(data.auth).equals(':%3Afoo%40') + assume(data.href).equals('https://:%3Afoo%40@example.com/'); + + data = parse('https://foo:bar@example.com') + data.set('password', 'baz'); + + assume(data.password).equals('baz'); + assume(data.auth).equals('foo:baz') + assume(data.href).equals('https://foo:baz@example.com/'); + }); + + it('updates username and password when updating auth', function() { + var data = parse('https://example.com'); + + assume(data.set('auth', 'foo:bar')).equals(data); + assume(data.username).equals('foo'); + assume(data.password).equals('bar'); + assume(data.href).equals('https://foo:bar@example.com/'); + + assume(data.set('auth', 'baz:')).equals(data); + assume(data.username).equals('baz'); + assume(data.password).equals(''); + assume(data.href).equals('https://baz@example.com/'); + + assume(data.set('auth', 'qux')).equals(data); + assume(data.username).equals('qux'); + assume(data.password).equals(''); + assume(data.href).equals('https://qux@example.com/'); + + assume(data.set('auth', ':quux')).equals(data); + assume(data.username).equals(''); + assume(data.password).equals('quux'); + assume(data.href).equals('https://:quux@example.com/'); + + assume(data.set('auth', 'user@:pass@')).equals(data); + assume(data.username).equals('user%40'); + assume(data.password).equals('pass%40'); + assume(data.href).equals('https://user%40:pass%40@example.com/'); + + assume(data.set('auth', 'user%40:pass%40')).equals(data); + assume(data.username).equals('user%40'); + assume(data.password).equals('pass%40'); + assume(data.href).equals('https://user%40:pass%40@example.com/'); + + assume(data.set('auth', 'user:pass:word')).equals(data); + assume(data.username).equals('user'); + assume(data.password).equals('pass%3Aword'); + assume(data.href).equals('https://user:pass%3Aword@example.com/'); + + assume(data.set('auth', 'user:pass%3Aword')).equals(data); + assume(data.username).equals('user'); + assume(data.password).equals('pass%3Aword'); + assume(data.href).equals('https://user:pass%3Aword@example.com/'); + }); + it('updates other values', function () { var data = parse('http://google.com/?foo=bar');