123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 |
- /*******************************************************************************
- ηMatrix - a browser extension to black/white list requests.
- Copyright (C) 2014-2019 Raymond Hill
- Copyright (C) 2019 Alessio Vanni
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see {http://www.gnu.org/licenses/}.
- Home: https://gitlab.com/vannilla/ematrix
- uMatrix Home: https://github.com/gorhill/uMatrix
- */
- 'use strict';
- Components.utils.import('chrome://ematrix/content/lib/Punycode.jsm');
- Components.utils.import('chrome://ematrix/content/lib/PublicSuffixList.jsm');
- var EXPORTED_SYMBOLS = ['UriTools'];
- var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/;
- var reSchemeFromURI = /^[^:\/?#]+:/;
- var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/;
- var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/;
- var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//;
- var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/;
- var reMustNormalizeHostname = /[^0-9a-z._-]/;
- // These are to parse authority field, not parsed by above official regex
- // IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and
- // if it fails, the IPv6 compatible regex istr used. This helps
- // peformance by avoiding the use of a too complicated regex first.
- // https://github.com/gorhill/httpswitchboard/issues/211
- // "While a hostname may not contain other characters, such as the
- // "underscore character (_), other DNS names may contain the underscore"
- var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/;
- var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i;
- var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i;
- var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/;
- var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i;
- // Coarse (but fast) tests
- var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/;
- var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/;
- var reNetworkScheme = /^(?:https?|wss?|ftps?)\b/;
- var reSecureScheme = /^(?:https|wss|ftps)\b/;
- function reset(o) {
- o.scheme = '';
- o.hostname = '';
- o._ipv4 = undefined;
- o._ipv6 = undefined;
- o.port = '';
- o.path = '';
- o.query = '';
- o.fragment = '';
- return o;
- }
- function resetAuthority(o) {
- o.hostname = '';
- o._ipv4 = undefined;
- o._ipv6 = undefined;
- o.port = '';
- return o;
- }
- function URI() {
- this.scheme = '',
- this.authority = '',
- this.hostname = '',
- this._ipv4 = undefined,
- this._ipv6 = undefined,
- this.port = '',
- this.domain = undefined,
- this.path = '',
- this.query = '',
- this.fragment = '',
- this.schemeBit = (1 << 0),
- this.userBit = (1 << 1),
- this.passwordBit = (1 << 2),
- this.hostnameBit = (1 << 3),
- this.portBit = (1 << 4),
- this.pathBit = (1 << 5),
- this.queryBit = (1 << 6),
- this.fragmentBit = (1 << 7),
- this.allBits = (0xFFFF),
- this.authorityBit =
- (this.userBit | this.passwordBit | this.hostnameBit | this.portBit);
- this.normalizeBits =
- (this.schemeBit | this.hostnameBit | this.pathBit | this.queryBit);
- }
- var cached = new URI();
- var domainCache = new Map();
- var cacheCountLow = 75;
- var cacheCountHigh = 100;
- var cacheJunkyard = [];
- var junkyardMax = cacheCountHigh - cacheCountLow;
- function DomainCacheEntry(domain) {
- this.init(domain);
- }
- DomainCacheEntry.prototype.init = function (domain) {
- this.domain = domain;
- this.tstamp = Date.now();
- return this;
- };
- DomainCacheEntry.prototype.dispose = function () {
- this.domain = '';
- if (cacheJunkyard.length < junkyardMax) {
- cacheJunkyard.push(this);
- }
- };
- var domainCacheEntryFactory = function (domain) {
- let entry = cacheJunkyard.pop();
- if (entry) {
- return entry.init(domain);
- }
- return new DomainCacheEntry(domain);
- };
- var domainCacheAdd = function (hostname, domain) {
- let entry = domainCache.get(hostname);
- if (entry !== undefined) {
- entry.tstamp = Date.now();
- } else {
- domainCache.set(hostname, domainCacheEntryFactory(domain));
- if (domainCache.size === cacheCountHigh) {
- domainCachePrune();
- }
- }
- return domain;
- };
- var domainCacheSort = function (a, b) {
- return domainCache.get(b).tstamp - domainCache.get(a).tstamp;
- };
- var domainCachePrune = function () {
- let hostnames =
- Array.from(domainCache.keys()).sort(domainCacheSort).slice(cacheCountLow);
- for (let i=hostnames.length-1; i>=0; --i) {
- domainCache.get(hostnames[i]).dispose();
- domainCache.delete(hostnames[i]);
- }
- };
- var domainCacheReset = function () {
- domainCache.clear();
- };
- publicSuffixList.onChanged.addListener(domainCacheReset);
- var UriTools = {
- set: function (uri) {
- if (uri === undefined) {
- return reset(cached);
- }
- let matches = reRFC3986.exec(uri);
- if (!matches) {
- return reset(cached);
- }
- cached.scheme = matches[1] !== undefined ?
- matches[1].slice(0, -1) :
- '';
- cached.authority = matches[2] !== undefined ?
- matches[2].slice(2).toLowerCase() :
- '';
- cached.path = matches[3] !== undefined ?
- matches[3] :
- '';
- // As per RFC3986
- if (cached.authority !== '' && cached.path === '') {
- cached.path = '/';
- }
- cached.query = matches[4] !== undefined ?
- matches[4].slice(1) :
- '';
- cached.fragment = matches[5] !== undefined ?
- matches[5].slice(1) :
- '';
- if (reHostFromNakedAuthority.test(cached.authority)) {
- cached.hostname = cached.authority;
- cached.port = '';
- return cached;
- }
- matches = reHostPortFromAuthority.exec(cached.authority);
- if (!matches) {
- matches = reIPv6PortFromAuthority.exec(cached.authority);
- if (!matches) {
- return resetAuthority(cached);
- }
- }
- cached.hostname = matches[1] !== undefined ?
- matches[1] :
- '';
- if (cached.hostname.slice(-1) === '.') {
- cached.hostname = cached.hostname.slice(0, -1);
- }
- cached.port = matches[2] !== undefined ?
- matches[2].slice(1) :
- '';
- return cached;
- },
- assemble: function (bits) {
- if (bits === undefined) {
- bits = cached.allBits;
- }
- let s = [];
- if (cached.scheme && (bits && cached.schemeBit)) {
- s.push(cached.scheme, ':');
- }
- if (cached.hostname && (bits & cached.hostnameBit)) {
- s.push('//', cached.hostname);
- }
- if (cached.port && (bits & cached.portBit)) {
- s.push(':', cached.port);
- }
- if (cached.path && (bits & cached.pathBit)) {
- s.push(cached.path);
- }
- if (cached.query && (bits & cached.queryBit)) {
- s.push('?', cached.query);
- }
- if (cached.fragment && (bits & cached.fragmentBit)) {
- s.push('#', cached.fragment);
- }
- return s.join('');
- },
- isNetworkScheme: function (scheme) {
- return reNetworkScheme.test(scheme);
- },
- isSecureScheme: function(scheme) {
- return reSecureScheme.test(scheme);
- },
- originFromURI: function (uri) {
- let matches = reOriginFromURI.exec(uri);
- return matches !== null ? matches[0].toLowerCase() : '';
- },
- schemeFromURI: function (uri) {
- let matches = reSchemeFromURI.exec(uri);
- return matches !== null ? matches[0].slice(0, -1).toLowerCase() : '';
- },
- authorityFromURI: function (uri) {
- let matches = reAuthorityFromURI.exec(uri);
- return matches !== null ? matches[1].slice(1).toLowerCase() : '';
- },
- hostnameFromURI: function (uri) {
- let matches = reCommonHostnameFromURL.exec(uri);
- if (matches) {
- return matches[1];
- }
- matches = reAuthorityFromURI.exec(uri);
- if (!matches) {
- return '';
- }
- let auth = matches[1].slice(2);
- if (reHostFromNakedAuthority.test(auth)) {
- return auth.toLowerCase();
- }
- matches = reHostFromAuthority.exec(auth);
- if (!matches) {
- matches = reIPv6FromAuthority.exec(auth);
- if (!matches) {
- return '';
- }
- }
- let hostname = matches[1];
- while (hostname.endsWith('.')) {
- hostname = hostname.slice(0, -1);
- }
- if (reMustNormalizeHostname.test(hostname)) {
- Punycode.toASCII(hostname.toLowerCase());
- }
- return hostname;
- },
- domainFromHostname: function (hostname) {
- let entry = domainCache.get(hostname);
- if (entry !== undefined) {
- entry.tstamp = Date.now();
- return entry.domain;
- }
- if (reIPAddressNaive.test(hostname) == false) {
- return domainCacheAdd(hostname,
- publicSuffixList.getDomain(hostname));
- }
- return domainCacheAdd(hostname, hostname);
- },
- domainFromURI: function (uri) {
- if (!uri) {
- return '';
- }
- return UriTools.domainFromHostname(UriTools.hostnameFromURI(uri));
- },
- domain: function() {
- return UriTools.domainFromHostname(cached.hostname);
- },
- pathFromURI: function (uri) {
- let matches = rePathFromURI.exec(uri);
- return matches !== null ? matches[1] : '';
- },
- normalizedURI: function () {
- return UriTools.assemble(cached.normalizeBits);
- },
- rootURL: function () {
- if (!cached.hostname) {
- return '';
- }
- return UriTools.assemble(cached.scemeBit | cached.hostnameBit);
- },
- isValidHostname: function (hostname) {
- try {
- let r = reValidHostname.test(hostname);
- return r;
- } catch (e) {
- return false;
- }
- },
- parentHostnameFromHostname: function (hostname) {
- // "locahost" => ""
- // "example.org" => "example.org"
- // "www.example.org" => "example.org"
- // "tomato.www.example.org" => "example.org"
- let domain = UriTools.domainFromHostname(hostname);
- if (domain === '' || domain === hostname) {
- return undefined;
- }
- return hostname.slice(hostname.indexOf('.') + 1);
- },
- parentHostnamesFromHostname: function (hostname) {
- let domain = UriTools.domainFromHostname(hostname);
- if (domain === '' || domain === hostname) {
- return [];
- }
- let nodes = [];
- for (;;) {
- let pos = hostname.indexOf('.');
- if (pos < 0) {
- break;
- }
- hostname = hostname.slice(pos+1);
- nodes.push(hostname);
- if (hostname === domain) {
- break;
- }
- }
- return nodes;
- },
- allHostNamesFromHostname: function (hostname) {
- let nodes = UriTools.parentHostnamesFromHostname(hostname);
- nodes.unshift(hostname);
- return nodes;
- },
- toString: function () {
- return UriTools.assemble();
- },
- };
|