UriTools.jsm 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. /*******************************************************************************
  2. ηMatrix - a browser extension to black/white list requests.
  3. Copyright (C) 2014-2019 Raymond Hill
  4. Copyright (C) 2019 Alessio Vanni
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see {http://www.gnu.org/licenses/}.
  15. Home: https://gitlab.com/vannilla/ematrix
  16. uMatrix Home: https://github.com/gorhill/uMatrix
  17. */
  18. 'use strict';
  19. Components.utils.import('chrome://ematrix/content/lib/Punycode.jsm');
  20. Components.utils.import('chrome://ematrix/content/lib/PublicSuffixList.jsm');
  21. var EXPORTED_SYMBOLS = ['UriTools'];
  22. var reRFC3986 = /^([^:\/?#]+:)?(\/\/[^\/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?/;
  23. var reSchemeFromURI = /^[^:\/?#]+:/;
  24. var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/;
  25. var reOriginFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]+)/;
  26. var reCommonHostnameFromURL = /^https?:\/\/([0-9a-z_][0-9a-z._-]*[0-9a-z])\//;
  27. var rePathFromURI = /^(?:[^:\/?#]+:)?(?:\/\/[^\/?#]*)?([^?#]*)/;
  28. var reMustNormalizeHostname = /[^0-9a-z._-]/;
  29. // These are to parse authority field, not parsed by above official regex
  30. // IPv6 is seen as an exception: a non-compatible IPv6 is first tried, and
  31. // if it fails, the IPv6 compatible regex istr used. This helps
  32. // peformance by avoiding the use of a too complicated regex first.
  33. // https://github.com/gorhill/httpswitchboard/issues/211
  34. // "While a hostname may not contain other characters, such as the
  35. // "underscore character (_), other DNS names may contain the underscore"
  36. var reHostPortFromAuthority = /^(?:[^@]*@)?([^:]*)(:\d*)?$/;
  37. var reIPv6PortFromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]*\])(:\d*)?$/i;
  38. var reHostFromNakedAuthority = /^[0-9a-z._-]+[0-9a-z]$/i;
  39. var reHostFromAuthority = /^(?:[^@]*@)?([^:]+)(?::\d*)?$/;
  40. var reIPv6FromAuthority = /^(?:[^@]*@)?(\[[0-9a-f:]+\])(?::\d*)?$/i;
  41. // Coarse (but fast) tests
  42. var reValidHostname = /^([a-z\d]+(-*[a-z\d]+)*)(\.[a-z\d]+(-*[a-z\d])*)*$/;
  43. var reIPAddressNaive = /^\d+\.\d+\.\d+\.\d+$|^\[[\da-zA-Z:]+\]$/;
  44. var reNetworkScheme = /^(?:https?|wss?|ftps?)\b/;
  45. var reSecureScheme = /^(?:https|wss|ftps)\b/;
  46. function reset(o) {
  47. o.scheme = '';
  48. o.hostname = '';
  49. o._ipv4 = undefined;
  50. o._ipv6 = undefined;
  51. o.port = '';
  52. o.path = '';
  53. o.query = '';
  54. o.fragment = '';
  55. return o;
  56. }
  57. function resetAuthority(o) {
  58. o.hostname = '';
  59. o._ipv4 = undefined;
  60. o._ipv6 = undefined;
  61. o.port = '';
  62. return o;
  63. }
  64. function URI() {
  65. this.scheme = '',
  66. this.authority = '',
  67. this.hostname = '',
  68. this._ipv4 = undefined,
  69. this._ipv6 = undefined,
  70. this.port = '',
  71. this.domain = undefined,
  72. this.path = '',
  73. this.query = '',
  74. this.fragment = '',
  75. this.schemeBit = (1 << 0),
  76. this.userBit = (1 << 1),
  77. this.passwordBit = (1 << 2),
  78. this.hostnameBit = (1 << 3),
  79. this.portBit = (1 << 4),
  80. this.pathBit = (1 << 5),
  81. this.queryBit = (1 << 6),
  82. this.fragmentBit = (1 << 7),
  83. this.allBits = (0xFFFF),
  84. this.authorityBit =
  85. (this.userBit | this.passwordBit | this.hostnameBit | this.portBit);
  86. this.normalizeBits =
  87. (this.schemeBit | this.hostnameBit | this.pathBit | this.queryBit);
  88. }
  89. var cached = new URI();
  90. var domainCache = new Map();
  91. var cacheCountLow = 75;
  92. var cacheCountHigh = 100;
  93. var cacheJunkyard = [];
  94. var junkyardMax = cacheCountHigh - cacheCountLow;
  95. function DomainCacheEntry(domain) {
  96. this.init(domain);
  97. }
  98. DomainCacheEntry.prototype.init = function (domain) {
  99. this.domain = domain;
  100. this.tstamp = Date.now();
  101. return this;
  102. };
  103. DomainCacheEntry.prototype.dispose = function () {
  104. this.domain = '';
  105. if (cacheJunkyard.length < junkyardMax) {
  106. cacheJunkyard.push(this);
  107. }
  108. };
  109. var domainCacheEntryFactory = function (domain) {
  110. let entry = cacheJunkyard.pop();
  111. if (entry) {
  112. return entry.init(domain);
  113. }
  114. return new DomainCacheEntry(domain);
  115. };
  116. var domainCacheAdd = function (hostname, domain) {
  117. let entry = domainCache.get(hostname);
  118. if (entry !== undefined) {
  119. entry.tstamp = Date.now();
  120. } else {
  121. domainCache.set(hostname, domainCacheEntryFactory(domain));
  122. if (domainCache.size === cacheCountHigh) {
  123. domainCachePrune();
  124. }
  125. }
  126. return domain;
  127. };
  128. var domainCacheSort = function (a, b) {
  129. return domainCache.get(b).tstamp - domainCache.get(a).tstamp;
  130. };
  131. var domainCachePrune = function () {
  132. let hostnames =
  133. Array.from(domainCache.keys()).sort(domainCacheSort).slice(cacheCountLow);
  134. for (let i=hostnames.length-1; i>=0; --i) {
  135. domainCache.get(hostnames[i]).dispose();
  136. domainCache.delete(hostnames[i]);
  137. }
  138. };
  139. var domainCacheReset = function () {
  140. domainCache.clear();
  141. };
  142. publicSuffixList.onChanged.addListener(domainCacheReset);
  143. var UriTools = {
  144. set: function (uri) {
  145. if (uri === undefined) {
  146. return reset(cached);
  147. }
  148. let matches = reRFC3986.exec(uri);
  149. if (!matches) {
  150. return reset(cached);
  151. }
  152. cached.scheme = matches[1] !== undefined ?
  153. matches[1].slice(0, -1) :
  154. '';
  155. cached.authority = matches[2] !== undefined ?
  156. matches[2].slice(2).toLowerCase() :
  157. '';
  158. cached.path = matches[3] !== undefined ?
  159. matches[3] :
  160. '';
  161. // As per RFC3986
  162. if (cached.authority !== '' && cached.path === '') {
  163. cached.path = '/';
  164. }
  165. cached.query = matches[4] !== undefined ?
  166. matches[4].slice(1) :
  167. '';
  168. cached.fragment = matches[5] !== undefined ?
  169. matches[5].slice(1) :
  170. '';
  171. if (reHostFromNakedAuthority.test(cached.authority)) {
  172. cached.hostname = cached.authority;
  173. cached.port = '';
  174. return cached;
  175. }
  176. matches = reHostPortFromAuthority.exec(cached.authority);
  177. if (!matches) {
  178. matches = reIPv6PortFromAuthority.exec(cached.authority);
  179. if (!matches) {
  180. return resetAuthority(cached);
  181. }
  182. }
  183. cached.hostname = matches[1] !== undefined ?
  184. matches[1] :
  185. '';
  186. if (cached.hostname.slice(-1) === '.') {
  187. cached.hostname = cached.hostname.slice(0, -1);
  188. }
  189. cached.port = matches[2] !== undefined ?
  190. matches[2].slice(1) :
  191. '';
  192. return cached;
  193. },
  194. assemble: function (bits) {
  195. if (bits === undefined) {
  196. bits = cached.allBits;
  197. }
  198. let s = [];
  199. if (cached.scheme && (bits && cached.schemeBit)) {
  200. s.push(cached.scheme, ':');
  201. }
  202. if (cached.hostname && (bits & cached.hostnameBit)) {
  203. s.push('//', cached.hostname);
  204. }
  205. if (cached.port && (bits & cached.portBit)) {
  206. s.push(':', cached.port);
  207. }
  208. if (cached.path && (bits & cached.pathBit)) {
  209. s.push(cached.path);
  210. }
  211. if (cached.query && (bits & cached.queryBit)) {
  212. s.push('?', cached.query);
  213. }
  214. if (cached.fragment && (bits & cached.fragmentBit)) {
  215. s.push('#', cached.fragment);
  216. }
  217. return s.join('');
  218. },
  219. isNetworkScheme: function (scheme) {
  220. return reNetworkScheme.test(scheme);
  221. },
  222. isSecureScheme: function(scheme) {
  223. return reSecureScheme.test(scheme);
  224. },
  225. originFromURI: function (uri) {
  226. let matches = reOriginFromURI.exec(uri);
  227. return matches !== null ? matches[0].toLowerCase() : '';
  228. },
  229. schemeFromURI: function (uri) {
  230. let matches = reSchemeFromURI.exec(uri);
  231. return matches !== null ? matches[0].slice(0, -1).toLowerCase() : '';
  232. },
  233. authorityFromURI: function (uri) {
  234. let matches = reAuthorityFromURI.exec(uri);
  235. return matches !== null ? matches[1].slice(1).toLowerCase() : '';
  236. },
  237. hostnameFromURI: function (uri) {
  238. let matches = reCommonHostnameFromURL.exec(uri);
  239. if (matches) {
  240. return matches[1];
  241. }
  242. matches = reAuthorityFromURI.exec(uri);
  243. if (!matches) {
  244. return '';
  245. }
  246. let auth = matches[1].slice(2);
  247. if (reHostFromNakedAuthority.test(auth)) {
  248. return auth.toLowerCase();
  249. }
  250. matches = reHostFromAuthority.exec(auth);
  251. if (!matches) {
  252. matches = reIPv6FromAuthority.exec(auth);
  253. if (!matches) {
  254. return '';
  255. }
  256. }
  257. let hostname = matches[1];
  258. while (hostname.endsWith('.')) {
  259. hostname = hostname.slice(0, -1);
  260. }
  261. if (reMustNormalizeHostname.test(hostname)) {
  262. Punycode.toASCII(hostname.toLowerCase());
  263. }
  264. return hostname;
  265. },
  266. domainFromHostname: function (hostname) {
  267. let entry = domainCache.get(hostname);
  268. if (entry !== undefined) {
  269. entry.tstamp = Date.now();
  270. return entry.domain;
  271. }
  272. if (reIPAddressNaive.test(hostname) == false) {
  273. return domainCacheAdd(hostname,
  274. publicSuffixList.getDomain(hostname));
  275. }
  276. return domainCacheAdd(hostname, hostname);
  277. },
  278. domainFromURI: function (uri) {
  279. if (!uri) {
  280. return '';
  281. }
  282. return UriTools.domainFromHostname(UriTools.hostnameFromURI(uri));
  283. },
  284. domain: function() {
  285. return UriTools.domainFromHostname(cached.hostname);
  286. },
  287. pathFromURI: function (uri) {
  288. let matches = rePathFromURI.exec(uri);
  289. return matches !== null ? matches[1] : '';
  290. },
  291. normalizedURI: function () {
  292. return UriTools.assemble(cached.normalizeBits);
  293. },
  294. rootURL: function () {
  295. if (!cached.hostname) {
  296. return '';
  297. }
  298. return UriTools.assemble(cached.scemeBit | cached.hostnameBit);
  299. },
  300. isValidHostname: function (hostname) {
  301. try {
  302. let r = reValidHostname.test(hostname);
  303. return r;
  304. } catch (e) {
  305. return false;
  306. }
  307. },
  308. parentHostnameFromHostname: function (hostname) {
  309. // "locahost" => ""
  310. // "example.org" => "example.org"
  311. // "www.example.org" => "example.org"
  312. // "tomato.www.example.org" => "example.org"
  313. let domain = UriTools.domainFromHostname(hostname);
  314. if (domain === '' || domain === hostname) {
  315. return undefined;
  316. }
  317. return hostname.slice(hostname.indexOf('.') + 1);
  318. },
  319. parentHostnamesFromHostname: function (hostname) {
  320. let domain = UriTools.domainFromHostname(hostname);
  321. if (domain === '' || domain === hostname) {
  322. return [];
  323. }
  324. let nodes = [];
  325. for (;;) {
  326. let pos = hostname.indexOf('.');
  327. if (pos < 0) {
  328. break;
  329. }
  330. hostname = hostname.slice(pos+1);
  331. nodes.push(hostname);
  332. if (hostname === domain) {
  333. break;
  334. }
  335. }
  336. return nodes;
  337. },
  338. allHostNamesFromHostname: function (hostname) {
  339. let nodes = UriTools.parentHostnamesFromHostname(hostname);
  340. nodes.unshift(hostname);
  341. return nodes;
  342. },
  343. toString: function () {
  344. return UriTools.assemble();
  345. },
  346. };