publicsuffixlist.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /*******************************************************************************
  2. publicsuffixlist.js - an efficient javascript implementation to deal with
  3. Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/>
  4. Copyright (C) 2013 Raymond Hill
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see {http://www.gnu.org/licenses/}.
  15. */
  16. /*! Home: https://github.com/gorhill/publicsuffixlist.js */
  17. /*
  18. This code is mostly dumb: I consider this to be lower-level code, thus
  19. in order to ensure efficiency, the caller is responsible for sanitizing
  20. the inputs.
  21. */
  22. /******************************************************************************/
  23. // A single instance of PublicSuffixList is enough.
  24. ;(function(root) {
  25. 'use strict';
  26. /******************************************************************************/
  27. var exceptions = {};
  28. var rules = {};
  29. var selfieMagic = 'iscjsfsaolnm';
  30. // This value dictate how the search will be performed:
  31. // < this.cutoffLength = indexOf()
  32. // >= this.cutoffLength = binary search
  33. var cutoffLength = 256;
  34. var mustPunycode = /[^\w.*-]/;
  35. var onChangedListeners = [];
  36. /******************************************************************************/
  37. // In the context of this code, a domain is defined as:
  38. // "{label}.{public suffix}".
  39. // A single standalone label is a public suffix as per
  40. // http://publicsuffix.org/list/:
  41. // "If no rules match, the prevailing rule is '*' "
  42. // This means 'localhost' is not deemed a domain by this
  43. // code, since according to the definition above, it would be
  44. // evaluated as a public suffix. The caller is therefore responsible to
  45. // decide how to further interpret such public suffix.
  46. //
  47. // `hostname` must be a valid ascii-based hostname.
  48. function getDomain(hostname) {
  49. // A hostname starting with a dot is not a valid hostname.
  50. if ( !hostname || hostname.charAt(0) === '.' ) {
  51. return '';
  52. }
  53. hostname = hostname.toLowerCase();
  54. var suffix = getPublicSuffix(hostname);
  55. if ( suffix === hostname ) {
  56. return '';
  57. }
  58. var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1);
  59. if ( pos <= 0 ) {
  60. return hostname;
  61. }
  62. return hostname.slice(pos + 1);
  63. }
  64. /******************************************************************************/
  65. // Return longest public suffix.
  66. //
  67. // `hostname` must be a valid ascii-based string which respect hostname naming.
  68. function getPublicSuffix(hostname) {
  69. if ( !hostname ) {
  70. return '';
  71. }
  72. // Since we slice down the hostname with each pass, the first match
  73. // is the longest, so no need to find all the matching rules.
  74. var pos;
  75. while ( true ) {
  76. pos = hostname.indexOf('.');
  77. if ( pos < 0 ) {
  78. return hostname;
  79. }
  80. if ( search(exceptions, hostname) ) {
  81. return hostname.slice(pos + 1);
  82. }
  83. if ( search(rules, hostname) ) {
  84. return hostname;
  85. }
  86. if ( search(rules, '*' + hostname.slice(pos)) ) {
  87. return hostname;
  88. }
  89. hostname = hostname.slice(pos + 1);
  90. }
  91. // unreachable
  92. }
  93. /******************************************************************************/
  94. // Look up a specific hostname.
  95. function search(store, hostname) {
  96. // Extract TLD
  97. var pos = hostname.lastIndexOf('.');
  98. var tld, remainder;
  99. if ( pos < 0 ) {
  100. tld = hostname;
  101. remainder = hostname;
  102. } else {
  103. tld = hostname.slice(pos + 1);
  104. remainder = hostname.slice(0, pos);
  105. }
  106. var substore = store[tld];
  107. if ( !substore ) {
  108. return false;
  109. }
  110. // If substore is a string, use indexOf()
  111. if ( typeof substore === 'string' ) {
  112. return substore.indexOf(' ' + remainder + ' ') >= 0;
  113. }
  114. // It is an array: use binary search.
  115. var l = remainder.length;
  116. var haystack = substore[l];
  117. if ( !haystack ) {
  118. return false;
  119. }
  120. var left = 0;
  121. var right = Math.floor(haystack.length / l + 0.5);
  122. var i, needle;
  123. while ( left < right ) {
  124. i = left + right >> 1;
  125. needle = haystack.substr( l * i, l );
  126. if ( remainder < needle ) {
  127. right = i;
  128. } else if ( remainder > needle ) {
  129. left = i + 1;
  130. } else {
  131. return true;
  132. }
  133. }
  134. return false;
  135. }
  136. /******************************************************************************/
  137. // Parse and set a UTF-8 text-based suffix list. Format is same as found at:
  138. // http://publicsuffix.org/list/
  139. //
  140. // `toAscii` is a converter from unicode to punycode. Required since the
  141. // Public Suffix List contains unicode characters.
  142. // Suggestion: use <https://github.com/bestiejs/punycode.js> it's quite good.
  143. function parse(text, toAscii) {
  144. exceptions = {};
  145. rules = {};
  146. var lineBeg = 0, lineEnd;
  147. var textEnd = text.length;
  148. var line, store, pos, tld;
  149. while ( lineBeg < textEnd ) {
  150. lineEnd = text.indexOf('\n', lineBeg);
  151. if ( lineEnd < 0 ) {
  152. lineEnd = text.indexOf('\r', lineBeg);
  153. if ( lineEnd < 0 ) {
  154. lineEnd = textEnd;
  155. }
  156. }
  157. line = text.slice(lineBeg, lineEnd).trim();
  158. lineBeg = lineEnd + 1;
  159. if ( line.length === 0 ) {
  160. continue;
  161. }
  162. // Ignore comments
  163. pos = line.indexOf('//');
  164. if ( pos >= 0 ) {
  165. line = line.slice(0, pos);
  166. }
  167. // Ignore surrounding whitespaces
  168. line = line.trim();
  169. if ( !line ) {
  170. continue;
  171. }
  172. // Is this an exception rule?
  173. if ( line.charAt(0) === '!' ) {
  174. store = exceptions;
  175. line = line.slice(1);
  176. } else {
  177. store = rules;
  178. }
  179. if ( mustPunycode.test(line) ) {
  180. line = toAscii(line);
  181. }
  182. // http://publicsuffix.org/list/:
  183. // "... all rules must be canonicalized in the normal way
  184. // for hostnames - lower-case, Punycode ..."
  185. line = line.toLowerCase();
  186. // Extract TLD
  187. pos = line.lastIndexOf('.');
  188. if ( pos < 0 ) {
  189. tld = line;
  190. } else {
  191. tld = line.slice(pos + 1);
  192. line = line.slice(0, pos);
  193. }
  194. // Store suffix using tld as key
  195. if ( !store.hasOwnProperty(tld) ) {
  196. store[tld] = [];
  197. }
  198. if ( line ) {
  199. store[tld].push(line);
  200. }
  201. }
  202. crystallize(exceptions);
  203. crystallize(rules);
  204. callListeners(onChangedListeners);
  205. }
  206. /******************************************************************************/
  207. // Cristallize the storage of suffixes using optimal internal representation
  208. // for future look up.
  209. function crystallize(store) {
  210. var suffixes, suffix, i, l;
  211. for ( var tld in store ) {
  212. if ( !store.hasOwnProperty(tld) ) {
  213. continue;
  214. }
  215. suffixes = store[tld].join(' ');
  216. // No suffix
  217. if ( !suffixes ) {
  218. store[tld] = '';
  219. continue;
  220. }
  221. // Concatenated list of suffixes less than cutoff length:
  222. // Store as string, lookup using indexOf()
  223. if ( suffixes.length < cutoffLength ) {
  224. store[tld] = ' ' + suffixes + ' ';
  225. continue;
  226. }
  227. // Concatenated list of suffixes greater or equal to cutoff length
  228. // Store as array keyed on suffix length, lookup using binary search.
  229. // I borrowed the idea to key on string length here:
  230. // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072
  231. i = store[tld].length;
  232. suffixes = [];
  233. while ( i-- ) {
  234. suffix = store[tld][i];
  235. l = suffix.length;
  236. if ( !suffixes[l] ) {
  237. suffixes[l] = [];
  238. }
  239. suffixes[l].push(suffix);
  240. }
  241. l = suffixes.length;
  242. while ( l-- ) {
  243. if ( suffixes[l] ) {
  244. suffixes[l] = suffixes[l].sort().join('');
  245. }
  246. }
  247. store[tld] = suffixes;
  248. }
  249. return store;
  250. }
  251. /******************************************************************************/
  252. function toSelfie() {
  253. return {
  254. magic: selfieMagic,
  255. rules: rules,
  256. exceptions: exceptions
  257. };
  258. }
  259. function fromSelfie(selfie) {
  260. if ( typeof selfie !== 'object' || typeof selfie.magic !== 'string' || selfie.magic !== selfieMagic ) {
  261. return false;
  262. }
  263. rules = selfie.rules;
  264. exceptions = selfie.exceptions;
  265. callListeners(onChangedListeners);
  266. return true;
  267. }
  268. /******************************************************************************/
  269. var addListener = function(listeners, callback) {
  270. if ( typeof callback !== 'function' ) {
  271. return;
  272. }
  273. if ( listeners.indexOf(callback) === -1 ) {
  274. listeners.push(callback);
  275. }
  276. };
  277. var removeListener = function(listeners, callback) {
  278. var pos = listeners.indexOf(callback);
  279. if ( pos !== -1 ) {
  280. listeners.splice(pos, 1);
  281. }
  282. };
  283. var callListeners = function(listeners) {
  284. for ( var i = 0; i < listeners.length; i++ ) {
  285. listeners[i]();
  286. }
  287. };
  288. /******************************************************************************/
  289. var onChanged = {
  290. addListener: function(callback) {
  291. addListener(onChangedListeners, callback);
  292. },
  293. removeListener: function(callback) {
  294. removeListener(onChangedListeners, callback);
  295. }
  296. };
  297. /******************************************************************************/
  298. // Public API
  299. root = root || window;
  300. root.publicSuffixList = {
  301. 'version': '1.0',
  302. 'parse': parse,
  303. 'getDomain': getDomain,
  304. 'getPublicSuffix': getPublicSuffix,
  305. 'toSelfie': toSelfie,
  306. 'fromSelfie': fromSelfie,
  307. 'onChanged': onChanged
  308. };
  309. /******************************************************************************/
  310. })(this);