123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- /*******************************************************************************
- publicsuffixlist.js - an efficient javascript implementation to deal with
- Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/>
- Copyright (C) 2013 Raymond Hill
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see {http://www.gnu.org/licenses/}.
- */
- /*! Home: https://github.com/gorhill/publicsuffixlist.js */
- /*
- This code is mostly dumb: I consider this to be lower-level code, thus
- in order to ensure efficiency, the caller is responsible for sanitizing
- the inputs.
- */
- /******************************************************************************/
- // A single instance of PublicSuffixList is enough.
- ;(function(root) {
- 'use strict';
- /******************************************************************************/
- var exceptions = {};
- var rules = {};
- var selfieMagic = 'iscjsfsaolnm';
- // This value dictate how the search will be performed:
- // < this.cutoffLength = indexOf()
- // >= this.cutoffLength = binary search
- var cutoffLength = 256;
- var mustPunycode = /[^\w.*-]/;
- var onChangedListeners = [];
- /******************************************************************************/
- // In the context of this code, a domain is defined as:
- // "{label}.{public suffix}".
- // A single standalone label is a public suffix as per
- // http://publicsuffix.org/list/:
- // "If no rules match, the prevailing rule is '*' "
- // This means 'localhost' is not deemed a domain by this
- // code, since according to the definition above, it would be
- // evaluated as a public suffix. The caller is therefore responsible to
- // decide how to further interpret such public suffix.
- //
- // `hostname` must be a valid ascii-based hostname.
- function getDomain(hostname) {
- // A hostname starting with a dot is not a valid hostname.
- if ( !hostname || hostname.charAt(0) === '.' ) {
- return '';
- }
- hostname = hostname.toLowerCase();
- var suffix = getPublicSuffix(hostname);
- if ( suffix === hostname ) {
- return '';
- }
- var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1);
- if ( pos <= 0 ) {
- return hostname;
- }
- return hostname.slice(pos + 1);
- }
- /******************************************************************************/
- // Return longest public suffix.
- //
- // `hostname` must be a valid ascii-based string which respect hostname naming.
- function getPublicSuffix(hostname) {
- if ( !hostname ) {
- return '';
- }
- // Since we slice down the hostname with each pass, the first match
- // is the longest, so no need to find all the matching rules.
- var pos;
- while ( true ) {
- pos = hostname.indexOf('.');
- if ( pos < 0 ) {
- return hostname;
- }
- if ( search(exceptions, hostname) ) {
- return hostname.slice(pos + 1);
- }
- if ( search(rules, hostname) ) {
- return hostname;
- }
- if ( search(rules, '*' + hostname.slice(pos)) ) {
- return hostname;
- }
- hostname = hostname.slice(pos + 1);
- }
- // unreachable
- }
- /******************************************************************************/
- // Look up a specific hostname.
- function search(store, hostname) {
- // Extract TLD
- var pos = hostname.lastIndexOf('.');
- var tld, remainder;
- if ( pos < 0 ) {
- tld = hostname;
- remainder = hostname;
- } else {
- tld = hostname.slice(pos + 1);
- remainder = hostname.slice(0, pos);
- }
- var substore = store[tld];
- if ( !substore ) {
- return false;
- }
- // If substore is a string, use indexOf()
- if ( typeof substore === 'string' ) {
- return substore.indexOf(' ' + remainder + ' ') >= 0;
- }
- // It is an array: use binary search.
- var l = remainder.length;
- var haystack = substore[l];
- if ( !haystack ) {
- return false;
- }
- var left = 0;
- var right = Math.floor(haystack.length / l + 0.5);
- var i, needle;
- while ( left < right ) {
- i = left + right >> 1;
- needle = haystack.substr( l * i, l );
- if ( remainder < needle ) {
- right = i;
- } else if ( remainder > needle ) {
- left = i + 1;
- } else {
- return true;
- }
- }
- return false;
- }
- /******************************************************************************/
- // Parse and set a UTF-8 text-based suffix list. Format is same as found at:
- // http://publicsuffix.org/list/
- //
- // `toAscii` is a converter from unicode to punycode. Required since the
- // Public Suffix List contains unicode characters.
- // Suggestion: use <https://github.com/bestiejs/punycode.js> it's quite good.
- function parse(text, toAscii) {
- exceptions = {};
- rules = {};
- var lineBeg = 0, lineEnd;
- var textEnd = text.length;
- var line, store, pos, tld;
- while ( lineBeg < textEnd ) {
- lineEnd = text.indexOf('\n', lineBeg);
- if ( lineEnd < 0 ) {
- lineEnd = text.indexOf('\r', lineBeg);
- if ( lineEnd < 0 ) {
- lineEnd = textEnd;
- }
- }
- line = text.slice(lineBeg, lineEnd).trim();
- lineBeg = lineEnd + 1;
- if ( line.length === 0 ) {
- continue;
- }
- // Ignore comments
- pos = line.indexOf('//');
- if ( pos >= 0 ) {
- line = line.slice(0, pos);
- }
- // Ignore surrounding whitespaces
- line = line.trim();
- if ( !line ) {
- continue;
- }
- // Is this an exception rule?
- if ( line.charAt(0) === '!' ) {
- store = exceptions;
- line = line.slice(1);
- } else {
- store = rules;
- }
- if ( mustPunycode.test(line) ) {
- line = toAscii(line);
- }
- // http://publicsuffix.org/list/:
- // "... all rules must be canonicalized in the normal way
- // for hostnames - lower-case, Punycode ..."
- line = line.toLowerCase();
- // Extract TLD
- pos = line.lastIndexOf('.');
- if ( pos < 0 ) {
- tld = line;
- } else {
- tld = line.slice(pos + 1);
- line = line.slice(0, pos);
- }
- // Store suffix using tld as key
- if ( !store.hasOwnProperty(tld) ) {
- store[tld] = [];
- }
- if ( line ) {
- store[tld].push(line);
- }
- }
- crystallize(exceptions);
- crystallize(rules);
- callListeners(onChangedListeners);
- }
- /******************************************************************************/
- // Cristallize the storage of suffixes using optimal internal representation
- // for future look up.
- function crystallize(store) {
- var suffixes, suffix, i, l;
- for ( var tld in store ) {
- if ( !store.hasOwnProperty(tld) ) {
- continue;
- }
- suffixes = store[tld].join(' ');
- // No suffix
- if ( !suffixes ) {
- store[tld] = '';
- continue;
- }
- // Concatenated list of suffixes less than cutoff length:
- // Store as string, lookup using indexOf()
- if ( suffixes.length < cutoffLength ) {
- store[tld] = ' ' + suffixes + ' ';
- continue;
- }
- // Concatenated list of suffixes greater or equal to cutoff length
- // Store as array keyed on suffix length, lookup using binary search.
- // I borrowed the idea to key on string length here:
- // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072
- i = store[tld].length;
- suffixes = [];
- while ( i-- ) {
- suffix = store[tld][i];
- l = suffix.length;
- if ( !suffixes[l] ) {
- suffixes[l] = [];
- }
- suffixes[l].push(suffix);
- }
- l = suffixes.length;
- while ( l-- ) {
- if ( suffixes[l] ) {
- suffixes[l] = suffixes[l].sort().join('');
- }
- }
- store[tld] = suffixes;
- }
- return store;
- }
- /******************************************************************************/
- function toSelfie() {
- return {
- magic: selfieMagic,
- rules: rules,
- exceptions: exceptions
- };
- }
- function fromSelfie(selfie) {
- if ( typeof selfie !== 'object' || typeof selfie.magic !== 'string' || selfie.magic !== selfieMagic ) {
- return false;
- }
- rules = selfie.rules;
- exceptions = selfie.exceptions;
- callListeners(onChangedListeners);
- return true;
- }
- /******************************************************************************/
- var addListener = function(listeners, callback) {
- if ( typeof callback !== 'function' ) {
- return;
- }
- if ( listeners.indexOf(callback) === -1 ) {
- listeners.push(callback);
- }
- };
- var removeListener = function(listeners, callback) {
- var pos = listeners.indexOf(callback);
- if ( pos !== -1 ) {
- listeners.splice(pos, 1);
- }
- };
- var callListeners = function(listeners) {
- for ( var i = 0; i < listeners.length; i++ ) {
- listeners[i]();
- }
- };
- /******************************************************************************/
- var onChanged = {
- addListener: function(callback) {
- addListener(onChangedListeners, callback);
- },
- removeListener: function(callback) {
- removeListener(onChangedListeners, callback);
- }
- };
- /******************************************************************************/
- // Public API
- root = root || window;
- root.publicSuffixList = {
- 'version': '1.0',
- 'parse': parse,
- 'getDomain': getDomain,
- 'getPublicSuffix': getPublicSuffix,
- 'toSelfie': toSelfie,
- 'fromSelfie': fromSelfie,
- 'onChanged': onChanged
- };
- /******************************************************************************/
- })(this);
|