1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- "use strict";
- const {tokenize} = ChromeUtils.import("resource://activity-stream/lib/Tokenize.jsm");
- /**
- * RecipeExecutor is the core feature engineering pipeline for the in-browser
- * personalization work. These pipelines are called "recipes". A recipe is an
- * array of objects that define a "step" in the recipe. A step is simply an
- * object with a field "function" that specifies what is being done in the step
- * along with other fields that are semantically defined for that step.
- *
- * There are two types of recipes "builder" recipes and "combiner" recipes. Builder
- * recipes mutate an object until it matches some set of critera. Combiner
- * recipes take two objects, (a "left" and a "right"), and specify the steps
- * to merge the right object into the left object.
- *
- * A short nonsense example recipe is:
- * [ {"function": "get_url_domain", "path_length": 1, "field": "url", "dest": "url_domain"},
- * {"function": "nb_tag", "fields": ["title", "description"]},
- * {"function": "conditionally_nmf_tag", "fields": ["title", "description"]} ]
- *
- * Recipes are sandboxed by the fact that the step functions must be explicitly
- * whitelisted. Functions whitelisted for builder recipes are specifed in the
- * RecipeExecutor.ITEM_BUILDER_REGISTRY, while combiner functions are whitelisted
- * in RecipeExecutor.ITEM_COMBINER_REGISTRY .
- */
- this.RecipeExecutor = class RecipeExecutor {
- constructor(nbTaggers, nmfTaggers) {
- this.ITEM_BUILDER_REGISTRY = {
- nb_tag: this.naiveBayesTag,
- conditionally_nmf_tag: this.conditionallyNmfTag,
- accept_item_by_field_value: this.acceptItemByFieldValue,
- tokenize_url: this.tokenizeUrl,
- get_url_domain: this.getUrlDomain,
- tokenize_field: this.tokenizeField,
- copy_value: this.copyValue,
- keep_top_k: this.keepTopK,
- scalar_multiply: this.scalarMultiply,
- elementwise_multiply: this.elementwiseMultiply,
- vector_multiply: this.vectorMultiply,
- scalar_add: this.scalarAdd,
- vector_add: this.vectorAdd,
- make_boolean: this.makeBoolean,
- whitelist_fields: this.whitelistFields,
- filter_by_value: this.filterByValue,
- l2_normalize: this.l2Normalize,
- prob_normalize: this.probNormalize,
- set_default: this.setDefault,
- lookup_value: this.lookupValue,
- copy_to_map: this.copyToMap,
- scalar_multiply_tag: this.scalarMultiplyTag,
- apply_softmax_tags: this.applySoftmaxTags,
- };
- this.ITEM_COMBINER_REGISTRY = {
- combiner_add: this.combinerAdd,
- combiner_max: this.combinerMax,
- combiner_collect_values: this.combinerCollectValues,
- };
- this.nbTaggers = nbTaggers;
- this.nmfTaggers = nmfTaggers;
- }
- /**
- * Determines the type of a field. Valid types are:
- * string
- * number
- * array
- * map (strings to anything)
- */
- _typeOf(data) {
- let t = typeof(data);
- if (t === "object") {
- if (data === null) {
- return "null";
- } if (Array.isArray(data)) {
- return "array";
- }
- return "map";
- }
- return t;
- }
- /**
- * Returns a scalar, either because it was a constant, or by
- * looking it up from the item. Allows for a default value if the lookup
- * fails.
- */
- _lookupScalar(item, k, dfault) {
- if (this._typeOf(k) === "number") {
- return k;
- } else if ((this._typeOf(k) === "string") && (k in item) && (this._typeOf(item[k]) === "number")) {
- return item[k];
- }
- return dfault;
- }
- /**
- * Simply appends all the strings from a set fields together. If the field
- * is a list, then the cells of the list are append.
- */
- _assembleText(item, fields) {
- let textArr = [];
- for (let field of fields) {
- if (field in item) {
- let type = this._typeOf(item[field]);
- if (type === "string") {
- textArr.push(item[field]);
- } else if (type === "array") {
- for (let ele of item[field]) {
- textArr.push(String(ele));
- }
- } else {
- textArr.push(String(item[field]));
- }
- }
- }
- return textArr.join(" ");
- }
- /**
- * Runs the naive bayes text taggers over a set of text fields. Stores the
- * results in new fields:
- * nb_tags: a map of text strings to probabilites
- * nb_tokens: the tokenized text that was tagged
- *
- * Config:
- * fields: an array containing a list of fields to concatenate and tag
- */
- naiveBayesTag(item, config) {
- let text = this._assembleText(item, config.fields);
- let tokens = tokenize(text);
- let tags = {};
- let extended_tags = {};
- for (let nbTagger of this.nbTaggers) {
- let result = nbTagger.tagTokens(tokens);
- if ((result.label !== null) && result.confident) {
- extended_tags[result.label] = result;
- tags[result.label] = Math.exp(result.logProb);
- }
- }
- item.nb_tags = tags;
- item.nb_tags_extended = extended_tags;
- item.nb_tokens = tokens;
- return item;
- }
- /**
- * Selectively runs NMF text taggers depending on which tags were found
- * by the naive bayes taggers. Writes the results in into new fields:
- * nmf_tags_parent_weights: map of pareent tags to probabilites of those parent tags
- * nmf_tags: map of strings to maps of strings to probabilities
- * nmf_tags_parent map of child tags to parent tags
- *
- * Config:
- * Not configurable
- */
- conditionallyNmfTag(item, config) {
- let nestedNmfTags = {};
- let parentTags = {};
- let parentWeights = {};
- if (!("nb_tags" in item) || !("nb_tokens" in item)) {
- return null;
- }
- Object.keys(item.nb_tags).forEach(parentTag => {
- let nmfTagger = this.nmfTaggers[parentTag];
- if (nmfTagger !== undefined) {
- nestedNmfTags[parentTag] = {};
- parentWeights[parentTag] = item.nb_tags[parentTag];
- let nmfTags = nmfTagger.tagTokens(item.nb_tokens);
- Object.keys(nmfTags).forEach(nmfTag => {
- nestedNmfTags[parentTag][nmfTag] = nmfTags[nmfTag];
- parentTags[nmfTag] = parentTag;
- });
- }
- });
- item.nmf_tags = nestedNmfTags;
- item.nmf_tags_parent = parentTags;
- item.nmf_tags_parent_weights = parentWeights;
- return item;
- }
- /**
- * Checks a field's value against another value (either from another field
- * or a constant). If the test passes, then the item is emitted, otherwise
- * the pipeline is aborted.
- *
- * Config:
- * field Field to read the value to test. Left side of operator.
- * op one of ==, !=, <, <=, >, >=
- * rhsValue Constant value to compare against. Right side of operator.
- * rhsField Field to read value to compare against. Right side of operator.
- *
- * NOTE: rhsValue takes precidence over rhsField.
- */
- acceptItemByFieldValue(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let rhs = null;
- if ("rhsValue" in config) {
- rhs = config.rhsValue;
- } else if (("rhsField" in config) && (config.rhsField in item)) {
- rhs = item[config.rhsField];
- }
- if (rhs === null) {
- return null;
- }
- // eslint-disable-next-line eqeqeq
- if (((config.op === "==") && (item[config.field] == rhs)) ||
- // eslint-disable-next-line eqeqeq
- ((config.op === "!=") && (item[config.field] != rhs)) ||
- ((config.op === "<") && (item[config.field] < rhs)) ||
- ((config.op === "<=") && (item[config.field] <= rhs)) ||
- ((config.op === ">") && (item[config.field] > rhs)) ||
- ((config.op === ">=") && (item[config.field] >= rhs))) {
- return item;
- }
- return null;
- }
- /**
- * Splits a URL into text-like tokens.
- *
- * Config:
- * field Field containing a URL
- * dest Field to write the tokens to as an array of strings
- *
- * NOTE: Any initial 'www' on the hostname is removed.
- */
- tokenizeUrl(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let url = new URL(item[config.field]);
- let domain = url.hostname;
- if (domain.startsWith("www.")) {
- domain = domain.substring(4);
- }
- let toks = tokenize(domain);
- let pathToks = tokenize(decodeURIComponent(url.pathname.replace(/\+/g, " ")));
- for (let tok of pathToks) {
- toks.push(tok);
- }
- for (let pair of url.searchParams.entries()) {
- let k = tokenize(decodeURIComponent(pair[0].replace(/\+/g, " ")));
- for (let tok of k) {
- toks.push(tok);
- }
- if ((pair[1] !== null) && (pair[1] !== "")) {
- let v = tokenize(decodeURIComponent(pair[1].replace(/\+/g, " ")));
- for (let tok of v) {
- toks.push(tok);
- }
- }
- }
- item[config.dest] = toks;
- return item;
- }
- /**
- * Gets the hostname (minus any initial "www." along with the left most
- * directories on the path.
- *
- * Config:
- * field Field containing the URL
- * dest Field to write the array of strings to
- * path_length OPTIONAL (DEFAULT: 0) Number of leftmost subdirectories to include
- */
- getUrlDomain(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let url = new URL(item[config.field]);
- let domain = url.hostname.toLocaleLowerCase();
- if (domain.startsWith("www.")) {
- domain = domain.substring(4);
- }
- item[config.dest] = domain;
- let pathLength = 0;
- if ("path_length" in config) {
- pathLength = config.path_length;
- }
- if (pathLength > 0) {
- item[config.dest] += url.pathname.toLocaleLowerCase().split("/")
- .slice(0, pathLength + 1)
- .join("/");
- }
- return item;
- }
- /**
- * Splits a field into tokens.
- * Config:
- * field Field containing a string to tokenize
- * dest Field to write the array of strings to
- */
- tokenizeField(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- item[config.dest] = tokenize(item[config.field]);
- return item;
- }
- /**
- * Deep copy from one field to another.
- * Config:
- * src Field to read from
- * dest Field to write to
- */
- copyValue(item, config) {
- if (!(config.src in item)) {
- return null;
- }
- item[config.dest] = JSON.parse(JSON.stringify(item[config.src]));
- return item;
- }
- /**
- * Converts a field containing a map of strings to a map of strings
- * to numbers, to a map of strings to numbers containing at most k elements.
- * This operation is performed by first, promoting all the subkeys up one
- * level, and then taking the top (or bottom) k values.
- *
- * Config:
- * field Points to a map of strings to a map of strings to numbers
- * k Maximum number of items to keep
- * descending OPTIONAL (DEFAULT: True) Sorts score in descending order
- * (i.e. keeps maximum)
- */
- keepTopK(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let k = this._lookupScalar(item, config.k, 1048576);
- let descending = (!("descending" in config) || (config.descending !== false));
- // we can't sort by the values in the map, so we have to convert this
- // to an array, and then sort.
- let sortable = [];
- Object.keys(item[config.field]).forEach(outerKey => {
- let innerType = this._typeOf(item[config.field][outerKey]);
- if (innerType === "map") {
- Object.keys(item[config.field][outerKey]).forEach(innerKey => {
- sortable.push({key: innerKey, value: item[config.field][outerKey][innerKey]});
- });
- } else {
- sortable.push({key: outerKey, value: item[config.field][outerKey]});
- }
- });
- sortable.sort((a, b) => {
- if (descending) {
- return b.value - a.value;
- }
- return a.value - b.value;
- });
- // now take the top k
- let newMap = {};
- let i = 0;
- for (let pair of sortable) {
- if (i >= k) {
- break;
- }
- newMap[pair.key] = pair.value;
- i++;
- }
- item[config.field] = newMap;
- return item;
- }
- /**
- * Scalar multiplies a vector by some constant
- *
- * Config:
- * field Points to:
- * a map of strings to numbers
- * an array of numbers
- * a number
- * k Either a number, or a string. If it's a number then This
- * is the scalar value to multiply by. If it's a string,
- * the value in the pointed to field is used.
- * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric
- * value is found, then use this value.
- */
- scalarMultiply(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let k = this._lookupScalar(item, config.k, config.dfault);
- let fieldType = this._typeOf(item[config.field]);
- if (fieldType === "number") {
- item[config.field] *= k;
- } else if (fieldType === "array") {
- for (let i = 0; i < item[config.field].length; i++) {
- item[config.field][i] *= k;
- }
- } else if (fieldType === "map") {
- Object.keys(item[config.field]).forEach(key => {
- item[config.field][key] *= k;
- });
- } else {
- return null;
- }
- return item;
- }
- /**
- * Elementwise multiplies either two maps or two arrays together, storing
- * the result in left. If left and right are of the same type, results in an
- * error.
- *
- * Maps are special case. For maps the left must be a nested map such as:
- * { k1: { k11: 1, k12: 2}, k2: { k21: 3, k22: 4 } } and right needs to be
- * simple map such as: { k1: 5, k2: 6} . The operation is then to mulitply
- * every value of every right key, to every value every subkey where the
- * parent keys match. Using the previous examples, the result would be:
- * { k1: { k11: 5, k12: 10 }, k2: { k21: 18, k22: 24 } } .
- *
- * Config:
- * left
- * right
- */
- elementwiseMultiply(item, config) {
- if (!(config.left in item) || !(config.right in item)) {
- return null;
- }
- let leftType = this._typeOf(item[config.left]);
- if (leftType !== this._typeOf(item[config.right])) {
- return null;
- }
- if (leftType === "array") {
- if (item[config.left].length !== item[config.right].length) {
- return null;
- }
- for (let i = 0; i < item[config.left].length; i++) {
- item[config.left][i] *= item[config.right][i];
- }
- } else if (leftType === "map") {
- Object.keys(item[config.left]).forEach(outerKey => {
- let r = 0.0;
- if (outerKey in item[config.right]) {
- r = item[config.right][outerKey];
- }
- Object.keys(item[config.left][outerKey]).forEach(innerKey => {
- item[config.left][outerKey][innerKey] *= r;
- });
- });
- } else if (leftType === "number") {
- item[config.left] *= item[config.right];
- } else {
- return null;
- }
- return item;
- }
- /**
- * Vector multiplies (i.e. dot products) two vectors and stores the result in
- * third field. Both vectors must either by maps, or arrays of numbers with
- * the same length.
- *
- * Config:
- * left A field pointing to either a map of strings to numbers,
- * or an array of numbers
- * right A field pointing to either a map of strings to numbers,
- * or an array of numbers
- * dest The field to store the dot product.
- */
- vectorMultiply(item, config) {
- if (!(config.left in item) || !(config.right in item)) {
- return null;
- }
- let leftType = this._typeOf(item[config.left]);
- if (leftType !== this._typeOf(item[config.right])) {
- return null;
- }
- let destVal = 0.0;
- if (leftType === "array") {
- if (item[config.left].length !== item[config.right].length) {
- return null;
- }
- for (let i = 0; i < item[config.left].length; i++) {
- destVal += item[config.left][i] * item[config.right][i];
- }
- } else if (leftType === "map") {
- Object.keys(item[config.left]).forEach(key => {
- if (key in item[config.right]) {
- destVal += item[config.left][key] * item[config.right][key];
- }
- });
- } else {
- return null;
- }
- item[config.dest] = destVal;
- return item;
- }
- /**
- * Adds a constant value to all elements in the field. Mathematically,
- * this is the same as taking a 1-vector, scalar multiplying it by k,
- * and then vector adding it to a field.
- *
- * Config:
- * field A field pointing to either a map of strings to numbers,
- * or an array of numbers
- * k Either a number, or a string. If it's a number then This
- * is the scalar value to multiply by. If it's a string,
- * the value in the pointed to field is used.
- * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric
- * value is found, then use this value.
- */
- scalarAdd(item, config) {
- let k = this._lookupScalar(item, config.k, config.dfault);
- if (!(config.field in item)) {
- return null;
- }
- let fieldType = this._typeOf(item[config.field]);
- if (fieldType === "array") {
- for (let i = 0; i < item[config.field].length; i++) {
- item[config.field][i] += k;
- }
- } else if (fieldType === "map") {
- Object.keys(item[config.field]).forEach(key => {
- item[config.field][key] += k;
- });
- } else if (fieldType === "number") {
- item[config.field] += k;
- } else {
- return null;
- }
- return item;
- }
- /**
- * Adds two vectors together and stores the result in left.
- *
- * Config:
- * left A field pointing to either a map of strings to numbers,
- * or an array of numbers
- * right A field pointing to either a map of strings to numbers,
- * or an array of numbers
- */
- vectorAdd(item, config) {
- if (!(config.left in item)) {
- return this.copyValue(item, {src: config.right, dest: config.left});
- }
- if (!(config.right in item)) {
- return null;
- }
- let leftType = this._typeOf(item[config.left]);
- if (leftType !== this._typeOf(item[config.right])) {
- return null;
- }
- if (leftType === "array") {
- if (item[config.left].length !== item[config.right].length) {
- return null;
- }
- for (let i = 0; i < item[config.left].length; i++) {
- item[config.left][i] += item[config.right][i];
- }
- return item;
- } else if (leftType === "map") {
- Object.keys(item[config.right]).forEach(key => {
- let v = 0;
- if (key in item[config.left]) {
- v = item[config.left][key];
- }
- item[config.left][key] = v + item[config.right][key];
- });
- return item;
- }
- return null;
- }
- /**
- * Converts a vector from real values to boolean integers. (i.e. either 1/0
- * or 1/-1).
- *
- * Config:
- * field Field containing either a mpa of strings to numbers or
- * an array of numbers to convert.
- * threshold OPTIONAL (DEFAULT: 0) Values above this will be replaced
- * with 1.0. Those below will be converted to 0.
- * keep_negative OPTIONAL (DEFAULT: False) If true, values below the
- * threshold will be converted to -1 instead of 0.
- */
- makeBoolean(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let threshold = this._lookupScalar(item, config.threshold, 0.0);
- let type = this._typeOf(item[config.field]);
- if (type === "array") {
- for (let i = 0; i < item[config.field].length; i++) {
- if (item[config.field][i] > threshold) {
- item[config.field][i] = 1.0;
- } else if (config.keep_negative) {
- item[config.field][i] = -1.0;
- } else {
- item[config.field][i] = 0.0;
- }
- }
- } else if (type === "map") {
- Object.keys(item[config.field]).forEach(key => {
- let value = item[config.field][key];
- if (value > threshold) {
- item[config.field][key] = 1.0;
- } else if (config.keep_negative) {
- item[config.field][key] = -1.0;
- } else {
- item[config.field][key] = 0.0;
- }
- });
- } else if (type === "number") {
- let value = item[config.field];
- if (value > threshold) {
- item[config.field] = 1.0;
- } else if (config.keep_negative) {
- item[config.field] = -1.0;
- } else {
- item[config.field] = 0.0;
- }
- } else {
- return null;
- }
- return item;
- }
- /**
- * Removes all keys from the item except for the ones specified.
- *
- * fields An array of strings indicating the fields to keep
- */
- whitelistFields(item, config) {
- let newItem = {};
- for (let ele of config.fields) {
- if (ele in item) {
- newItem[ele] = item[ele];
- }
- }
- return newItem;
- }
- /**
- * Removes all keys whose value does not exceed some threshold.
- *
- * Config:
- * field Points to a map of strings to numbers
- * threshold Values must exceed this value, otherwise they are removed.
- */
- filterByValue(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let threshold = this._lookupScalar(item, config.threshold, 0.0);
- let filtered = {};
- Object.keys(item[config.field]).forEach(key => {
- let value = item[config.field][key];
- if (value > threshold) {
- filtered[key] = value;
- }
- });
- item[config.field] = filtered;
- return item;
- }
- /**
- * Rewrites a field so that its values are now L2 normed.
- *
- * Config:
- * field Points to a map of strings to numbers, or an array of numbers
- */
- l2Normalize(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let data = item[config.field];
- let type = this._typeOf(data);
- if (type === "array") {
- let norm = 0.0;
- for (let datum of data) {
- norm += datum * datum;
- }
- norm = Math.sqrt(norm);
- if (norm !== 0) {
- for (let i = 0; i < data.length; i++) {
- data[i] /= norm;
- }
- }
- } else if (type === "map") {
- let norm = 0.0;
- Object.keys(data).forEach(key => {
- norm += data[key] * data[key];
- });
- norm = Math.sqrt(norm);
- if (norm !== 0) {
- Object.keys(data).forEach(key => {
- data[key] /= norm;
- });
- }
- } else {
- return null;
- }
- item[config.field] = data;
- return item;
- }
- /**
- * Rewrites a field so that all of its values sum to 1.0
- *
- * Config:
- * field Points to a map of strings to numbers, or an array of numbers
- */
- probNormalize(item, config) {
- if (!(config.field in item)) {
- return null;
- }
- let data = item[config.field];
- let type = this._typeOf(data);
- if (type === "array") {
- let norm = 0.0;
- for (let datum of data) {
- norm += datum;
- }
- if (norm !== 0) {
- for (let i = 0; i < data.length; i++) {
- data[i] /= norm;
- }
- }
- } else if (type === "map") {
- let norm = 0.0;
- Object.keys(item[config.field]).forEach(key => {
- norm += item[config.field][key];
- });
- if (norm !== 0) {
- Object.keys(item[config.field]).forEach(key => {
- item[config.field][key] /= norm;
- });
- }
- } else {
- return null;
- }
- return item;
- }
- /**
- * Stores a value, if it is not already present
- *
- * Config:
- * field field to write to if it is missing
- * value value to store in that field
- */
- setDefault(item, config) {
- let val = this._lookupScalar(item, config.value, config.value);
- if (!(config.field in item)) {
- item[config.field] = val;
- }
- return item;
- }
- /**
- * Selctively promotes an value from an inner map up to the outer map
- *
- * Config:
- * haystack Points to a map of strings to values
- * needle Key inside the map we should promote up
- * dest Where we should write the value of haystack[needle]
- */
- lookupValue(item, config) {
- if ((config.haystack in item) && (config.needle in item[config.haystack])) {
- item[config.dest] = item[config.haystack][config.needle];
- }
- return item;
- }
- /**
- * Demotes a field into a map
- *
- * Config:
- * src Field to copy
- * dest_map Points to a map
- * dest_key Key inside dest_map to copy src to
- */
- copyToMap(item, config) {
- if (config.src in item) {
- if (!(config.dest_map in item)) {
- item[config.dest_map] = {};
- }
- item[config.dest_map][config.dest_key] = item[config.src];
- }
- return item;
- }
- /**
- * Config:
- * field Points to a string to number map
- * k Scalar to multiply the values by
- * log_scale Boolean, if true, then the values will be transformed
- * by a logrithm prior to multiplications
- */
- scalarMultiplyTag(item, config) {
- let EPSILON = 0.000001;
- if (!(config.field in item)) {
- return null;
- }
- let k = this._lookupScalar(item, config.k, 1);
- let type = this._typeOf(item[config.field]);
- if (type === "map") {
- Object.keys(item[config.field]).forEach(parentKey => {
- Object.keys(item[config.field][parentKey]).forEach(key => {
- let v = item[config.field][parentKey][key];
- if (config.log_scale) {
- v = Math.log(v + EPSILON);
- }
- item[config.field][parentKey][key] = v * k;
- });
- });
- } else {
- return null;
- }
- return item;
- }
- /**
- * Independently applies softmax across all subtags.
- *
- * Config:
- * field Points to a map of strings with values being another map of strings
- */
- applySoftmaxTags(item, config) {
- let type = this._typeOf(item[config.field]);
- if (type !== "map") {
- return null;
- }
- let abort = false;
- let softmaxSum = {};
- Object.keys(item[config.field]).forEach(tag => {
- if (this._typeOf(item[config.field][tag]) !== "map") {
- abort = true;
- return;
- }
- if (abort) {
- return;
- }
- softmaxSum[tag] = 0;
- Object.keys(item[config.field][tag]).forEach(subtag => {
- if (this._typeOf(item[config.field][tag][subtag]) !== "number") {
- abort = true;
- return;
- }
- let score = item[config.field][tag][subtag];
- softmaxSum[tag] += Math.exp(score);
- });
- });
- if (abort) {
- return null;
- }
- Object.keys(item[config.field]).forEach(tag => {
- Object.keys(item[config.field][tag]).forEach(subtag => {
- item[config.field][tag][subtag] = Math.exp(item[config.field][tag][subtag]) / softmaxSum[tag];
- });
- });
- return item;
- }
- /**
- * Vector adds a field and stores the result in left.
- *
- * Config:
- * field The field to vector add
- */
- combinerAdd(left, right, config) {
- if (!(config.field in right)) {
- return left;
- }
- let type = this._typeOf(right[config.field]);
- if (!(config.field in left)) {
- if (type === "map") {
- left[config.field] = {};
- } else if (type === "array") {
- left[config.field] = [];
- } else if (type === "number") {
- left[config.field] = 0;
- } else {
- return null;
- }
- }
- if (type !== this._typeOf(left[config.field])) {
- return null;
- }
- if (type === "map") {
- Object.keys(right[config.field]).forEach(key => {
- if (!(key in left[config.field])) {
- left[config.field][key] = 0;
- }
- left[config.field][key] += right[config.field][key];
- });
- } else if (type === "array") {
- for (let i = 0; i < right[config.field].length; i++) {
- if (i < left[config.field].length) {
- left[config.field][i] += right[config.field][i];
- } else {
- left[config.field].push(right[config.field][i]);
- }
- }
- } else if (type === "number") {
- left[config.field] += right[config.field];
- } else {
- return null;
- }
- return left;
- }
- /**
- * Stores the maximum value of the field in left.
- *
- * Config:
- * field The field to vector add
- */
- combinerMax(left, right, config) {
- if (!(config.field in right)) {
- return left;
- }
- let type = this._typeOf(right[config.field]);
- if (!(config.field in left)) {
- if (type === "map") {
- left[config.field] = {};
- } else if (type === "array") {
- left[config.field] = [];
- } else if (type === "number") {
- left[config.field] = 0;
- } else {
- return null;
- }
- }
- if (type !== this._typeOf(left[config.field])) {
- return null;
- }
- if (type === "map") {
- Object.keys(right[config.field]).forEach(key => {
- if (!(key in left[config.field]) ||
- (right[config.field][key] > left[config.field][key])) {
- left[config.field][key] = right[config.field][key];
- }
- });
- } else if (type === "array") {
- for (let i = 0; i < right[config.field].length; i++) {
- if (i < left[config.field].length) {
- if (left[config.field][i] < right[config.field][i]) {
- left[config.field][i] = right[config.field][i];
- }
- } else {
- left[config.field].push(right[config.field][i]);
- }
- }
- } else if (type === "number") {
- if (left[config.field] < right[config.field]) {
- left[config.field] = right[config.field];
- }
- } else {
- return null;
- }
- return left;
- }
- /**
- * Associates a value in right with another value in right. This association
- * is then stored in a map in left.
- *
- * For example: If a sequence of rights is:
- * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 41 }
- * { 'tags': {}, 'url_domain': 'mbusa.com/mercedes', 'time': 21 }
- * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 34 }
- *
- * Then assuming a 'sum' operation, left can build a map that would look like:
- * {
- * 'maseratiusa.com/maserati': 75,
- * 'mbusa.com/mercedes': 21,
- * }
- *
- * Fields:
- * left_field field in the left to store / update the map
- * right_key_field Field in the right to use as a key
- * right_value_field Field in the right to use as a value
- * operation One of "sum", "max", "overwrite", "count"
- */
- combinerCollectValues(left, right, config) {
- let op;
- if (config.operation === "sum") {
- op = (a, b) => a + b;
- } else if (config.operation === "max") {
- op = (a, b) => ((a > b) ? a : b);
- } else if (config.operation === "overwrite") {
- op = (a, b) => b;
- } else if (config.operation === "count") {
- op = (a, b) => a + 1;
- } else {
- return null;
- }
- if (!(config.left_field in left)) {
- left[config.left_field] = {};
- }
- if ((!(config.right_key_field in right)) || (!(config.right_value_field in right))) {
- return left;
- }
- let key = right[config.right_key_field];
- let rightValue = right[config.right_value_field];
- let leftValue = 0.0;
- if (key in left[config.left_field]) {
- leftValue = left[config.left_field][key];
- }
- left[config.left_field][key] = op(leftValue, rightValue);
- return left;
- }
- /**
- * Executes a recipe. Returns an object on success, or null on failure.
- */
- executeRecipe(item, recipe) {
- let newItem = item;
- for (let step of recipe) {
- let op = this.ITEM_BUILDER_REGISTRY[step.function];
- if (op === undefined) {
- return null;
- }
- newItem = op.call(this, newItem, step);
- if (newItem === null) {
- break;
- }
- }
- return newItem;
- }
- /**
- * Executes a recipe. Returns an object on success, or null on failure.
- */
- executeCombinerRecipe(item1, item2, recipe) {
- let newItem1 = item1;
- for (let step of recipe) {
- let op = this.ITEM_COMBINER_REGISTRY[step.function];
- if (op === undefined) {
- return null;
- }
- newItem1 = op.call(this, newItem1, item2, step);
- if (newItem1 === null) {
- break;
- }
- }
- return newItem1;
- }
- };
- const EXPORTED_SYMBOLS = ["RecipeExecutor"];
|