123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- "use strict";
- const {RemoteSettings} = ChromeUtils.import("resource://services-settings/remote-settings.js");
- const {actionCreators: ac} = ChromeUtils.import("resource://activity-stream/common/Actions.jsm");
- ChromeUtils.defineModuleGetter(this, "perfService", "resource://activity-stream/common/PerfService.jsm");
- const {NaiveBayesTextTagger} = ChromeUtils.import("resource://activity-stream/lib/NaiveBayesTextTagger.jsm");
- const {NmfTextTagger} = ChromeUtils.import("resource://activity-stream/lib/NmfTextTagger.jsm");
- const {RecipeExecutor} = ChromeUtils.import("resource://activity-stream/lib/RecipeExecutor.jsm");
- ChromeUtils.defineModuleGetter(this, "NewTabUtils",
- "resource://gre/modules/NewTabUtils.jsm");
- const {Services} = ChromeUtils.import("resource://gre/modules/Services.jsm");
- const {XPCOMUtils} = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
- ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
- XPCOMUtils.defineLazyGlobalGetters(this, ["fetch"]);
- XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => new TextDecoder());
- XPCOMUtils.defineLazyGetter(this, "baseAttachmentsURL", async () => {
- const server = Services.prefs.getCharPref("services.settings.server");
- const serverInfo = await (await fetch(`${server}/`, {credentials: "omit"})).json();
- const {capabilities: {attachments: {base_url}}} = serverInfo;
- return base_url;
- });
- const PERSONALITY_PROVIDER_DIR = OS.Path.join(OS.Constants.Path.localProfileDir, "personality-provider");
- const RECIPE_NAME = "personality-provider-recipe";
- const MODELS_NAME = "personality-provider-models";
- function getHash(aStr) {
- // return the two-digit hexadecimal code for a byte
- let toHexString = charCode => (`0${charCode.toString(16)}`).slice(-2);
- let hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
- hasher.init(Ci.nsICryptoHash.SHA256);
- let stringStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance(Ci.nsIStringInputStream);
- stringStream.data = aStr;
- hasher.updateFromStream(stringStream, -1);
- // convert the binary hash data to a hex string.
- let binary = hasher.finish(false);
- return Array.from(binary, (c, i) => toHexString(binary.charCodeAt(i))).join("").toLowerCase();
- }
- /**
- * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
- * This allows Firefox to classify pages into topics, by examining the text found on the page.
- * It does this by looking at the history text content, title, and description.
- */
- this.PersonalityProvider = class PersonalityProvider {
- constructor(
- timeSegments,
- parameterSets,
- maxHistoryQueryResults,
- version,
- scores,
- v2Params) {
- this.v2Params = v2Params || {};
- this.dispatch = this.v2Params.dispatch || (() => {});
- this.modelKeys = this.v2Params.modelKeys;
- this.timeSegments = timeSegments;
- this.parameterSets = parameterSets;
- this.maxHistoryQueryResults = maxHistoryQueryResults;
- this.version = version;
- this.scores = scores || {};
- this.interestConfig = this.scores.interestConfig;
- this.interestVector = this.scores.interestVector;
- this.onSync = this.onSync.bind(this);
- this.setupSyncAttachment(RECIPE_NAME);
- this.setupSyncAttachment(MODELS_NAME);
- }
- async onSync(event) {
- const {
- data: {created, updated, deleted},
- } = event;
- // Remove every removed attachment.
- const toRemove = deleted.concat(updated.map(u => u.old));
- await Promise.all(toRemove.map(record => this.deleteAttachment(record)));
- // Download every new/updated attachment.
- const toDownload = created.concat(updated.map(u => u.new));
- await Promise.all(toDownload.map(record => this.maybeDownloadAttachment(record)));
- }
- setupSyncAttachment(collection) {
- RemoteSettings(collection).on("sync", this.onSync);
- }
- /**
- * Downloads the attachment to disk assuming the dir already exists
- * and any existing files matching the filename are clobbered.
- */
- async _downloadAttachment(record) {
- const {attachment: {location, filename}} = record;
- const remoteFilePath = (await baseAttachmentsURL) + location;
- const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
- const headers = new Headers();
- headers.set("Accept-Encoding", "gzip");
- const resp = await fetch(remoteFilePath, {headers, credentials: "omit"});
- if (!resp.ok) {
- Cu.reportError(`Failed to fetch ${remoteFilePath}: ${resp.status}`);
- return;
- }
- const buffer = await resp.arrayBuffer();
- const bytes = new Uint8Array(buffer);
- await OS.File.writeAtomic(localFilePath, bytes, {tmpPath: `${localFilePath}.tmp`});
- }
- /**
- * Attempts to download the attachment, but only if it doesn't already exist.
- */
- async maybeDownloadAttachment(record, retries = 3) {
- const {attachment: {filename, hash, size}} = record;
- await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
- const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
- let retry = 0;
- while ((retry++ < retries) &&
- (!await OS.File.exists(localFilePath) ||
- (await OS.File.stat(localFilePath)).size !== size ||
- getHash(await this._getFileStr(localFilePath)) !== hash)) {
- await this._downloadAttachment(record);
- }
- }
- async deleteAttachment(record) {
- const {attachment: {filename}} = record;
- await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
- const path = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
- await OS.File.remove(path, {ignoreAbsent: true});
- return OS.File.removeEmptyDir(PERSONALITY_PROVIDER_DIR, {ignoreAbsent: true});
- }
- /**
- * Gets contents of the attachment if it already exists on file,
- * and if not attempts to download it.
- */
- async getAttachment(record) {
- const {attachment: {filename}} = record;
- const filepath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
- try {
- await this.maybeDownloadAttachment(record);
- return JSON.parse(await this._getFileStr(filepath));
- } catch (error) {
- Cu.reportError(`Failed to load ${filepath}: ${error.message}`);
- }
- return {};
- }
- // A helper function to read and decode a file, it isn't a stand alone function.
- // If you use this, ensure you check the file exists and you have a try catch.
- async _getFileStr(filepath) {
- const binaryData = await OS.File.read(filepath);
- return gTextDecoder.decode(binaryData);
- }
- async init(callback) {
- const perfStart = perfService.absNow();
- this.interestConfig = this.interestConfig || await this.getRecipe();
- if (!this.interestConfig) {
- this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_GET_RECIPE_ERROR"}));
- return;
- }
- this.recipeExecutor = await this.generateRecipeExecutor();
- if (!this.recipeExecutor) {
- this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_GENERATE_RECIPE_EXECUTOR_ERROR"}));
- return;
- }
- this.interestVector = this.interestVector || await this.createInterestVector();
- if (!this.interestVector) {
- this.dispatch(ac.PerfEvent({event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_ERROR"}));
- return;
- }
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_TOTAL_DURATION",
- value: Math.round(perfService.absNow() - perfStart),
- }));
- this.initialized = true;
- if (callback) {
- callback();
- }
- }
- async getFromRemoteSettings(name) {
- const result = await RemoteSettings(name).get();
- return Promise.all(result.map(async record => ({...await this.getAttachment(record), recordKey: record.key})));
- }
- /**
- * Returns a Recipe from remote settings to be consumed by a RecipeExecutor.
- * A Recipe is a set of instructions on how to processes a RecipeExecutor.
- */
- async getRecipe() {
- if (!this.recipes || !this.recipes.length) {
- const start = perfService.absNow();
- this.recipes = await this.getFromRemoteSettings(RECIPE_NAME);
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_GET_RECIPE_DURATION",
- value: Math.round(perfService.absNow() - start),
- }));
- }
- return this.recipes[0];
- }
- /**
- * Returns a Recipe Executor.
- * A Recipe Executor is a set of actions that can be consumed by a Recipe.
- * The Recipe determines the order and specifics of which the actions are called.
- */
- async generateRecipeExecutor() {
- if (!this.taggers) {
- const startTaggers = perfService.absNow();
- let nbTaggers = [];
- let nmfTaggers = {};
- const models = await this.getFromRemoteSettings(MODELS_NAME);
- if (models.length === 0) {
- return null;
- }
- for (let model of models) {
- if (!this.modelKeys.includes(model.recordKey)) {
- continue;
- }
- if (model.model_type === "nb") {
- nbTaggers.push(new NaiveBayesTextTagger(model));
- } else if (model.model_type === "nmf") {
- nmfTaggers[model.parent_tag] = new NmfTextTagger(model);
- }
- }
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_TAGGERS_DURATION",
- value: Math.round(perfService.absNow() - startTaggers),
- }));
- this.taggers = {nbTaggers, nmfTaggers};
- }
- const startRecipeExecutor = perfService.absNow();
- const recipeExecutor = new RecipeExecutor(this.taggers.nbTaggers, this.taggers.nmfTaggers);
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_RECIPE_EXECUTOR_DURATION",
- value: Math.round(perfService.absNow() - startRecipeExecutor),
- }));
- return recipeExecutor;
- }
- /**
- * Grabs a slice of browse history for building a interest vector
- */
- async fetchHistory(columns, beginTimeSecs, endTimeSecs) {
- let sql = `SELECT url, title, visit_count, frecency, last_visit_date, description
- FROM moz_places
- WHERE last_visit_date >= ${beginTimeSecs * 1000000}
- AND last_visit_date < ${endTimeSecs * 1000000}`;
- columns.forEach(requiredColumn => {
- sql += ` AND IFNULL(${requiredColumn}, "") <> ""`;
- });
- sql += " LIMIT 30000";
- const {activityStreamProvider} = NewTabUtils;
- const history = await activityStreamProvider.executePlacesQuery(sql, {
- columns,
- params: {},
- });
- return history;
- }
- /**
- * Examines the user's browse history and returns an interest vector that
- * describes the topics the user frequently browses.
- */
- async createInterestVector() {
- let interestVector = {};
- let endTimeSecs = ((new Date()).getTime() / 1000);
- let beginTimeSecs = endTimeSecs - this.interestConfig.history_limit_secs;
- let history = await this.fetchHistory(this.interestConfig.history_required_fields, beginTimeSecs, endTimeSecs);
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_HISTORY_SIZE",
- value: history.length,
- }));
- const start = perfService.absNow();
- for (let historyRec of history) {
- let ivItem = this.recipeExecutor.executeRecipe(
- historyRec,
- this.interestConfig.history_item_builder);
- if (ivItem === null) {
- continue;
- }
- interestVector = this.recipeExecutor.executeCombinerRecipe(
- interestVector,
- ivItem,
- this.interestConfig.interest_combiner);
- if (interestVector === null) {
- return null;
- }
- }
- const finalResult = this.recipeExecutor.executeRecipe(
- interestVector,
- this.interestConfig.interest_finalizer);
- this.dispatch(ac.PerfEvent({
- event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_DURATION",
- value: Math.round(perfService.absNow() - start),
- }));
- return finalResult;
- }
- /**
- * Calculates a score of a Pocket item when compared to the user's interest
- * vector. Returns the score. Higher scores are better. Assumes this.interestVector
- * is populated.
- */
- calculateItemRelevanceScore(pocketItem) {
- if (!this.initialized) {
- return pocketItem.item_score || 1;
- }
- let scorableItem = this.recipeExecutor.executeRecipe(
- pocketItem,
- this.interestConfig.item_to_rank_builder);
- if (scorableItem === null) {
- return -1;
- }
- let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
- Object.keys(scorableItem).forEach(key => {
- rankingVector[key] = scorableItem[key];
- });
- rankingVector = this.recipeExecutor.executeRecipe(
- rankingVector,
- this.interestConfig.item_ranker);
- if (rankingVector === null) {
- return -1;
- }
- return rankingVector.score;
- }
- /**
- * Returns an object holding the settings and affinity scores of this provider instance.
- */
- getAffinities() {
- return {
- timeSegments: this.timeSegments,
- parameterSets: this.parameterSets,
- maxHistoryQueryResults: this.maxHistoryQueryResults,
- version: this.version,
- scores: {
- interestConfig: this.interestConfig,
- interestVector: this.interestVector,
- taggers: this.taggers,
- },
- };
- }
- };
- const EXPORTED_SYMBOLS = ["PersonalityProvider"];
|