12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- "use strict";
- const {toksToTfIdfVector} = ChromeUtils.import("resource://activity-stream/lib/Tokenize.jsm");
- this.NaiveBayesTextTagger = class NaiveBayesTextTagger {
- constructor(model) {
- this.model = model;
- }
- /**
- * Determines if the tokenized text belongs to class according to binary naive Bayes
- * classifier. Returns an object containing the class label ("label"), and
- * the log probability ("logProb") that the text belongs to that class. If
- * the positive class is more likely, then "label" is the positive class
- * label. If the negative class is matched, then "label" is set to null.
- */
- tagTokens(tokens) {
- let fv = toksToTfIdfVector(tokens, this.model.vocab_idfs);
- let bestLogProb = null;
- let bestClassId = -1;
- let bestClassLabel = null;
- let logSumExp = 0.0; // will be P(x). Used to create a proper probability
- for (let classId = 0; classId < this.model.classes.length; classId++) {
- let classModel = this.model.classes[classId];
- let classLogProb = classModel.log_prior;
- // dot fv with the class model
- for (let pair of Object.values(fv)) {
- let [termId, tfidf] = pair;
- classLogProb += tfidf * classModel.feature_log_probs[termId];
- }
- if ((bestLogProb === null) || (classLogProb > bestLogProb)) {
- bestLogProb = classLogProb;
- bestClassId = classId;
- }
- logSumExp += Math.exp(classLogProb);
- }
- // now normalize the probability by dividing by P(x)
- logSumExp = Math.log(logSumExp);
- bestLogProb -= logSumExp;
- if (bestClassId === this.model.positive_class_id) {
- bestClassLabel = this.model.positive_class_label;
- } else {
- bestClassLabel = null;
- }
- let confident = ((bestClassId === this.model.positive_class_id) &&
- (bestLogProb > this.model.positive_class_threshold_log_prob));
- return {
- "label": bestClassLabel,
- "logProb": bestLogProb,
- "confident": confident,
- };
- }
- };
- const EXPORTED_SYMBOLS = ["NaiveBayesTextTagger"];
|