/* global module */ /* jslint node: true */ /* jslint indent: 2 */ 'use strict'; var Backbone = require('backbone'); module.exports = Backbone.Model.extend({ SEARCH: 0, NOUN: 1, defaults: { tagger: null, filter: null, }, initialize: function() { }, call: function(text) { var terms = this.get('tagger').call(text); return this.extract(terms); }, extract: function(taggedTerms) { var terms = { _add: function(norm) { if (!this[norm]) { this[norm] = { frequency: 0 }; } this[norm].frequency++; } }; //# Phase 1: A little state machine is used to build simple and //# composite terms. var multiterm = []; var state = this.SEARCH; var word; while (taggedTerms.length > 0) { var tagged_term = taggedTerms.shift(); var term = tagged_term.term; var tag = tagged_term.tag; var norm = tagged_term.lemma; var startsWithN = this._startsWith(tag, 'N'); var startsWithJ = this._startsWith(tag, 'J'); if (state == this.SEARCH && startsWithN) { state = this.NOUN; multiterm.push(term); terms._add(norm); } else if (state == this.SEARCH && startsWithJ) { state = this.NOUN; multiterm.push(term); terms._add(norm); } else if (state == this.NOUN && startsWithN) { multiterm.push(term); terms._add(norm); } else if (state == this.NOUN && !startsWithN) { state = this.SEARCH; if (multiterm.length > 1) { word = multiterm.join(' '); terms._add(word); } multiterm = []; } } //# Phase 2: Only select the terms that fulfill the filter criteria. //# Also create the term strength. var result = {}; delete terms._add; for (word in terms) { var occur = terms[word].frequency; var strength = word.split(" ").length; if (this.get('filter').call(occur, strength)) { result[word] = { frequency: occur, strength: strength }; } } return result; }, _startsWith: function(str, prefix) { return str.substring(0, prefix.length) === prefix; } });