/* global module */ /* jslint node: true */ /* jslint indent: 2 */ 'use strict'; var teeft = require('rd-teeft'), async = require('async'), path = require('path'), fs = require('fs'); var jLouvain = require('./lib/jLouvain.js'); var myObject = {}; myObject.indexAll = function(directory, output, cb) { var result = {}; // Regroup keywords by document Id fs.readdir(directory, function(err, filenames) { if (err) return cb(err); // I/O Errors async.each(filenames, function(filename, callback) { var filePath = path.join(directory, filename); fs.readFile(filePath, 'utf-8', function(err, res) { if (err) return callback(err); // I/O Errors var docId = path.basename(filename, ('.txt')); result[docId] = teeft.index(res).keywords; callback(); }); }, function(err) { if (err) return cb(err); // I/O Errors // write data fs.writeFile(output || './cache/indexAll.json', JSON.stringify(result), 'utf-8', function(err, res) { if (err) return cb(err); return cb(null, result); }); }); }); }; myObject.graphs = {} myObject.graphs.docToDoc = function(keywords, options, cb) { if (!options) options = {}; var terms = {}, // Each key is a term, his value is the list of documents containing it documents = Object.keys(keywords), // List of document Ids result = { 'nodes': [], 'links': [] }, edges = [], // [{'source': '', 'target': '', 'weight': 0}, ...] nodes = [], // ['id', ...] matrix = {}, // Matrix of "doc-doc" links (sparse matrix) output = options.output || './cache/docToDoc.json', minLinkValue = options.minLinkValue || 0; // Construction of terms Object for (var i = 0; i < documents.length; i++) { var doc = documents[i]; for (var j = 0; j < keywords[doc].length; j++) { var term = keywords[doc][j].term; if (!terms[term]) terms[term] = []; terms[term].push(i); } } // Construction of matrix Object for (var key in terms) { // Fill it with values for (var i = 0; i < terms[key].length - 1; i++) { var idDoc1 = terms[key][i]; for (var j = i + 1; j < terms[key].length; j++) { var idDoc2 = terms[key][j], ids = [idDoc1, idDoc2], id = { 'min': Math.min(ids[0], ids[1]), 'max': Math.max(ids[0], ids[1]) }; // Only half of it will be fill! if (!matrix[id.min + ',' + id.max]) { matrix[id.min + ',' + id.max] = 0; } matrix[id.min + ',' + id.max]++; } } } // Construction of matrix of links doc-doc for (var key in matrix) { var ids = key.split(','); if (matrix[key] > minLinkValue) { edges.push({ 'source': ids[0], 'target': ids[1], 'weight': matrix[key] }); result.links.push({ 'source': ids[0], 'target': ids[1], 'value': matrix[key] }); } } // Construction of Nodes object for (var i = 0; i < documents.length; i++) { nodes.push(i); result.nodes.push({ 'id': i, 'value': documents[i], 'group': 0 }); } // Create the "community" var community = jLouvain().nodes(nodes).edges(edges), res = community(); // Affect community for each node for (var key in res) { result.nodes[key].group = res[key]; } // write data fs.writeFile(output, JSON.stringify(result), 'utf-8', function(err, res) { if (err) return cb(err); return cb(null, result); }); }; module.exports = myObject;