Newer
Older
indexation / index.js
@kieffer kieffer on 27 Feb 2017 3 KB v0
/* global module */
/* jslint node: true */
/* jslint indent: 2 */
'use strict';

var teeft = require('rd-teeft'),
  async = require('async'),
  path = require('path'),
  fs = require('fs');

var jLouvain = require('./lib/jLouvain.js');

var myObject = {};

myObject.indexAll = function(directory, output, cb) {
  var result = {}; // Regroup keywords by document Id
  fs.readdir(directory, function(err, filenames) {
    if (err) return cb(err); // I/O Errors
    async.each(filenames, function(filename, callback) {
      var filePath = path.join(directory, filename);
      fs.readFile(filePath, 'utf-8', function(err, res) {
        if (err) return callback(err); // I/O Errors
        var docId = path.basename(filename, ('.txt'));
        result[docId] = teeft.index(res).keywords;
        callback();
      });
    }, function(err) {
      if (err) return cb(err); // I/O Errors
      // write data
      fs.writeFile(output || './cache/indexAll.json', JSON.stringify(result), 'utf-8', function(err, res) {
        if (err) return cb(err);
        return cb(null, result);
      });
    });
  });
};

myObject.graphs = {}

myObject.graphs.docToDoc = function(keywords, options, cb) {
  if (!options) options = {};
  var terms = {}, // Each key is a term, his value is the list of documents containing it
    documents = Object.keys(keywords), // List of document Ids
    result = {
      'nodes': [],
      'links': []
    },
    edges = [], //  [{'source': '', 'target': '', 'weight': 0}, ...]
    nodes = [], // ['id', ...]
    matrix = {}, // Matrix of "doc-doc" links (sparse matrix)
    output = options.output || './cache/docToDoc.json',
    minLinkValue = options.minLinkValue || 0;
  // Construction of terms Object
  for (var i = 0; i < documents.length; i++) {
    var doc = documents[i];
    for (var j = 0; j < keywords[doc].length; j++) {
      var term = keywords[doc][j].term;
      if (!terms[term]) terms[term] = [];
      terms[term].push(i);
    }
  }
  // Construction of matrix Object
  for (var key in terms) {
    // Fill it with values
    for (var i = 0; i < terms[key].length - 1; i++) {
      var idDoc1 = terms[key][i];
      for (var j = i + 1; j < terms[key].length; j++) {
        var idDoc2 = terms[key][j],
          ids = [idDoc1, idDoc2],
          id = {
            'min': Math.min(ids[0], ids[1]),
            'max': Math.max(ids[0], ids[1])
          };
        // Only half of it will be fill!
        if (!matrix[id.min + ',' + id.max]) {
          matrix[id.min + ',' + id.max] = 0;
        }
        matrix[id.min + ',' + id.max]++;
      }
    }
  }
  // Construction of matrix of links doc-doc
  for (var key in matrix) {
    var ids = key.split(',');
    if (matrix[key] > minLinkValue) {
      edges.push({
        'source': ids[0],
        'target': ids[1],
        'weight': matrix[key]
      });
      result.links.push({
        'source': ids[0],
        'target': ids[1],
        'value': matrix[key]
      });
    }
  }
  // Construction of Nodes object
  for (var i = 0; i < documents.length; i++) {
    nodes.push(i);
    result.nodes.push({
      'id': i,
      'value': documents[i],
      'group': 0
    });
  }
  // Create the "community"
  var community = jLouvain().nodes(nodes).edges(edges),
    res = community();
  // Affect community for each node
  for (var key in res) {
    result.nodes[key].group = res[key];
  }
  // write data
  fs.writeFile(output, JSON.stringify(result), 'utf-8', function(err, res) {
    if (err) return cb(err);
    return cb(null, result);
  });
};

module.exports = myObject;