#!/usr/bin/env python # -*- coding: utf-8 -*- """ .py """ __authors__ = "Stephane Schneider" __contact__ = "stephane.schneider@inis.fr" # from cso_classifier import CSOClassifier # from tools.tools import precision_recall import glob import sys import json import plac import os # # Version NON EZ_MASTER de l indexeur memoire # # # CSOClassifier(modules="both", enhancement="first", explanation=False) # Additional parameters: # (i) workers, => nbre de process en // # (ii) modules, => module d indexation à executer : "syntactic", "semantic", "both" # (iii) enhancement, => relevant super-areas, "first", "all", or "no". # (iv) explanation, => chunks of text that allowed the classifier to infer a given topic. # (v) delete_outliers, => emoving erroneous topics that were conceptually distant from the others. # (vi) fast_classification, => determines whether the semantic module should use the full model or the cached one. # (vii) silent. => prints its progress in the console. # File "/home/schneist/app/kos2vec/cso/cso-classifier/cso_classifier/postprocmodule.py", line 66, in __create_matrix_distance_from_ontology # ex : # python3 exec_indexer.py -indent data/memory20/annotated # @plac.annotations( article_paths=("Corpus file path", "positional", None, str), indent=("Indent output json", "flag", "indent") ) def main ( article_paths, indent ): workers="8" if os.path.exists(article_paths): article_paths = glob.glob(article_paths + "/*.txt") else : print(f"ERROR : Path '{article_paths}' not exist !!") exit(0) #raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT)) # parametrage de l indexeur indexer = CSOClassifier( modules="both", enhancement="first", explanation=False, delete_outliers=False, # False pour que ca marche sinon error : silent=True, fast_classification=False, workers=workers ) # print(article_paths) # filename = "/home/schneist/app/cso/test/memory_20.txt" i = 0 for i, article in enumerate(article_paths): i += 1 try: f_article = open(article) except OSError: print(f"ERROR : Could not open/read {article} file:") sys.exit(0) with f_article: text = f_article.read() data = indexer.run(text) data['text']=text data['path']=article if indent: js = json.dumps(data, indent=4, sort_keys=True) else: js = json.dumps(data, sort_keys=True) print( f"{js}" # format json.ld ) # (silence, inter, recall, precision, f1, intersect) = precision_recall( # list_ids_mc, article # ) if __name__ == "__main__": if False: import cProfile import pstats cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats() else: plac.call(main)