#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
.py
"""
__authors__ = "Stephane Schneider"
__contact__ = "stephane.schneider@inis.fr"
#
from cso_classifier import CSOClassifier
# from tools.tools import precision_recall
import glob
import sys
import json
import plac
import os
#
# Version NON EZ_MASTER de l indexeur memoire
#
#
# CSOClassifier(modules="both", enhancement="first", explanation=False)
# Additional parameters:
# (i) workers, => nbre de process en //
# (ii) modules, => module d indexation à executer : "syntactic", "semantic", "both"
# (iii) enhancement, => relevant super-areas, "first", "all", or "no".
# (iv) explanation, => chunks of text that allowed the classifier to infer a given topic.
# (v) delete_outliers, => emoving erroneous topics that were conceptually distant from the others.
# (vi) fast_classification, => determines whether the semantic module should use the full model or the cached one.
# (vii) silent. => prints its progress in the console.
# File "/home/schneist/app/kos2vec/cso/cso-classifier/cso_classifier/postprocmodule.py", line 66, in __create_matrix_distance_from_ontology
# ex :
# python3 exec_indexer.py -indent data/memory20/annotated
#
@plac.annotations(
article_paths=("Corpus file path", "positional", None, str),
indent=("Indent output json", "flag", "indent")
)
def main (
article_paths,
indent
):
workers="8"
if os.path.exists(article_paths):
article_paths = glob.glob(article_paths + "/*.txt")
else :
print(f"ERROR : Path '{article_paths}' not exist !!")
exit(0)
#raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT))
# parametrage de l indexeur
indexer = CSOClassifier(
modules="both",
enhancement="first",
explanation=False,
delete_outliers=False, # False pour que ca marche sinon error :
silent=True,
fast_classification=False,
workers=workers
)
# print(article_paths)
# filename = "/home/schneist/app/cso/test/memory_20.txt"
i = 0
for i, article in enumerate(article_paths):
i += 1
try:
f_article = open(article)
except OSError:
print(f"ERROR : Could not open/read {article} file:")
sys.exit(0)
with f_article:
text = f_article.read()
data = indexer.run(text)
data['text']=text
data['path']=article
if indent:
js = json.dumps(data, indent=4, sort_keys=True)
else:
js = json.dumps(data, sort_keys=True)
print(
f"{js}" # format json.ld
)
# (silence, inter, recall, precision, f1, intersect) = precision_recall(
# list_ids_mc, article
# )
if __name__ == "__main__":
if False:
import cProfile
import pstats
cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()
else:
plac.call(main)