#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
.py
"""
__authors__ = "Stephane Schneider"
__contact__ = "stephane.schneider@inis.fr"
from cso_classifier import CSOClassifier
import sys
import json
import os
import plac
# Version EZ_MASTER de l indexeur memoire
#nombre de workers
workers="8"
# parametrage de l indexeur
# Additional parameters:
# (i) workers, => nbr process en //
# (ii) modules, => module d indexation à executer : "syntactic", "semantic", "both"
# (iii) enhancement, => relevant super-areas, "first", "all", or "no".
# (iv) explanation, => chunks of text that allowed the classifier to infer a given topic.
# (v) delete_outliers, => emoving erroneous topics that were conceptually distant from the others.
# (vi) fast_classification, => determines whether the semantic module should use the full model or the cached one.
# (vii) silent. => prints its progress in the console.
@plac.annotations(
config=(
"Name of the config",
"positional",
None,
str,
),
)
# lecture ligne a ligne sur input standard
# 1 ligne = un json {}
def main(config):
worker="8"
indexer = CSOClassifier(
modules="both",
enhancement="first",
explanation=False,
delete_outliers=False, # False pour que ca marche sinon error :
silent=True, # Si False ==> <Error: [unpack] <SyntaxError: Unexpected token O in JSON at position 0>>>>
fast_classification=True, # marche pas en EZS sinon
configFile=config, # a passer en arg par le .ini
workers=worker,
)
compteur = 0
for json_line in sys.stdin:
# deserialisation json
# compteur += 1
try:
data = json.loads(json_line)
# trace
#print("******** FORMAT OK : id:{}".format(data["id"]))
except json.decoder.JSONDecodeError:
mess='[{"code" : "1","message":"Input format problem line :{compteur} : String could not be converted to JSON".format(compteur)"}]'
exit({mess})
data["value"] = indexer.run(data["value"]) # <============== ligne qui si silent=False
if not bool( data["value"]):
sys.stderr.write('ERROR OUPUT EMPTY !! \n')
#trace
#sys.stderr.write(str(data["value"]) + '\n')
sys.stdout.write(json.dumps(data) +'\n')
if __name__ == "__main__":
if False:
import cProfile
import pstats
cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()
else:
plac.call(main)