#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ .py """ __authors__ = "Stephane Schneider" __contact__ = "stephane.schneider@inis.fr" from cso_classifier import CSOClassifier import sys import json import os import plac # Version EZ_MASTER de l indexeur memoire #nombre de workers workers="8" # parametrage de l indexeur # Additional parameters: # (i) workers, => nbr process en // # (ii) modules, => module d indexation à executer : "syntactic", "semantic", "both" # (iii) enhancement, => relevant super-areas, "first", "all", or "no". # (iv) explanation, => chunks of text that allowed the classifier to infer a given topic. # (v) delete_outliers, => emoving erroneous topics that were conceptually distant from the others. # (vi) fast_classification, => determines whether the semantic module should use the full model or the cached one. # (vii) silent. => prints its progress in the console. @plac.annotations( config=( "Name of the config", "positional", None, str, ), ) # lecture ligne a ligne sur input standard # 1 ligne = un json {} def main(config): worker="8" indexer = CSOClassifier( modules="both", enhancement="first", explanation=False, delete_outliers=False, # False pour que ca marche sinon error : silent=True, # Si False ==> <Error: [unpack] <SyntaxError: Unexpected token O in JSON at position 0>>>> fast_classification=True, # marche pas en EZS sinon configFile=config, # a passer en arg par le .ini workers=worker, ) compteur = 0 for json_line in sys.stdin: # deserialisation json # compteur += 1 try: data = json.loads(json_line) # trace #print("******** FORMAT OK : id:{}".format(data["id"])) except json.decoder.JSONDecodeError: mess='[{"code" : "1","message":"Input format problem line :{compteur} : String could not be converted to JSON".format(compteur)"}]' exit({mess}) data["value"] = indexer.run(data["value"]) # <============== ligne qui si silent=False if not bool( data["value"]): sys.stderr.write('ERROR OUPUT EMPTY !! \n') #trace #sys.stderr.write(str(data["value"]) + '\n') sys.stdout.write(json.dumps(data) +'\n') if __name__ == "__main__": if False: import cProfile import pstats cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats() else: plac.call(main)