Newer
Older
web-services / kos2vec / indexer.py
@schneist schneist on 10 Jul 2023 2 KB new version
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
    .py
"""
__authors__ = "Stephane Schneider"
__contact__ = "stephane.schneider@inis.fr"
from cso_classifier import CSOClassifier
import sys
import json
import os
import plac
#   Version EZ_MASTER  de l indexeur memoire
#nombre de workers
workers="8"
# parametrage de l indexeur
# Additional parameters:
#   (i)   workers,                => nbr process en //
#   (ii)  modules,                => module d indexation à executer  : "syntactic", "semantic", "both"
#   (iii) enhancement,            => relevant super-areas,  "first", "all", or "no".
#   (iv)  explanation,            => chunks of text that allowed the classifier to infer a given topic.
#   (v)   delete_outliers,        => emoving erroneous topics that were conceptually distant from the others.
#   (vi)  fast_classification,    => determines whether the semantic module should use the full model or the cached one.
#   (vii) silent.                 => prints its progress in the console.


@plac.annotations(
    config=(
        "Name of the config",
        "positional",
        None,
        str,
    ),
)

# lecture ligne a ligne sur input standard
# 1 ligne = un json {}
def main(config):

    worker="8"    

    indexer = CSOClassifier(
        modules="both",
        enhancement="first",
        explanation=False,
        delete_outliers=False, # False pour que ca marche sinon error :
        silent=True,  # Si False ==> <Error: [unpack] <SyntaxError: Unexpected token O in JSON at position 0>>>>
        fast_classification=True, # marche pas en EZS sinon
        configFile=config,  # a passer en arg par le .ini
        workers=worker,
    )
    
    compteur = 0
    for json_line in sys.stdin:
        # deserialisation json
        # compteur += 1
        try:
            data = json.loads(json_line)
        # trace
        #print("******** FORMAT OK : id:{}".format(data["id"]))
        except json.decoder.JSONDecodeError:
            mess='[{"code" : "1","message":"Input format problem line :{compteur} : String could not be converted to JSON".format(compteur)"}]'
            exit({mess})
        
        data["value"] = indexer.run(data["value"])  # <==============  ligne qui si silent=False
        if not bool( data["value"]):
            sys.stderr.write('ERROR OUPUT EMPTY !! \n')
            
        #trace        
        #sys.stderr.write(str(data["value"]) +  '\n')
        sys.stdout.write(json.dumps(data) +'\n')
    
        
if __name__ == "__main__":
    if False:
        import cProfile
        import pstats
        cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
    else:
        plac.call(main)