Newer
Older
web-services / kos2vec / indexer.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
    .py

"""
__authors__ = "Stephane Schneider"
__contact__ = "stephane.schneider@inis.fr"

from cso_classifier import CSOClassifier
import sys
import json
import os

#   Version EZ_MASTER  de l indexeur memoire

#nombre de workers
workers="8"

# parametrage de l indexeur
# Additional parameters:
#   (i)   workers,                => nbre de process en //
#   (ii)  modules,                => module d indexation à executer  : "syntactic", "semantic", "both"
#   (iii) enhancement,            => relevant super-areas,  "first", "all", or "no".
#   (iv)  explanation,            => chunks of text that allowed the classifier to infer a given topic.
#   (v)   delete_outliers,        => emoving erroneous topics that were conceptually distant from the others.
#   (vi)  fast_classification,    => determines whether the semantic module should use the full model or the cached one.
#   (vii) silent.                 => prints its progress in the console.
indexer = CSOClassifier(
    modules="both",
    enhancement="first",
    explanation=True,
    delete_outliers=False, #  False pour que ca marche sinon error :
    silent=True,
    fast_classification=True,
    workers=workers
)

# lecture ligne a ligne sur input standard
# 1 ligne = un json {}
compteur = 0

for json_line in sys.stdin:
    # deserialisation json
    compteur += 1

    try:

        data = json.loads(json_line)
        # trace
        # print("******** FORMAT OK : id:{}".format(data["id"]))
    except json.decoder.JSONDecodeError:

        logging.error("Input format problem line :{} : String could not be converted to JSON".format(compteur))
        exit(1)

    data["value"] = indexer.run(data["value"])

    sys.stdout.write(json.dumps(data))
    sys.stdout.write("\n")