Newer
Older
web-services / ner-tagger / v1 / astro / find-astro.py
# -*- coding: utf-8 -*-

import sys
import json
from flair.models import SequenceTagger
from flair.data import Sentence
from unidecode import unidecode  
import logging

logging.getLogger('flair').handlers[0].stream = sys.stderr

def data_normalization(sentence):
    cpy_sentence = sentence.lower()
    return cpy_sentence
tagger = SequenceTagger.load("model.pt")

for line in sys.stdin:
    data = json.loads(line)
    text=data['value']
    PL = []
    TNQ = []
    SNAT = []
    OA = []
    SSO = []
    EB = []
    ET = []
    NRA = []
    CST = []
    GAL = []
    AST = []
    ST = []
    AS = []
    SN = []
    XPL = []
    SR = []
    sent = data_normalization(text)
    sentS = sent.split(".")
    sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))]
    tagger.predict(sentences)
    label_lists = {"PL": PL,"TNQ": TNQ,"SNAT": SNAT,"OA": OA,"SSO": SSO,"EB": EB,"ET": ET,"NRA": NRA,"CST": CST,"GAL": GAL,"AST": AST,"ST": ST,"AS": AS,"SN": SN,"XPL": XPL,"SR": SR}
    for sentence in sentences:
        for entity in sentence.get_spans('ner'):
            label_value = entity.labels[0].value
            if entity.text not in label_lists.get(label_value, []):
                label_lists[label_value].append(entity.text)            
        
    returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR}
    # ajouter unidecode
    data['value'] = {id:value for id, value in returnDic.items() if value != []}
    sys.stdout.write(json.dumps(data))
    sys.stdout.write('\n')