# -*- coding: utf-8 -*-
import sys
import json
from flair.models import SequenceTagger
from flair.data import Sentence
from unidecode import unidecode
import logging
logging.getLogger('flair').handlers[0].stream = sys.stderr
def data_normalization(sentence):
cpy_sentence = sentence.lower()
return cpy_sentence
tagger = SequenceTagger.load("model.pt")
for line in sys.stdin:
data = json.loads(line)
text=data['value']
PL = []
TNQ = []
SNAT = []
OA = []
SSO = []
EB = []
ET = []
NRA = []
CST = []
GAL = []
AST = []
ST = []
AS = []
SN = []
XPL = []
SR = []
sent = data_normalization(text)
sentS = sent.split(".")
sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))]
tagger.predict(sentences)
for sentence in sentences:
for entity in sentence.get_spans('ner'):
if (entity.labels[0].value == "PL"):
if entity.text not in PL:
PL.append(entity.text)
if (entity.labels[0].value == "TNQ"):
if entity.text not in TNQ:
TNQ.append(entity.text)
if (entity.labels[0].value == "SNAT"):
if entity.text not in SNAT:
SNAT.append(entity.text)
if (entity.labels[0].value == "OA"):
if entity.text not in OA:
OA.append(entity.text)
if (entity.labels[0].value == "SSO"):
if entity.text not in SSO:
SSO.append(entity.text)
if (entity.labels[0].value == "EB"):
if entity.text not in EB:
EB.append(entity.text)
if (entity.labels[0].value == "ET"):
if entity.text not in ET:
ET.append(entity.text)
if (entity.labels[0].value == "NRA"):
if entity.text not in NRA:
NRA.append(entity.text)
if (entity.labels[0].value == "CST"):
if entity.text not in CST:
CST.append(entity.text)
if (entity.labels[0].value == "GAL"):
if entity.text not in GAL:
GAL.append(entity.text)
if (entity.labels[0].value == "AST"):
if entity.text not in AST:
AST.append(entity.text)
if (entity.labels[0].value == "ST"):
if entity.text not in ST:
ST.append(entity.text)
if (entity.labels[0].value == "AS"):
if entity.text not in AS:
AS.append(entity.text)
if (entity.labels[0].value == "SN"):
if entity.text not in SN:
SN.append(entity.text)
if (entity.labels[0].value == "XPL"):
if entity.text not in XPL:
XPL.append(entity.text)
if (entity.labels[0].value == "SR"):
if entity.text not in SR:
SR.append(entity.text)
returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR}
# ajouter unidecode
data['value'] = returnDic
sys.stdout.write(json.dumps(data))
sys.stdout.write('\n')