diff --git a/geo-tagger/v1/astroTagger/find-astro.py b/geo-tagger/v1/astroTagger/find-astro.py new file mode 100644 index 0000000..539ecc7 --- /dev/null +++ b/geo-tagger/v1/astroTagger/find-astro.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +import sys +import json +from flair.models import SequenceTagger +from flair.data import Sentence +from unidecode import unidecode +import logging + +logging.getLogger('flair').handlers[0].stream = sys.stderr + +def data_normalization(sentence): + cpy_sentence = sentence.lower() + return cpy_sentence +tagger = SequenceTagger.load("model.pt") + +for line in sys.stdin: + data = json.loads(line) + text=data['value'] + PL = [] + TNQ = [] + SNAT = [] + OA = [] + SSO = [] + EB = [] + ET = [] + NRA = [] + CST = [] + GAL = [] + AST = [] + ST = [] + AS = [] + SN = [] + XPL = [] + SR = [] + sent = data_normalization(text) + sentS = sent.split(".") + sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] + tagger.predict(sentences) + for sentence in sentences: + for entity in sentence.get_spans('ner'): + if (entity.labels[0].value == "PL"): + if entity.text not in PL: + PL.append(entity.text) + if (entity.labels[0].value == "TNQ"): + if entity.text not in TNQ: + TNQ.append(entity.text) + if (entity.labels[0].value == "SNAT"): + if entity.text not in SNAT: + SNAT.append(entity.text) + if (entity.labels[0].value == "OA"): + if entity.text not in OA: + OA.append(entity.text) + if (entity.labels[0].value == "SSO"): + if entity.text not in SSO: + SSO.append(entity.text) + if (entity.labels[0].value == "EB"): + if entity.text not in EB: + EB.append(entity.text) + if (entity.labels[0].value == "ET"): + if entity.text not in ET: + ET.append(entity.text) + if (entity.labels[0].value == "NRA"): + if entity.text not in NRA: + NRA.append(entity.text) + if (entity.labels[0].value == "CST"): + if entity.text not in CST: + CST.append(entity.text) + if (entity.labels[0].value == "GAL"): + if entity.text not in GAL: + GAL.append(entity.text) + if (entity.labels[0].value == "AST"): + if entity.text not in AST: + AST.append(entity.text) + if (entity.labels[0].value == "ST"): + if entity.text not in ST: + ST.append(entity.text) + if (entity.labels[0].value == "AS"): + if entity.text not in AS: + AS.append(entity.text) + if (entity.labels[0].value == "SN"): + if entity.text not in SN: + SN.append(entity.text) + if (entity.labels[0].value == "XPL"): + if entity.text not in XPL: + XPL.append(entity.text) + if (entity.labels[0].value == "SR"): + if entity.text not in SR: + SR.append(entity.text) + + + + returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR} + # ajouter unidecode + data['value'] = returnDic + sys.stdout.write(json.dumps(data)) + sys.stdout.write('\n') \ No newline at end of file