diff --git a/ner-tagger/v1/astro/find-astro.py b/ner-tagger/v1/astro/find-astro.py new file mode 100755 index 0000000..f63c249 --- /dev/null +++ b/ner-tagger/v1/astro/find-astro.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +import sys +import json +from flair.models import SequenceTagger +from flair.data import Sentence +from unidecode import unidecode +import logging + +logging.getLogger('flair').handlers[0].stream = sys.stderr + +def data_normalization(sentence): + cpy_sentence = sentence.lower() + return cpy_sentence +tagger = SequenceTagger.load("model.pt") + +for line in sys.stdin: + data = json.loads(line) + text=data['value'] + PL = [] + TNQ = [] + SNAT = [] + OA = [] + SSO = [] + EB = [] + ET = [] + NRA = [] + CST = [] + GAL = [] + AST = [] + ST = [] + AS = [] + SN = [] + XPL = [] + SR = [] + sent = data_normalization(text) + sentS = sent.split(".") + sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] + tagger.predict(sentences) + label_lists = {"PL": PL,"TNQ": TNQ,"SNAT": SNAT,"OA": OA,"SSO": SSO,"EB": EB,"ET": ET,"NRA": NRA,"CST": CST,"GAL": GAL,"AST": AST,"ST": ST,"AS": AS,"SN": SN,"XPL": XPL,"SR": SR} + for sentence in sentences: + for entity in sentence.get_spans('ner'): + label_value = entity.labels[0].value + if entity.text not in label_lists.get(label_value, []): + label_lists[label_value].append(entity.text) + + returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR} + # ajouter unidecode + data['value'] = {id:value for id, value in returnDic.items() if value != []} + sys.stdout.write(json.dumps(data)) + sys.stdout.write('\n') \ No newline at end of file diff --git a/ner-tagger/v1/astro/tagger.ini b/ner-tagger/v1/astro/tagger.ini new file mode 100644 index 0000000..03b61cc --- /dev/null +++ b/ner-tagger/v1/astro/tagger.ini @@ -0,0 +1,36 @@ +# OpenAPI Documentation - JSON format (dot notation) +mimeType = application/json + +post.description = Reconnait des entités nommées en astrophysique +post.responses.default.description = Reconnaissance d'entités nommées en astrophysique. +post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.summary = Recherche d'entités en astrophysique +post.requestBody.required = true +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.parameters.0.in = query +post.parameters.0.name = indent +post.parameters.0.schema.type = boolean +post.parameters.0.description = Indent or not the JSON Result + +# Examples + + +[use] +# exec +plugin = @ezs/spawn +# JSONParse +plugin = @ezs/basics + +[JSONParse] +separator = * + +[expand] +path = value +size = 100 + +[expand/exec] +# command should be executable ! +command = ./v1/astro/find-astro.py + +[dump] +indent = env('indent', false) \ No newline at end of file diff --git a/ner-tagger/v1/astroTagger/find-astro.py b/ner-tagger/v1/astroTagger/find-astro.py deleted file mode 100644 index f63c249..0000000 --- a/ner-tagger/v1/astroTagger/find-astro.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- - -import sys -import json -from flair.models import SequenceTagger -from flair.data import Sentence -from unidecode import unidecode -import logging - -logging.getLogger('flair').handlers[0].stream = sys.stderr - -def data_normalization(sentence): - cpy_sentence = sentence.lower() - return cpy_sentence -tagger = SequenceTagger.load("model.pt") - -for line in sys.stdin: - data = json.loads(line) - text=data['value'] - PL = [] - TNQ = [] - SNAT = [] - OA = [] - SSO = [] - EB = [] - ET = [] - NRA = [] - CST = [] - GAL = [] - AST = [] - ST = [] - AS = [] - SN = [] - XPL = [] - SR = [] - sent = data_normalization(text) - sentS = sent.split(".") - sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] - tagger.predict(sentences) - label_lists = {"PL": PL,"TNQ": TNQ,"SNAT": SNAT,"OA": OA,"SSO": SSO,"EB": EB,"ET": ET,"NRA": NRA,"CST": CST,"GAL": GAL,"AST": AST,"ST": ST,"AS": AS,"SN": SN,"XPL": XPL,"SR": SR} - for sentence in sentences: - for entity in sentence.get_spans('ner'): - label_value = entity.labels[0].value - if entity.text not in label_lists.get(label_value, []): - label_lists[label_value].append(entity.text) - - returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR} - # ajouter unidecode - data['value'] = {id:value for id, value in returnDic.items() if value != []} - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') \ No newline at end of file