diff --git a/astro-ner/examples.http b/astro-ner/examples.http new file mode 100644 index 0000000..ef69d8f --- /dev/null +++ b/astro-ner/examples.http @@ -0,0 +1,18 @@ +# Ces exemples peuvent être exécutés directement dans VSCode, en utilisant l'extension REST Client (humao.rest-client) +# Décommenter/commenter les lignes voulues pour tester localement +# @baseUrl=http://localhost:31976 +# @baseUrl=http://astro-ner.tdmservices.intra.inist.fr/ +@baseUrl=https://astro-ner.services.istex.fr/ + +### +# @name v1AstroNer +# Reconnait des entités nommées en astrophysique +POST {{baseUrl}}/v1/tagger?indent=true HTTP/1.1 +Content-Type: application/json + +[ + { + "id": 1, + "value": "V643 Orionis is a binary star system located in the Orion constellation, offering valuable insightsinto stellar evolution. The Orion constellation is simple" + } +] diff --git a/astro-ner/swagger.json b/astro-ner/swagger.json new file mode 100644 index 0000000..19d879a --- /dev/null +++ b/astro-ner/swagger.json @@ -0,0 +1,32 @@ +{ + "info": { + "title": "astro-ner - Reconnaissance d'entités nommées en astrophysique", + "summary": "Reconnait des entités nommées en astrophysique", + "version": "0.0.0", + "termsOfService": "https://objectif-tdm.inist.fr/", + "contact": { + "name": "Inist-CNRS", + "url": "https://www.inist.fr/nous-contacter/" + } + }, + "servers": [ + { + "x-comment": "Will be automatically completed by the ezs server." + }, + { + "url": "http://vptdmservices.intra.inist.fr:????/", + "description": "Latest version for production", + "x-profil": "Standard" + } + ], + "tags": [ + { + "name": "astro-ner", + "description": "Reconnaissance d'entités nommées en astrophysique", + "externalDocs": { + "description": "Plus de documentation", + "url": "https://gitbucket.inist.fr/tdm/web-services/astro-ner/README.md" + } + } + ] +} \ No newline at end of file diff --git a/astro-ner/v1/find-astro.py b/astro-ner/v1/find-astro.py new file mode 100755 index 0000000..f63c249 --- /dev/null +++ b/astro-ner/v1/find-astro.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +import sys +import json +from flair.models import SequenceTagger +from flair.data import Sentence +from unidecode import unidecode +import logging + +logging.getLogger('flair').handlers[0].stream = sys.stderr + +def data_normalization(sentence): + cpy_sentence = sentence.lower() + return cpy_sentence +tagger = SequenceTagger.load("model.pt") + +for line in sys.stdin: + data = json.loads(line) + text=data['value'] + PL = [] + TNQ = [] + SNAT = [] + OA = [] + SSO = [] + EB = [] + ET = [] + NRA = [] + CST = [] + GAL = [] + AST = [] + ST = [] + AS = [] + SN = [] + XPL = [] + SR = [] + sent = data_normalization(text) + sentS = sent.split(".") + sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] + tagger.predict(sentences) + label_lists = {"PL": PL,"TNQ": TNQ,"SNAT": SNAT,"OA": OA,"SSO": SSO,"EB": EB,"ET": ET,"NRA": NRA,"CST": CST,"GAL": GAL,"AST": AST,"ST": ST,"AS": AS,"SN": SN,"XPL": XPL,"SR": SR} + for sentence in sentences: + for entity in sentence.get_spans('ner'): + label_value = entity.labels[0].value + if entity.text not in label_lists.get(label_value, []): + label_lists[label_value].append(entity.text) + + returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR} + # ajouter unidecode + data['value'] = {id:value for id, value in returnDic.items() if value != []} + sys.stdout.write(json.dumps(data)) + sys.stdout.write('\n') \ No newline at end of file diff --git a/astro-ner/v1/tagger.ini b/astro-ner/v1/tagger.ini new file mode 100644 index 0000000..3d17c8e --- /dev/null +++ b/astro-ner/v1/tagger.ini @@ -0,0 +1,36 @@ +# OpenAPI Documentation - JSON format (dot notation) +mimeType = application/json + +post.description = Reconnait des entités nommées en astrophysique +post.responses.default.description = Reconnaissance d'entités nommées en astrophysique. +post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.summary = Recherche d'entités en astrophysique +post.requestBody.required = true +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.parameters.0.in = query +post.parameters.0.name = indent +post.parameters.0.schema.type = boolean +post.parameters.0.description = Indent or not the JSON Result + +# Examples + + +[use] +# exec +plugin = @ezs/spawn +# JSONParse +plugin = @ezs/basics + +[JSONParse] +separator = * + +[expand] +path = value +size = 100 + +[expand/exec] +# command should be executable ! +command = ./v1/find-astro.py + +[dump] +indent = env('indent', false) \ No newline at end of file diff --git a/ner-tagger/v1/astroTagger/find-astro.py b/ner-tagger/v1/astroTagger/find-astro.py deleted file mode 100644 index 539ecc7..0000000 --- a/ner-tagger/v1/astroTagger/find-astro.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- - -import sys -import json -from flair.models import SequenceTagger -from flair.data import Sentence -from unidecode import unidecode -import logging - -logging.getLogger('flair').handlers[0].stream = sys.stderr - -def data_normalization(sentence): - cpy_sentence = sentence.lower() - return cpy_sentence -tagger = SequenceTagger.load("model.pt") - -for line in sys.stdin: - data = json.loads(line) - text=data['value'] - PL = [] - TNQ = [] - SNAT = [] - OA = [] - SSO = [] - EB = [] - ET = [] - NRA = [] - CST = [] - GAL = [] - AST = [] - ST = [] - AS = [] - SN = [] - XPL = [] - SR = [] - sent = data_normalization(text) - sentS = sent.split(".") - sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] - tagger.predict(sentences) - for sentence in sentences: - for entity in sentence.get_spans('ner'): - if (entity.labels[0].value == "PL"): - if entity.text not in PL: - PL.append(entity.text) - if (entity.labels[0].value == "TNQ"): - if entity.text not in TNQ: - TNQ.append(entity.text) - if (entity.labels[0].value == "SNAT"): - if entity.text not in SNAT: - SNAT.append(entity.text) - if (entity.labels[0].value == "OA"): - if entity.text not in OA: - OA.append(entity.text) - if (entity.labels[0].value == "SSO"): - if entity.text not in SSO: - SSO.append(entity.text) - if (entity.labels[0].value == "EB"): - if entity.text not in EB: - EB.append(entity.text) - if (entity.labels[0].value == "ET"): - if entity.text not in ET: - ET.append(entity.text) - if (entity.labels[0].value == "NRA"): - if entity.text not in NRA: - NRA.append(entity.text) - if (entity.labels[0].value == "CST"): - if entity.text not in CST: - CST.append(entity.text) - if (entity.labels[0].value == "GAL"): - if entity.text not in GAL: - GAL.append(entity.text) - if (entity.labels[0].value == "AST"): - if entity.text not in AST: - AST.append(entity.text) - if (entity.labels[0].value == "ST"): - if entity.text not in ST: - ST.append(entity.text) - if (entity.labels[0].value == "AS"): - if entity.text not in AS: - AS.append(entity.text) - if (entity.labels[0].value == "SN"): - if entity.text not in SN: - SN.append(entity.text) - if (entity.labels[0].value == "XPL"): - if entity.text not in XPL: - XPL.append(entity.text) - if (entity.labels[0].value == "SR"): - if entity.text not in SR: - SR.append(entity.text) - - - - returnDic = {unidecode('Planète'):PL,unidecode('Trou noirs, quasars et apparentés'):TNQ,'Satellite naturel':SNAT,'Objets artificiels':OA,unidecode('Système solaire') :SSO,unidecode('Étoiles binaires (et pulsars)'):EB,unidecode('Étoiles'):ET,unidecode('Nébuleuse et région apparentés'):NRA,'Constellations':CST,'Galaxies et amas de galaxie':GAL,unidecode('Astèroïdes'):AST,unidecode('Satue hypotétique'):ST,'amas stellaires':AS,'supernovas':SN,unidecode('exoplanètes'):XPL,'sursaut radio, source radio, autres sursauts':SR} - # ajouter unidecode - data['value'] = returnDic - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') \ No newline at end of file