diff --git a/fund-ner/README.md b/fund-ner/README.md index 1eab109..300fb82 100644 --- a/fund-ner/README.md +++ b/fund-ner/README.md @@ -1,5 +1,7 @@ # fund-ner +**Service migré vers https://github.com/Inist-CNRS/web-services** + Cette instance propose un outil de détection de financeurs. ## Configuration diff --git a/fund-ner/swagger.json b/fund-ner/swagger.json index d374e1e..cf105b0 100644 --- a/fund-ner/swagger.json +++ b/fund-ner/swagger.json @@ -1,33 +1,33 @@ { - "openapi": "3.0.0", - "info": { - "title": "fund-ner - Detection de financeurs", - "summary": "Detection de financeurs", - "version": "1.0.1", - "termsOfService": "https://services.istex.fr/", - "contact": { - "name": "Inist-CNRS", - "url": "https://www.inist.fr/nous-contacter/" - } + "openapi": "3.0.0", + "info": { + "title": "funder-ner - Détection de financeurs", + "description": "Détecte des financeurs dans un article en anglais et renvoie la liste des financeurs repérés.", + "version": "1.0.3", + "termsOfService": "https://services.istex.fr/", + "contact": { + "name": "Inist-CNRS", + "url": "https://www.inist.fr/nous-contacter/" + } + }, + "servers": [ + { + "x-comment": "Will be automatically completed by the ezs server." }, - "servers": [ - { - "x-comment": "Will be automatically completed by the ezs server." - }, - { - "url": "http://vptdmservices.intra.inist.fr:49243/", - "description": "Latest version for production", - "x-profil": "Standard" + { + "url": "http://vptdmservices.intra.inist.fr:49279/", + "description": "Latest version for production", + "x-profil": "Standard" + } + ], + "tags": [ + { + "name": "funder-ner", + "description": "Détection de financeurs", + "externalDocs": { + "description": "Plus de documentation", + "url": "https://github.com/inist-cnrs/web-services/tree/main/services/funder-ner" } - ], - "tags": [ - { - "name": "fund-ner", - "description": "Detection de financeurs", - "externalDocs": { - "description": "Plus de documentation", - "url": "https://gitbucket.inist.fr/tdm/web-services/fund-ner/README.md" - } - } - ] - } \ No newline at end of file + } + ] +} \ No newline at end of file diff --git a/fund-ner/v1/best-model.pt.dvc b/fund-ner/v1/best-model.pt.dvc deleted file mode 100644 index a514b78..0000000 --- a/fund-ner/v1/best-model.pt.dvc +++ /dev/null @@ -1,5 +0,0 @@ -outs: -- md5: fdcb80f28184d1fb839f0575a9cffd39 - size: 419082833 - hash: md5 - path: best-model.pt diff --git a/fund-ner/v1/normalize.py b/fund-ner/v1/normalize.py deleted file mode 100644 index b4391a2..0000000 --- a/fund-ner/v1/normalize.py +++ /dev/null @@ -1,37 +0,0 @@ -def normalize(words): - L = [] - for word in words: - save = word - finalFile = [] - end = ['.',',',";",":","»",")","’"] - start = ["«","(",".","‘"] - middle = ["l'","d'","j'","L'","D'","J'","l’","d’","j’","L’","D’","J’"] - queue = [] - - File = word.split() - for Word in File: - word = Word - for execp in start: - if word.startswith(execp): - finalFile.append(word[0]) - word = word[1:] - for execp in middle: - if word.startswith(execp): - finalFile.append(word[:2]) - word = word[2:] - for execp in end: - if word.endswith(execp): - queue.insert(0,word[-1]) - word = word[:-1] - - finalFile.append(word) - for i in queue: - finalFile.append(i) - queue = [] - - if finalFile == ["a"]: - #print("word = ",save) - pass - - L.append(" ".join(finalFile)) - return L \ No newline at end of file diff --git a/fund-ner/v1/tagger.ini b/fund-ner/v1/tagger.ini deleted file mode 100644 index 7f761f3..0000000 --- a/fund-ner/v1/tagger.ini +++ /dev/null @@ -1,41 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json -post.description = Trouve des financeurs dans un texte -post.responses.default.description = Renvoie un Json id/value, où value est une liste de financeurs -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Recherche de financeurs -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = indent -post.parameters.0.schema.type = boolean -post.parameters.0.description = Indent or not the JSON Result - -# Examples - -post.requestBody.content.application/json.example.0.id = 1 -post.requestBody.content.application/json.example.0.value = This study was funded by the CNRS and INIST. -post.responses.default.content.application/json.example.0.id = 1 -post.responses.default.content.application/json.example.0.value.0 = CNRS -post.responses.default.content.application/json.example.0.value.1 = INIST - - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics -[JSONParse] -separator = * -[expand] -path = env('path', 'value') -size = 100 -# in production mode, uncomment the following line -# cache = boost -[expand/exec] -# command should be executable ! -command = ./v1/tagger.py - -#command = ./expand.py -[dump] -indent = env('indent', false) diff --git a/fund-ner/v1/tagger.py b/fund-ner/v1/tagger.py deleted file mode 100644 index bdd493c..0000000 --- a/fund-ner/v1/tagger.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/opt/bitnami/python/bin/python3.7 - -from flair.data import Sentence -from flair.models import SequenceTagger -from normalize import normalize -import json -import sys -import logging - -logging.getLogger("flair").handlers[0].stream = sys.stderr -tagger = SequenceTagger.load('./v1/best-model.pt') -for line in sys.stdin: - data = json.loads(line) - lSent = normalize([data["value"]])[0].split() - sentence = Sentence() - for token in lSent: - sentence.add_token(token) - tagger.predict(sentence) - data["value"] = str([entity.text for entity in sentence.get_spans('ner')]) - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') diff --git a/ner-tagger/README.md b/ner-tagger/README.md index c05d88a..47eedd8 100755 --- a/ner-tagger/README.md +++ b/ner-tagger/README.md @@ -1,5 +1,7 @@ # ner-tagger +**Service migré vers https://github.com/Inist-CNRS/web-services** + ## geoTagger Cette instance propose un outil de détéction d'entités nommées dans des textes anglais. diff --git a/ner-tagger/requirements.txt b/ner-tagger/requirements.txt deleted file mode 100755 index ef2b307..0000000 --- a/ner-tagger/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -flair==0.10 -certifi==2019.11.28 - diff --git a/ner-tagger/swagger.json b/ner-tagger/swagger.json index 637b249..6e6556d 100755 --- a/ner-tagger/swagger.json +++ b/ner-tagger/swagger.json @@ -1,33 +1,33 @@ { - "openapi": "3.1.0", - "info": { - "title": "ner-tagger", - "summary": "Cette instance propose un outil de détéction d'entités nommées dans des textes anglais.", - "version": "0.0.0", - "termsOfService": "https://services.istex.fr/", - "contact": { - "name": "Inist-CNRS", - "url": "https://www.inist.fr/nous-contacter/" - } - }, - "servers": [ - { - "x-comment": "Will be automatically completed by the ezs server." - }, - { - "url": "http://vptdmservices.intra.inist.fr:49221/", - "description": "Latest version for production", - "x-profil": "Standard" - } - ], - "tags": [ - { - "name": "geoTagger", - "description": "Détection d'entités géographiques", - "externalDocs": { - "description": "Plus de documentation", - "url": "https://gitbucket.inist.fr/tdm/web-services/tree/master/geo-tagger" - } - } - ] -} + "openapi": "3.0.0", + "info": { + "title": "ner-tagger - Détection d'entités nommées dans des textes", + "description": "Détecter des entités géographiques dans des textes anglais et d'organismes scolaires, de localisation dans des bulletins administratifs de l'instruction publique", + "version": "1.0.7", + "termsOfService": "https://services.istex.fr/", + "contact": { + "name": "Inist-CNRS", + "url": "https://www.inist.fr/nous-contacter/" + } + }, + "servers": [ + { + "x-comment": "Will be automatically completed by the ezs server." + }, + { + "url": "http://vptdmservices.intra.inist.fr:49280/", + "description": "Latest version for production", + "x-profil": "Standard" + } + ], + "tags": [ + { + "name": "ner-tagger", + "description": "Détection d'entités nommées dans des textes", + "externalDocs": { + "description": "Plus de documentation", + "url": "https://github.com/inist-cnrs/web-services/tree/main/services/ner-tagger" + } + } + ] +} \ No newline at end of file diff --git a/ner-tagger/v1/geoTagger/geoTagger.ini b/ner-tagger/v1/geoTagger/geoTagger.ini deleted file mode 100755 index 340baa6..0000000 --- a/ner-tagger/v1/geoTagger/geoTagger.ini +++ /dev/null @@ -1,37 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json - -post.description = Détecte les entités géographiques d'un texte en anglais -post.responses.default.description = Renvoie un Json composé d'`id`, `value` avec `value` la liste des entités géographiques trouvées -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Détection d'entité géographique -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = path -post.parameters.0.schema.type = string -post.parameters.0.description = The path in each object to enrich with an Python script -post.parameters.1.in = query -post.parameters.1.name = indent -post.parameters.1.schema.type = boolean -post.parameters.1.description = Indent or not the JSON Result - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics - -[JSONParse] -separator = * - -[expand] -path = env('path', 'value') -size = 100 - -[expand/exec] -# command should be executable ! -command = ./v1/geoTagger/geoTagger.py - -[dump] -indent = env('indent', false) \ No newline at end of file diff --git a/ner-tagger/v1/geoTagger/geoTagger.py b/ner-tagger/v1/geoTagger/geoTagger.py deleted file mode 100755 index 2bd70f1..0000000 --- a/ner-tagger/v1/geoTagger/geoTagger.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/opt/bitnami/python/bin/python3.7 - -import sys -import json - -from flair.models import SequenceTagger -from flair.data import Sentence - -import logging -logging.getLogger('flair').handlers[0].stream = sys.stderr - -tagger = SequenceTagger.load("flair/ner-english") - -for line in sys.stdin: - data = json.loads(line) - text=data['value'] - sent= text.split(".") - sentences = [Sentence(sent[i]+".") for i in range(len(sent))] - tagger.predict(sentences) - geo = [] - - for sentence in sentences: - for entity in sentence.get_spans('ner'): - if entity.tag == "LOC": - geo.append(entity.text) - data['value'] = geo - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') diff --git a/ner-tagger/v1/perseeBaipTagger/model.pt b/ner-tagger/v1/perseeBaipTagger/model.pt deleted file mode 100755 index 10cdfbf..0000000 --- a/ner-tagger/v1/perseeBaipTagger/model.pt +++ /dev/null Binary files differ diff --git a/ner-tagger/v1/perseeBaipTagger/tagger.ini b/ner-tagger/v1/perseeBaipTagger/tagger.ini deleted file mode 100755 index e61adc0..0000000 --- a/ner-tagger/v1/perseeBaipTagger/tagger.ini +++ /dev/null @@ -1,37 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json - -post.description = Détecte les organismes scolaires et localisations dans les baip -post.responses.default.description = Renvoie un Json composé d'`id`, `value` avec `value` un dictionnaire contenant les organismes scolaires et localisations extraites. -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Détection dans les baip -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = path -post.parameters.0.schema.type = string -post.parameters.0.description = The path in each object to enrich with an Python script -post.parameters.1.in = query -post.parameters.1.name = indent -post.parameters.1.schema.type = boolean -post.parameters.1.description = Indent or not the JSON Result - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics - -[JSONParse] -separator = * - -[expand] -path = env('path', 'value') -size = 100 - -[expand/exec] -# command should be executable ! -command = ./v1/perseeBaipTagger/tagger.py - -[dump] -indent = env('indent', false) \ No newline at end of file diff --git a/ner-tagger/v1/perseeBaipTagger/tagger.py b/ner-tagger/v1/perseeBaipTagger/tagger.py deleted file mode 100755 index 2623632..0000000 --- a/ner-tagger/v1/perseeBaipTagger/tagger.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/opt/bitnami/python/bin/python3.7 - -import sys -import json -import re - -from flair.models import SequenceTagger -from flair.data import Sentence - -import logging - -logging.getLogger('flair').handlers[0].stream = sys.stderr - - -def data_normalization(dic,sentence): - cpy_sentence = sentence - - cpy_sentence = re.sub(r'\bArt. \w*\b', '', cpy_sentence) - cpy_sentence = re.sub(r'\.\.\.*', '', cpy_sentence) - - for key in dic: - cpy_sentence = cpy_sentence.replace(key,dic[key]) - - return cpy_sentence - - -tagger = SequenceTagger.load("./v1/perseeBaipTagger/model.pt") - - -error_dic = {} -error_dic["\n"] = " " -error_dic["¬ "] = "" -error_dic["l'"] = "l' " -error_dic["d'"] = "d' " -error_dic["1° "] = "" -error_dic["2° "] = "" -error_dic["3° "] = "" -error_dic["Art. "] = "" -error_dic[", A"] = ", a" - -uniqueOrg = ["collège","lycée","académie","faculté","école","ecole","établissement","institut"] -trans = ["nomination","affectation","concession","érection","suppression","transformation"] - -for line in sys.stdin: - data = json.loads(line) - text=data['value'] - - locL = [] - orgL = [] - basicOrg = [] - operation = [] - - sent = data_normalization(error_dic,text) - - sentS = sent.split(".") - sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] - - tagger.predict(sentences) - - for word in sent.lower().split(" "): - for transfo in trans: - if word.startswith(transfo): - if transfo not in operation: - operation.append(transfo) - break - - for sentence in sentences: - for entity in sentence.get_spans('ner'): - if (entity.labels[0].value == "LOC"): - if entity.text not in locL: - locL.append(entity.text) - if entity.labels[0].value == "ORG": - org = entity.text.split(" ") - if len(org[-1]) > 2: - for borg in uniqueOrg: - if entity.text.lower().startswith(borg): - basicOrg.append(borg) - if entity.text not in orgL: - orgL.append(entity.text) - if len(org)>1: - for k in ["à","de","l'","d'","'","du","la"]: - if org[-2] == k: - if org[-1] not in locL: - locL.append(org[-1]) - break - - returnDic = {"loc":locL,"org":orgL,"basicOrg":basicOrg,"operation":operation} - data['value'] = returnDic - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') \ No newline at end of file diff --git a/nlp-tools2/README.md b/nlp-tools2/README.md index 87b1634..1b56143 100644 --- a/nlp-tools2/README.md +++ b/nlp-tools2/README.md @@ -1,5 +1,7 @@ # nlp-tools2 +**Service migré vers https://github.com/Inist-CNRS/web-services** + Cette instance propose des outils de traitement de la langue diff --git a/nlp-tools2/requirements.txt b/nlp-tools2/requirements.txt deleted file mode 100644 index ed440b4..0000000 --- a/nlp-tools2/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pycld3 -spacy==2.3.5 -en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz diff --git a/nlp-tools2/swagger.json b/nlp-tools2/swagger.json index e6246ba..b885d50 100644 --- a/nlp-tools2/swagger.json +++ b/nlp-tools2/swagger.json @@ -1,33 +1,33 @@ { - "openapi": "3.1.0", - "info": { - "title": "nlp-tools2 - Outils de NLP", - "summary": "Cette instance propose des outils de traitement de la langue", - "version": "2.0.3", - "termsOfService": "https://services.istex.fr/", - "contact": { - "name": "Inist-CNRS", - "url": "https://www.inist.fr/nous-contacter/" - } - }, - "servers": [ - { - "x-comment": "Will be automatically completed by the ezs server." - }, - { - "url": "http://vptdmservices.intra.inist.fr:49212/", - "description": "Latest version for production", - "x-profil": "Standard" - } - ], - "tags": [ - { - "name": "NLP", - "description": "Traitement en langage naturel", - "externalDocs": { - "description": "Plus de documentation", - "url": "https://gitbucket.inist.fr/tdm/web-services/tree/master/nlp-tools2" - } - } - ] -} + "openapi": "3.0.0", + "info": { + "title": "nlp-tools2 - Outils de NLP", + "description": "Outils de traitement de la langue", + "version": "2.0.4", + "termsOfService": "https://services.istex.fr/", + "contact": { + "name": "Inist-CNRS", + "url": "https://www.inist.fr/nous-contacter/" + } + }, + "servers": [ + { + "x-comment": "Will be automatically completed by the ezs server." + }, + { + "url": "http://vptdmservices.intra.inist.fr:49281/", + "description": "Latest version for production", + "x-profil": "Standard" + } + ], + "tags": [ + { + "name": "NLP", + "description": "Outils de NLP", + "externalDocs": { + "description": "Plus de documentation", + "url": "https://github.com/inist-cnrs/web-services/tree/main/services/nlp-tools2" + } + } + ] +} \ No newline at end of file diff --git a/nlp-tools2/v1/detect-lang.ini b/nlp-tools2/v1/detect-lang.ini deleted file mode 100644 index 278f8f6..0000000 --- a/nlp-tools2/v1/detect-lang.ini +++ /dev/null @@ -1,62 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json -post.operationId = post-v1-detect-lang -post.description = Détecte la langue d'un texte et renvoie un code langue sur 2 caractères -post.responses.default.description = Renvoie un Json composé d'`id`, `value` avec `value` un code langue sur 2 caractères -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Détection de langue -post.tags.0 = NLP -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = path -post.parameters.0.schema.type = string -post.parameters.0.description = The path in each object to enrich with an Python script -post.parameters.1.in = query -post.parameters.1.name = indent -post.parameters.1.schema.type = boolean -post.parameters.1.description = Indent or not the JSON Result - -# Examples -post.requestBody.content.application/json.example.0.id = 1 -post.requestBody.content.application/json.example.0.value = The COVID-19 pandemic, also known as the coronavirus pandemic, is an ongoing global pandemic of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus2 (SARS-CoV-2). It was first identified in December 2019 in Wuhan, China. The World Health Organization declared the outbreak a Public Health Emergency of International Concern on 20 January 2020, and later a pandemic on 11 March 2020. As of 2 April 2021, more than 129 million cases have been confirmed, with more than 2.82 million deaths attributed to COVID-19, making it one of the deadliest pandemics in history. -post.requestBody.content.application/json.example.1.id = 2 -post.requestBody.content.application/json.example.1.value = In the southern French Massif Central, the Montagne Noire axial zone is a NE-SW elongated granite-migmatite dome emplaced within Visean south-verging recumbent folds and intruded by syn- to late-migmatization granitoids. The tectonic setting of this dome is still disputed, thus several models have been proposed. In order to better understand the emplacement mechanism of this dome, petrofabric and Anisotropy of Magnetic Susceptibility (AMS) studies have been carried out. In the granites and migmatites that form the dome core, magmatic texture and to a lesser extent weak solid-state texture are dominant. As a paramagnetic mineral, biotite is the main carrier of the magnetic susceptibility. On the basis of 135 AMS sites, the magnetic fabrics appear as independent of the lithology but related to the dome architecture. Coupling our results with previous structural and geochronological studies, allows us to propose a new emplacement model. Between 340-325 Ma, the Palaeozoic series underwent a compressional deformation represented by nappes and recumbent folds involving the thermal event leading to partial melting. Until ~325-310 Ma, the dome emplacement was assisted by diapiric processes. An extensional event took place at 300 Ma, after the emplacement of the late to post-migmatitic granitic plutons. In the northeast side of the dome, a brittle normal-dextral faulting controlled the opening of the Graissessac coal-basin. -post.requestBody.content.application/json.example.2.id = 3 -post.requestBody.content.application/json.example.2.value = La pandémie de Covid-19 est une pandémie d'une maladie infectieuse émergente, appelée la maladie à coronavirus 2019 ou Covid-19, provoquée par le coronavirus SARS-CoV-2, apparue à Wuhan le 16 novembre 20193, dans la province de Hubei (en Chine centrale), avant de se propager dans le monde. L'Organisation mondiale de la santé (OMS) alerte dans un premier temps la République populaire de Chine et ses autres états membres, puis prononce l'état d'urgence de santé publique de portée internationale le 30 janvier 2020. -post.requestBody.content.application/json.example.3.id = 4 -post.requestBody.content.application/json.example.3.value = Au dernier recensement de 2018, la commune comptait 46 513 habitants appelés les Carcassonnais. Carcassonne est la ville principale de la Carcassonne Agglo 111 452 habitants (2016), de l'aire urbaine de Carcassonne 99 448 habitants (2017)1 et de son unité urbaine qui compte 48 633 habitants (2017). Occupée depuis le Néolithique, Carcassonne se trouve dans la plaine de l'Aude entre deux grands axes de circulation reliant l'Atlantique à la mer Méditerranée et le Massif central aux Pyrénées. La ville est connue pour la Cité de Carcassonne, ensemble architectural médiéval restauré par Viollet-le-Duc au xixe siècle et inscrit au patrimoine mondial de l'UNESCO depuis 1997. -post.requestBody.content.application/json.example.4.id = 5 -post.requestBody.content.application/json.example.4.value = Par rapport à la période écoulée, le fait d'avoir appris après coup que des circulaires imposaient de manière retroactive le retrait de jours de congés pour des personnes qui s'étaient mises en ASA pour cause de garde d'enfants m'a semblé particulièrement injuste et m'a mis vraiment en colère. J'aurais eu besoin de soutien à ce niveau là de la part du CNRS, car faire l'école à la maison était un travail à temps plein aussi nécessaire à la nation que mon travail au CNRS.Par rapport au satisfaction, j'ai trouvé que le télétravail me convenait bien. -post.responses.default.content.application/json.example.0.id = 1 -post.responses.default.content.application/json.example.0.value = en -post.responses.default.content.application/json.example.1.id = 2 -post.responses.default.content.application/json.example.1.value = en -post.responses.default.content.application/json.example.2.id = 3 -post.responses.default.content.application/json.example.2.value = fr -post.responses.default.content.application/json.example.3.id = 4 -post.responses.default.content.application/json.example.3.value = fr -post.responses.default.content.application/json.example.4.id = 5 -post.responses.default.content.application/json.example.4.value = fr - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics - -[JSONParse] -separator = * - -[expand] -path = env('path', 'value') -size = 100 -# in production mode, uncomment the following line -# cache = boost - -[expand/exec] -# command should be executable ! -command = ./v1/detect_lang.py - -[dump] -indent = env('indent', false) diff --git a/nlp-tools2/v1/detect_lang.py b/nlp-tools2/v1/detect_lang.py deleted file mode 100644 index ba4a1a1..0000000 --- a/nlp-tools2/v1/detect_lang.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Jun 17 15:18:10 2021 - -@author: cuxac - -A partir d'un texte (pas d'un mot) affiche le code langue du texte (2 caractères) si ça probabilité est supérieure au seuil de 0.85 - -""" - -import json - -import sys - -import cld3 - - -#fin=json.loads(json.dumps([{ -#"id":1, -#"value":"The COVID-19 pandemic, also known as the coronavirus pandemic, is an ongoing global pandemic of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus2 (SARS-CoV-2). It was first identified in December 2019 in Wuhan, China. The World Health Organization declared the outbreak a Public Health Emergency of International Concern on 20 January 2020, and later a pandemic on 11 March 2020. As of 2 April 2021, more than 129 million cases have been confirmed, with more than 2.82 million deaths attributed to COVID-19, making it one of the deadliest pandemics in history." -#}, -#{"id":2, -#"value":"In the southern French Massif Central, the Montagne Noire axial zone is a NE-SW elongated granite-migmatite dome emplaced within Visean south-verging recumbent folds and intruded by syn- to late-migmatization granitoids. The tectonic setting of this dome is still disputed, thus several models have been proposed. In order to better understand the emplacement mechanism of this dome, petrofabric and Anisotropy of Magnetic Susceptibility (AMS) studies have been carried out. In the granites and migmatites that form the dome core, magmatic texture and to a lesser extent weak solid-state texture are dominant. As a paramagnetic mineral, biotite is the main carrier of the magnetic susceptibility. On the basis of 135 AMS sites, the magnetic fabrics appear as independent of the lithology but related to the dome architecture. Coupling our results with previous structural and geochronological studies, allows us to propose a new emplacement model. Between 340-325 Ma, the Palaeozoic series underwent a compressional deformation represented by nappes and recumbent folds involving the thermal event leading to partial melting. Until ~325-310 Ma, the dome emplacement was assisted by diapiric processes. An extensional event took place at 300 Ma, after the emplacement of the late to post-migmatitic granitic plutons. In the northeast side of the dome, a brittle normal-dextral faulting controlled the opening of the Graissessac coal-basin." -#}, -#{"id":3, -# "value":"La pandémie de Covid-19 est une pandémie d'une maladie infectieuse émergente, appelée la maladie à coronavirus 2019 ou Covid-19, provoquée par le coronavirus SARS-CoV-2, apparue à Wuhan le 16 novembre 20193, dans la province de Hubei (en Chine centrale), avant de se propager dans le monde. L'Organisation mondiale de la santé (OMS) alerte dans un premier temps la République populaire de Chine et ses autres états membres, puis prononce l'état d'urgence de santé publique de portée internationale le 30 janvier 2020."} -#, -#{"id":4, -# "value":"Au dernier recensement de 2018, la commune comptait 46 513 habitants appelés les Carcassonnais. Carcassonne est la ville principale de la Carcassonne Agglo 111 452 habitants (2016), de l'aire urbaine de Carcassonne 99 448 habitants (2017)1 et de son unité urbaine qui compte 48 633 habitants (2017). Occupée depuis le Néolithique, Carcassonne se trouve dans la plaine de l'Aude entre deux grands axes de circulation reliant l'Atlantique à la mer Méditerranée et le Massif central aux Pyrénées. La ville est connue pour la Cité de Carcassonne, ensemble architectural médiéval restauré par Viollet-le-Duc au xixe siècle et inscrit au patrimoine mondial de l'UNESCO depuis 1997."} -#, -#{"id":5, -#"value":"Par rapport à la période écoulée, le fait d'avoir appris après coup que des circulaires imposaient de manière retroactive le retrait de jours de congés pour des personnes qui s'étaient mises en ASA pour cause de garde d'enfants m'a semblé particulièrement injuste et m'a mis vraiment en colère. J'aurais eu besoin de soutien à ce niveau là de la part du CNRS, car faire l'école à la maison était un travail à temps plein aussi nécessaire à la nation que mon travail au CNRS.Par rapport au satisfaction, j'ai trouvé que le télétravail me convenait bien." -#} -#])) - - -for line in sys.stdin: - data=json.loads(line) - - text=data['value'] - if cld3.get_language(text).probability>0.85: - - data['value']=cld3.get_language(text).language#,round(cld3.get_language(text).probability,2) - - else: - data['value']='unknown' - - - - - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') - diff --git a/nlp-tools2/v1/lemma.ini b/nlp-tools2/v1/lemma.ini deleted file mode 100644 index 7fe8a7c..0000000 --- a/nlp-tools2/v1/lemma.ini +++ /dev/null @@ -1,57 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json -post.operationId = post-v1-lemma -post.description = Lemmatise un texte en anglais -post.responses.default.description = Renvoie un Json composé de`id`, `value` avec `value` un texte anglais lemmatisé en minuscule -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Lemmatisation de textes en anglais -post.tags.0 = NLP -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = path -post.parameters.0.schema.type = string -post.parameters.0.description = The path in each object to enrich with an Python script -post.parameters.1.in = query -post.parameters.1.name = indent -post.parameters.1.schema.type = boolean -post.parameters.1.description = Indent or not the JSON Result - -# Examples -post.requestBody.content.application/json.example.0.id = 1 -post.requestBody.content.application/json.example.0.value.0 = rocks -post.requestBody.content.application/json.example.0.value.1 = are -post.requestBody.content.application/json.example.0.value.2 = images analysis -post.requestBody.content.application/json.example.1.id = 2 -post.requestBody.content.application/json.example.1.value = Computers -post.requestBody.content.application/json.example.2.id = 3 -post.requestBody.content.application/json.example.2.value = As of 2 April 2021, more than 129 million cases have been confirmed, with more than 2.82 million deaths attributed to COVID-19, making it one of the deadliest pandemics in history. -post.responses.default.content.application/json.example.0.id = 1 -post.responses.default.content.application/json.example.0.value.0 = rock -post.responses.default.content.application/json.example.0.value.1 = be -post.responses.default.content.application/json.example.0.value.2 = image analysis -post.responses.default.content.application/json.example.1.id = 2 -post.responses.default.content.application/json.example.1.value = computer -post.responses.default.content.application/json.example.2.id = 3 -post.responses.default.content.application/json.example.2.value = as of 2 April 2021 , more than 129 million case have be confirm , with more than 2.82 million death attribute to COVID-19 , make -PRON- one of the deadly pandemic in history . - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics - -[JSONParse] - -[expand] -path = env('path', 'value') -size = 100 -# in production mode, uncomment the following line -# cache = boost - -[expand/exec] -# command should be executable ! -command = ./v1/ws_lemmatization.py - -[dump] -indent = env('indent', false) diff --git a/nlp-tools2/v1/ws_lemmatization.py b/nlp-tools2/v1/ws_lemmatization.py deleted file mode 100644 index 999b17c..0000000 --- a/nlp-tools2/v1/ws_lemmatization.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Sep 29 16:57:23 2022 - -@author: cuxac -""" -import json -import sys -import spacy -nlp=spacy.load('en_core_web_sm', disable = ['parser','ner']) - - -for line in sys.stdin: - data=json.loads(line) - i=data['value'] - if type(i)==str and (len(i.split(' '))==1 and len(i.split('-'))==1 and len(i.split('/'))==1): - i=i.replace("*"," ").strip() - data["value"]=nlp(i)[0].lemma_ - elif type(i)==list: - ll=list() - for j in i: - j=j.replace('*',' ').strip() - if len(j.split())==1: - ll.append(nlp(j)[0].lemma_) - else: - ll.append(' '.join([w.lemma_ for w in nlp(j)])) - - data['value']=ll - else: - i=i.replace('*',' ').strip() - sent=' '.join([w.lemma_ for w in nlp(i)]) - - data['value']=sent - - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n')