diff --git a/NLP_tools-EZmaster/README.md b/NLP_tools-EZmaster/README.md index 79b9fff..bfc79df 100644 --- a/NLP_tools-EZmaster/README.md +++ b/NLP_tools-EZmaster/README.md @@ -6,8 +6,8 @@ Liste des traitements NLP disponibles : -* Stemming (stemmer) -* Etiquettage en partie du discours (POStagger) +* Stemming (stemmer), français et anglais +* Etiquettage en partie du discours (POStagger), français et anglais * Reconnaissance de termes contrôlés (termMatcher) * Reconnaissance d'entités nommées (ner) * Chunking nominal (NPchunker) @@ -23,7 +23,7 @@ #### Test d'intégation EZmaster like ``` cd public -sed -e '1d; $d' ../data/data.json | sed 's/\,$/ /g' | python3 analyze.py stemmer -o doc -ini conf_EZ.ini -log +sed -e '1d; $d' ../data/data_en.json | sed 's/\,$/ /g' | python3 analyze.py stemmer -o doc -lang en -log analayze.log ``` #### Test solliciation Web Service EZmaster @@ -33,12 +33,15 @@ Syntaxe des URLs : -https://nlp-tools-1.services.inist.fr/v1/en/{engine}/analyze?output={val} - -* paramètres -{engine} = nom pipeline de traitement à appliquer : -               stemmer ,ner , postagger, npchunker, npchunkerdp, termmatcher -{output} = format du résulat (doc , json) +``` +https://nlp-tools-1.services.inist.fr/v1/{langue}/{engine}/analyze?output={val} +``` +* paramètres : +**{langue}** = la langue à analyser           [en , fr] +**{engine}** = nom pipeline de traitement à appliquer : + *anglais* :           [stemmer ,ner , postagger, npchunker, npchunkerdp, termmatcher] + *francais* :          [stemmer , postagger] + **{output}** = format du résulat           [doc , json]                  doc = le resultat est reinseré dans le document                  json = le produit de l'analyse au frmat json diff --git a/NLP_tools-EZmaster/public/analyze.py b/NLP_tools-EZmaster/public/analyze.py index 6e34f18..a20c279 100644 --- a/NLP_tools-EZmaster/public/analyze.py +++ b/NLP_tools-EZmaster/public/analyze.py @@ -2,7 +2,7 @@ from nlptools import * from nlptools.tools import * from nlptools.resources import * - +from nlptools.run import full_run import os import time import logging @@ -11,29 +11,30 @@ import plac # exemple : -# sed -e '1d; $d' ../data/data2.json | sed 's/\,$/ /g' | python3 analyze.py stemmer -o doc -ini conf_EZ.ini -log -# analyze.log +# sed -e '1d; $d' ../data/data.json | sed 's/\,$/ /g' | python3 analyze.py stemmer -o doc -lang fr -log analyze.log @plac.annotations( analyzer=( - "Name oh the analyzer [default stemmer porter]", - "positional", - None, - str, - ["stemmer", "termMatcher", "ner", "NPchunker", "POStagger", "gazetteer","NPchunkerDP"], + "Name oh the NLPpipe ", + "positional", + None, + str, + ["stemmer", "termMatcher", "ner", "NPchunker", "POStagger", "gazetteer", "NPchunkerDP", "lefff_tagger"] ), + language=("language", "option", "lang", str, ["fr", "en"]), output=("Format result ", "option", "o", str, ["doc", "json"]), + param=("initialisation param in json", "option", "param", str), init_file=("initialisation file [default config.ini]", "option", "ini", str), log=("log file", "option", "log", str) ) +def main(analyzer, init_file, log, language, param, output="doc"): -def main( init_file, log, analyzer="stemmer", output="doc"): - - field = 1 # indique le nombre de champs tsv/csv/txt des fichiers du corpus compteur = 0 - pipe = full_run(analyzer, init_file, output ) # doc = corpus traité sur la sortie standard - + # init d un pipe nlptools + pipe = full_run(analyzer, language, init_file, param, output) + # doc = corpus traité sur la sortie standard + # init_file = fichier de param par defaut logging.basicConfig(filename=log, level=logging.DEBUG) t1 = time.time() @@ -58,25 +59,6 @@ t2 = time.time() logging.info("TRACE::Executing times %.3f " % (t2 - t1)) -# class runner -class full_run: - - def __init__(self, analyzer, init_file, output): - - self.location = os.path.realpath( - os.path.join(os.getcwd(), os.path.dirname("doc")) - ) - self.parsers = [ - exec_spacy_pipe(analyzer, init_file, output), - ] - - def pipe_analyse(self, text): - - # execution du pipe - for parser in self.parsers: - text = parser(text) - return text - if __name__ == "__main__": diff --git a/NLP_tools-EZmaster/public/requirements.txt b/NLP_tools-EZmaster/public/requirements.txt index 5275edc..5237c7a 100644 --- a/NLP_tools-EZmaster/public/requirements.txt +++ b/NLP_tools-EZmaster/public/requirements.txt @@ -1 +1 @@ -git+http://vxgit.intra.inist.fr:60000/git/RichText/NLP_tools.git@v1.4#egg=nlptools +git+http://vxgit.intra.inist.fr:60000/git/RichText/NLP_tools.git@v1.5#egg=nlptools diff --git a/NLP_tools-EZmaster/public/v1/en/ner/analyze.ini b/NLP_tools-EZmaster/public/v1/en/ner/analyze.ini index 5efac41..2978918 100644 --- a/NLP_tools-EZmaster/public/v1/en/ner/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/ner/analyze.ini @@ -34,6 +34,7 @@ args = ner args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false) diff --git a/NLP_tools-EZmaster/public/v1/en/npchunker/analyze.ini b/NLP_tools-EZmaster/public/v1/en/npchunker/analyze.ini index 968fe9d..334c734 100644 --- a/NLP_tools-EZmaster/public/v1/en/npchunker/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/npchunker/analyze.ini @@ -34,6 +34,7 @@ args = NPchunker args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false) diff --git a/NLP_tools-EZmaster/public/v1/en/npchunkerdp/analyze.ini b/NLP_tools-EZmaster/public/v1/en/npchunkerdp/analyze.ini index a59e22b..85a9c96 100644 --- a/NLP_tools-EZmaster/public/v1/en/npchunkerdp/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/npchunkerdp/analyze.ini @@ -34,6 +34,7 @@ args = NPchunkerDP args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false) diff --git a/NLP_tools-EZmaster/public/v1/en/postagger/analyze.ini b/NLP_tools-EZmaster/public/v1/en/postagger/analyze.ini index 9af1259..919c28e 100644 --- a/NLP_tools-EZmaster/public/v1/en/postagger/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/postagger/analyze.ini @@ -34,6 +34,7 @@ args = POStagger args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false) diff --git a/NLP_tools-EZmaster/public/v1/en/stemmer/analyze.ini b/NLP_tools-EZmaster/public/v1/en/stemmer/analyze.ini index 6102403..aaf047e 100644 --- a/NLP_tools-EZmaster/public/v1/en/stemmer/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/stemmer/analyze.ini @@ -34,6 +34,7 @@ args = stemmer args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false) diff --git a/NLP_tools-EZmaster/public/v1/en/termmatcher/analyze.ini b/NLP_tools-EZmaster/public/v1/en/termmatcher/analyze.ini index 4062994..9df6c8f 100644 --- a/NLP_tools-EZmaster/public/v1/en/termmatcher/analyze.ini +++ b/NLP_tools-EZmaster/public/v1/en/termmatcher/analyze.ini @@ -34,6 +34,7 @@ args = termMatcher args = fix('-o') args = env('output','doc') +args = fix('-lang','en').join(' ') [dump] indent = env('indent', false)