diff --git a/geo-tagger/v1/geoTagger.ini b/geo-tagger/v1/geoTagger.ini deleted file mode 100644 index 6fbe33d..0000000 --- a/geo-tagger/v1/geoTagger.ini +++ /dev/null @@ -1,37 +0,0 @@ -# OpenAPI Documentation - JSON format (dot notation) -mimeType = application/json - -post.description = Détecte les entités géographiques d'un texte en anglais -post.responses.default.description = Renvoie un Json composé d'`id`, `value` avec `value` la liste des entités géographiques trouvées -post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.summary = Détection d'entité géographique -post.requestBody.required = true -post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.parameters.0.in = query -post.parameters.0.name = path -post.parameters.0.schema.type = string -post.parameters.0.description = The path in each object to enrich with an Python script -post.parameters.1.in = query -post.parameters.1.name = indent -post.parameters.1.schema.type = boolean -post.parameters.1.description = Indent or not the JSON Result - -[use] -plugin = @ezs/spawn -plugin = @ezs/basics -plugin = @ezs/storage -plugin = @ezs/analytics - -[JSONParse] -separator = * - -[expand] -path = env('path', 'value') -size = 100 - -[expand/exec] -# command should be executable ! -command = ./v1/geoTagger.py - -[dump] -indent = env('indent', false) \ No newline at end of file diff --git a/geo-tagger/v1/geoTagger.py b/geo-tagger/v1/geoTagger.py deleted file mode 100644 index bbe75f7..0000000 --- a/geo-tagger/v1/geoTagger.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/opt/bitnami/python/bin/python3.7 - -import sys,os -import json - -from flair.models import SequenceTagger -from flair.data import Sentence - -import logging -logging.getLogger('flair').handlers[0].stream = sys.stderr - -tagger = SequenceTagger.load("flair/ner-english") - -for line in sys.stdin: - data = json.loads(line) - text=data['value'] - text = Sentence(text) - tagger.predict(text) - geo = [] - for entity in text.get_spans('ner'): - if entity.tag == "LOC": - geo.append(entity.text) - data['value'] = geo - sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') diff --git a/geo-tagger/v1/geoTagger/geoTagger.ini b/geo-tagger/v1/geoTagger/geoTagger.ini new file mode 100644 index 0000000..340baa6 --- /dev/null +++ b/geo-tagger/v1/geoTagger/geoTagger.ini @@ -0,0 +1,37 @@ +# OpenAPI Documentation - JSON format (dot notation) +mimeType = application/json + +post.description = Détecte les entités géographiques d'un texte en anglais +post.responses.default.description = Renvoie un Json composé d'`id`, `value` avec `value` la liste des entités géographiques trouvées +post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.summary = Détection d'entité géographique +post.requestBody.required = true +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.parameters.0.in = query +post.parameters.0.name = path +post.parameters.0.schema.type = string +post.parameters.0.description = The path in each object to enrich with an Python script +post.parameters.1.in = query +post.parameters.1.name = indent +post.parameters.1.schema.type = boolean +post.parameters.1.description = Indent or not the JSON Result + +[use] +plugin = @ezs/spawn +plugin = @ezs/basics +plugin = @ezs/storage +plugin = @ezs/analytics + +[JSONParse] +separator = * + +[expand] +path = env('path', 'value') +size = 100 + +[expand/exec] +# command should be executable ! +command = ./v1/geoTagger/geoTagger.py + +[dump] +indent = env('indent', false) \ No newline at end of file diff --git a/geo-tagger/v1/geoTagger/geoTagger.py b/geo-tagger/v1/geoTagger/geoTagger.py new file mode 100644 index 0000000..e4ab9b5 --- /dev/null +++ b/geo-tagger/v1/geoTagger/geoTagger.py @@ -0,0 +1,25 @@ +#!/opt/bitnami/python/bin/python3.7 + +import sys +import json + +from flair.models import SequenceTagger +from flair.data import Sentence + +import logging +logging.getLogger('flair').handlers[0].stream = sys.stderr + +tagger = SequenceTagger.load("flair/ner-english") + +for line in sys.stdin: + data = json.loads(line) + text=data['value'] + text = Sentence(text) + tagger.predict(text) + geo = [] + for entity in text.get_spans('ner'): + if entity.tag == "LOC": + geo.append(entity.text) + data['value'] = geo + sys.stdout.write(json.dumps(data)) + sys.stdout.write('\n')