diff --git a/terms-extraction/examples.http b/terms-extraction/examples.http index 846eb0f..2c70a43 100644 --- a/terms-extraction/examples.http +++ b/terms-extraction/examples.http @@ -38,6 +38,39 @@ ] ### +# Extraction des termes de deux textes en anglais, en gardant les nombres +POST {{baseUrl}}/v1/teeft/with-numbers/en?indent=true HTTP/1.1 +Content-Type: application/json + +[ + { + "id": "MPES-ERM_ER2023_000737", + "value": "Flow control based 5 MW wind turbine enhanced energy production for hydrogen generation cost reduction" + }, + { + "id": "MPES-ERM_ER2023_001916", + "value": "Study on the Motion Characteristics of 10 MW Superconducting Floating Offshore Wind Turbine Considering 2nd Order Wave Effect" + } +] +### + +# Extraction des termes de deux textes en français, en gardant les nombres +POST {{baseUrl}}/v1/teeft/with-numbers/fr?indent=true HTTP/1.1 +Content-Type: application/json + +[ + { + "id": "MPES-ERM_ER2023_000737", + "value": "Production d'énergie améliorée par une turbine éolienne de 5 MW basée sur le contrôle du flux pour la réduction des coûts de production d'hydrogène" + }, + { + "id": "MPES-ERM_ER2023_001916", + "value": "Étude des caractéristiques de mouvement d'une éolienne offshore flottante supraconductrice de 10 MW en tenant compte de l'effet de vague du deuxième ordre" + } +] +### + + # Normalisation des termes de deux textes POST {{baseUrl}}/v1/tools/keywords-clean?indent=true HTTP/1.1 Content-Type: application/json diff --git a/terms-extraction/v1/teeft/with-numbers/en.ini b/terms-extraction/v1/teeft/with-numbers/en.ini new file mode 100644 index 0000000..9d3e77e --- /dev/null +++ b/terms-extraction/v1/teeft/with-numbers/en.ini @@ -0,0 +1,136 @@ +# OpenAPI Documentation - JSON format (dot notation) +mimeType = application/json + +post.operationId = post-v1-teeft-with-numbers-en +post.requestBody.content.application/json.example.0.id = 1 +post.requestBody.content.application/json.example.0.value = Flow control based 5 MW wind turbine enhanced energy production for hydrogen generation cost reduction +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.requestBody.required = true +post.responses.default.content.application/json.example.0.id = 1 +post.responses.default.content.application/json.example.0.value.0 = teeft +post.responses.default.content.application/json.example.0.value.1 = teeft algorithm +post.responses.default.content.application/json.example.0.value.2 = english-written text +post.responses.default.content.application/json.example.0.value.3 = specificity value +post.responses.default.content.application/json.example.0.value.4 = specific terms +post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.responses.default.description = Termes extraits du texte envoyé +post.summary = Extrait des termes du texte en anglais en utilisant Teeft prenant en compte les nombres +post.tags.0: terms-extraction +post.parameters.0.description = Nombre maximal de termes à récupérer +post.parameters.0.in = query +post.parameters.0.name = nb +post.parameters.0.required = false +post.parameters.0.schema.type = number +post.parameters.1.description = Indenter le JSON résultant +post.parameters.1.in = query +post.parameters.1.name = indent +post.parameters.0.required = false +post.parameters.1.schema.type = boolean + +[use] +plugin = @ezs/basics +plugin = @ezs/teeft +plugin = @ezs/strings + +[JSONParse] +separator = * + +[encode] +path = value +from = 1 +to = one +from = 2 +to = two +from = 3 +to = three +from = 4 +to = four +from = 5 +to = five +from = 6 +to = six +from = 7 +to = seven +from = 8 +to = eight +from = 9 +to = nine +from = 0 +to = zero +prefix = inf +suffix = sup + +[replace] +path = content +value = get('value') + +path = path +value = get('id', 'n/a') + +[TeeftToLowerCase] +path = content + +[TeeftSentenceTokenize] +[TeeftTokenize] + +[TeeftNaturalTag] +lang = en + +[TeeftExtractTerms] +lang = en + +[TeeftFilterTags] +lang = en + +[TeeftRemoveNumbers] +[TeeftRemoveShortTerms] +[TeeftRemoveLongTerms] +[TeeftRemoveWeirdTerms] +[debug] +[TeeftStopWords] +lang = en + +[TeeftSumUpFrequencies] +[TeeftSpecificity] +lang = en +sort = true + +[TeeftFilterMonoFreq] + +[replace] +path = id +value = get('path') + +path = value +value = get('terms').map('term').slice(0, env("nb", 5)) + +[map] +path = value + +[map/decode] +from = 1 +to = one +from = 2 +to = two +from = 3 +to = three +from = 4 +to = four +from = 5 +to = five +from = 6 +to = six +from = 7 +to = seven +from = 8 +to = eight +from = 9 +to = nine +from = 0 +to = zero +prefix = inf +suffix = sup + +[dump] +indent = env('indent', false) diff --git a/terms-extraction/v1/teeft/with-numbers/fr.ini b/terms-extraction/v1/teeft/with-numbers/fr.ini new file mode 100644 index 0000000..024e0c3 --- /dev/null +++ b/terms-extraction/v1/teeft/with-numbers/fr.ini @@ -0,0 +1,135 @@ +# OpenAPI Documentation - JSON format (dot notation) +mimeType = application/json + +post.operationId = post-v1-teeft-with-numbers-fr +post.requestBody.content.application/json.example.0.id = 1 +post.requestBody.content.application/json.example.0.value = Pour faciliter l’accès aux techniques de fouille de données notamment pour les non spécialistes, le service TDM de l’Inist-CNRS développe des web services autour du traitement de l’information scientifique et technique. Ces services peuvent être appelés en ligne de commande ou au sein de LODEX, outil libre de visualisation. La démonstration montre comment, à partir des informations présentes dans une notice bibliographique et plus particulièrement à partir d’une adresse d’auteur, l’identifiant RNSR (Répertoire national des structures de recherche) est attribué automatiquement au document initial et comment cette nouvelle donnée est exploitée au sein de LODEX. Ainsi, programme ou algorithme développé par des enseignants chercheurs pourrait être adapté pour devenir un web service et être utilisé par le plus grand nombre. +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.requestBody.required = true +post.responses.default.content.application/json.example.0.id = 1 +post.responses.default.content.application/json.example.0.value.0 = service tdm +post.responses.default.content.application/json.example.0.value.1 = web services +post.responses.default.content.application/json.example.0.value.2 = information scientifique +post.responses.default.content.application/json.example.0.value.3 = outil libre +post.responses.default.content.application/json.example.0.value.4 = informations présentes +post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream +post.responses.default.description = Termes extraits du texte envoyé +post.summary = Extrait des termes du texte en français en utilisant Teeft prenant en compte les nombres +post.tags.0: terms-extraction +post.parameters.0.description = Nombre maximal de termes à récupérer +post.parameters.0.in = query +post.parameters.0.name = nb +post.parameters.0.required = false +post.parameters.0.schema.type = number +post.parameters.1.description = Indenter le JSON résultant +post.parameters.1.in = query +post.parameters.1.name = indent +post.parameters.0.required = false +post.parameters.1.schema.type = boolean + +[use] +plugin = @ezs/basics +plugin = @ezs/teeft +plugin = @ezs/strings + +[JSONParse] +separator = * + +[encode] +path = value +from = 1 +to = one +from = 2 +to = two +from = 3 +to = three +from = 4 +to = four +from = 5 +to = five +from = 6 +to = six +from = 7 +to = seven +from = 8 +to = eight +from = 9 +to = nine +from = 0 +to = zero +prefix = inf +suffix = sup + +[replace] +path = content +value = get('value') + +path = path +value = get('id', 'n/a') + +[TeeftToLowerCase] +path = content + +[TeeftSentenceTokenize] +[TeeftTokenize] + +[TeeftNaturalTag] +lang = fr + +[TeeftExtractTerms] +lang = fr + +[TeeftFilterTags] +lang = fr + +[TeeftRemoveNumbers] +[TeeftRemoveShortTerms] +[TeeftRemoveLongTerms] +[TeeftRemoveWeirdTerms] +[TeeftStopWords] +lang = fr + +[TeeftSumUpFrequencies] +[TeeftSpecificity] +lang = fr +sort = true + +[TeeftFilterMonoFreq] + +[replace] +path = id +value = get('path') + +path = value +value = get('terms').map('term').slice(0, env("nb", 5)) + +[map] +path = value + +[map/decode] +from = 1 +to = one +from = 2 +to = two +from = 3 +to = three +from = 4 +to = four +from = 5 +to = five +from = 6 +to = six +from = 7 +to = seven +from = 8 +to = eight +from = 9 +to = nine +from = 0 +to = zero +prefix = inf +suffix = sup + +[dump] +indent = env('indent', false)