diff --git a/biblio-ref/swagger.json b/biblio-ref/swagger.json index f7993cc..a421174 100644 --- a/biblio-ref/swagger.json +++ b/biblio-ref/swagger.json @@ -15,7 +15,7 @@ "x-comment": "Will be automatically completed by the ezs server." }, { - "url": "http://vptdmservices.intra.inist.fr:49251/", + "url": "http://vptdmservices.intra.inist.fr:49252/", "description": "Latest version for production", "x-profil": "Standard" } diff --git a/bin/publish.sh b/bin/publish.sh index 5516a19..71fba4c 100755 --- a/bin/publish.sh +++ b/bin/publish.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -ROOT="$(dirname $0)/.." +ROOT="$(dirname "$0")/.." SCHEME="https:" allItems="" @@ -16,34 +16,34 @@ fi SWAGGER_DIRECTORY=$(dirname "${SWAGGER_FILE}") - SWAGGER_CONTENT=$() NAME=$(basename "${SWAGGER_DIRECTORY}") - TITLE=$(cat "${SWAGGER_FILE}"|jq .info.title) + TITLE=$(jq .info.title < "${SWAGGER_FILE}") if [ "${TITLE:-null}" = "null" ] then logger -s "${NAME} - ERROR: swagger has no title !" return 2 fi - SUMMARY=$(cat "${SWAGGER_FILE}"|jq .info.summary) - if [ "${SUMMARY:-null}" = "null" ] + SUMMARY=$(jq .info.summary < "${SWAGGER_FILE}") + DESCRIPTION=$(jq .info.description < "${SWAGGER_FILE}") + if [ "${SUMMARY:-null}" = "null" ] && [ "${DESCRIPTION:-null}" = "null" ] then - logger -s "${NAME} - ERROR: swagger has no summary !" + logger -s "${NAME} - ERROR: swagger has no summary nor description!" return 3 fi - SERVERS=$(cat "${SWAGGER_FILE}"|jq -r ".servers") + SERVERS=$(jq -r ".servers" < "${SWAGGER_FILE}") if [ "${SERVERS:-null}" = "null" ] then logger -s "${NAME} - ERROR: swagger has no servers!" return 4 fi - URL=$(cat "${SWAGGER_FILE}"|jq -r ".servers[] | select(.\"x-profil\").url"|sed -e "s/^/ - url: /") + URL=$(jq -r ".servers[] | select(.\"x-profil\").url" < "${SWAGGER_FILE}"|sed -e "s/^/ - url: /") if [ "${URL:-null}" = "null" ] then logger -s "${NAME} - ERROR: swagger has no url !" return 5 fi # First profil will be used for all url - PROFIL=$(cat "${SWAGGER_FILE}"|jq -r ".servers[] | select(.\"x-profil\").\"x-profil\""|head -n 1) + PROFIL=$(jq -r ".servers[] | select(.\"x-profil\").\"x-profil\"" < "${SWAGGER_FILE}"|head -n 1) case ${PROFIL} in Standard | Deprecated | Reserved | Administrator ) echo -n "${NAME} - ${PROFIL} - " @@ -53,8 +53,8 @@ return 6 ;; esac - CURL_OUTFILE=$(tempfile) - cat < ${CURL_OUTFILE} + CURL_OUTFILE=$(mktemp) + cat < "${CURL_OUTFILE}" http: routers: Router-${NAME}: @@ -71,29 +71,29 @@ ${URL} EOF - HTTP_CODE=$(cat ${CURL_OUTFILE}; rm ${CURL_OUTFILE}) + HTTP_CODE=$(cat "${CURL_OUTFILE}"; rm "${CURL_OUTFILE}") echo -n "${HTTP_CODE} - " - CURL_OUTFILE=$(tempfile) - cat < ${CURL_OUTFILE} + CURL_OUTFILE=$(mktemp) + cat < "${CURL_OUTFILE}" - job_name: '${NAME}' scrape_interval: 10s scheme: https static_configs: - targets: ['${NAME}.services.istex.fr'] EOF - HTTP_CODE=$(cat ${CURL_OUTFILE}; rm ${CURL_OUTFILE}) + HTTP_CODE=$(cat "${CURL_OUTFILE}"; rm "${CURL_OUTFILE}") echo "${HTTP_CODE}" allItems+="{ url: \"${SCHEME}//${NAME}.services.istex.fr\", name: ${TITLE} }," return 0 } -FILES=$(ls ${ROOT}/*/swagger.json) +FILES=$(ls "${ROOT}"/*/swagger.json) echo -n "Login: " -read login +read -r login echo -n "Password: " -read -s passw +read -rs passw echo " " for swagger in ${FILES} @@ -103,8 +103,8 @@ echo -n "open-api - Swagger - " -CURL_OUTFILE=$(tempfile) -cat < ${CURL_OUTFILE} +CURL_OUTFILE=$(mktemp) +cat < "${CURL_OUTFILE}" window.onload = function() { // @@ -128,5 +128,5 @@ // }; EOF -HTTP_CODE=$(cat ${CURL_OUTFILE}; rm ${CURL_OUTFILE}) +HTTP_CODE=$(cat "${CURL_OUTFILE}"; rm "${CURL_OUTFILE}") echo "${HTTP_CODE}" diff --git a/data-termsuite/README.md b/data-termsuite/README.md new file mode 100644 index 0000000..6a06d19 --- /dev/null +++ b/data-termsuite/README.md @@ -0,0 +1,10 @@ +# ws-data-termsuite@2.0.0 + +Web service TermSuite + +Extrait des termes d'un corpus. +C'est un service web asynchrone, à l'instar de +[data-computer](../data-computer/). + +> Le code source de ce service est hébergé [sur +> GitHub](https://github.com/Inist-CNRS/web-services/tree/main/services/data-termsuite) diff --git a/data-termsuite/examples.http b/data-termsuite/examples.http new file mode 100644 index 0000000..3b52a58 --- /dev/null +++ b/data-termsuite/examples.http @@ -0,0 +1,43 @@ +# These examples can be used directly in VSCode, using HTTPYac extension (anweber.vscode-httpyac) + +# Décommenter/commenter les lignes voulues pour tester localement +@host=http://localhost:31976 +# @host=https://data-termsuite.services.istex.fr + +# Pour voir les webhooks, aller sur https://webhook.site/#!/view/2caab8b5-fc96-4d7a-bb94-bdda20977830 + +### +# @name v1en +# Extraction d'une terminologie sur un corpus en anglais (à la manière data-computer) +POST {{host}}/v1/en?nb=10 HTTP/1.1 +Content-Type: application/x-gzip +X-Webhook-Success: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +X-Webhook-Failure: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 + +< ./examples/22-txt-en.tar.gz + +### +# @name v1fr +# Extraction d'une terminologie sur un corpus en français (à la manière data-computer) +POST {{host}}/v1/fr?nb=10 HTTP/1.1 +Content-Type: application/x-gzip +X-Webhook-Success: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +X-Webhook-Failure: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 + +< ./examples/8-txt-fr.tar.gz + +### +# @name v1retrieveJson +# Récupération de terminologie sur un corpus en anglais (après traitement via v1en) +# ATTENTION: pour que ça marche, modifier la valeur de value avec celle retournée par v1en ou v1fr +POST {{host}}/v1/retrieve-json HTTP/1.1 +Content-Type: application/json +[{"value": "EkqfSn4S2"}] + +### +# @name v1retrieveCsv +# Récupération de terminologie sur un corpus en anglais (après traitement via v1en) +# ATTENTION: pour que ça marche, modifier la valeur de value avec celle retournée par v1en ou v1fr +POST {{host}}/v1/retrieve-csv HTTP/1.1 +Content-Type: application/json +[{"value": "EkqfSn4S2"}] diff --git a/data-termsuite/swagger.json b/data-termsuite/swagger.json new file mode 100644 index 0000000..a697e0c --- /dev/null +++ b/data-termsuite/swagger.json @@ -0,0 +1,33 @@ +{ + "openapi": "3.0.0", + "info": { + "title": "data-termsuite - Web service TermSuite", + "description": "Extraction terminologique d'un corpus via [TermSuite](https://termsuite.github.io/).\n\nComme ces services s'appliquent à un corpus entier, la réponse finale n'est pas\ndonnée immédiatement après l'appel. \nAu contraire, un service comme [`/v1/en`](#data-termsuite/post-v1-en) renverra une réponse JSON contenant un champ `value` donnant l'identifiant du traitement en cours. \nEt comme on lui passe aussi l'URL d'un *webhook*, cela lui permet, une fois le\ntraitement terminé, de signaler qu'on peut dès lors utiliser le service de\nrécupération (les routes qui commencent par `/v1/retrieve`). \n\nExemple: traitement d'un corpus de textes en anglais\n\n### Préparation du corpus\n\nOn crée une archive `.tar.gz` de fichiers `.txt`.\n\nSi on a un répertoire `corpus` contenant ces fichiers `.txt`:\n\n```txt\ncorpus\n├── W2BVWkiVT.txt\n├── W2CeZqyNR.txt\n├── W77S4YQqx.txt\n├── W8kkWKySy.txt\n├── WcKPMhj3p.txt\n├── WG5aHJqba.txt\n├── Wh3itHprz.txt\n├── WhW6tZ6NH.txt\n├── WjS3eZyG4.txt\n├── Wk6YCLbzZ.txt\n├── WmiHPaEdf.txt\n├── WmJYZipzE.txt\n├── Wn8KqZXeX.txt\n├── WPpTXDTJB.txt\n├── WpRjkUwwB.txt\n├── WtCWN5q5Y.txt\n├── WtJ4NNWhq.txt\n├── WTxTnPGxt.txt\n├── WwzTseBX6.txt\n├── WXer3K9QE.txt\n├── Wymfn7YTm.txt\n└── WzXkqs4zt.txt\n```\n\nLa commande Linux suivante crée le fichier `corpus.tar.gz` conforme à ce qui est\nattendu par le service web.\n\n```bash\ntar czf corpus.tar.gz corpus\n```\n\n### Appel du traitement\n\nIci, il faut donner un [*webhook*](https://fr.wikipedia.org/wiki/Webhook) pour\nconnaître l'instant où on peut (ou non) récupérer le résultat.\n\n```bash\ncurl -X 'POST' \n 'http://data-termsuite.services.istex.fr/v1/en?indent=true&nb=10' \n -H 'accept: application/json' \n -H 'X-Webhook-Success: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830' \n -H 'X-Webhook-Failure: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830' \n -H 'Content-Type: application/x-gzip' \n --data-binary '@corpus.tar.gz'\n```\n\nL'appel précédent renvoie une réponse du type:\n\n```json\n[{\n \"id\": \"termsuite-en\",\n \"value\": \"qiCVLyh5p\"\n}]\n```\n\nEt le serveur du *webhook* recevra, une fois le traitement terminé, un JSON\ncontenant un champ `identifier` (la même valeur que `value` dans la réponse au\n`curl`), et un champ `state` qui devrait être `ready`. \nIci c'est `qiCVLyh5p`.\n\n### Récupération du résultat\n\nEnfin, on peut demander la réponse via\n[`/v1/retrieve-csv`](#data-termsuite/post-v1-retrieve-csv), en n'oubliant\nd'adapter `value`:\n\n```bash\ncurl -X 'POST' \n 'http://localhost:31976/v1/retrieve-csv' \n -H 'accept: text/csv' \n -H 'Content-Type: text/csv' \n -d '[\n {\n \"value\": \"qiCVLyh5p\"\n }\n]'\n```\n\nqui donnera ce résultat\n\n```csv\n\"key\",\"freq\"\n\"n: sediment\",\"10\"\n\"nn: proto-paratethys sea\",\"9\"\n\"a: glacial\",\"7\"\n\"n: mmes\",\"7\"\n\"a: tropical\",\"7\"\n\"n: precipitation\",\"7\"\n\"n: genus\",\"7\"\n\"n: obliquiloculata\",\"7\"\n\"n: telescopus\",\"6\"\n\"nn: clay mineral\",\"6\"\n```", + "version": "2.0.0", + "termsOfService": "https://services.istex.fr/", + "contact": { + "name": "Inist-CNRS", + "url": "https://www.inist.fr/nous-contacter/" + } + }, + "servers": [ + { + "x-comment": "Will be automatically completed by the ezs server." + }, + { + "url": "http://vptdmjobs.intra.inist.fr:49162/", + "description": "Latest version for production", + "x-profil": "Standard" + } + ], + "tags": [ + { + "name": "data-termsuite", + "description": "Web service TermSuite", + "externalDocs": { + "description": "Plus de documentation", + "url": "https://github.com/inist-cnrs/web-services/tree/main/services/data-termsuite" + } + } + ] +} \ No newline at end of file diff --git a/data-termsuite/tests.hurl b/data-termsuite/tests.hurl new file mode 100644 index 0000000..53e477a --- /dev/null +++ b/data-termsuite/tests.hurl @@ -0,0 +1,147 @@ +# WARNING: This file was not generated, but manually written. +# DON'T OVERWRITE IT +# Use it to test: +# npx hurl --test --variable host="http://localhost:31976" tests.hurl +# or (from root of the repo) +# npm run test:local data-termsuite + +############################################################################ +# Test v1/en + +POST {{host}}/v1/en?nb=10 +content-type: application/x-gzip +X-Webhook-Success: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +X-Webhook-Failure: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +file,./examples/22-txt-en.tar.gz; + +HTTP 200 +# Capture the computing token +[Captures] +computing_token: jsonpath "$[0].value" +[Asserts] +variable "computing_token" exists + +# There should be a waiting time, representing the time taken to process data. +# Fortunately, as the data is sparse, and the computing time is small, +# the need is small. +# In normal use cases, a webhook is called when the processing is finished. +# That query gives the process identifier. +# Next, you can call the retrieve URL by putting the identifier in the value field. + +# Version 4.1.0 of hurl added a delay option, which value is milliseconds. +# https://hurl.dev/blog/2023/09/24/announcing-hurl-4.1.0.html#add-delay-between-requests + +POST {{host}}/v1/retrieve-json?indent=true +content-type: application/json +[Options] +delay: 10000 +``` +[ + { + "value":"{{computing_token}}" + } +] +``` + +HTTP 200 +Content-Type: application/json +[{ + "key": "n: sediment", + "freq": 10 +}, +{ + "key": "nn: proto-paratethys sea", + "freq": 9 +}, +{ + "key": "a: glacial", + "freq": 7 +}, +{ + "key": "n: mmes", + "freq": 7 +}, +{ + "key": "a: tropical", + "freq": 7 +}, +{ + "key": "n: precipitation", + "freq": 7 +}, +{ + "key": "n: genus", + "freq": 7 +}, +{ + "key": "n: obliquiloculata", + "freq": 7 +}, +{ + "key": "n: telescopus", + "freq": 6 +}, +{ + "key": "nn: clay mineral", + "freq": 6 +}] + +############################################################################ +# Test v1/fr +POST {{host}}/v1/fr?nb=5 +content-type: application/x-gzip +X-Webhook-Success: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +X-Webhook-Failure: https://webhook.site/2caab8b5-fc96-4d7a-bb94-bdda20977830 +file,./examples/8-txt-fr.tar.gz; + +HTTP 200 +# Capture the computing token +[Captures] +fr_computing_token: jsonpath "$[0].value" +[Asserts] +variable "fr_computing_token" exists + +# There should be a waiting time, representing the time taken to process data. +# Fortunately, as the data is sparse, and the computing time is small, +# the need is small. +# In normal use cases, a webhook is called when the processing is finished. +# That query gives the process identifier. +# Next, you can call the retrieve URL by putting the identifier in the value field. + +# Version 4.1.0 of hurl added a delay option, which value is milliseconds. +# https://hurl.dev/blog/2023/09/24/announcing-hurl-4.1.0.html#add-delay-between-requests + +POST {{host}}/v1/retrieve-json?indent=true +content-type: application/json +[Options] +delay: 10000 +``` +[ + { + "value":"{{fr_computing_token}}" + } +] +``` + +HTTP 200 +Content-Type: application/json +[{ + "key": "a: thermique", + "freq": 12 +}, +{ + "key": "n: fission", + "freq": 12 +}, +{ + "key": "n: biomasse", + "freq": 10 +}, +{ + "key": "na: énergie éolien", + "freq": 7 +}, +{ + "key": "a: électrique", + "freq": 7 +}] diff --git a/data-wrapper/swagger.json b/data-wrapper/swagger.json index fa88430..37b1a5a 100644 --- a/data-wrapper/swagger.json +++ b/data-wrapper/swagger.json @@ -2,8 +2,8 @@ "openapi": "3.1.0", "info": { "title": "data-wrapper - Conversions en fichier corpus compressé", - "summary": "Les fichiers corpus compressés sont compatibles avec tous traitements TDM dédiés aux corpus (webservices asynchrones)", - "version": "1.3.1", + "summary": "Les fichiers corpus compressés sont compatibles avec tous les traitements TDM dédiés aux corpus (services web asynchrone)", + "version": "1.3.3", "termsOfService": "https://services.istex.fr/", "contact": { "name": "Inist-CNRS", @@ -15,13 +15,9 @@ "x-comment": "Will be automatically completed by the ezs server." }, { - "url": "http://vptdmjobs.intra.inist.fr:49161/", + "url": "http://vptdmjobs.intra.inist.fr:49163/", "description": "Latest version for production", "x-profil": "Standard" - }, - { - "url": "http://vitdmservices.intra.inist.fr:49301/", - "description": "Next version for production" } ], "tags": [ @@ -30,7 +26,7 @@ "description": "Conversions en fichier corpus compressé", "externalDocs": { "description": "Plus de documentation", - "url": "https://gitbucket.inist.fr/tdm/web-services/tree/master/data-wrapper" + "url": "https://github.com/inist-cnrs/web-services/tree/main/services/data-wrapper" } } ]