diff --git a/conditor-dumps/.gitignore b/conditor-dumps/.gitignore index 640c5aa..c8fb751 100644 --- a/conditor-dumps/.gitignore +++ b/conditor-dumps/.gitignore @@ -1,2 +1,3 @@ 02-download/*.json -03-create-fields/*.json \ No newline at end of file +03-create-fields/*.json +04-enrich/*.json \ No newline at end of file diff --git a/conditor-dumps/04-enrich.ini b/conditor-dumps/04-enrich.ini new file mode 100644 index 0000000..265b2f5 --- /dev/null +++ b/conditor-dumps/04-enrich.ini @@ -0,0 +1,76 @@ +append = dump?indent=true + +[use] +# JSONParse URLConnect +plugin = basics + +[JSONParse] + +# Prépare la structure à envoyer au web service v1/rnsr/info +[assign] +path = ws.rnsr +value = get("authors") \ + .flatMap("affiliations") \ + .map("address").uniq() \ + .map((address, id) => ({ \ + id, \ + value: { \ + year: _.get(self,"ApilPublicationDate"), \ + address \ + } \ + })) + +[map] +path = ws.rnsr + +[map/expand] +path = value +size = 100 + +[map/expand/URLConnect] +url = https://affiliations-tools.services.inist.fr/v1/rnsr/info +timeout = 1000 +noerror = true + +# Récupère les informations +[assign] +path = IsCnrs +value = get("ws.rnsr").map("value").flatten() \ + .map("etabAssoc").flatten() \ + .map("etab.sigle") \ + .filter(sigle => sigle === "CNRS") \ + .thru(arr => Boolean(arr.length)) + +path = LaboSigle +value = get("ws.rnsr").map("value").flatten().map("sigle") + +path = LaboIntitule +value = get("ws.rnsr").map("value").flatten().map("intitule") + +path = Rnsr +value = get("ws.rnsr").map("value").flatten().map("num_nat_struct") + +# Récupère les instituts du CNRS à partir des RNSR + +[assign] +path = InstitutCnrs +value = get("Rnsr").map((value, id) => ({id, value})) + +[map] +path = InstitutCnrs + +[map/expand] +path = value +size = 100 + +[map/expand/URLConnect] +url = https://mapping-tools.services.inist.fr/v1/rnsr/instituts-cnrs/json +timeout = 1000 +error = true + +[assign] +path = InstitutCnrs +value = get("InstitutCnrs").map("value") + +[exchange] +value = omit("ws") diff --git a/conditor-dumps/04-enrich/.gitkeep b/conditor-dumps/04-enrich/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/conditor-dumps/04-enrich/.gitkeep diff --git a/conditor-dumps/Makefile b/conditor-dumps/Makefile index 694121e..319da78 100644 --- a/conditor-dumps/Makefile +++ b/conditor-dumps/Makefile @@ -1,6 +1,9 @@ # cible : dépendance -02-download/inist.json: 01-query/inist.txt - npx ezs 02-download.ini < 01-query/inist.txt > 02-download/inist.json +04-enrich/inist.json: 03-create-fields/inist.json + npx ezs 04-enrich.ini < 03-create-fields/inist.json > 04-enrich/inist.json 03-create-fields/inist.json: 02-download/inist.json - npx ezs 03-create-fields.ini < 02-download/inist.json > 03-create-fields/inist.json \ No newline at end of file + npx ezs 03-create-fields.ini < 02-download/inist.json > 03-create-fields/inist.json + +02-download/inist.json: 01-query/inist.txt + npx ezs 02-download.ini < 01-query/inist.txt > 02-download/inist.json \ No newline at end of file diff --git a/conditor-dumps/README.md b/conditor-dumps/README.md index e12a9dd..aa87570 100644 --- a/conditor-dumps/README.md +++ b/conditor-dumps/README.md @@ -1,17 +1,42 @@ # Conditor-dumps -Téléchargement de données via une requête sur l'api Corhal. -Création d'un champ `ApilPublicationDate`. +## Étapes + +### 01-query + +Dépôt d'un fichier `inist.txt` (requête) dans le répertoire `01-query`. + +### 02-download + +Téléchargement de données via la requête sur l'api Corhal. + +### 03-create-fields + +Création d'un champ `ApilPublicationDate` à partir de données existantes dans le json (`publicationDate` et `electronicPublicationDate`). + +### 04-enrich + +Appel de deux web services ([informations RNSR](https://openapi.services.inist.fr/?urls.primaryName=affiliations-tools%20-%20Structuration%20%26%20enrichissements%20d%27affiliations#/affiliations/post-v1-rnsr-info) et [instituts CNRS](https://objectif-tdm.inist.fr/2022/03/29/attribution-de-noms-dinstituts-cnrs-a-partir-didentifiants-rnsr/)) pour créer les champs suivants : + +- `IsCnrs` +- `LaboSigle` +- `LaboIntitule` +- `Rnsr` +- `InstitutCnrs` + +## Configuration + +S'assurer d'avoir les versions suivantes : ```json "packages": [ - "@ezs/core@2.1.2", - "@ezs/basics@1.22.4", - "@ezs/conditor@2.9.0" + "@ezs/core@2.1.9", + "@ezs/basics@1.22.6", + "@ezs/conditor@2.10.1" ] ``` -Configuration complète : +Reste de la configuration : ```json { @@ -20,12 +45,12 @@ "EZS_VERBOSE": false }, "files" : { - "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.0.0.zip" + "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.2.0.zip" }, "tasks": [ { "CronRule": "1 0 * * *", - "Target": "03-create-fields/inist.json", + "Target": "04-enrich/inist.json", "RunOnStartup": true } ]