diff --git a/conditor-dumps-config.json b/conditor-dumps-config.json index 6261b5c..0c06cd2 100644 --- a/conditor-dumps-config.json +++ b/conditor-dumps-config.json @@ -4,7 +4,7 @@ "EZS_VERBOSE": false }, "files": { - "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.4.0.zip" + "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.5.0.zip" }, "tasks": [ { diff --git a/conditor-dumps/.gitignore b/conditor-dumps/.gitignore index 639bf77..68bbe88 100644 --- a/conditor-dumps/.gitignore +++ b/conditor-dumps/.gitignore @@ -2,4 +2,8 @@ 03-create-fields/*.json 04-enrich/*.json 05-future-loader/*.json +02-download/*.jsonl +03-create-fields/*.jsonl +04-enrich/*.jsonl +05-future-loader/*.jsonl package.json diff --git a/conditor-dumps/02-download.ini b/conditor-dumps/02-download.ini index a08cf62..a3fdae8 100644 --- a/conditor-dumps/02-download.ini +++ b/conditor-dumps/02-download.ini @@ -1,5 +1,5 @@ # npx ezs 02-download.ini -append = dump?indent=true +append = pack [use] plugin = basics diff --git a/conditor-dumps/03-create-fields.ini b/conditor-dumps/03-create-fields.ini index 4590963..46b312d 100644 --- a/conditor-dumps/03-create-fields.ini +++ b/conditor-dumps/03-create-fields.ini @@ -1,10 +1,7 @@ # npx ezs 03-create-fields.ini -append = dump?indent=true +append = pack -[use] -plugin = basics - -[JSONParse] +[unpack] [assign] # Récupère electronicPublicationDate et publicationDate diff --git a/conditor-dumps/04-enrich.ini b/conditor-dumps/04-enrich.ini index 53684b4..b471c33 100644 --- a/conditor-dumps/04-enrich.ini +++ b/conditor-dumps/04-enrich.ini @@ -1,10 +1,10 @@ -append = dump?indent=true +append = pack [use] -# JSONParse URLConnect +# URLConnect plugin = basics -[JSONParse] +[unpack] # Prépare la structure à envoyer au web service v1/rnsr/info [assign] diff --git a/conditor-dumps/05-future-loader.ini b/conditor-dumps/05-future-loader.ini index 653c0c2..1832b41 100644 --- a/conditor-dumps/05-future-loader.ini +++ b/conditor-dumps/05-future-loader.ini @@ -1,11 +1,8 @@ # npx ezs 05-future-loader.ini # Sera déplacé dans le loader spécifique Conditor-métrie -append = dump?indent=true +append = pack -[use] -plugin = basics - -[JSONParse] +[unpack] [assign] path = ApilCollation diff --git a/conditor-dumps/Makefile b/conditor-dumps/Makefile index fe1f257..830cdde 100644 --- a/conditor-dumps/Makefile +++ b/conditor-dumps/Makefile @@ -4,10 +4,10 @@ .DELETE_ON_ERROR: # To prevent deleting intermediate files (for controls) -#.PRECIOUS: 02-download/%.json 03-create-fields/%.json +#.PRECIOUS: 02-download/%.jsonl 03-create-fields/%.jsonl SOURCE_FILES := $(wildcard 01-query/*.txt) -TARGET_FILES := $(patsubst 01-query/%.txt, 04-enrich/%.json, $(SOURCE_FILES)) +TARGET_FILES := $(patsubst 01-query/%.txt, 05-future-loader/%.jsonl, $(SOURCE_FILES)) watch: ## Automatically build files when they change while true; do \ @@ -20,14 +20,14 @@ echo $(TARGET_FILES) # cible : dépendance -05-future-loader/%.json: 04-enrich/%.json +05-future-loader/%.jsonl: 04-enrich/%.jsonl npx ezs 05-future-loader.ini < $< > $@ -04-enrich/%.json: 03-create-fields/%.json +04-enrich/%.jsonl: 03-create-fields/%.jsonl npx ezs 04-enrich.ini < $< > $@ -03-create-fields/%.json: 02-download/%.json +03-create-fields/%.jsonl: 02-download/%.jsonl npx ezs 03-create-fields.ini < $< > $@ -02-download/%.json: 01-query/%.txt +02-download/%.jsonl: 01-query/%.txt npx ezs 02-download.ini < $< > $@ \ No newline at end of file diff --git a/conditor-dumps/README.md b/conditor-dumps/README.md index 74f5e91..94d8e02 100644 --- a/conditor-dumps/README.md +++ b/conditor-dumps/README.md @@ -1,5 +1,11 @@ # Conditor-dumps +Collecte, restructuration et enrichissement de données +[Conditor](https://corhal-api.inist.fr/api-docs/). + +Les fichiers fournis sont au format JSON Lines (`.jsonl`), ce qui veut dire que +le *loader* utilisé pour LODEX doit être adapté. + ## Étapes ### 01-query @@ -39,7 +45,7 @@ > ⚠️ **Attention**: utilisez au moins la version 1.5 de `lodex-crontab`. -S'assurer d'avoir les versions suivantes : +S'assurer d'avoir les versions suivantes : ```json "packages": [ @@ -58,7 +64,7 @@ "EZS_VERBOSE": false }, "files" : { - "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.4.0.zip" + "zip": "https://gitbucket.inist.fr/tdm/web-dumps/archive/conditor-dumps/conditor-dumps@1.5.0.zip" }, "tasks": [ {