diff --git a/Makefile b/Makefile index 568a776..80ae3fe 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,11 @@ # # Récupérer les données statistiques depuis l'API Istex -fetch-api-istex: - echo "Récupération des données depuis l'API Istex" +harvest: + timestamp=$$(date +%Y-%m-%d) + file_name="$${timestamp}_istex_stats.jsonl" + ./script/harvester/harvester.sh > ./data/records/$$file_name + # Récupérer les données de l'instance loaded corpus pour créer un mapping identifiant corpus => ARK fetch-loaded-corpus: @@ -79,9 +82,9 @@ -H "Authorization: Bearer $$(make -s lodex-auth)" \ -H 'Accept: application/json' \ -.PHONY: setup fetch-api-istex fetch-loaded-corpus lodex-clean-json-keys \ +.PHONY: setup harvest fetch-loaded-corpus lodex-clean-json-keys \ mapping-corpus-ark merge-loaded-corpus-data export-all \ export-excel export-lodex lodex-auth \ lodex-backup-template lodex-backup-data backup \ lodex-delete-instance-data lodex-import-template lodex-import-data \ - publish-lodex \ No newline at end of file + publish-lodex diff --git a/schema/schema.json b/schema/schema.json index 877cd19..b2a43f2 100644 --- a/schema/schema.json +++ b/schema/schema.json @@ -140,6 +140,34 @@ "additionalProperties": false }, { + "title": "istex.host.genres", + "description": "Nombre de documents pour un host.genre spécifique d'ISTEX", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["istex.host.genres"], + "description": "Type de l'élément" + }, + "date": { + "type": "string", + "format": "date", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$", + "description": "Date de la mesure au format yyyy-mm-dd" + }, + "genre": { + "type": "string", + "description": "Genre des documents" + }, + "nb": { + "type": "integer", + "description": "Nombre de documents dans le genre" + } + }, + "required": ["type", "date", "genre", "nb"], + "additionalProperties": false + }, + { "title": "istex.genres", "description": "Nombre de documents pour un genre spécifique d'ISTEX", "type": "object", @@ -430,4 +458,4 @@ } ] } - } \ No newline at end of file + } diff --git a/script/harvester/harvester.sh b/script/harvester/harvester.sh index 8738f64..3acd1bb 100755 --- a/script/harvester/harvester.sh +++ b/script/harvester/harvester.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + # URL de base de l'API ISTEX ISTEX_API=https://api.istex.fr/document @@ -9,7 +11,7 @@ response=$(curl --silent --get \ --data-urlencode "q=$query" \ --data-urlencode "facet=$facet[*]" \ - --data-urlencore "size=0" \ + --data-urlencode "size=0" \ $ISTEX_API \ ) @@ -22,7 +24,7 @@ # Appel à l'API ISTEX avec curl, en spécifiant la requête et la facette response=$(curl --silent --get \ --data-urlencode "q=$query" \ - --data-urlencore "size=0" \ + --data-urlencode "size=0" \ $ISTEX_API \ ) @@ -137,24 +139,26 @@ } handle_request "*" "corpusName" "" "corpus.nb_doc" -handle_request "*" "language" "" "istex.languages" handle_request "*" "accessCondition.value" "" "istex.access_condition" +handle_request "*" "language" "" "istex.languages" handle_request "*" "enrichments.type" "" "istex.enrichments" -handle_request "*" "host.genre" "" "istex.genre" +handle_request "*" "host.genre" "" "istex.host.genres" +handle_request "*" "genre" "" "istex.genres" handle_request "*" "categories.inist" "" "istex.categories.inist" +handle_request "*" "categories.wos" "" "istex.categories.wos" handle_request "*" "categories.scopus" "" "istex.categories.scopus" handle_request "*" "categories.scienceMetrix" "" "istex.categories.scienceMetrix" -handle_request "*" "categories.wos" "" "istex.categories.wos" handle_request "*" "corpusName" "accessCondition.value" "corpus.access_condition" handle_request "*" "corpusName" "enrichments.type" "corpus.enrichment" -handle_request "*" "corpusName" "host.genre" "corpus.genre" +handle_request "*" "corpusName" "language enrichments.type" "corpus.enrichment.lang" +handle_request "*" "corpusName" "language" "corpus.language" +handle_request "*" "corpusName" "host.genre" "corpus.host.genre" handle_request "*" "corpusName" "categories.inist" "corpus.category.inist" handle_request "*" "corpusName" "categories.scopus" "corpus.category.scopus" handle_request "*" "corpusName" "categories.scienceMetrix" "corpus.category.scienceMetrix" handle_request "*" "corpusName" "categories.wos" "corpus.category.wos" handle_request "*" "corpusName" "genre" "corpus.genre" -handle_request "*" "corpusName" "language language" "corpus.enrichment.lang" fetch_total "*" | type "istex.nb_doc" | stamp fetch_total "enrichments.type.raw:*" | type "istex.nb_doc_enrichis" | stamp