diff --git a/biblio-tools/v1/crossref/prefixes/expand.ini b/biblio-tools/v1/crossref/prefixes/expand.ini index 28b13d5..e293be1 100644 --- a/biblio-tools/v1/crossref/prefixes/expand.ini +++ b/biblio-tools/v1/crossref/prefixes/expand.ini @@ -26,8 +26,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [expand] path = value diff --git a/biblio-tools/v1/crossref/works/expand.ini b/biblio-tools/v1/crossref/works/expand.ini index 8643800..6d873d9 100644 --- a/biblio-tools/v1/crossref/works/expand.ini +++ b/biblio-tools/v1/crossref/works/expand.ini @@ -27,52 +27,55 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * -[bufferize] +[expand] +path = value +size = 10 +cacheName = post-v1-crossref-works-expand -[combine] +[expand/bufferize] + +[expand/combine] path = env('path', 'value') primer = get('bufferID') prepend = buffers append = transit -cacheName = post-v1-crossref-works-expand -[combine/exchange] +[expand/combine/exchange] value = get('value') -[combine/group] +[expand/combine/group] size = 100 -[combine/replace] +[expand/combine/replace] path = rows value = fix(self.length) path = filter value = self().filter(x => x.match(/^10.\d{4,9}\/[^\s]+$/i)).map(x => 'doi:'.concat(x)).join(',') # see https://github.com/CrossRef/rest-api-doc#api-overview -[combine/URLStream] +[expand/combine/URLStream] url = https://api.crossref.org/works path = .items.* timeout = 30000 ; Simplification de la structure à postriori (le faire à priori était également possible) -[combine/exchange] +[expand/combine/exchange] value = self().omit(['indexed', 'reference']) -[combine/OBJFlatten] +[expand/combine/OBJFlatten] -[combine/replace] +[expand/combine/replace] path = id value = get('DOI') path = value value = self() # On nettoie l'objet en supprimant les champs temporaires -[exchange] +[expand/exchange] value = self().omit('bufferID') [assign] diff --git a/biblio-tools/v1/inspirehep/works/expand.ini b/biblio-tools/v1/inspirehep/works/expand.ini index 1603f88..8db514a 100644 --- a/biblio-tools/v1/inspirehep/works/expand.ini +++ b/biblio-tools/v1/inspirehep/works/expand.ini @@ -27,8 +27,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [expand] path = value @@ -38,17 +37,16 @@ url = get('value').prepend('https://inspirehep.net/api/doi/') json = true target = fix('value') -timeout = 5000 +timeout = 3000 +retries = 2 noerror = true -; Suppression de certains champs +; Simplification de la structure de la notice [expand/exchange] value = omit(['value.metadata.references']) -; Simplification de la structure de la notice [expand/OBJFlatten] - [expand/replace] path = id value = get('id') @@ -71,11 +69,17 @@ [expand/expand/aggregate] -# On marque les réponses qui n'ont pas abouties -[swing] -test = has('value.id') -reverse = true +; On supprime uniqument le résulat des documents non trouvés pour +; pour les conserver sans les mettre dans le cache +[expand/swing] +test = get('value.id').isEmpty() +[expand/swing/exchange] +value = self().omit('value') +; Pour les documents sans résulat +; On donne une valeur par défaut +[swing] +test = get('value.id').isEmpty() [swing/assign] path = value value = fix('n/a') diff --git a/biblio-tools/v1/istex/works/expand.ini b/biblio-tools/v1/istex/works/expand.ini index 7eb75ce..808fe5a 100644 --- a/biblio-tools/v1/istex/works/expand.ini +++ b/biblio-tools/v1/istex/works/expand.ini @@ -27,8 +27,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [bufferize] diff --git a/biblio-tools/v1/unpaywall/corhal.ini b/biblio-tools/v1/unpaywall/corhal.ini index 56d284b..66a82fa 100644 --- a/biblio-tools/v1/unpaywall/corhal.ini +++ b/biblio-tools/v1/unpaywall/corhal.ini @@ -26,8 +26,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [env] path = size diff --git a/biblio-tools/v1/unpaywall/expand.ini b/biblio-tools/v1/unpaywall/expand.ini index f3b5975..6ef6d5d 100644 --- a/biblio-tools/v1/unpaywall/expand.ini +++ b/biblio-tools/v1/unpaywall/expand.ini @@ -26,8 +26,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [env] path = size diff --git a/biblio-tools/v1/unpaywall/is_oa.ini b/biblio-tools/v1/unpaywall/is_oa.ini index 8c32033..dbbaa77 100644 --- a/biblio-tools/v1/unpaywall/is_oa.ini +++ b/biblio-tools/v1/unpaywall/is_oa.ini @@ -26,8 +26,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [env] path = size diff --git a/biblio-tools/v1/wos/works/expand.ini b/biblio-tools/v1/wos/works/expand.ini index f60f04f..62b3f74 100644 --- a/biblio-tools/v1/wos/works/expand.ini +++ b/biblio-tools/v1/wos/works/expand.ini @@ -26,8 +26,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [env] path = size @@ -43,32 +42,29 @@ path = value value = get('value').toLower() -# Pour traiter par lot on groupe puis on utilise spawn qui crée un buffer par lot -[group] +[expand] +path = value size = env('size') - -[spawn] -[spawn/ungroup] -[spawn/bufferize] +cacheName = post-v1-wos-expand +[expand/bufferize] # On réalise un mapping dynamique sur le champ value (et on sauvegarde les correspondances trouvées) -[spawn/combine] +[expand/combine] path = value default = n/a primer = get('bufferID') prepend = buffers append = transit -cacheName = post-v1-wos-expand -[spawn/combine/group] +[expand/combine/group] size = env('size') -[spawn/combine/replace] +[expand/combine/replace] path = dois value = self().map('value').filter(Boolean).map(x => JSON.stringify(x)).join(' OR ') # On crée un objet contenant tous les valeurs nécessaire pour créer une requete -[spawn/combine/replace] +[expand/combine/replace] path = usrQuery value = fix('DO=(', self.dois ,')').join('') path = databaseId @@ -78,33 +74,50 @@ path = firstRecord value = 1 -[spawn/combine/URLStream] +[expand/combine/URLStream] url = https://wos-api.clarivate.com/api/wos/ header = env('token').prepend('X-ApiKey:') path = Data.Records.records.REC.* timeout = 5000 noerror = true -[spawn/combine/OBJFlatten] +[expand/combine/OBJFlatten] -[spawn/combine/replace] +[expand/combine/replace] path = id value = get('dynamic_data/cluster_related/identifiers/identifier').find({ type: 'xref_doi'}).get('value').toLower() path = value value = self() -# On supprime les réponses qui n'ont pas abouties ( -[spawn/combine/drop] -path = id +# On supprime de la table de mapping, les réponses qui n'ont pas abouties +[expand/combine/remove] +test = get('id').isEmpty() # On nettoie l'objet en supprimant les champs temporaires -[spawn/exchange] +[expand/exchange] value = self().omit('bufferID') -[assign] +[expand/assign] path = value -value = get('value.value', 'n/a') +value = get('value.value') + +; On supprime uniqument le résulat des documents non trouvés pour +; pour les conserver sans les mettre dans le cache +[expand/swing] +test = get('value.UID').isEmpty() +[expand/swing/exchange] +value = self().omit('value') + + +; Pour les documents sans résulat +; On donne une valeur par défaut +[swing] +test = get('value.UID').isEmpty() +[swing/assign] +path = value +value = fix('n/a') + [dump] indent = env('indent') diff --git a/mapping-tools/v1/halAuthorId/idRef/json.ini b/mapping-tools/v1/halAuthorId/idRef/json.ini index 8f27c95..00de3fc 100644 --- a/mapping-tools/v1/halAuthorId/idRef/json.ini +++ b/mapping-tools/v1/halAuthorId/idRef/json.ini @@ -25,12 +25,15 @@ path = value value = get("value").split('/').last().replace(/^[^\d]*(?=[\d])/,"https://data.archives-ouvertes.fr/author/") -[combine] +[expand] +path = value +cacheName = post-v1-halauthorid-idref-json + +[expand/combine] path = value default = n/a primer = ./halAuthorId_idRef.tsv file = ./halauthorid2idref.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/homogenize/documentType/json.ini b/mapping-tools/v1/homogenize/documentType/json.ini index c29f069..ab571c3 100644 --- a/mapping-tools/v1/homogenize/documentType/json.ini +++ b/mapping-tools/v1/homogenize/documentType/json.ini @@ -19,12 +19,16 @@ plugin = analytics [JSONParse] -[combine] + +[expand] +path = value +cacheName = post-v1-homogenize-document-type-json + +[expand/combine] path = value default = n/a primer = ./typeDocOriginal2typeDocHomog.tsv file = ./homogenize-document-type.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/homogenize/publisher/json.ini b/mapping-tools/v1/homogenize/publisher/json.ini index 5f8e5c3..2844011 100644 --- a/mapping-tools/v1/homogenize/publisher/json.ini +++ b/mapping-tools/v1/homogenize/publisher/json.ini @@ -19,12 +19,16 @@ plugin = analytics [JSONParse] -[combine] + +[expand] +path = value +cacheName = post-v1-homogenize-publisher-json + +[expand/combine] path = value default = n/a primer = ./publisherOriginal2publisherHomogenize.tsv file = ./homogenize-publisher.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/homogenize/source/json.ini b/mapping-tools/v1/homogenize/source/json.ini index 81d44c3..0850e0a 100644 --- a/mapping-tools/v1/homogenize/source/json.ini +++ b/mapping-tools/v1/homogenize/source/json.ini @@ -19,12 +19,16 @@ plugin = analytics [JSONParse] -[combine] + +[expand] +path = value +cacheName = post-v1-homogenize-source-json + +[expand/combine] path = value default = n/a primer = ./sourceOriginal2sourceConditorWoS.tsv file = ./homogenize-source.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/idRef/orcid/json.ini b/mapping-tools/v1/idRef/orcid/json.ini index 9ac0765..6fdc14b 100644 --- a/mapping-tools/v1/idRef/orcid/json.ini +++ b/mapping-tools/v1/idRef/orcid/json.ini @@ -21,12 +21,15 @@ path = value value = get("value").replace(/[^\d]*/, "http://www.idref.fr/").replace(/[^\d]*$/, "/id") -[combine] +[expand] +path = value +cacheName = post-v1-idref-orcid-json + +[expand/combine] path = value default = n/a primer = ./idRef_orcId.tsv file = ./idref2orcid.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/inspire-category/meta-category/json.ini b/mapping-tools/v1/inspire-category/meta-category/json.ini index fd4b63b..e8fda9b 100644 --- a/mapping-tools/v1/inspire-category/meta-category/json.ini +++ b/mapping-tools/v1/inspire-category/meta-category/json.ini @@ -26,12 +26,15 @@ [JSONParse] # Homogenize ID: url, string and number to an url -[combine] +[expand] +path = value +cacheName = post-v1-inspire-category-meta-category-json + +[expand/combine] path = value default = n/a primer = ./regroupement-inspire-categories.tsv file = ./regroupement-inspire-categories.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/inspire-labos/in2p3-labos/json.ini b/mapping-tools/v1/inspire-labos/in2p3-labos/json.ini index 3b65a13..e9f6fdb 100644 --- a/mapping-tools/v1/inspire-labos/in2p3-labos/json.ini +++ b/mapping-tools/v1/inspire-labos/in2p3-labos/json.ini @@ -26,12 +26,15 @@ [JSONParse] # Homogenize ID: url, string and number to an url -[combine] +[expand] +path = value +cacheName = post-v1-inspire-labos-in2p3-labos-json + +[expand/combine] path = value default = n/a primer = ./regroupement-inspire-labos.tsv file = ./regroupement-inspire-labos.ini -persistent = true [assign] path = value diff --git a/mapping-tools/v1/rnsr/instituts-cnrs/json.ini b/mapping-tools/v1/rnsr/instituts-cnrs/json.ini index 87493ef..8731e34 100644 --- a/mapping-tools/v1/rnsr/instituts-cnrs/json.ini +++ b/mapping-tools/v1/rnsr/instituts-cnrs/json.ini @@ -15,12 +15,16 @@ plugin = analytics [JSONParse] -[combine] + +[expand] +path = value +cacheName = post-v1-rnsr-instituts-cnrs-json + +[expand/combine] path = value default = n/a primer = ./rnsrl2institutCnrs.tsv file = ./instituts-cnrs.ini -persistent = true [assign] path = value