diff --git a/biblio-tools/v1/crossref/works/expand.ini b/biblio-tools/v1/crossref/works/expand.ini index 8643800..6d873d9 100644 --- a/biblio-tools/v1/crossref/works/expand.ini +++ b/biblio-tools/v1/crossref/works/expand.ini @@ -27,52 +27,55 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * -[bufferize] +[expand] +path = value +size = 10 +cacheName = post-v1-crossref-works-expand -[combine] +[expand/bufferize] + +[expand/combine] path = env('path', 'value') primer = get('bufferID') prepend = buffers append = transit -cacheName = post-v1-crossref-works-expand -[combine/exchange] +[expand/combine/exchange] value = get('value') -[combine/group] +[expand/combine/group] size = 100 -[combine/replace] +[expand/combine/replace] path = rows value = fix(self.length) path = filter value = self().filter(x => x.match(/^10.\d{4,9}\/[^\s]+$/i)).map(x => 'doi:'.concat(x)).join(',') # see https://github.com/CrossRef/rest-api-doc#api-overview -[combine/URLStream] +[expand/combine/URLStream] url = https://api.crossref.org/works path = .items.* timeout = 30000 ; Simplification de la structure à postriori (le faire à priori était également possible) -[combine/exchange] +[expand/combine/exchange] value = self().omit(['indexed', 'reference']) -[combine/OBJFlatten] +[expand/combine/OBJFlatten] -[combine/replace] +[expand/combine/replace] path = id value = get('DOI') path = value value = self() # On nettoie l'objet en supprimant les champs temporaires -[exchange] +[expand/exchange] value = self().omit('bufferID') [assign] diff --git a/biblio-tools/v1/inspirehep/works/expand.ini b/biblio-tools/v1/inspirehep/works/expand.ini index 1603f88..8db514a 100644 --- a/biblio-tools/v1/inspirehep/works/expand.ini +++ b/biblio-tools/v1/inspirehep/works/expand.ini @@ -27,8 +27,7 @@ # Flow configuration [JSONParse] -legacy = false -separator = $ +separator = * [expand] path = value @@ -38,17 +37,16 @@ url = get('value').prepend('https://inspirehep.net/api/doi/') json = true target = fix('value') -timeout = 5000 +timeout = 3000 +retries = 2 noerror = true -; Suppression de certains champs +; Simplification de la structure de la notice [expand/exchange] value = omit(['value.metadata.references']) -; Simplification de la structure de la notice [expand/OBJFlatten] - [expand/replace] path = id value = get('id') @@ -71,11 +69,17 @@ [expand/expand/aggregate] -# On marque les réponses qui n'ont pas abouties -[swing] -test = has('value.id') -reverse = true +; On supprime uniqument le résulat des documents non trouvés pour +; pour les conserver sans les mettre dans le cache +[expand/swing] +test = get('value.id').isEmpty() +[expand/swing/exchange] +value = self().omit('value') +; Pour les documents sans résulat +; On donne une valeur par défaut +[swing] +test = get('value.id').isEmpty() [swing/assign] path = value value = fix('n/a') diff --git a/biblio-tools/v1/wos/works/expand.ini b/biblio-tools/v1/wos/works/expand.ini index 82bbf02..62b3f74 100644 --- a/biblio-tools/v1/wos/works/expand.ini +++ b/biblio-tools/v1/wos/works/expand.ini @@ -42,32 +42,29 @@ path = value value = get('value').toLower() -# Pour traiter par lot on groupe puis on utilise spawn qui crée un buffer par lot -[group] +[expand] +path = value size = env('size') - -[spawn] -[spawn/ungroup] -[spawn/bufferize] +cacheName = post-v1-wos-expand +[expand/bufferize] # On réalise un mapping dynamique sur le champ value (et on sauvegarde les correspondances trouvées) -[spawn/combine] +[expand/combine] path = value default = n/a primer = get('bufferID') prepend = buffers append = transit -cacheName = post-v1-wos-expand -[spawn/combine/group] +[expand/combine/group] size = env('size') -[spawn/combine/replace] +[expand/combine/replace] path = dois value = self().map('value').filter(Boolean).map(x => JSON.stringify(x)).join(' OR ') # On crée un objet contenant tous les valeurs nécessaire pour créer une requete -[spawn/combine/replace] +[expand/combine/replace] path = usrQuery value = fix('DO=(', self.dois ,')').join('') path = databaseId @@ -77,33 +74,50 @@ path = firstRecord value = 1 -[spawn/combine/URLStream] +[expand/combine/URLStream] url = https://wos-api.clarivate.com/api/wos/ header = env('token').prepend('X-ApiKey:') path = Data.Records.records.REC.* timeout = 5000 noerror = true -[spawn/combine/OBJFlatten] +[expand/combine/OBJFlatten] -[spawn/combine/replace] +[expand/combine/replace] path = id value = get('dynamic_data/cluster_related/identifiers/identifier').find({ type: 'xref_doi'}).get('value').toLower() path = value value = self() -# On supprime les réponses qui n'ont pas abouties ( -[spawn/combine/drop] -path = id +# On supprime de la table de mapping, les réponses qui n'ont pas abouties +[expand/combine/remove] +test = get('id').isEmpty() # On nettoie l'objet en supprimant les champs temporaires -[spawn/exchange] +[expand/exchange] value = self().omit('bufferID') -[assign] +[expand/assign] path = value -value = get('value.value', 'n/a') +value = get('value.value') + +; On supprime uniqument le résulat des documents non trouvés pour +; pour les conserver sans les mettre dans le cache +[expand/swing] +test = get('value.UID').isEmpty() +[expand/swing/exchange] +value = self().omit('value') + + +; Pour les documents sans résulat +; On donne une valeur par défaut +[swing] +test = get('value.UID').isEmpty() +[swing/assign] +path = value +value = fix('n/a') + [dump] indent = env('indent')