Newer
Older
web-dumps / conditor-dumps / 03-enrich.ini
append = pack
[use]
; URLConnect
plugin = basics
plugin = analytics
[env]
path = number2labelDR
value = fix({"01": "DR01 Ile-de-France Villejuif","02": "DR02 Paris-Centre","03": "DR01 Ile-de-France Villejuif","04": "DR04 Ile-de-France Gif-sur-Yvette","05": "DR05 Ile-de-France Meudon","06": "DR06 Centre Est","07": "DR07 Rhône Auvergne","08": "DR08 Centre Limousin Poitou Charente","10": "DR10 Alsace","11": "DR11 Alpes","12": "DR12 Provence et Corse","20": "DR20 Côte d'Azur","13": "DR13 Occitanie Est","14": "DR14 Occitanie Ouest","15": "DR15 Aquitaine","16": "DR16 Paris-Normandie","17": "DR17 Bretagne et Pays de la Loire","18": "DR18 Hauts-de-France","19": "DR16 Paris-Normandie"})
path = weekNumber
value = thru(() => new Date()).thru(currentDate => Math.floor((currentDate - (new Date(currentDate.getFullYear(), 0, 1)))/(24 * 60 * 60 * 1000))).thru(days => Math.ceil(days / 7))
path = dayNumber
value = thru(() => Math.floor((Date.now() - new Date(new Date().getFullYear(), 0, 0)) / 86400000))
; Permet de choisir la durée du cache :
; - env("weekNumber") : permet de garder les données en cache pour une semaine,
; - env("dayNumber") : permet de garder les données en cache pour une journée,
; - forever : permet de garder les données en cache pour toujours
[env]
path = cacheSalt
value = env("weekNumber")

[unpack]

[assign]
path = wsHal
value = get("codesRnsr").thru(code=>_.isEmpty(code) ? self.doi : "")

[swing]
test = get("wsHal").isEmpty()
reverse = true

[swing/expand]
path = wsHal
size = 100

[swing/expand/URLConnect]
url = https://biblio-tools.services.istex.fr/v2/hal/works/expand
timeout = 90007
noerror = true

[assign]
path = retrieveHalRnsr
value = get("wsHal.text.back.listOrg").flatMap(item => (item && item.type === "structures") ? item.org : item).filter(org => org && org.type === "laboratory" && Array.isArray(org.idno)).flatMap(org => org.idno).filter(idno => idno.type === "RNSR").map(idno => idno._t)

[assign]
path = codesRnsr
value = fix(self.codesRnsr,self.retrieveHalRnsr).flatten().compact()

; Récupère les infos Loterre 2XK
[assign]
path = ws.loterre2xk
value = get("codesRnsr").castArray().map((itemcodesRnsr, indice) => ({indice, itemcodesRnsr, codeRNSR: itemcodesRnsr, institut: itemcodesRnsr, publicationDate: self.publicationYear }))

[expand]
path = ws.loterre2xk

[expand/exploding]

[expand/expand]
path = value.itemcodesRnsr
size = 100
cacheName = fix(`${env("cacheSalt")}-04-2xk-identify`)

[expand/expand/URLConnect]
url = https://loterre-resolvers.services.istex.fr/v1/2XK/identify
timeout = 120000
noerror = true

[expand/assign]
path = value.institut
value = get("value.institut").append(`|${self.value.publicationDate}`)

[expand/expand]
path = value.institut
size = 100
file = ./03.1-enrich-rnsrByYear.ini
cacheName = fix(`${env("cacheSalt")}-04-rnsr-year-instituts-cnrs`)

[expand/assign]
path = value.label
value = get("value.itemcodesRnsr.prefLabel@fr", "n/a")

path = value.labelNormalized
value = get("value.itemcodesRnsr.prefLabel@fr", "n/a").thru(item => String(item).normalize("NFKD").replace(/[\u0300-\u036f]/g, "").toUpperCase())

path = value.dr
value = get("value.itemcodesRnsr.delegationRegionale_dep").castArray().compact().map(n => _.get(env("number2labelDR"), n, `unknow ${n}` ))
[expand/aggregate]

[assign]
path = codesRnsr
value = get("ws.loterre2xk").castArray().map("codeRNSR")

path = laboIntitule
value = get("ws.loterre2xk").castArray().map("label")

path = delegationsRegionales
value = get("ws.loterre2xk").castArray().flatMap("dr")

path = institutCnrs
value = get("ws.loterre2xk").castArray().map("institut")

; S'il y a au moins un institut, il y a au moins une affiliation CNRS
path = isCnrs
value = get("ws.loterre2xk").castArray().flatMap("institut").size().thru(size => size > 0 ? "Oui" : "Non")
; [debug]
; path = codesRnsr
; path = laboIntitule
; path = delegationsRegionales
; path = institutCnrs
; path = isCnrs
[assign]
path=wsOpenalex
value = get("doi")

[swing]
test = get("wsOpenalex").isEmpty()
reverse = true

[swing/expand]
path = wsOpenalex
size = 100

[swing/expand/URLConnect]
url = https://biblio-tools.services.istex.fr/v1/openalex/works/expand
timeout = 90007
noerror = true

[assign]
path = isRetracted
value = get("wsOpenalex.is_retracted").thru(bool => bool === true  ? "Oui" : "Non")

[assign]
path = institutsPrincipaux
value = get("institutCnrs").flatMap(arr => arr.flatMap(item => item.split(";")).filter(instit => instit.includes("(P)")).map(instit => instit.replace(" (P)", "")))

[assign]
path = institutsSecondaires
value = get("institutCnrs").flatMap(arr => arr.flatMap(item => item.split(";")).filter(instit => instit.includes("(S)")).map(instit => instit.replace(" (S)", "")))

[assign]
path = oaStatusUnpaywall
value = get("enrichments.openAccess.unpaywall.oaStatus","OA - Inconnu").thru(status => status === "OA - Inconnu" ? status : _.capitalize(status))

[assign]
path = hostTypeUnpaywall
value = get("enrichments.openAccess.unpaywall.oaLocations").map("hostType").uniq().map(host => host === "repository" ? "Archive seule" : host === "publisher" ? "Editeur seul" : host==="OA - Inconnu" ? "Type d'accès inconnu" : host).thru(arr=>_.size(arr)===2 ? "Commun" : _.size(arr)===0 ? "OA - Non" : arr).toString()

[assign]
path = isOaUnpaywall
value = get("enrichments.openAccess.unpaywall.isOa").thru(bool => bool === true ? "OA - Oui" : bool === false ? "OA - Non" : "OA - Inconnu")
; Données Open Access host type modifiées à partir d'un champ fulltext, si hal est présent
;Transformer des données inconnues de 'HostType' en repository si absence d'un DOI mais présence de Hal dans 'fulltext'
[assign]
path = oaLocationsUnpaywallHal
value = get("enrichments.openAccess.unpaywall.oaLocations").map("hostType").concat(self.fulltextUrl && self.fulltextUrl.includes("hal") ? ["repository"] : []).filter((value, index, collection) => !(value === "OA - Inconnu" && collection[index + 1] === "repository")).uniq()
;Transformer des données inconnues en "green" si absence d'un DOI mais présence de "repository" dans 'oaLocationsUnpaywallHal'
[assign]
path = oaStatusUnpaywallHal
value = get("enrichments.openAccess.unpaywall.oaStatus","OA - Inconnu").concat(self.oaLocationsUnpaywallHal.includes("repository") ? "repository" : []).reduce((acc, val, index, arr) => {return (["OA - Inconnu", "OA - Non", "closed"].includes(val) && arr.includes("repository"))? ["green"]: acc.concat(val)}, []).head().capitalize().replace("Oa - inconnu","OA - Inconnu")
;;Transformer des données inconnues en OA-Oui si absence d'un DOI mais présence de "green" dans 'oaStatusUnpaywallHal'
[assign]
path = isOaUnpaywallHal
value=get("oaStatusUnpaywallHal").thru(status=>status ==="OA - Inconnu" ? status : status ==="Closed" ? "OA - Non" : "OA - Oui" )

[assign]
path = hostTypeUnpaywallHal
value = get("oaLocationsUnpaywallHal").map(host => host === "repository" ? "Archive seule" : host === "publisher" ? "Editeur seul" : host ==="OA - Inconnu" ? "Type d'accès inconnu" :host).thru(arr=>_.size(arr)===2 ? "Commun" : _.size(arr)===0 ? "OA - Non" : arr).toString()

[assign]
path = oaStatusUnpaywallOpenalex
value=get("wsOpenalex.open_access/oa_status").thru(status => status==="diamond" ? "Diamond" : _.capitalize(self.enrichments.openAccess.unpaywall.oaStatus)).replace(/^$/,"OA - Inconnu")
;on crée un nouveau champ où l'on cumule les nouvelles données de 'oaStatusUnpaywallOpenalex' et 'oaStatusUnpaywallHal'
[assign]
path = oaStatusUnpaywallOpenalexHal
value = get("wsOpenalex.open_access/oa_status").thru(status => status ==="diamond" ? "Diamond" : self.oaStatusUnpaywallHal)
;Transformations spécifiques pour créer des valuers compatibles avec VEga-lite pour la création de graphiques
[assign]
path = graphSourceEditeurIsOa
value = get("enrichments.openAccess.unpaywall.isOa").replace(/^((?!Oui).)*$/,"null").prepend("OA=").append((";TypeAcces="+self.hostTypeUnpaywallHal).replace(/Commun|Editeur seul/g,"Editeur").replace(/OA - Non|Archive seule|OA - Inconnu/g,"null"))
[assign]
path = graphSourceEditeurIsOaHal
value = get("isOaUnpaywallHal").replace(/^((?!Oui).)*$/,"null").prepend("OA=").append((";TypeAcces="+self.hostTypeUnpaywallHal).replace(/Commun|Editeur seul/g,"Editeur").replace(/OA - Non|Archive seule|OA - Inconnu/g,"null"))
;On détermine l'ordre de provenance des notices composants la notice Conditor
[assign]
path = sourceOfMergedRecords
value = get("sourceUidChain").replace(/\$.*?!/g,"!").split("!").compact()

;Récupère les fulltext d'unpaywall si le champs 'fulltexturl' (qui vient de conditor) est nul
[assign]
path = fulltextUrl
value = get("fulltextUrl").castArray().compact().thru(fulltxt => _.isEmpty(fulltxt) ? self.enrichments.openAccess.unpaywall.oaLocations.filter(item => item.hostType === "repository" && item.isBest===true).map(item => item.url) : fulltxt)

;Homogénéise les types de document
[assign]
path = documentType
value = get("originalGenre").trim()
[expand]
path = documentType
size = 100
file = ./03.2-enrich-docType.ini
cacheName = fix(`${env("cacheSalt")}-04-homogenize-document-type-json`)
; Si le WS renvoie un "n/a"
[swing]
test = get("documentType").isEqual("n/a")
; On l'écrase avec la valeur de "originalGenre"
[swing/assign]
path = documentType
value = get("originalGenre").trim()

; Homogénéise les sources
[assign]
path = source
value = get("host.title",_.get(self,"host.conference.name")).trim()
; si les champs 'host.title' et 'host.conference.name' ne sont pas vides
[expand]
path = source
size = 100
file = ./03.3-enrich-source.ini
cacheName = fix(`${env("cacheSalt")}-04-homogenize-source-json`)
[swing]
test = get("source").isEqual("n/a")
[swing/assign]
path = source
value = get("host.title",_.get(self,"host.conference.name")).trim().toUpper().thru(source => source === "N/A" || source === "" ? "NON RENSEIGNE" : source)

; Homogénéise les éditeurs
[assign]
path = ws.racineDoiPublisher
value = get("doi").split("/").filter(i => i.match(/^10./)).pop()
[swing]
test = get("ws.racineDoiPublisher").isEmpty()
reverse = true
[swing/expand]
path = ws.racineDoiPublisher
size = 1
cacheName = fix(`${env("cacheSalt")}-04-api-crossref-prefixes-expand`)
[swing/expand/URLFetch]
target = value
url = fix("https://api.crossref.org/prefixes/").append(self.value)
json = true
timeout = 60000
noerror = true
retries = 2
[Swing/expand/assign]
path = value
value = get("value.message.name", "n/a")

; Dans un champ temporaire, récupérer la valeur host.publisher si elle est présente, sinon récupérer celle du WS DOI.
[assign]
path = tmp.publisher
value = get("host.publisher",_.get(self,"ws.racineDoiPublisher"))
; Homogénéise l'éditeur
[assign]
path = publisher
value = get("tmp.publisher","n/a").trim()
[expand]
path = publisher
size = 100
file = ./03.4-enrich-publisher.ini
cacheName = fix(`${env("cacheSalt")}-04-homogenize-publisher-json`)
; Si host.publisher existe et que le publisher vaut n/a,
[swing]
test = has("host.publisher")
test = get("publisher").isEqual("n/a")
; On l'écrase avec la valeur de host.publisher
[swing/assign]
path = publisher
value = get("host.publisher")

[assign]
path = publisher
value = get("publisher").toUpper().thru(pub => pub.match(/^SPRINGER.*$|.*NATURE PUBLISHING GROUP.*/i) ? "SPRINGER NATURE" : pub === "N/A" ? "NON RENSEIGNE" : pub)

; Enrichissements pays
[assign]
path = ws.libpostal
value = get("authors").flatMap("affiliations").map("address").uniq().map((address, id) => ({id,value: address}))
[map]
path = ws.libpostal
[map/expand]
path = value
size = 100
cacheName = fix(`${env("cacheSalt")}-04-address-expand`)
[map/expand/URLConnect]
url = https://affiliations-tools.services.istex.fr/v1/addresses/parse
timeout = 90007
noerror = true
[map/expand/assign]
path = value.value.address
value = get("value.id")
path = value.value.country
value = get("value.value.country").replace(/\W/g, " ").trim()
[map/expand/assign]
path = value
value = get("value.value")
[map/expand/expand]
path = value.country
size = 10
cacheName = fix(`${env("cacheSalt")}-04-country-expand`)
[map/expand/expand/URLConnect]
url = https://loterre-resolvers.services.istex.fr/v1/9SD/identify
timeout = 90008
[map/exchange]
value = get("value")
; TODO: si champ state, on est aux États-Unis (United States of America)
[assign]
path = codeISO
value = get("ws.libpostal").castArray().filter(Boolean).map(n => n.country?.cartographyCode).uniq().filter(Boolean)
path = countries
value = get("ws.libpostal").castArray().filter(Boolean).map(n => n.country?.["prefLabel@en"]).uniq().filter(Boolean)
; Suppression des champs non voulus
[exchange]
value = omit(["wsHal","retrieveHalRnsr","tmp","ws","originalGenre","localRef","pii","host","wsOpenalex","enrichments","oaLocationsUnpaywallHal"])