Newer
Older
web-dumps / conditor-dumps / 04-enrich.ini
append = pack

[use]
# URLConnect
plugin = basics

[unpack]

# Quand les RNSR ne sont pas fournis dans authors.*.affiliations.*.rnsr
# on utilise le Web Service qui les met au même niveau dans wsRnsr
[map]
path = authors

[map/map]
path = affiliations

# Récupère les infos Loterre 2XK
[map/map/assign]
path = ws.loterre2xk
value = get("ApilRnsr").map((rnsr,id) => ({id, value: rnsr}))

[map/map/map]
path = ws.loterre2xk

[map/map/map/expand]
path = value
size = 100


[map/map/map/expand/URLConnect]
url = https://loterre-resolvers.services.inist.fr/v1/2XK/expand
timeout = 120000
noerror = true

# Récupère les informations
[map/map/assign]
# Les éléments de ws.loterre2xk sont dans le même ordre que ApilRnsr,
# et il y en a autant.
path = ApilWsLaboIntitule
value = get("ws.loterre2xk") \
        .map((orga) => orga.value["skos$prefLabel/$t"] ?? "n/a")

; [assign]
; path = ApilWsIsCnrs
; value = get("ws.rnsr").map("value").flatten() \
;         .map("etabAssoc").flatten() \
;         .map("etab.sigle") \
;         .filter(sigle => sigle === "CNRS") \
;         .thru(arr => Boolean(arr.length))

; path = ApilWsLaboSigle
; value = get("ws.rnsr").map("value").flatten().map("sigle")

; path = ApilWsLaboIntitule
; value = get("ws.rnsr").map("value").flatten().map("intitule")

; [assign]
; path = tmp.authorsAffiliationsRnsr
; value = get("authors") \
;     .map("affiliations") \
;     .map(affiliations => affiliations.map( \
;         affiliation => affiliation.rnsr) \
;     )

[assign]
# Concatène et dédoublonne tous les RNSR
path = ApilRnsr
value = get("authors").flatMap("affiliations").flatMap("ApilRnsr").uniq()

# Supprime les accents et met en majuscules
path = tmp.ApilWsLaboIntitule
value = get("ApilWsLaboIntitule") \
        .map(str => str \
            .normalize("NFKD").replace(/[\u0300-\u036f]/g, "") \
            .toUpperCase() \
        )

[assign]
path = ApilWsSigleLaboIntitule
value = get("ApilWsLaboSigle") \
        .map((sigle,i) => [ \
            sigle, \
            _.get(self,"tmp.ApilWsLaboIntitule")[i] \
        ] \
        .filter(Boolean) \
        .join(" - "))

# Récupère les instituts du CNRS à partir des RNSR

[assign]
path = ApilWsInstitutCnrs
value = get("ApilRnsr").map((value, id) => ({id, value}))

[map]
path = ApilWsInstitutCnrs

[map/expand]
path = value
size = 100
cacheName = 04-rnsr-instituts-cnrs-json

[map/expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/rnsr/instituts-cnrs/json
timeout = 90002
noerror = true

[assign]
path = ApilWsInstitutCnrs
value = get("ApilWsInstitutCnrs") \
        .map("value") \
        .filter(institut => institut && institut !== "n/a")

# S'il y a au moins un institut, il y a au moins une affiliation CNRS
[assign]
path = ApilWsIsCnrs
value = get("ApilWsInstitutCnrs").thru(array => Boolean(array.length))

################################################################

# Homogénéise les types de document
[assign]
path = ApilWsTypeDoc
value = get("originalGenre")

[expand]
path = ApilWsTypeDoc
size = 100
cacheName = 04-homogenize-document-type-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/documentType/json
timeout = 90003
noerror = true

# Si le WS renvoie un "n/a"
[swing]
test = get("ApilWsTypeDoc").isEqual("n/a")

# On l'écrase avec la valeur de "originalGenre"
[swing/assign]
path = ApilWsTypeDoc
value = get("originalGenre")

# Homogénéise les sources
[assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name"))

# si les champs 'host.title' et 'host.conference.name' ne sont pas vides
[swing]
test = has("ApilWsSource")

[swing/expand]
path = ApilWsSource
size = 100
cacheName = 04-homogenize-source-json

[swing/expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/source/json
timeout = 90004
noerror = true

# si le champ "ApilWsSource" issu du WS est "n/a"
[swing]
test = get("ApilWsSource").isEqual("n/a")

[swing/assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name"))

# Traitement des éditeurs
# Si le champ host.publisher est vide
[swing]
test = has("host.publisher")
reverse = true

[swing/assign]
path = ws.ApilRacineDoiPublisher
value = get("doi")

[swing/expand]
path = ws.ApilRacineDoiPublisher
size = 100
cacheName = 04-crossref-prefixes-expand

[swing/expand/URLConnect]
url = https://biblio-tools.services.inist.fr/v1/crossref/prefixes/expand
timeout = 90005
noerror = true
retries = 1

# Dans un champ temporaire, récupérer la valeur host.publisher si elle est présente, sinon récupérer celle du WS DOI.
[assign]
path = tmp.ApilWsPublisher
value = get("host.publisher",_.get(self,"ws.ApilRacineDoiPublisher"))

# Homogénéise l'éditeur
[assign]
path = ApilWsPublisher
value = get("tmp.ApilWsPublisher")

[expand]
path = ApilWsPublisher
size = 100
cacheName = 04-homogenize-publisher-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/publisher/json
timeout = 90006
noerror = true

# Si host.publisher existe et que le ApilWsPublisher vaut n/a,
[swing]
test = has("host.publisher")
test = get("ApilWsPublisher").isEqual("n/a")

# On l'écrase avec la valeur de host.publisher
[swing/assign]
path = ApilWsPublisher
value = get("host.publisher")

# Enrichissements pays
[assign]
path = ws.libpostal
value = get("authors") \
        .flatMap("affiliations") \
        .map("address").uniq() \
        .map((address, id) => ({ \
            id, \
            value: address \
        }))
[map]
path = ws.libpostal

[map/expand]
path = value
size = 100
cacheName = 04-address-expand

[map/expand/URLConnect]
url = https://affiliations-tools.services.inist.fr/v1/expand
timeout = 90007
noerror = true

[assign]
path = ws.cartographyCodes
value = get("ws.libpostal").map(n => n.value?.country?.cartographyCode).uniq()

path = ws.prefLabels
value = get("ws.libpostal").map(n => n.value?.country?.["prefLabel@en"]).uniq()

[assign]
path = ApilWsCodeISO
value = get("ws.cartographyCodes")

path = ApilWsCountry
value = get("ws.prefLabels")

# Suppression des champs non voulus
[exchange]
value = omit(['business','origins','technical','tmp','ws'])
; value = omit(['business','origins','technical'])