Newer
Older
web-dumps / conditor-dumps / 04-enrich.ini
append = pack

[use]
# URLConnect
plugin = basics

[unpack]

# Prépare la structure à envoyer au web service v1/rnsr/info
[assign]
path = ws.rnsr
value = get("authors") \
        .flatMap("affiliations") \
        .map("address").uniq() \
        .map((address, id) => ({ \
            id, \
            value: { \
                year: _.get(self,"ApilPublicationDate"), \
                address \
            } \
        }))

[map]
path = ws.rnsr

[map/expand]
path = value
size = 100
cacheName = 04-rnsr-info

[map/expand/URLConnect]
url = https://affiliations-tools.services.inist.fr/v1/rnsr/info
timeout = 90001
noerror = true

# Récupère les informations
[assign]
path = ApilWsIsCnrs
value = get("ws.rnsr").map("value").flatten() \
        .map("etabAssoc").flatten() \
        .map("etab.sigle") \
        .filter(sigle => sigle === "CNRS") \
        .thru(arr => Boolean(arr.length))

path = ApilWsLaboSigle
value = get("ws.rnsr").map("value").flatten().map("sigle")

path = ApilWsLaboIntitule
value = get("ws.rnsr").map("value").flatten().map("intitule")

path = tmp.Rnsr1
value = get("ws.rnsr").map("value").flatten().map("num_nat_struct")

path = tmp.Rnsr2
value = get("authors").flatMap("affiliations").flatMap("rnsr")

[assign]
# Concatène les deux champs RNSR
path = ApilWsRnsr
value = get("tmp.Rnsr1").concat(_.get(self,"tmp.Rnsr2")).uniq()

# Supprime les accents et met en majuscules
path = tmp.ApilWsLaboIntitule
value = get("ApilWsLaboIntitule") \
        .map(str => str \
            .normalize("NFKD").replace(/[\u0300-\u036f]/g, "") \
            .toUpperCase() \
        )

[assign]
path = ApilWsSigleLaboIntitule
value = get("ApilWsLaboSigle") \
        .map((sigle,i) => [ \
            sigle, \
            _.get(self,"tmp.ApilWsLaboIntitule")[i] \
        ] \
        .filter(Boolean) \
        .join(" - "))

# Récupère les instituts du CNRS à partir des RNSR

[assign]
path = ApilWsInstitutCnrs
value = get("ApilWsRnsr").map((value, id) => ({id, value}))

[map]
path = ApilWsInstitutCnrs

[map/expand]
path = value
size = 100
cacheName = 04-rnsr-instituts-cnrs-json

[map/expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/rnsr/instituts-cnrs/json
timeout = 90002
noerror = true

[assign]
path = ApilWsInstitutCnrs
value = get("ApilWsInstitutCnrs").map("value")

# Homogénéise les types de document
[assign]
path = ApilWsTypeDoc
value = get("originalGenre")

[expand]
path = ApilWsTypeDoc
size = 100
cacheName = 04-homogenize-document-type-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/documentType/json
timeout = 90003
noerror = true

# Si le WS renvoie un "n/a"
[swing]
test = get("ApilWsTypeDoc").isEqual("n/a")

# On l'écrase avec la valeur de "originalGenre"
[swing/assign]
path = ApilWsTypeDoc
value = get("originalGenre")

# Homogénéise les sources
[assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name"))

# si les champs 'host.title' et 'host.conference.name' ne sont pas vides
[swing]
test = has("ApilWsSource")

[swing/expand]
path = ApilWsSource
size = 100
cacheName = 04-homogenize-source-json

[swing/expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/source/json
timeout = 90004
noerror = true

# si le champ "ApilWsSource" issu du WS est "n/a"
[swing]
test = get("ApilWsSource").isEqual("n/a")

[swing/assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name"))

# Traitement des éditeurs
# Si le champ host.publisher est vide
[swing]
test = has("host.publisher")
reverse = true

[swing/assign]
path = ws.ApilRacineDoiPublisher
value = get("doi")

[swing/expand]
path = ws.ApilRacineDoiPublisher
size = 100
cacheName = 04-crossref-prefixes-expand

[swing/expand/URLConnect]
url = https://biblio-tools.services.inist.fr/v1/crossref/prefixes/expand
timeout = 90005
noerror = true
retries = 1

# Dans un champ temporaire, récupérer la valeur host.publisher si elle est présente, sinon récupérer celle du WS DOI.
[assign]
path = tmp.ApilWsPublisher
value = get("host.publisher",_.get(self,"ws.ApilRacineDoiPublisher"))

# Homogénéise l'éditeur
[assign]
path = ApilWsPublisher
value = get("tmp.ApilWsPublisher")

[expand]
path = ApilWsPublisher
size = 100
cacheName = 04-homogenize-publisher-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/publisher/json
timeout = 90006
noerror = true

# Si host.publisher existe et que le ApilWsPublisher vaut n/a,
[swing]
test = has("host.publisher")
test = get("ApilWsPublisher").isEqual("n/a")

# On l'écrase avec la valeur de host.publisher
[swing/assign]
path = ApilWsPublisher
value = get("host.publisher")

# Enrichissements pays
[assign]
path = ws.libpostal
value = get("authors") \
        .flatMap("affiliations") \
        .map("address").uniq() \
        .map((address, id) => ({ \
            id, \
            value: address \
        }))
[map]
path = ws.libpostal

[map/expand]
path = value
size = 100
cacheName = 04-address-expand

[map/expand/URLConnect]
url = https://affiliations-tools.services.inist.fr/v1/expand
timeout = 90007
noerror = true

[assign]
path = ws.cartographyCodes
value = get("ws.libpostal").map(n => n.value.country?.cartographyCode).uniq()

path = ws.prefLabels
value = get("ws.libpostal").map(n => n.value.country?.["prefLabel@en"]).uniq()

[assign]
path = ApilWsCodeISO
value = get("ws.cartographyCodes")

path = ApilWsCountry
value = get("ws.prefLabels")

# Suppression des champs non voulus
[exchange]
value = omit(['business','origins','technical','tmp','ws'])