Newer
Older
web-dumps / conditor-dumps / 04-enrich.ini
append = pack

[use]
# URLConnect
plugin = basics
plugin = analytics

[unpack]

# Quand les RNSR ne sont pas fournis dans authors.*.affiliations.*.rnsr
# on utilise le Web Service qui les met au même niveau dans wsRnsr
[map]
path = authors

[map/map]
path = affiliations

# Récupère les infos Loterre 2XK
[map/map/assign]
path = ws.loterre2xk
value = get("ApilAllRnsr")

[map/map/expand]
path = ws.loterre2xk

[map/map/expand/exploding]
[map/map/expand/expand]
path = value
size = 100
cacheName = 04-2xk-expand

[map/map/expand/expand/URLConnect]
url = https://loterre-resolvers.services.inist.fr/v1/2XK/identify
timeout = 120000
noerror = true

[map/map/expand/aggregate]

# Récupère les informations
[map/map/assign]
# Les éléments de ws.loterre2xk sont dans le même ordre que ApilAllRnsr,
# et il y en a autant.
path = ApilWsLaboIntitule
value = get("ws.loterre2xk").castArray().filter(Boolean).filter(x => (typeof x !== 'string') ) \
        .map((item) => item["prefLabel@fr"] ?? "n/a")

[assign]

; path = ApilWsLaboSigle
; value = get("ws.rnsr").map("value").flatten().map("sigle")

path = ApilWsLaboIntitule
value = get("authors").flatMap("affiliations").flatMap("ApilWsLaboIntitule").uniq()

[assign]
# Supprime les accents et met en majuscules
path = tmp.ApilWsLaboIntitule
value = get("ApilWsLaboIntitule").castArray().filter(Boolean) \
    .map(str => str \
        .normalize("NFKD").replace(/[\u0300-\u036f]/g, "") \
        .toUpperCase() \
    )

[assign]
path = ApilWsSigleLaboIntitule
value = get("ApilWsLaboSigle").castArray().filter(Boolean) \
        .map((sigle,i) => [ \
            sigle, \
            _.get(self,"tmp.ApilWsLaboIntitule")[i] \
        ] \
        .filter(Boolean) \
        .join(" - "))

# Récupère les instituts du CNRS à partir des RNSR

[assign]
path = ApilWsInstitutCnrs
value = get("ApilAllRnsr").castArray().filter(Boolean) \
        .map((rnsr, id) => ({ \
                id, \
                value: `${rnsr}|${self.ApilPublicationDate}` \
        }))

[map]
path = ApilWsInstitutCnrs

[map/expand]
path = value
size = 100
cacheName = 04-rnsr-year-instituts-cnrs

[map/expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/rnsr-year/instituts-cnrs
timeout = 90002
noerror = true

[assign]
path = ApilWsInstitutCnrs
value = get("ApilWsInstitutCnrs").castArray().filter(Boolean) \
        .map("value") \
        .filter(institut => institut && institut !== "n/a")

# S'il y a au moins un institut, il y a au moins une affiliation CNRS
[assign]
path = ApilWsIsCnrs
value = get("ApilWsInstitutCnrs").thru(array => Boolean(array.length))

################################################################

# Homogénéise les types de document
[assign]
path = ApilWsTypeDoc
value = get("originalGenre").trim()

[expand]
path = ApilWsTypeDoc
size = 100
cacheName = 04-homogenize-document-type-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/documentType/json
timeout = 90003
noerror = true

# Si le WS renvoie un "n/a"
[swing]
test = get("ApilWsTypeDoc").isEqual("n/a")

# On l'écrase avec la valeur de "originalGenre"
[swing/assign]
path = ApilWsTypeDoc
value = get("originalGenre").trim()

# Homogénéise les sources
[assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name")).trim()

# si les champs 'host.title' et 'host.conference.name' ne sont pas vides
[expand]
path = ApilWsSource
size = 100
cacheName = 04-homogenize-source-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/source/json
timeout = 90004
noerror = true

# si le champ "ApilWsSource" issu du WS est "n/a"
[swing]
test = get("ApilWsSource").isEqual("n/a")

[swing/assign]
path = ApilWsSource
value = get("host.title",_.get(self,"host.conference.name")).trim()

# Traitement des éditeurs
[assign]
path = ws.ApilRacineDoiPublisher
value = get("doi").split('/').filter(i => i.match(/^10./)).pop()

[expand]
path = ws.ApilRacineDoiPublisher
size = 1
cacheName = 04-api-crossref-prefixes-expand

[expand/URLFetch]
target = value
url = fix('https://api.crossref.org/prefixes/').append(self.value)
json = true
timeout = 60000
noerror = true
retries = 2

[expand/assign]
path = value
value = get('value.message.name', 'n/a')

# Dans un champ temporaire, récupérer la valeur host.publisher si elle est présente, sinon récupérer celle du WS DOI.
[assign]
path = tmp.ApilWsPublisher
value = get("host.publisher",_.get(self,"ws.ApilRacineDoiPublisher"))

# Homogénéise l'éditeur
[assign]
path = ApilWsPublisher
value = get("tmp.ApilWsPublisher").trim()

[expand]
path = ApilWsPublisher
size = 100
cacheName = 04-homogenize-publisher-json

[expand/URLConnect]
url = https://mapping-tools.services.inist.fr/v1/homogenize/publisher/json
timeout = 90006
noerror = true

# Si host.publisher existe et que le ApilWsPublisher vaut n/a,
[swing]
test = has("host.publisher")
test = get("ApilWsPublisher").isEqual("n/a")

# On l'écrase avec la valeur de host.publisher
[swing/assign]
path = ApilWsPublisher
value = get("host.publisher")

# Enrichissements pays
[assign]
path = ws.libpostal
value = get("authors") \
        .flatMap("affiliations") \
        .map("address").uniq() \
        .map((address, id) => ({ \
            id, \
            value: address \
        }))

[map]
path = ws.libpostal

[map/expand]
path = value
size = 100
cacheName = 04-address-expand

[map/expand/URLConnect]
url = https://affiliations-tools.services.inist.fr/v1/addresses/parse
timeout = 90007
noerror = true

[map/expand/assign]
path = value.value.address
value = get('value.id')

path = value.value.country
value = get('value.value.country').replace(/\W/g, ' ').trim()

[map/expand/assign]
path = value
value = get('value.value')

[map/expand/expand]
path = value.country
size = 10
cacheName = 04-country-expand

[map/expand/expand/URLConnect]
url = https://loterre-resolvers.services.inist.fr/v1/9SD/identify
timeout = 90008

[map/exchange]
value = get('value')

# TODO: si champ state, on est aux États-Unis (United States of America)

[assign]
path = ApilWsCodeISO
value = get("ws.libpostal").castArray().filter(Boolean) \
        .map(n => n.country?.cartographyCode) \
        .uniq().filter(Boolean)

path = ApilWsCountry
value = get("ws.libpostal").castArray().filter(Boolean) \
        .map(n => n.country?.["prefLabel@en"]) \
        .uniq().filter(Boolean)

# Suppression des champs non voulus
[exchange]
value = omit(['tmp'])