Newer
Older
web-dumps / doiwos-dumps / 01-harvest-from-doi.ini
; Usage: npx ezs harvest-from-doi.ini < dois.txt

; Inspiré de https://gitbucket.inist.fr/tdm/web-services/blob/master/biblio-tools/v1/wos/works/expand.ini

[use]
plugin = @ezs/basics
plugin = @ezs/analytics

# Flow configuration
[TXTParse]

[env]
; Maximum 90 (théoriquement 100, mais un DOI peut renvoyer plusieurs notices)
; Parfois, on rencontre une erreur "Request Header Fields Too Large", sans doute
; due à une URL trop longue (avec trop de DOI trop longs).
path = size
value = 50

path = indent
value = true

path = token
value = env('WOS_API_KEY')

# Clean up DOIs
# - lower case
# - remove BOM
# - from CRLF to LF
# - remove - at then of a DOI
# - replace middle point with normal point
# - replace en dash with normal hyphen-minus
# - remove soft hyphen (discretionary hyphen)
# - remove "http" part of a DOI
# - remove "doi:" part of a DOI
# - remove URL following a DOI
# - remove duplicates of a DOI
# - remove PMID following DOI
# - remove part preceding a DOI
# - remove blanks
# - remove ;subjmeta part of a DOI
# - remove double quotes from DOIs
[replace]
path = value
value = toLower().replace(/^\uFEFF/, "").replace(/\r/,"").replace(/-$/, "").replace(/\u00B7/g, ".").replace(/\u2013/g, "-").replace(/\u00AD/g, "").replace(/^.*doi(\.org)?\/(.+)/g, "$2").replace(/^.*doi[: ]+10\./g, "10.").replace(/^(10\.\d+\/.+\S) https?:\/\/.+/, "$1").replace(/^(10\.\d+\/\S+) \1.*/, "$1").replace(/^(10\.\d+\/\S+) pmid.+/, "$1").replace(/^.* (10\.\d+\/.+)/, "$1").replace(/ /g, "").replace(/;subjmeta=\d+$/,"").replace(/"+/g, "")

[group]
size = env('size')

[replace]
path = dois
value = self().map('value').filter(Boolean).map(x => JSON.stringify(x)).join(' OR ')

[replace]
path = usrQuery
value = fix('DO=(', self.dois ,')').join('')

path = databaseId
value = WOK

path = count
value = env('size')

path = firstRecord
value = 1

; FR = Full Record
path = optionView
value = FR

# Maximum 1 request by second
[throttle]
bySecond = 1

; [debug]

[URLStream]
url = https://wos-api.clarivate.com/api/wos/
header = env('token').prepend('X-ApiKey:')
path = Data.Records.records.REC.*
timeout = 50000
noerror = true
retries = 1

[dump]
indent = env('indent')