; Usage: npx ezs harvest-from-doi.ini < dois.txt ; Inspiré de https://gitbucket.inist.fr/tdm/web-services/blob/master/biblio-tools/v1/wos/works/expand.ini [use] plugin = @ezs/basics plugin = @ezs/analytics # Flow configuration [TXTParse] [env] ; Maximum 90 (théoriquement 100, mais un DOI peut renvoyer plusieurs notices) ; Parfois, on rencontre une erreur "Request Header Fields Too Large", sans doute ; due à une URL trop longue (avec trop de DOI trop longs). path = size value = 50 path = indent value = true path = token value = env('WOS_API_KEY') # Clean up DOIs # - lower case # - remove BOM # - from CRLF to LF # - remove - at then of a DOI # - replace middle point with normal point # - replace en dash with normal hyphen-minus # - remove soft hyphen (discretionary hyphen) # - remove "http" part of a DOI # - remove "doi:" part of a DOI # - remove URL following a DOI # - remove duplicates of a DOI # - remove PMID following DOI # - remove part preceding a DOI # - remove blanks # - remove ;subjmeta part of a DOI # - remove double quotes from DOIs [replace] path = value value = toLower().replace(/^\uFEFF/, "").replace(/\r/,"").replace(/-$/, "").replace(/\u00B7/g, ".").replace(/\u2013/g, "-").replace(/\u00AD/g, "").replace(/^.*doi(\.org)?\/(.+)/g, "$2").replace(/^.*doi[: ]+10\./g, "10.").replace(/^(10\.\d+\/.+\S) https?:\/\/.+/, "$1").replace(/^(10\.\d+\/\S+) \1.*/, "$1").replace(/^(10\.\d+\/\S+) pmid.+/, "$1").replace(/^.* (10\.\d+\/.+)/, "$1").replace(/ /g, "").replace(/;subjmeta=\d+$/,"").replace(/"+/g, "") [group] size = env('size') [replace] path = dois value = self().map('value').filter(Boolean).map(x => JSON.stringify(x)).join(' OR ') [replace] path = usrQuery value = fix('DO=(', self.dois ,')').join('') path = databaseId value = WOK path = count value = env('size') path = firstRecord value = 1 ; FR = Full Record path = optionView value = FR # Maximum 1 request by second [throttle] bySecond = 1 ; [debug] [URLStream] url = https://wos-api.clarivate.com/api/wos/ header = env('token').prepend('X-ApiKey:') path = Data.Records.records.REC.* timeout = 50000 noerror = true retries = 1 [dump] indent = env('indent')