diff --git a/wos-dumps/02-download.ini b/wos-dumps/02-download.ini index dd3b0ce..7cd5932 100644 --- a/wos-dumps/02-download.ini +++ b/wos-dumps/02-download.ini @@ -28,6 +28,7 @@ retries = 5 timeout = 120000 token = env('WOS_API_KEY') +step = 20 [ungroup] @@ -49,8 +50,7 @@ return acc}, { DOI: "", ISSN: "", eISSN: "", eISBN: "", ArticleNumber: "", PMID: "" })) path = DocumentType -value = get("static_data.summary.doctypes") \ - .thru(obj => [].concat(obj || []).map(i => i?.doctype || null)) +value = get("static_data.summary.doctypes").thru(obj => [].concat(obj || []).map(i => i?.doctype || null)) path = Titles value = get("static_data.summary.titles.title") \ @@ -93,25 +93,26 @@ authorsData[key] = { \ wos_standard: author.wos_standard || "Unknown", \ full_name: author.preferred_name?.full_name || author.full_name || "Unknown", \ - countries: new Set(), \ - cities: new Set(), \ - full_addresses: new Set() \ + addresses: [] \ }; \ } \ - authorsData[key].countries.add(addr.address_spec?.country || ""); \ - authorsData[key].cities.add(addr.address_spec?.city || ""); \ - authorsData[key].full_addresses.add(addr.address_spec?.full_address || ""); \ + let full_address = addr.address_spec?.full_address || "Unknown address"; \ + let country = addr.address_spec?.country || "Unknown country"; \ + let city = addr.address_spec?.city || "Unknown city"; \ + let organizations = _.castArray(addr.address_spec?.organizations?.organization || []) \ + .filter(org => org.pref === "Y") \ + .map(org => org.content); \ + authorsData[key].addresses.push({ full_address, country, city, organizations }); \ }); \ }); \ return Object.values(authorsData).map(a => ({ \ wos_standard: a.wos_standard, \ full_name: a.full_name, \ - country: [...a.countries], \ - city: [...a.cities], \ - full_address: [...a.full_addresses] \ + addresses: a.addresses \ })); \ }) + path = ReprintAddresses value = get("static_data.fullrecord_metadata.reprint_addresses.address_name", []) \ .thru(reprints => [].concat(reprints || []).map(addr => { \ @@ -163,12 +164,10 @@ .thru(langs => [].concat(langs || []).map(i => i.content || "")) path = NormalizedDocumentType -value = get("static_data.fullrecord_metadata.normalized_doctypes.doctype", []) \ - .thru(docs => [].concat(docs || []).map(i => i || "")) +value = get("static_data.fullrecord_metadata.normalized_doctypes.doctype", []).thru(docs => [].concat(docs || []).map(i => i || "")) path = Abstract value = get("static_data.fullrecord_metadata.abstracts.abstract.abstract_text.p","") path = SDG -value = get("dynamic_data.citation_related.SDG.sdg_category", []) \ - .castArray().map("content") \ No newline at end of file +value = get("dynamic_data.citation_related.SDG.sdg_category", []).castArray().map("content") \ No newline at end of file diff --git a/wos-dumps/03-enrich.ini b/wos-dumps/03-enrich.ini index 5be34be..a77ab43 100644 --- a/wos-dumps/03-enrich.ini +++ b/wos-dumps/03-enrich.ini @@ -38,4 +38,14 @@ url = https://biblio-tools.services.istex.fr/v2/unpaywall/works/expand timeout = 3600000 noerror = false -retries = 5 \ No newline at end of file +retries = 5 + +[assign] +path = OAInfo +value = get("Unpaywall").thru(data => ({\ +is_oa: _.isBoolean(data.is_oa) ? data.is_oa : "inconnu", \ +oa_status: data.oa_status || "inconnu", \ +host_types: _.map(data.oa_locations, 'host_type') || []})) + +[exchange] +value = omit(["Unpaywall"])