diff --git a/wos-dumps/02-download.ini b/wos-dumps/02-download.ini
index 7cd5932..8a54374 100644
--- a/wos-dumps/02-download.ini
+++ b/wos-dumps/02-download.ini
@@ -28,7 +28,7 @@
retries = 5
timeout = 120000
token = env('WOS_API_KEY')
-step = 20
+step = 100
[ungroup]
@@ -50,7 +50,7 @@
return acc}, { DOI: "", ISSN: "", eISSN: "", eISBN: "", ArticleNumber: "", PMID: "" }))
path = DocumentType
-value = get("static_data.summary.doctypes").thru(obj => [].concat(obj || []).map(i => i?.doctype || null))
+value = get("static_data.summary.doctypes").thru(obj => [].concat(obj || []).map(i => i?.doctype || null)).flatten()
path = Titles
value = get("static_data.summary.titles.title") \
@@ -59,8 +59,7 @@
path = Publisher
value = get("static_data.summary.publishers.publisher.names.name") \
- .thru(pub => ({ Publisher: pub?.full_name || "", \
- PublisherUnifiedName: (pub?.unified_name || "").toUpperCase() }))
+ .thru(pub => _.toUpper(pub?.unified_name || pub?.full_name || ""))
path = PubInfo
value = get("static_data.summary.pub_info") \
@@ -93,6 +92,7 @@
authorsData[key] = { \
wos_standard: author.wos_standard || "Unknown", \
full_name: author.preferred_name?.full_name || author.full_name || "Unknown", \
+ full_name_deburred: _.deburr(author.preferred_name?.full_name || author.full_name || "Unknown"), \
addresses: [] \
}; \
} \
@@ -105,28 +105,36 @@
authorsData[key].addresses.push({ full_address, country, city, organizations }); \
}); \
}); \
- return Object.values(authorsData).map(a => ({ \
- wos_standard: a.wos_standard, \
- full_name: a.full_name, \
- addresses: a.addresses \
- })); \
+ return Object.values(authorsData); \
})
path = ReprintAddresses
value = get("static_data.fullrecord_metadata.reprint_addresses.address_name", []) \
- .thru(reprints => [].concat(reprints || []).map(addr => { \
- let authors = [].concat(addr.names?.name || []).filter(a => a.role === "author"); \
- let wos_standard = authors.length > 0 ? authors.map(a => a.wos_standard || "Unknown") : ["Unknown"]; \
- let full_name = authors.length > 0 ? authors.map(a => a.full_name || "Unknown") : ["Unknown"]; \
- return { \
- wos_standard: wos_standard, \
- full_name: full_name, \
- country: addr.address_spec?.country || "", \
- city: addr.address_spec?.city || "", \
- full_address: addr.address_spec?.full_address || "" \
- }; \
- }))
+ .thru(reprints => { \
+ let authorsData = {}; \
+ [].concat(reprints || []).forEach(addr => { \
+ let author = addr.names?.name || {}; \
+ let key = author.wos_standard || author.full_name || "Unknown"; \
+ if (!authorsData[key]) { \
+ authorsData[key] = { \
+ wos_standard: author.wos_standard || "Unknown", \
+ full_name: author.full_name || "Unknown", \
+ full_name_deburred: _.deburr(author.full_name || "Unknown"), \
+ addresses: [] \
+ }; \
+ } \
+ authorsData[key].addresses.push({ \
+ full_address: addr.address_spec?.full_address || "Unknown address", \
+ country: addr.address_spec?.country || "Unknown country", \
+ city: addr.address_spec?.city || "Unknown city", \
+ organizations: _.castArray(addr.address_spec?.organizations?.organization || []) \
+ .filter(org => org.pref === "Y") \
+ .map(org => org.content) \
+ }); \
+ }); \
+ return Object.values(authorsData); \
+ })
path = KeywordsPlus
value = get("static_data.item.keywords_plus.keyword", []) \
@@ -156,16 +164,14 @@
path = Categories
value = get("static_data.fullrecord_metadata.category_info.subjects.subject", []) \
- .thru(subjects => ({ ResearchAreas: subjects.find(i => i.ascatype === "extended")?.content || "", \
- WebofScienceCategories: subjects.find(i => i.ascatype === "traditional")?.content || "" }))
+ .thru(subjects => ({ \
+ ResearchAreas: _.castArray(subjects).filter(i => i.ascatype === "extended").map(i => i.content), \
+ WebofScienceCategories: _.castArray(subjects).filter(i => i.ascatype === "traditional").map(i => i.content)}))
path = NormalizedLanguage
value = get("static_data.fullrecord_metadata.normalized_languages.language", []) \
.thru(langs => [].concat(langs || []).map(i => i.content || ""))
-path = NormalizedDocumentType
-value = get("static_data.fullrecord_metadata.normalized_doctypes.doctype", []).thru(docs => [].concat(docs || []).map(i => i || ""))
-
path = Abstract
value = get("static_data.fullrecord_metadata.abstracts.abstract.abstract_text.p","")
diff --git a/wos-dumps/03-enrich.ini b/wos-dumps/03-enrich.ini
index a77ab43..a4dbcf7 100644
--- a/wos-dumps/03-enrich.ini
+++ b/wos-dumps/03-enrich.ini
@@ -5,6 +5,11 @@
plugin = analytics
[unpack]
+
+[env]
+path = dictionary
+value = fix({"PEOPLES R CHINA": "CHINA","USA": "UNITED STATES","ENGLAND": "UNITED KINGDOM","WALES": "UNITED KINGDOM","SCOTLAND": "UNITED KINGDOM","BOSNIA & HERCEG": "BOSNIA & HERZEGOVINA","COTE IVOIRE": "COTE D’IVOIRE","NORTH IRELAND": "UNITED KINGDOM","DEM REP CONGO": "CONGO - KINSHASA","REP CONGO": "CONGO - BRAZZAVILLE","CZECH REPUBLIC": "CZECHIA","DOMINICAN REP": "DOMINICAN REPUBLIC","MYANMAR": "MYANMAR (BURMA)","CENT AFR REPUBL": "CENTRAL AFRICAN REPUBLIC","EQUAT GUINEA": "EQUATORIAL GUINEA","TRINIDAD TOBAGO": "TRINIDAD & TOBAGO","BRITISH VIRGIN ISL": "BRITISH VIRGIN ISLANDS","PAPUA N GUINEA": "PAPUA NEW GUINEA","U ARAB EMIRATES": "UNITED ARAB EMIRATES","MACEDONIA": "NORTH MACEDONIA","VATICAN": "VATICAN CITY","TURKEY": "TURKIYE","SWAZILAND": "ESWATINI","ST KITTS & NEVI": "ST. KITTS & NEVIS","ST HELENA": "ST. HELENA","SAO TOME & PRIN": "SAO TOME & PRINCIPE","PALESTINE": "PALESTINIAN TERRITORIES","GUINEA BISSAU": "GUINEA-BISSAU","FALKLAND ISLAND": "FALKLAND ISLANDS","ANTIGUA & BARBU": "ANTIGUA & BARBUDA"})
+
[assign]
path = RevueVolume
value = fix(`${self.Titles.Source}${self.PubInfo.Volume ? ' Volume:'+self.PubInfo.Volume :''}${self.PubInfo.Issue ? ' Issue:'+self.PubInfo.Issue :''}${self.Identifiers.ArticleNumber ? ' Article Number:'+self.Identifiers.ArticleNumber :''}${self.PubInfo.BeginningPage && self.PubInfo.EndingPage ? ' Pages:'+self.PubInfo.BeginningPage+'-'+self.PubInfo.EndingPage :''} Published:${self.PubInfo.YearPublished}`)
@@ -47,5 +52,98 @@
oa_status: data.oa_status || "inconnu", \
host_types: _.map(data.oa_locations, 'host_type') || []}))
+[assign]
+path = AuthorsWithAddress
+value = get("AuthorsWithAddress").map(author => ({ \
+ ...author, \
+ addresses: author.addresses.map(addr => ({ \
+ ...addr, \
+ country: _.get(env("dictionary"), _.toUpper(_.deburr(addr.country || "")), _.toUpper(_.deburr(addr.country || ""))) \
+ })) \
+}))
+
+[assign]
+path = ReprintAddresses
+value = get("ReprintAddresses").map(author => ({ \
+ ...author, \
+ addresses: author.addresses.map(addr => ({ \
+ ...addr, \
+ country: _.get(env("dictionary"), _.toUpper(_.deburr(addr.country || "")), _.toUpper(_.deburr(addr.country || ""))) \
+ })) \
+}))
+
+
+[assign]
+path = CountriesToEnrich
+value = pick(["AuthorsWithAddress", "ReprintAddresses"]).values().flatten().map("addresses").flatten().map("country").compact().uniq()
+[expand]
+path = CountriesToEnrich
+size = 100
+file = ./03.3-enrich-Countries.ini
+
+[assign]
+path = AuthorsWithAddress
+value = get("AuthorsWithAddress").map(author => ({ \
+ ...author, \
+ addresses: author.addresses.map(addr => ({ \
+ ...addr, \
+ ...(_.find(self.CountriesToEnrich, e => e.pays === addr.country) || { \
+ iso2: "N/A", \
+ iso3: "N/A" \
+ }) \
+ })) \
+}))
+
+[assign]
+path = ReprintAddresses
+value = get("ReprintAddresses").map(author => ({ \
+ ...author, \
+ addresses: author.addresses.map(addr => ({ \
+ ...addr, \
+ ...(_.find(self.CountriesToEnrich, e => e.pays === addr.country) || { \
+ iso2: "N/A", \
+ iso3: "N/A" \
+ }) \
+ })) \
+}))
+
+[assign]
+path = OrganizationISO3Pairs
+value = pick(["AuthorsWithAddress", "ReprintAddresses"]) \
+ .thru(data => _.flatMap(data, entries => \
+ _.flatMap(entries || [], entry => \
+ _.flatMap(entry.addresses || [], addr => \
+ (addr.organizations || ["Unknown"]).map(org => `${addr.iso3} / ${org}`))))).uniq()
+
+[assign]
+path = AuthorsNamesOnly
+value = get("Authors").map("full_name_deburred")
+
+[assign]
+path = AuthorsHasAddress
+value = get("AuthorsWithAddress").map("full_name_deburred")
+
+[assign]
+path = AuthorsToPut
+value = get("AuthorsNamesOnly").xor(self.AuthorsHasAddress).map(auth =>`${auth}: Unknown` )
+
+[assign]
+path = AuthorsWithAddressesHtml
+value = get("AuthorsWithAddress") \
+ .groupBy("full_name") \
+ .map((entries, author) => ({ \
+ author: `${author}:`, \
+ addresses: entries.flatMap(e => e.addresses.map(a => _.get(a, 'full_address', 'Unknown address'))).join("
- ")})) \
+ .map(e => `${e.author} ${e.addresses}`).concat(self.AuthorsToPut)
+
+[assign]
+path = ReprintAddressesHtml
+value = get("ReprintAddresses") \
+ .map(entry => ({ \
+ author: `${entry.full_name}:`, \
+ addresses: entry.addresses.map(a => _.get(a, 'full_address', 'Unknown address')).join("
- ") \
+ })) \
+ .map(e => `${e.author} ${e.addresses}`)
+
[exchange]
-value = omit(["Unpaywall"])
+value = omit(["Unpaywall","AuthorsNamesOnly","AuthorsHasAddress","AuthorsToPut"])
diff --git a/wos-dumps/03.3-enrich-Countries.ini b/wos-dumps/03.3-enrich-Countries.ini
new file mode 100644
index 0000000..2ee2937
--- /dev/null
+++ b/wos-dumps/03.3-enrich-Countries.ini
@@ -0,0 +1,37 @@
+# Création d'une variable d'environnement qui stocke la date au format ISO
+[env]
+path = date
+value = thru(d => (new Date()).toISOString().split("T")[0])
+# Sert à traiter individuellement chaque valeur de la liste (chaque code ici)
+[map]
+path = value
+# Création d'une clé temporaire pour stocker la valeur actuelle
+[map/replace]
+path = tmpkey
+value = self()
+
+[map/combine]
+path = tmpkey
+# URL vers un fichier CSV accessible via internet
+primer = http://mapping-tables.daf.intra.inist.fr/tablePaysIso2&3.tsv
+
+# Création d'un fichier temporaire local avec la date définie dans [env] (facultatif, évite de télécharger le fichier plusieurs fois)
+cacheName = env("date").prepend("WosCountries")
+default = "n/a"
+
+[map/combine/URLStream]
+path = false
+
+[map/combine/CSVParse]
+separator = fix("\t")
+
+[map/combine/CSVObject]
+# Une fois le TSV parsé en objets JSON, la valeur de "From" est transformé en clé et celle de "To" en valeur ({"From": "code1", "To": "Intitulé1"}). Puis on transforme l'objet avec "intitule" comme clé.
+[map/combine/assign]
+path = id
+value = get('FROM')
+path = value
+value = get('TO').split().zipObject(["intitule"]).invert()
+# Enfin on extrait uniquement la vaeur de "intitule" après le mapping
+[map/exchange]
+value = get('tmpkey.value.intitule').split(";").thru(arr => ({ pays: arr[0] || "Unknown", iso2: arr[1] || "Unknown", iso3: arr[2] || "Unknown" }))
\ No newline at end of file
diff --git a/wos-dumps/README.md b/wos-dumps/README.md
index 316f0c2..3e1da3f 100644
--- a/wos-dumps/README.md
+++ b/wos-dumps/README.md
@@ -25,18 +25,18 @@
- `Identifiers` => `DOI`, `ISSN`, `eISSN`, `eISBN`, `ArticleNumber` & `PMID`
- `DocumentType` => Les types de document
- `Titles` => `DocumentTitle` & `Source`
+- `Publisher` => Récupère le nom normalisé (`unified_name`) s'il existe, sinon `full_name`
- `PubInfo` => `Volume`, `Issue`, `BeginningPage`, `EndingPage`, `YearPublished` & `EarlyAccessYear`
- `Conference` => `ConferenceDate`, `ConferenceTitle`, `ConferenceCity` & `ConferenceState`
- `Authors` => Pour chaque auteur `wos_standard`, `full_name_deburred` & `full_name`
-- `AuthorsWithAddress` => Pour chaque auteur `wos_standard`, `full_name`, `country`, `city` & `full_address`
-- `ReprintAddresses` => Pour chaque auteur `wos_standard`, `full_name`, `country`, `city` & `full_address`
+- `AuthorsWithAddress` => Pour chaque auteur `wos_standard`, `full_name`, `full_name_deburred` & `addresses`. Cette clé contient `full_address`, `country`, `city` & `organizations`.
+- `ReprintAddresses` => Pour chaque auteur `wos_standard`, `full_name`, `full_name_deburred` & `addresses`. Cette clé contient `full_address`, `country`, `city` & `organizations`.
- `KeywordsPlus` => Les mots-clés WoS
- `AuthorKeywords` => Les mots-clés d'auteurs
- `CitationCountWOS` => Le nombre de citations
- `CitationTopics` => `Macro`, `Meso` & `Micro`
- `Categories` => `ResearchAreas` & `WebofScienceCategories`
- `NormalizedLanguage` => La/les langue(s) du document
-- `NormalizedDocumentType` => Les types de document normalisés
- `Abstract` => Le résumé du document
- `SDG` => Les Sustainable Development Goals
@@ -46,7 +46,11 @@
- `RevueVolume` => Template string qui concatène, si les données existent, les éléments suivants dans une chaîne du type : `Source` `Volume:` `Issue:` `Article Number:` `Pages:`(`BeginningPage EndingPage`) `Published:`
- `DisciplinesESI` => enrichissement par sous-flux. Le fichier `03.1-enrich-ESI.ini` récupère les disciplines ESI à partir de `WebofScienceCategories`
- `JCRSubjectCategory` => enrichissement par sous-flux. Le fichier `03.2-enrich-JCR.ini` récupère les données relatives à la notoriété d'une revue à partir de `Source`
-- `Unpaywall` interroge le web-service Unpaywall uniquement si le document possède un DOI (évite les interrogations inutiles)
+- `OAInfo` interroge le web-service Unpaywall uniquement si le document possède un DOI (évite les interrogations inutiles). Récupère `is_oa`, `oa_status` & `host_types`
+- `AuthorsWithAddress` & `ReprintAddresses` sont enrichis avec `pays`, `iso2` & `iso3`. Dans un 1er temps `country` est soumis à un dictionnaire afin de convertir les quelques pays pour lesquels le Wos donne une verbalisation "non standard". Les pays sont ensuite enrichis par sous-flux via le fichier `03.3-enrich-Countries.ini` qui récupère `pays`, `iso2` & `iso3`. On utilise ensuite .find() qui cherche les objets où `pays` correspond à `country` et lorsque c'est le cas fusionne les données. Ce qui permet d'ajouter correctement `iso2` et `iso3` à chaque auteur dans`AuthorsWithAddress` & `ReprintAddresses`.
+- `OrganizationISO3Pairs` => associe les organismes avec leur(s) pays.
+- `AuthorsWithAddressesHtml` & `ReprintAddressesHtml` => Pour un affichage clair dans Lodex en format liste à puces on met chaque nom d'auteur entre balises `b`, puis on joute son adresse. S'il en a plusieurs, elles sont séparées par une balise `br` et un `-` pour créer un effet de sous-puce dans la liste.
+
### 04-report