Newer
Older
web-dumps / wos-dumps / 03-enrich.ini
append = pack
[use]
; URLConnect
plugin = basics
plugin = analytics
[unpack]


[env]
path = dictionary
value = fix({"PEOPLES R CHINA": "CHINA","USA": "UNITED STATES","ENGLAND": "UNITED KINGDOM","WALES": "UNITED KINGDOM","SCOTLAND": "UNITED KINGDOM","BOSNIA & HERCEG": "BOSNIA & HERZEGOVINA","COTE IVOIRE": "COTE D’IVOIRE","NORTH IRELAND": "UNITED KINGDOM","DEM REP CONGO": "CONGO - KINSHASA","REP CONGO": "CONGO - BRAZZAVILLE","CZECH REPUBLIC": "CZECHIA","DOMINICAN REP": "DOMINICAN REPUBLIC","MYANMAR": "MYANMAR (BURMA)","CENT AFR REPUBL": "CENTRAL AFRICAN REPUBLIC","EQUAT GUINEA": "EQUATORIAL GUINEA","TRINIDAD TOBAGO": "TRINIDAD & TOBAGO","BRITISH VIRGIN ISL": "BRITISH VIRGIN ISLANDS","PAPUA N GUINEA": "PAPUA NEW GUINEA","U ARAB EMIRATES": "UNITED ARAB EMIRATES","MACEDONIA": "NORTH MACEDONIA","VATICAN": "VATICAN CITY","TURKEY": "TURKIYE","SWAZILAND": "ESWATINI","ST KITTS & NEVI": "ST. KITTS & NEVIS","ST HELENA": "ST. HELENA","SAO TOME & PRIN": "SAO TOME & PRINCIPE","PALESTINE": "PALESTINIAN TERRITORIES","GUINEA BISSAU": "GUINEA-BISSAU","FALKLAND ISLAND": "FALKLAND ISLANDS","ANTIGUA & BARBU": "ANTIGUA & BARBUDA"})

[assign]
path = RevueVolume
value = fix(`${self.Titles.Source}${self.PubInfo.Volume ? ' Volume:'+self.PubInfo.Volume :''}${self.PubInfo.Issue ? ' Issue:'+self.PubInfo.Issue :''}${self.Identifiers.ArticleNumber ? ' Article Number:'+self.Identifiers.ArticleNumber :''}${self.PubInfo.BeginningPage && self.PubInfo.EndingPage ? ' Pages:'+self.PubInfo.BeginningPage+'-'+self.PubInfo.EndingPage :''} Published:${self.PubInfo.YearPublished}`)

[assign]
path = DisciplinesESI
value = get("Categories.WebofScienceCategories")
[expand]
path = DisciplinesESI
size = 100
file = ./03.1-enrich-ESI.ini

[assign]
path = JCRSubjectCategory
value = get("Titles.Source")
[expand]
path = JCRSubjectCategory
size = 100
file = ./03.2-enrich-JCR.ini

[assign]
path = Unpaywall
value = get("Identifiers.DOI")
[swing]
test = get("Unpaywall").isEmpty()
reverse = true
[swing/expand]
path = Unpaywall
size = 100
[swing/expand/URLConnect]
url = https://biblio-tools.services.istex.fr/v2/unpaywall/works/expand
timeout = 3600000
noerror = false
retries = 5

[assign]
path = OAInfo
value = get("Unpaywall").thru(data => ({\
is_oa: _.isBoolean(data.is_oa) ? data.is_oa : "inconnu", \
oa_status: data.oa_status || "inconnu", \
host_types: _.map(data.oa_locations, 'host_type') || []}))

[assign]
path = AuthorsWithAddress
value = get("AuthorsWithAddress").map(author => ({ \
    ...author, \
    addresses: author.addresses.map(addr => ({ \
        ...addr, \
        country: _.get(env("dictionary"), _.toUpper(_.deburr(addr.country || "")), _.toUpper(_.deburr(addr.country || ""))) \
    })) \
}))

[assign]
path = ReprintAddresses
value = get("ReprintAddresses").map(author => ({ \
    ...author, \
    addresses: author.addresses.map(addr => ({ \
        ...addr, \
        country: _.get(env("dictionary"), _.toUpper(_.deburr(addr.country || "")), _.toUpper(_.deburr(addr.country || ""))) \
    })) \
}))


[assign]
path = CountriesToEnrich
value = pick(["AuthorsWithAddress", "ReprintAddresses"]).values().flatten().map("addresses").flatten().map("country").compact().uniq()
[expand]
path = CountriesToEnrich
size = 100
file = ./03.3-enrich-Countries.ini

[assign]
path = AuthorsWithAddress
value = get("AuthorsWithAddress").map(author => ({ \
    ...author, \
    addresses: author.addresses.map(addr => ({ \
        ...addr, \
        ...(_.find(self.CountriesToEnrich, e => e.pays === addr.country) || { \
            iso2: "N/A", \
            iso3: "N/A" \
        }) \
    })) \
}))

[assign]
path = ReprintAddresses
value = get("ReprintAddresses").map(author => ({ \
    ...author, \
    addresses: author.addresses.map(addr => ({ \
        ...addr, \
        ...(_.find(self.CountriesToEnrich, e => e.pays === addr.country) || { \
            iso2: "N/A", \
            iso3: "N/A" \
        }) \
    })) \
}))

[assign]
path = OrganizationISO3Pairs
value = pick(["AuthorsWithAddress", "ReprintAddresses"]) \
    .thru(data => _.flatMap(data, entries => \
        _.flatMap(entries || [], entry => \
            _.flatMap(entry.addresses || [], addr => \
                (addr.organizations || ["Unknown"]).map(org => `${addr.iso3} / ${org}`))))).uniq()

[assign]
path = AuthorsNamesOnly
value = get("Authors").map("full_name_deburred")

[assign]
path = AuthorsHasAddress
value = get("AuthorsWithAddress").map("full_name_deburred")

[assign]
path = AuthorsToPut
value = get("AuthorsNamesOnly") \
  .xor(self.AuthorsHasAddress) \
  .map(auth => `${auth}: Unknown`)

[assign]
path = AuthorsWithAddressesDisplay
value = get("AuthorsWithAddress") \
  .groupBy("full_name") \
  .map((entries, author) => ({ \
    author, \
    addresses: entries \
      .flatMap(e => e.addresses.map(a => _.get(a, 'full_address', 'Unknown address'))) \
      .join(" || ") \
  })) \
  .map(e => `${e.author}: ${e.addresses}`) \
  .concat(self.AuthorsToPut)

[assign]
path = ReprintAddressesDisplay
value = get("ReprintAddresses") \
  .map(entry => ({ \
    author: entry.full_name, \
    addresses: entry.addresses \
      .map(a => _.get(a, 'full_address', 'Unknown address')) \
      .join(" || ") \
  })) \
  .map(e => `${e.author}: ${e.addresses}`)

[exchange]
value = omit(["Unpaywall","AuthorsNamesOnly","AuthorsHasAddress","AuthorsToPut"])