Newer
Older
giec-wos / extract-fields.ini
# Take a WoS file in the data directory and extract few fields.
# Usage:
# npx ezs -e extract-fields.ini | jq

[use]
plugin = basics

[exchange]
value = [ "corpus_WoS_vol1-50.json" ]

[JSONParse]

[FILELoad]
location = data

[JSONParse]

[replace]
path = uri
value = get("UID")

path = title
value = get('static_data.summary.titles.title').filter(title => title.type === "item").map(title => title.content).join('|')

path = abstract
value = get('static_data.fullrecord_metadata.abstracts.abstract.abstract_text.p')

path = publication_year
value = get('static_data.summary.pub_info.pubyear')

path = source
value = get('static_data.summary.titles.title').filter(title => title.type === "source").map(title => title.content).join('|')

; TODO: ajouter les adresses de reprint_addresses (au même niveau que addresses)
path = affiliations
value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map(name => name.address_spec.full_address)

path = countries
value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map("address_spec.country", []).uniq()

path = keywords
value = get('static_data.item.keywords_plus.keyword')

path = subjects
value = get('static_data.fullrecord_metadata.category_info.subjects.subject').map('content').uniq()

path = subheadings
value = get('static_data.fullrecord_metadata.category_info.subheadings.subheading').castArray().uniq()

path = headings
value = get('static_data.fullrecord_metadata.category_info.headings.heading').castArray().uniq()

[dump]
indent = true

[FILESave]
location = data
identifier = corpus-simple-50.json

[dump]
indent = true