# Extract few fields from a WoS file. # Usage: # npx ezs extract-fields.ini < corpus_WoS_vol1.json | jq [use] plugin = basics [JSONParse] [assign] path = tmp.reprint_addresses value = get('static_data.fullrecord_metadata.reprint_addresses.address_name', []).castArray().map(name => name.address_spec.full_address) path = tmp.addresses value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map(name => name.address_spec.full_address) [replace] path = uri value = get("UID") path = doi value = get("dynamic_data.cluster_related.identifiers.identifier").filter(i => i.type === "doi").map("value").uniq() path = authors value = get("static_data.summary.names.name").filter(person => person.role === "author").map(author => author.first_name ? author.first_name + ", " + author.last_name : null).compact() path = title value = get('static_data.summary.titles.title').filter(title => title.type === "item").map(title => title.content).join('|') path = abstract value = get('static_data.fullrecord_metadata.abstracts.abstract.abstract_text.p', []).castArray().map(s => String(s).replace(/<[^>]*>/g, "").replace("Key Points", "")).join(" ; ") path = publication_year value = get('static_data.summary.pub_info.pubyear') path = source value = get('static_data.summary.titles.title').filter(title => title.type === "source").map(title => title.content).join('|') path = affiliations value = get('tmp.addresses', []).concat(_.get(self, 'tmp.reprint_addresses')).remove(null).uniq() path = countries value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map("address_spec.country", []).uniq() path = document_type value = get('static_data.summary.doctypes.doctype') path = language value = get('static_data.fullrecord_metadata.languages.language.content') path = keywords value = get('static_data.item.keywords_plus.keyword') path = subjects value = get('static_data.fullrecord_metadata.category_info.subjects.subject').map('content').uniq() path = subheadings value = get('static_data.fullrecord_metadata.category_info.subheadings.subheading').castArray().uniq() path = headings value = get('static_data.fullrecord_metadata.category_info.headings.heading').castArray().uniq() path = fund_text value = get('static_data.fullrecord_metadata.fund_ack.fund_text.p') path = grants value = get('static_data.fullrecord_metadata.fund_ack.grants.grant').castArray().map(grant => ({ \ ids:_.chain(grant).get('grant_ids').castArray().map('grant_id').compact().flatten(), \ agency: _.chain(grant).get('grant_agency_names').castArray().map('content').compact() \ })) path = publisher value = get('static_data.summary.publishers.publisher.names.name.unified_name') [dump] indent = true