diff --git a/wos-dumps/.gitignore b/wos-dumps/.gitignore new file mode 100644 index 0000000..0566458 --- /dev/null +++ b/wos-dumps/.gitignore @@ -0,0 +1,3 @@ +02-download/ +03-refine/ +04-export/ diff --git a/wos-dumps/01-query/wos-2018-france.txt b/wos-dumps/01-query/wos-2018-france.txt new file mode 100644 index 0000000..88171e0 --- /dev/null +++ b/wos-dumps/01-query/wos-2018-france.txt @@ -0,0 +1 @@ +DO=(10.1039/c8ta04267e OR 10.1016/j.biortech.2018.07.125) diff --git a/wos-dumps/01-query/wos-2019-france.txt b/wos-dumps/01-query/wos-2019-france.txt new file mode 100644 index 0000000..88171e0 --- /dev/null +++ b/wos-dumps/01-query/wos-2019-france.txt @@ -0,0 +1 @@ +DO=(10.1039/c8ta04267e OR 10.1016/j.biortech.2018.07.125) diff --git a/wos-dumps/01-query/wos-2020-france.txt b/wos-dumps/01-query/wos-2020-france.txt new file mode 100644 index 0000000..88171e0 --- /dev/null +++ b/wos-dumps/01-query/wos-2020-france.txt @@ -0,0 +1 @@ +DO=(10.1039/c8ta04267e OR 10.1016/j.biortech.2018.07.125) diff --git a/wos-dumps/01-query/wos-2021-france.txt b/wos-dumps/01-query/wos-2021-france.txt new file mode 100644 index 0000000..88171e0 --- /dev/null +++ b/wos-dumps/01-query/wos-2021-france.txt @@ -0,0 +1 @@ +DO=(10.1039/c8ta04267e OR 10.1016/j.biortech.2018.07.125) diff --git a/wos-dumps/01-query/wos-2022-france.txt b/wos-dumps/01-query/wos-2022-france.txt new file mode 100644 index 0000000..88171e0 --- /dev/null +++ b/wos-dumps/01-query/wos-2022-france.txt @@ -0,0 +1 @@ +DO=(10.1039/c8ta04267e OR 10.1016/j.biortech.2018.07.125) diff --git a/wos-dumps/02-download.ini b/wos-dumps/02-download.ini new file mode 100644 index 0000000..6d38e86 --- /dev/null +++ b/wos-dumps/02-download.ini @@ -0,0 +1,28 @@ +append = pack + +[use] +plugin = basics +plugin = conditor + +[TXTConcat] + +[replace] +path = usrQuery +value = self().trim() + +path = databaseId +value = WOK + +path = optionView +value = FS + +path = sortField +value = LD+D + +[WOSFetch] +url = https://wos-api.clarivate.com/api/wos +retries = 5 +timeout = 120000 +token = env('WOS_API_KEY') + +[ungroup] diff --git a/wos-dumps/03-refine.ini b/wos-dumps/03-refine.ini new file mode 100644 index 0000000..1262ef5 --- /dev/null +++ b/wos-dumps/03-refine.ini @@ -0,0 +1,52 @@ +prepend = unpack +append = pack + +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[assign] +path = tmp.reprint_addresses +value = get('static_data.fullrecord_metadata.reprint_addresses.address_name', []).castArray().map(name => name.address_spec.full_address) + +path = tmp.addresses +value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map(name => name.address_spec.full_address) + +[replace] +path = uri +value = get("UID") + +path = doi +value = get('dynamic_data.cluster_related.identifiers.identifier').find({ type: 'doi'}).get('value').toLower() + +path = title +value = get('static_data.summary.titles.title').filter(title => title.type === "item").map(title => title.content).join('|') + +path = abstract +value = get('static_data.fullrecord_metadata.abstracts.abstract.abstract_text.p', []).castArray().map(s => s.replace(/<[^>]*>/g, "").replace("Key Points", "")).join(" ; ") + +path = publication_year +value = get('static_data.summary.pub_info.pubyear') + +path = source +value = get('static_data.summary.titles.title').filter(title => title.type === "source").map(title => title.content).join('|') + +path = affiliations +value = get('tmp.addresses', []).concat(_.get(self, 'tmp.reprint_addresses')).remove(null).uniq() + +path = countries +value = get('static_data.fullrecord_metadata.addresses.address_name', []).castArray().map("address_spec.country", []).uniq() + +path = keywords +value = get('static_data.item.keywords_plus.keyword') + +path = subjects +value = get('static_data.fullrecord_metadata.category_info.subjects.subject').map('content').uniq() + +path = subheadings +value = get('static_data.fullrecord_metadata.category_info.subheadings.subheading').castArray().uniq() + +path = headings +value = get('static_data.fullrecord_metadata.category_info.headings.heading').castArray().uniq() + diff --git a/wos-dumps/04-export.ini b/wos-dumps/04-export.ini new file mode 100644 index 0000000..137f233 --- /dev/null +++ b/wos-dumps/04-export.ini @@ -0,0 +1 @@ +[transit] diff --git a/wos-dumps/Makefile b/wos-dumps/Makefile new file mode 100644 index 0000000..ffd3e93 --- /dev/null +++ b/wos-dumps/Makefile @@ -0,0 +1,59 @@ +# To set specific directory for each version +ifeq ($(strip $(Startup)),) +# use the month +VERSION_DIR := $(shell date +%B) +else +# use no version +VERSION_DIR := . +endif + +# To set ezs parameters +EZSFLAGS := -v --param VERSION_DIR="$(VERSION_DIR)" + +# To set the location and the extension of sources files (queries) +INPUT_DIR := 01-query +INPUT_EXT := txt + +# To set the location and the extension of results files +OUTPUT_DIR := 04-export/$(VERSION_DIR) +OUTPUT_EXT := jsonl + +# To set custom aliases +wos-2018-france: $(OUTPUT_DIR)/wos-2018-france.jsonl +wos-2019-france: $(OUTPUT_DIR)/wos-2019-france.jsonl +wos-2020-france: $(OUTPUT_DIR)/wos-2020-france.jsonl +wos-2021-france: $(OUTPUT_DIR)/wos-2021-france.jsonl +wos-2022-france: $(OUTPUT_DIR)/wos-2022-france.jsonl + + +# To generate all files from source directory to target directory +SOURCE_FILES := $(wildcard $(INPUT_DIR)/*.$(INPUT_EXT)) +TARGET_FILES := $(patsubst $(INPUT_DIR)/%.$(INPUT_EXT), $(OUTPUT_DIR)/%.$(OUTPUT_EXT), $(SOURCE_FILES)) +all: $(TARGET_FILES) + +# Step 2: to download all result +02-download/$(VERSION_DIR)/%.jsonl: $(INPUT_DIR)/%.txt + @[ -d 02-download/$(VERSION_DIR)/ ] || mkdir -p 02-download/$(VERSION_DIR)/ + ezs $(EZSFLAGS) 02-download.ini < $< > $@ + +# Step 3: to refine and select only chosen fields +03-refine/$(VERSION_DIR)/%.jsonl: 02-download/$(VERSION_DIR)/%.jsonl + @[ -d 03-refine/$(VERSION_DIR)/ ] || mkdir -p 03-refine/$(VERSION_DIR)/ + ezs $(EZSFLAGS) 03-refine.ini < $< > $@ + +# Step 4 : to create a export file +04-export/$(VERSION_DIR)/%.jsonl: 03-refine/$(VERSION_DIR)/%.jsonl + @[ -d 04-export/$(VERSION_DIR)/ ] || mkdir -p 04-export/$(VERSION_DIR)/ + ezs $(EZSFLAGS) 04-export.ini < $< > $@ + +# To delete all generated files +clean: + @rm -f $(TARGET_FILES) + @echo "Files deleted!" + +# To prevent deleting intermediate files +.PRECIOUS: 02-download/$(VERSION_DIR)/%.jsonl + +# To ignore non-file targets +.PHONY: clean +