diff --git a/wos-dumps/03-enrich.ini b/wos-dumps/03-enrich.ini new file mode 100644 index 0000000..3daddc0 --- /dev/null +++ b/wos-dumps/03-enrich.ini @@ -0,0 +1,2 @@ +[transit] + diff --git a/wos-dumps/Makefile b/wos-dumps/Makefile index 949fa20..886e0d7 100644 --- a/wos-dumps/Makefile +++ b/wos-dumps/Makefile @@ -1,73 +1,37 @@ -# To set specific directory for each crontab run -ifeq ($(strip $(Startup)),) -# use the the day of the week -#VERSION_DIR := $(shell date +%A) -# use the month -VERSION_DIR := $(shell date +%B) -else -# use no version -VERSION_DIR := . -endif +NPROCS = $(shell grep -c 'processor' /proc/cpuinfo) +MAKEFLAGS += --keep-going -j$(NPROCS) --max-load=$(NPROCS) +# This ensures the next time you run Make, it’ll properly re-run the failed +# rule, and guards against broken files. +# See https://tech.davis-hansson.com/p/make/#change-some-make-defaults +.DELETE_ON_ERROR: -# To set ezs parameters -EZSFLAGS := --param VERSION_DIR="$(VERSION_DIR)" -# To set global parameters -ROOT_DIR := $(shell pwd) -# To set the location and the extension of sources files (queries) -INPUT_DIR := 01-query -INPUT_EXT := txt +# To prevent deleting intermediate files (for controls) +.PRECIOUS: 02-download/%.jsonl 03-enrich/%.jsonl -# To set the location and the extension of results files -OUTPUT_DIR := 02-download/$(VERSION_DIR) -OUTPUT_EXT := jsonl.gz - -# To generate all files from source directory to target directory -SOURCE_FILES := $(wildcard $(INPUT_DIR)/*.$(INPUT_EXT)) -TARGET_FILES := $(patsubst $(INPUT_DIR)/%.$(INPUT_EXT), $(OUTPUT_DIR)/%.$(OUTPUT_EXT), $(SOURCE_FILES)) - -# Phony Rules -help: - @grep -P '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' - -list: ## list all target files - @ls -1 $(TARGET_FILES) - -view: ## To view current generated files - find $(OUTPUT_DIR) - -clean-cache: ## To delete enrichment cache - @rm -v -f $(TMPDIR)/memory - -clean: ## To delete current generated files - @rm -v -f $(TARGET_FILES) - -drain: ## To delete all directories - @rm -v -Rf 02-download +SOURCE_FILES := $(wildcard 01-query/*.txt) +TARGET_FILES := $(patsubst 01-query/%.txt, 04-report/%.log, $(SOURCE_FILES)) watch: ## Automatically build files when they change while true; do \ - inotifywait -qr -e modify -e create -e delete -e move --exclude '/\.' $(INPUT_DIR); \ - make clean-cache; \ - make all; \ + inotifywait -qr -e modify -e create -e delete -e move --exclude '/\.' 01-query; \ + $(MAKE) all; \ done # Rules all: $(TARGET_FILES) ## Build all files + echo $(TARGET_FILES) -%: $(OUTPUT_DIR)/%.$(OUTPUT_EXT) - @echo "$<" - -02-download/$(VERSION_DIR)/%.jsonl: 01-query/%.txt +# cible : dépendance +04-report/%.log: 03-enrich/%.jsonl mkdir -p $(@D) - time ezs $(EZSFLAGS) 02-download.ini < $< > $@.crdownload + wc -l $< > $@ + +03-enrich/%.jsonl: 02-download/%.jsonl + mkdir -p $(@D) + time npx ezs 03-enrich.ini < $< > $@.crdownload mv $@.crdownload $@ - ls -lhag $@ |sed -re 's/^[^ ]* //' >> "$(subst .jsonl,.log,$@)" -02-download/$(VERSION_DIR)/%.jsonl.gz: 02-download/$(VERSION_DIR)/%.jsonl - gzip -f $< - -# To prevent deleting intermediate files (for controls) -.PRECIOUS: 02-download/$(VERSION_DIR)/%.json - -.DEFAULT_GOAL := help -.PHONY: clean drain view help list all clean-cache watch +02-download/%.jsonl: 01-query/%.txt + mkdir -p $(@D) + time npx ezs 02-download.ini < $< > $@.crdownload + mv $@.crdownload $@