Newer
Older
web-dumps / apil-dumps / Makefile
@Nicolas Thouvenin Nicolas Thouvenin on 23 Nov 2022 2 KB refactor: use makefile
# Aliases
corhal-ins2i: corhal-ins2i-2016 corhal-ins2i-2017 corhal-ins2i-2018 corhal-ins2i-2019 corhal-ins2i-2020 corhal-ins2i-2021 corhal-ins2i-2022
corhal-insu: corhal-insu-2016 corhal-insu-2017 corhal-insu-2018 corhal-insu-2019 corhal-insu-2020 corhal-insu-2021 corhal-insu-2022

# To set specific directory for each version
ifeq ($(strip $(Startup)),)
# use the the day of the week
#VERSION_DIR := $(shell date +%A)
# use the month
VERSION_DIR := $(shell date +%B)
else
# use no version
VERSION_DIR := .
endif

# To set ezs parameters
EZSFLAGS := --param VERSION_DIR="$(VERSION_DIR)"
# To set global  parameters
ROOT_DIR := $(shell pwd)
# To set the location and the extension of sources files (queries)
INPUT_DIR := 01-query
INPUT_EXT := txt

# To set the location and the extension of results files
OUTPUT_DIR := 05-result/$(VERSION_DIR)
OUTPUT_EXT := txt

# To generate all files from source directory to target directory
SOURCE_FILES := $(wildcard $(INPUT_DIR)/*.$(INPUT_EXT))
TARGET_FILES := $(patsubst $(INPUT_DIR)/%.$(INPUT_EXT), $(OUTPUT_DIR)/%.$(OUTPUT_EXT), $(SOURCE_FILES))

# Phony Rules
help:
	@grep -P '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

list: ## list all target files
	@ls -1 $(TARGET_FILES)

view: ## To view current generated files
	find $(OUTPUT_DIR)

clean-cache: ## To delete enrichment cache
	@rm -v -f $(TMPDIR)/memory

clean: ## To delete current generated files
	@rm -v -f $(TARGET_FILES)

drain: ## To delete all directories
	@rm -v -Rf 02-download  03-enrichment 04-refine 05-result

# Rules
all: $(TARGET_FILES) ## Build all files

%: $(OUTPUT_DIR)/%.$(OUTPUT_EXT)
	[ -f $< ] && rm $<
	$(MAKE) $<

02-download/$(VERSION_DIR)/%.jsonl: 01-query/%.txt
	mkdir -p $(@D)
	time ezs $(EZSFLAGS) 02-download.ini < $< > $@

03-enrichment/$(VERSION_DIR)/%.jsonl: 02-download/$(VERSION_DIR)/%.jsonl
	mkdir -p $(@D)
	time ezs $(EZSFLAGS) 03-enrichment.ini < $< > $@

04-refine/$(VERSION_DIR)/%.jsonl: 03-enrichment/$(VERSION_DIR)/%.jsonl
	mkdir -p $(@D)
	time ezs $(EZSFLAGS) 04-refine.ini < $< > $@

# WARNING: split --additional-suffix is unavailable in busybox, so we should use find command
05-result/$(VERSION_DIR)/%.txt: 04-refine/$(VERSION_DIR)/%.jsonl
	$(eval TMP1 = $(realpath $<))
	$(eval TMP2 = $(basename $(notdir $<)))
	$(eval TMP3 = $(@D)/$(TMP2))
	mkdir -p $(TMP3)
	cd $(TMP3) && cat $(TMP1) | split -l 10000 - "$(TMP2)_"
	find $(TMP3) -type f -name "$(TMP2)_??" -exec mv {} {}.jsonl \;
	find $(TMP3) -type f -name "$(TMP2)_??.jsonl" -exec readlink -f {} \;|xargs wc -l > $@

# To prevent deleting intermediate files (for controls)
.PRECIOUS: 02-download/$(VERSION_DIR)/%.jsonl
#03-enrichment/$(VERSION_DIR)/%.jsonl

.DEFAULT_GOAL := help
.PHONY: clean drain view help list all clean-cache