stages: prepare: foreach: ${prepare} do: desc: Assign a geographical area to each address cmd: node bin/address-to-area.mjs "${item.input}" > ${item.output} deps: - ${item.input} # - data/addresses-40-cnrs-rnsr-big-classes-${item}.txt - data/netscity-ville-aire-uniq.tsv - bin/address-to-area.mjs - libs/geo.mjs outs: - ${item.output} # data/area-address-${item}.tsv extract-areas: foreach: - train - test do: desc: Extract geographic areas cmd: cat data/area-address-${item}.tsv| cut -f1 | sort -u > data/areas-${item}.txt deps: - data/area-address-${item}.tsv outs: - data/areas-${item}.txt split: desc: Split the adresses into the tree of areas cmd: - rm -rf areas - cat data/areas-train.txt | sed -e 's|^|areas/|' | xargs mkdir -p - bash bin/split-addresses.sh - bash bin/gather-little-areas.sh - bin/assign-to-areas.mjs data/area-address-test.tsv deps: - data/areas-train.txt - data/area-address-train.tsv - bin/split-addresses.sh - bin/gather-little-areas.sh - bin/assign-to-areas.mjs - data/area-address-test.tsv outs: - areas