stages: prepare: foreach: ${prepare} do: desc: Assign a geographical area to each address cmd: node bin/address-to-area.mjs "${item.input}" > ${item.output} deps: - ${item.input} # - data/addresses-40-cnrs-rnsr-big-classes-${item}.txt - data/netscity-ville-aire-uniq.tsv - bin/address-to-area.mjs - libs/geo.mjs outs: - ${item.output} # data/area-address-${item}.tsv extract-areas: foreach: - train - test do: desc: Extract geographic areas cmd: cat data/area-address-${item}.tsv| cut -f1 | sort -u > data/areas-${item}.txt deps: - data/area-address-${item}.tsv outs: - data/areas-${item}.txt split: desc: Split the adresses into the tree of areas cmd: - rm -rf areas - cat data/areas-train.txt | sed -e 's|^|areas/|' | xargs mkdir -p - bash bin/split-addresses.sh - bash bin/gather-little-areas.sh - bin/assign-to-areas.mjs data/area-address-test.tsv deps: - data/areas-train.txt - data/area-address-train.tsv - bin/split-addresses.sh - bin/gather-little-areas.sh - bin/assign-to-areas.mjs - data/area-address-test.tsv outs: - areas train: desc: Train a model in each area cmd: - mkdir -p models - bin/train-areas.sh ${train.fasttext.duration} "${train.areas}" ${train.fasttext.modelsize} deps: - bin/fasttext - bin/train-areas.sh - areas - data/areas-train.txt - data/areas-test.txt outs: - models evaluate: cmd: - bin/evaluate-areas.sh deps: - models - bin/evaluate-areas.sh metrics: - metrics.json: desc: Statistiques sur les précisions obtenues par aire géographique cache: false plots: - precision.json: cache: false y: precision template: scatter title: Précision par aire géographique x_label: Aire y_label: Précision