stages:
prepare:
foreach: ${prepare}
do:
desc: Assign a geographical area to each address
cmd: node bin/address-to-area.mjs "${item.input}" > ${item.output}
deps:
- ${item.input} # - data/addresses-40-cnrs-rnsr-big-classes-${item}.txt
- data/netscity-ville-aire-uniq.tsv
- bin/address-to-area.mjs
- libs/geo.mjs
outs:
- ${item.output} # data/area-address-${item}.tsv
extract-areas:
foreach:
- train
- test
do:
desc: Extract geographic areas
cmd: cat data/area-address-${item}.tsv| cut -f1 | sort -u > data/areas-${item}.txt
deps:
- data/area-address-${item}.tsv
outs:
- data/areas-${item}.txt
split:
desc: Split the adresses into the tree of areas
cmd:
- rm -rf areas
- cat data/areas-train.txt | sed -e 's|^|areas/|' | xargs mkdir -p
- bash bin/split-addresses.sh
- bash bin/gather-little-areas.sh
- bin/assign-to-areas.mjs data/area-address-test.tsv
deps:
- data/areas-train.txt
- data/area-address-train.tsv
- bin/split-addresses.sh
- bin/gather-little-areas.sh
- bin/assign-to-areas.mjs
- data/area-address-test.tsv
outs:
- areas
train:
desc: Train a model in each area
cmd:
- mkdir -p models
- bin/train-areas.sh ${train.fasttext.duration} "${train.areas}" ${train.fasttext.modelsize}
deps:
- bin/fasttext
- bin/train-areas.sh
- areas
- data/areas-train.txt
- data/areas-test.txt
outs:
- models
evaluate:
cmd:
- bin/evaluate-areas.sh "${train.areas}"
deps:
- models
- bin/evaluate-areas.sh
metrics:
- metrics.json:
desc: Statistiques sur les précisions obtenues par aire géographique
cache: false
plots:
- precision.json:
cache: false
y: precision
template: simple # simple or scatter
title: Précision par aire géographique
x_label: Aire
y_label: Précision