Newer
Older
rnsr-geo-ml-dvc / dvc.yaml
stages:
  prepare:
    desc: Assign a geographical area to each address
    cmd: node bin/address-to-area.mjs > data/area-address.tsv
    deps:
    - data/addresses-40-cnrs-rnsr-big-classes-train.txt
    - data/netscity-ville-aire-uniq.tsv
    - bin/address-to-area.mjs
    - libs/geo.mjs
    params:
    - prepare.input
    outs:
    - data/area-address.tsv
  extract-areas:
    desc: Extract geographic areas
    cmd: cat data/area-address.tsv| cut -f1 | sort -u > data/areas.txt
    deps:
    - data/area-address.tsv
    outs:
    - data/areas.txt
  create-tree:
    desc: Create the tree of areas
    cmd:
    - rm -rf areas
    - cat data/areas.txt | sed -e 's|^|areas/|' | xargs mkdir -p
    deps:
    - data/areas.txt
    outs:
    - areas # Unfortunately, does not take into account the subdirectories