Newer
Older
rnsr-geo-ml-dvc / dvc.lock
schema: '2.0'
stages:
  prepare:
    cmd: node bin/address-to-area.mjs > data/area-address.tsv
    deps:
    - path: bin/address-to-area.mjs
      md5: 586c0f15529591e46a6b22e2a05770d5
      size: 942
    - path: data/addresses-40-cnrs-rnsr-big-classes-train.txt
      md5: 612c9731294d230f5fd4ed8c10d67468
      size: 3694626
    - path: data/netscity-ville-aire-uniq.tsv
      md5: 85e405b8cc452b953e8dfd12dae5d8d6
      size: 341275
    - path: libs/geo.mjs
      md5: af499b4463e041aeb5f4ca58af486565
      size: 2443
    params:
      params.yaml:
        prepare.input: data/addresses-40-cnrs-rnsr-big-classes-train.txt
    outs:
    - path: data/area-address.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
  extract-areas:
    cmd: cat data/area-address.tsv| cut -f1 | sort -u > data/areas.txt
    deps:
    - path: data/area-address.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
    outs:
    - path: data/areas.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
  create-tree:
    cmd:
    - rm -rf areas
    - cat data/areas.txt | sed -e 's|^|areas/|' | xargs mkdir -p
    deps:
    - path: data/areas.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
    outs:
    - path: areas
      md5: d751713988987e9331980363e24189ce.dir
      size: 0
      nfiles: 0
  clean-tree:
    cmd: rm -rf areas