Newer
Older
rnsr-geo-ml-dvc / dvc.lock
schema: '2.0'
stages:
  prepare:
    cmd: node bin/address-to-area.mjs > data/area-address.tsv
    deps:
    - path: bin/address-to-area.mjs
      md5: 586c0f15529591e46a6b22e2a05770d5
      size: 942
    - path: data/addresses-40-cnrs-rnsr-big-classes-train.txt
      md5: 612c9731294d230f5fd4ed8c10d67468
      size: 3694626
    - path: data/netscity-ville-aire-uniq.tsv
      md5: 85e405b8cc452b953e8dfd12dae5d8d6
      size: 341275
    - path: libs/geo.mjs
      md5: af499b4463e041aeb5f4ca58af486565
      size: 2443
    params:
      params.yaml:
        prepare.input: data/addresses-40-cnrs-rnsr-big-classes-train.txt
    outs:
    - path: data/area-address.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
  extract-areas:
    cmd: cat data/area-address.tsv| cut -f1 | sort -u > data/areas.txt
    deps:
    - path: data/area-address.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
    outs:
    - path: data/areas.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
  create-tree:
    cmd:
    - rm -rf areas
    - cat data/areas.txt | sed -e 's|^|areas/|' | xargs mkdir -p
    deps:
    - path: data/areas.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
    outs:
    - path: areas
      md5: d751713988987e9331980363e24189ce.dir
      size: 0
      nfiles: 0
  clean-tree:
    cmd: rm -rf areas
  split:
    cmd:
    - rm -rf areas
    - cat data/areas-train.txt | sed -e 's|^|areas/|' | xargs mkdir -p
    - bash bin/split-addresses.sh
    - bash bin/gather-little-areas.sh
    deps:
    - path: bin/gather-little-areas.sh
      md5: f8363cdb0c36496eac9fcb89f367bffd
      size: 305
    - path: bin/split-addresses.sh
      md5: e64b86f9e4c88ef778e19a7423e59b0b
      size: 176
    - path: data/area-address-train.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
    - path: data/areas-train.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
    outs:
    - path: areas
      md5: 9be0cb2ec76928e1443e2d4679c014c3.dir
      size: 3694627
      nfiles: 48
  prepare@train:
    cmd: node bin/address-to-area.mjs  "data/addresses-40-cnrs-rnsr-big-classes-train.txt"
      >  data/area-address-train.tsv
    deps:
    - path: bin/address-to-area.mjs
      md5: 1b7878a5dda36bc9b67b49778aac702a
      size: 886
    - path: data/addresses-40-cnrs-rnsr-big-classes-train.txt
      md5: 612c9731294d230f5fd4ed8c10d67468
      size: 3694626
    - path: data/netscity-ville-aire-uniq.tsv
      md5: 85e405b8cc452b953e8dfd12dae5d8d6
      size: 341275
    - path: libs/geo.mjs
      md5: af499b4463e041aeb5f4ca58af486565
      size: 2443
    outs:
    - path: data/area-address-train.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
  extract-areas@train:
    cmd: cat data/area-address-train.tsv| cut -f1 | sort -u > data/areas-train.txt
    deps:
    - path: data/area-address-train.tsv
      md5: 179a8eb09f2ddf72dd9aa2add136fb87
      size: 4010846
    outs:
    - path: data/areas-train.txt
      md5: 6102ce8099fd61bccb6eb606b66ef260
      size: 964
  prepare@test:
    cmd: node bin/address-to-area.mjs  "data/addresses-40-cnrs-rnsr-big-classes-test.txt"
      >  data/area-address-test.tsv
    deps:
    - path: bin/address-to-area.mjs
      md5: 1b7878a5dda36bc9b67b49778aac702a
      size: 886
    - path: data/addresses-40-cnrs-rnsr-big-classes-test.txt
      md5: 88402e9e874960f0b6f7eeb8e3c306d4
      size: 3410283
    - path: data/netscity-ville-aire-uniq.tsv
      md5: 85e405b8cc452b953e8dfd12dae5d8d6
      size: 341275
    - path: libs/geo.mjs
      md5: af499b4463e041aeb5f4ca58af486565
      size: 2443
    outs:
    - path: data/area-address-test.tsv
      md5: 72ff16c01ed5e62d7ec3e2daafc4f257
      size: 3700326
  extract-areas@test:
    cmd: cat data/area-address-test.tsv| cut -f1 | sort -u > data/areas-test.txt
    deps:
    - path: data/area-address-test.tsv
      md5: 72ff16c01ed5e62d7ec3e2daafc4f257
      size: 3700326
    outs:
    - path: data/areas-test.txt
      md5: b4464b712659fc2b0ebfaece2ba6b695
      size: 962