schema: '2.0' stages: prepare: cmd: node bin/address-to-area.mjs > data/area-address.tsv deps: - path: bin/address-to-area.mjs md5: 586c0f15529591e46a6b22e2a05770d5 size: 942 - path: data/addresses-40-cnrs-rnsr-big-classes-train.txt md5: 612c9731294d230f5fd4ed8c10d67468 size: 3694626 - path: data/netscity-ville-aire-uniq.tsv md5: 85e405b8cc452b953e8dfd12dae5d8d6 size: 341275 - path: libs/geo.mjs md5: af499b4463e041aeb5f4ca58af486565 size: 2443 params: params.yaml: prepare.input: data/addresses-40-cnrs-rnsr-big-classes-train.txt outs: - path: data/area-address.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 extract-areas: cmd: cat data/area-address.tsv| cut -f1 | sort -u > data/areas.txt deps: - path: data/area-address.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 outs: - path: data/areas.txt md5: 6102ce8099fd61bccb6eb606b66ef260 size: 964 create-tree: cmd: - rm -rf areas - cat data/areas.txt | sed -e 's|^|areas/|' | xargs mkdir -p deps: - path: data/areas.txt md5: 6102ce8099fd61bccb6eb606b66ef260 size: 964 outs: - path: areas md5: d751713988987e9331980363e24189ce.dir size: 0 nfiles: 0 clean-tree: cmd: rm -rf areas split: cmd: - rm -rf areas - cat data/areas-train.txt | sed -e 's|^|areas/|' | xargs mkdir -p - bash bin/split-addresses.sh - bash bin/gather-little-areas.sh deps: - path: bin/gather-little-areas.sh md5: f8363cdb0c36496eac9fcb89f367bffd size: 305 - path: bin/split-addresses.sh md5: e64b86f9e4c88ef778e19a7423e59b0b size: 176 - path: data/area-address-train.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 - path: data/areas-train.txt md5: 6102ce8099fd61bccb6eb606b66ef260 size: 964 outs: - path: areas md5: 9be0cb2ec76928e1443e2d4679c014c3.dir size: 3694627 nfiles: 48 prepare@train: cmd: node bin/address-to-area.mjs "data/addresses-40-cnrs-rnsr-big-classes-train.txt" > data/area-address-train.tsv deps: - path: bin/address-to-area.mjs md5: 1b7878a5dda36bc9b67b49778aac702a size: 886 - path: data/addresses-40-cnrs-rnsr-big-classes-train.txt md5: 612c9731294d230f5fd4ed8c10d67468 size: 3694626 - path: data/netscity-ville-aire-uniq.tsv md5: 85e405b8cc452b953e8dfd12dae5d8d6 size: 341275 - path: libs/geo.mjs md5: af499b4463e041aeb5f4ca58af486565 size: 2443 outs: - path: data/area-address-train.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 extract-areas@train: cmd: cat data/area-address-train.tsv| cut -f1 | sort -u > data/areas-train.txt deps: - path: data/area-address-train.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 outs: - path: data/areas-train.txt md5: 6102ce8099fd61bccb6eb606b66ef260 size: 964 prepare@test: cmd: node bin/address-to-area.mjs "data/addresses-40-cnrs-rnsr-big-classes-test.txt" > data/area-address-test.tsv deps: - path: bin/address-to-area.mjs md5: 1b7878a5dda36bc9b67b49778aac702a size: 886 - path: data/addresses-40-cnrs-rnsr-big-classes-test.txt md5: 88402e9e874960f0b6f7eeb8e3c306d4 size: 3410283 - path: data/netscity-ville-aire-uniq.tsv md5: 85e405b8cc452b953e8dfd12dae5d8d6 size: 341275 - path: libs/geo.mjs md5: af499b4463e041aeb5f4ca58af486565 size: 2443 outs: - path: data/area-address-test.tsv md5: 72ff16c01ed5e62d7ec3e2daafc4f257 size: 3700326 extract-areas@test: cmd: cat data/area-address-test.tsv| cut -f1 | sort -u > data/areas-test.txt deps: - path: data/area-address-test.tsv md5: 72ff16c01ed5e62d7ec3e2daafc4f257 size: 3700326 outs: - path: data/areas-test.txt md5: b4464b712659fc2b0ebfaece2ba6b695 size: 962