diff --git a/bin/split-addresses.sh b/bin/split-addresses.sh index 933dbbe..72d0ca8 100755 --- a/bin/split-addresses.sh +++ b/bin/split-addresses.sh @@ -2,5 +2,6 @@ for area in $(cat data/areas-train.txt) do - grep -E "^${area}\s" data/area-address-train.tsv > "areas/${area}/addresses-train.txt" + grep -E "^${area}\s" data/area-address-train.tsv | \ + cut -f2 > "areas/${area}/addresses-train.txt" done diff --git a/dvc.lock b/dvc.lock index 9240cdc..6bb5b0a 100644 --- a/dvc.lock +++ b/dvc.lock @@ -58,8 +58,8 @@ md5: f8363cdb0c36496eac9fcb89f367bffd size: 305 - path: bin/split-addresses.sh - md5: a673fe80ebde2c236ede30c4ed57ce8c - size: 160 + md5: e64b86f9e4c88ef778e19a7423e59b0b + size: 176 - path: data/area-address-train.tsv md5: 179a8eb09f2ddf72dd9aa2add136fb87 size: 4010846 @@ -68,8 +68,8 @@ size: 964 outs: - path: areas - md5: 880febe8c0d19ec9ae40a29f5bf026ba.dir - size: 4010846 + md5: 9be0cb2ec76928e1443e2d4679c014c3.dir + size: 3694627 nfiles: 48 prepare@train: cmd: node bin/address-to-area.mjs "data/addresses-40-cnrs-rnsr-big-classes-train.txt"