diff --git a/bin/train-areas.sh b/bin/train-areas.sh index af8f4c2..b0c2651 100755 --- a/bin/train-areas.sh +++ b/bin/train-areas.sh @@ -2,14 +2,17 @@ duration=${1:-60} # seconds areas=${2:-*} # * or ANGERS +size=${3:-2M} for areaPath in areas/${areas} do area="${areaPath#areas/}" + echo "${area} ------------" bin/fasttext supervised \ -input "areas/${area}/addresses-train.txt" \ -output "models/${area}" \ -autotune-validation \ "areas/${area}/addresses-test.txt" \ - -autotune-duration ${duration} + -autotune-duration ${duration} \ + -autotune-modelsize ${size} done diff --git a/dvc.lock b/dvc.lock index 3e49e29..c4ea715 100644 --- a/dvc.lock +++ b/dvc.lock @@ -154,7 +154,7 @@ train: cmd: - mkdir -p models - - bin/train-areas.sh 30 "*" + - bin/train-areas.sh 30 "*" 2M deps: - path: areas md5: 025c79e8e821b743ea5f9a2820c8d9f5.dir @@ -163,11 +163,17 @@ - path: bin/fasttext md5: 2fbc2c71ba6e474327503d21206ec9b8 size: 462216 + - path: bin/train-areas.sh + md5: b51c7308c8fca67995e77f6fd6237639 + size: 464 + - path: data/areas-test.txt + md5: b4464b712659fc2b0ebfaece2ba6b695 + size: 962 - path: data/areas-train.txt md5: 6102ce8099fd61bccb6eb606b66ef260 size: 964 outs: - path: models - md5: 6a3d10abfdd5f2c02620e816c71e0c2c.dir - size: 18803735951 - nfiles: 94 + md5: 4bc30dee7cb60406116610ceb54f066a.dir + size: 102068604 + nfiles: 96 diff --git a/dvc.yaml b/dvc.yaml index 8e871a4..9cc1b62 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -43,11 +43,12 @@ desc: Train a model in each area cmd: - mkdir -p models - - bin/train-areas.sh ${train.fasttext.duration} "${train.areas}" + - bin/train-areas.sh ${train.fasttext.duration} "${train.areas}" ${train.fasttext.modelsize} deps: - bin/fasttext - # - bin/train-areas.sh + - bin/train-areas.sh - areas - data/areas-train.txt + - data/areas-test.txt outs: - models diff --git a/params.yaml b/params.yaml index cfefe04..b4bfa67 100644 --- a/params.yaml +++ b/params.yaml @@ -8,4 +8,5 @@ train: fasttext: duration: 30 # seconds + modelsize: 2M areas: "*" # "*" or any of ANGERS, ALBI, ...