diff --git a/README.md b/README.md index 3415803..f00fa83 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,12 @@ Generic analysis ```bash docker-compose up -d -docker exec -t sisyphe_go_go_1 go run . -n corpusName -p corpusPath -o outputPath +docker exec -t sisyphe-go_go_1 go run . -n corpusName -p corpusPath -o outputPath ``` Detailed analysis ```bash -docker exec -t sisyphe_go_go_1 go run . -n corpusName -c corpusResourcesPath -p corpusPath -o outputPath +docker exec -t sisyphe-go_go_1 go run . -n corpusName -c corpusResourcesPath -p corpusPath -o outputPath ``` Example: diff --git a/xpath.sh b/xpath.sh index f4ed557..200562b 100755 --- a/xpath.sh +++ b/xpath.sh @@ -22,13 +22,15 @@ exit fi +cpus=`cat /proc/cpuinfo | grep -i "^processor" | wc -l` + if [ "$attValue" = "attval" ] then # Generation xpaths.csv avec les valeurs des attributs echo "Generation xpaths.csv avec les valeurs des attributs" - find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv + find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=50% --parallel=$cpus | LC_ALL=C uniq -c > $outPath/xpaths.csv else # Generation xpaths.csv sans les valeurs des attributs echo "Generation xpaths.csv sans les valeurs des attributs" - find $corpusPath -name *.xml | parallel --silent xmlstarlet el -a {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv + find $corpusPath -name *.xml | parallel --silent xmlstarlet el -a {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=50% --parallel=$cpus | LC_ALL=C uniq -c > $outPath/xpaths.csv fi