#!/bin/sh # Debuggage #set -x corpusPath=$1 outPath=$2 attValue=$3 if [ "$#" -ne 3 ] then echo "" echo "Generation xpaths.csv" echo "" echo "Script Usage : $0 \$corpusPath \$outPath" echo " \$corpusPath = full path of the corpus" echo " \$outPath = xpaths.csv file directory path" echo " \$attValue = with or without attribute value [noattval/attval]" echo "" echo "Exemple : $0 /work/elsevier/elsevier-2019-livraison-2020-09-15 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique noattval" echo "" exit fi cpus=`cat /proc/cpuinfo | grep -i "^processor" | wc -l` if [ "$attValue" = "attval" ] then # Generation xpaths.csv avec les valeurs des attributs echo "Generation xpaths.csv avec les valeurs des attributs" find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=50% --parallel=$cpus | LC_ALL=C uniq -c > $outPath/xpaths.csv else # Generation xpaths.csv sans les valeurs des attributs echo "Generation xpaths.csv sans les valeurs des attributs" find $corpusPath -name *.xml | parallel --silent xmlstarlet el -a {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=50% --parallel=$cpus | LC_ALL=C uniq -c > $outPath/xpaths.csv fi