#!/bin/sh # Debuggage #set -x corpusPath=$1 outPath=$2 if [ "$#" -ne 2 ] then echo "" echo "Generation xpaths.csv" echo "" echo "Script Usage : $0 \$corpusPath \$outPath" echo " \$corpusPath = full path of the corpus" echo " \$outPath = xpaths.csv file directory path" echo "" echo "Exemple : $0 /work/elsevier/elsevier-2019-livraison-2020-09-15 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique" echo "" exit fi # Generation xpaths.csv avec les valeurs des attributs echo "Generation xpaths.csv avec les valeurs des attributs" find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv