diff --git a/README.md b/README.md index e1dd663..3415803 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ -noanalyze Disable analysis -noindex Disable indexation -noxpath Disable xpaths.csv file generation + -noattval xpaths.csv without attribute value ### How it works ? diff --git a/main.go b/main.go index 56317d4..2245501 100644 --- a/main.go +++ b/main.go @@ -39,6 +39,7 @@ var noIndexation = flag.Bool("noindex", false, "Disable indexation after process") var noAnalyze = flag.Bool("noanalyze", false, "Disable analysis") var noXpath = flag.Bool("noxpath", false, "Disable xpaths.csv file generation") +var noAttValue = flag.Bool("noattval", false, "Without attribute value") // regex var regexMime = regexp.MustCompile(`(.*); charset=(.*)`) @@ -220,7 +221,11 @@ // Generation xpaths.csv if !*noXpath { log.Println(color.InBlue("Run xpath process")) - result, err := exec.Command("/bin/bash", "xpath.sh", *corpusPath, logPath).CombinedOutput() + if !*noAttValue { + result, err := exec.Command("/bin/bash", "xpath.sh", *corpusPath, logPath, "noattval").CombinedOutput() + } else { + result, err := exec.Command("/bin/bash", "xpath.sh", *corpusPath, logPath, "attval").CombinedOutput() + } if err != nil { fmt.Println(color.InRed("Error xpath.sh")) } diff --git a/xpath.sh b/xpath.sh index 01d9365..f4ed557 100755 --- a/xpath.sh +++ b/xpath.sh @@ -5,8 +5,9 @@ corpusPath=$1 outPath=$2 +attValue=$3 -if [ "$#" -ne 2 ] +if [ "$#" -ne 3 ] then echo "" echo "Generation xpaths.csv" @@ -14,12 +15,20 @@ echo "Script Usage : $0 \$corpusPath \$outPath" echo " \$corpusPath = full path of the corpus" echo " \$outPath = xpaths.csv file directory path" + echo " \$attValue = with or without attribute value [noattval/attval]" echo "" - echo "Exemple : $0 /work/elsevier/elsevier-2019-livraison-2020-09-15 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique" + echo "Exemple : $0 /work/elsevier/elsevier-2019-livraison-2020-09-15 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique noattval" echo "" exit fi -# Generation xpaths.csv avec les valeurs des attributs -echo "Generation xpaths.csv avec les valeurs des attributs" -find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv +if [ "$attValue" = "attval" ] +then + # Generation xpaths.csv avec les valeurs des attributs + echo "Generation xpaths.csv avec les valeurs des attributs" + find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv +else + # Generation xpaths.csv sans les valeurs des attributs + echo "Generation xpaths.csv sans les valeurs des attributs" + find $corpusPath -name *.xml | parallel --silent xmlstarlet el -a {} 2>/dev/null \; | LC_ALL=C sort --temporary-directory=${outPath} --compress-program=gzip --buffer-size=2G --parallel=4 | LC_ALL=C uniq -c > $outPath/xpaths.csv +fi