diff --git a/indexCorpus.sh b/indexCorpus.sh index f1d537e..791439d 100755 --- a/indexCorpus.sh +++ b/indexCorpus.sh @@ -101,10 +101,6 @@ echo "Unexpected file name : "$fileName" instead of "$filePattern exit fi - # Generation xpaths.csv avec les valeurs des attributs - echo "" - echo "Generation xpaths.csv avec les valeurs des attributs" - find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | sort | uniq -c > $outPath/xpaths.csv else echo "File is missing" exit diff --git a/main.go b/main.go index 98f446d..e9ca216 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,8 @@ var configurationFolder = flag.String("c", "", "Configuration folder path") var withWordCount = flag.Bool("w", false, "Enable word count") var noIndexation = flag.Bool("noindex", false, "Disable indexation after process") +var noAnalyze = flag.Bool("noanalyze", false, "Disable analysis") +var noXpath = flag.Bool("noxpath", false, "Disable xpath.csv file generation") // regex var regexMime = regexp.MustCompile(`(.*); charset=(.*)`) @@ -186,18 +188,21 @@ // init logger and params InitProcess() - if *configurationFolder != "" { - InitDetailledAnalyze() - } + if !*noAnalyze { - wg.Add(1) - GetAllFiles(*corpusPath) - wg.Wait() - close(queueForConcurrent) - elapsed := time.Since(start) - fmt.Println("") - log.Println(color.Green + "End of program with " + strconv.Itoa(numberFiles) + " files processed" + color.Reset) - log.Printf("Total time %s", elapsed) + if *configurationFolder != "" { + InitDetailledAnalyze() + } + + wg.Add(1) + GetAllFiles(*corpusPath) + wg.Wait() + close(queueForConcurrent) + elapsed := time.Since(start) + fmt.Println("") + log.Println(color.Green + "End of program with " + strconv.Itoa(numberFiles) + " files processed" + color.Reset) + log.Printf("Total time %s", elapsed) + } // after process index analyze file if !*noIndexation { @@ -208,5 +213,16 @@ } fmt.Println(string(result)) } - fmt.Println(color.InGreen("Done!")) + + // Generation xpaths.csv + if !*noXpath { + log.Println(color.InBlue("Run xpath process")) + result, err := exec.Command("/bin/bash", "xpath.sh", *corpusPath, logPath).CombinedOutput() + if err != nil { + fmt.Println(color.InRed("Error xpath.sh")) + } + fmt.Println(string(result)) + } + + fmt.Println(color.InGreen("End")) } diff --git a/xpath.sh b/xpath.sh new file mode 100755 index 0000000..f15032d --- /dev/null +++ b/xpath.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# Debuggage +#set -x + +corpusPath=$1 +outPath=$2 + +if [ "$#" -ne 2 ] + then + echo "" + echo "Generation xpaths.csv" + echo "" + echo "Script Usage : $0 \$corpusPath \$outPath" + echo " \$corpusPath = full path of the corpus" + echo " \$outPath = xpaths.csv file directory path" + echo "" + echo "Exemple : $0 /work/elsevier/elsevier-2019-livraison-2020-09-15 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique" + echo "" + exit +fi + +# Generation xpaths.csv avec les valeurs des attributs +echo "Generation xpaths.csv avec les valeurs des attributs" +find $corpusPath -name *.xml | parallel --silent xmlstarlet el -v {} 2>/dev/null \; | sort | uniq -c > $outPath/xpaths.csv