diff --git a/README.md b/README.md index f645d43..a6c3c46 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ### Requirements -Tested with Golang 1.17 +Tested with Golang 1.18 Works on Linux/OSX/Windows @@ -57,7 +57,12 @@ Just run `go test` +For cover +`go test -cover` + ### Modules -- XML - Usage of poppler function (`pdftotext` and `pdfinfo`) +- PDF + Usage of poppler lib (`pdftotext` and `pdfinfo`) +- XML + Usage of xml lib (`xmlstarlet` and `xmllint`) diff --git a/main.go b/main.go index da36f95..2b90464 100644 --- a/main.go +++ b/main.go @@ -63,10 +63,6 @@ return filepath.SkipDir } if file.Mode().IsRegular() { - // count number files processed - if numberFiles%10 == 0 { - fmt.Printf("\rFiles processed: %d", numberFiles) - } mtype, err := mimetype.DetectFile(path) extension := filepath.Ext(path) groupMime := regexMime.FindStringSubmatch(mtype.String()) @@ -107,7 +103,7 @@ go ProcessXML(&fileData, logger) } else { logger.Info("") - numberFiles++ + UpdateCounter() } } else { if fileData.mimetype == "application/xml" || fileData.mimetype == "text/xml" { diff --git a/pdf.go b/pdf.go index aecf5ab..103546a 100644 --- a/pdf.go +++ b/pdf.go @@ -38,7 +38,7 @@ }) } logger.Info("") - numberFiles++ + UpdateCounter() } // return Message with metadata info diff --git a/util.go b/util.go index 58cc8b9..a19a046 100644 --- a/util.go +++ b/util.go @@ -1,6 +1,9 @@ package main -import "strings" +import ( + "fmt" + "strings" +) func GetStringWithSuffixInList(s []string, e string) string { for _, a := range s { @@ -17,3 +20,11 @@ } return "" } + +func UpdateCounter() { + numberFiles++ + // count number files processed + if numberFiles%10 == 0 { + fmt.Printf("\rFiles processed: %d", numberFiles) + } +} diff --git a/xml.go b/xml.go index e340699..9ad2d14 100644 --- a/xml.go +++ b/xml.go @@ -31,13 +31,11 @@ xmlInfo, logger := CheckIfXmlIsWellFormed(message.path, logger) if *configurationFolder != "" && len(xmlInfo.data) > 0 { - detailledInfo := DetailledAnalysis{} - detailledInfo, logger = CheckXMLValidation(message.path, xmlInfo, logger) - ProcessXpath(xmlInfo, detailledInfo, logger) - } else { - logger.Info("") + _, logger = CheckXMLValidation(message.path, xmlInfo, logger) + ProcessXpath(xmlInfo, logger) } - numberFiles++ + logger.Info("") + UpdateCounter() } func CheckIfXmlIsWellFormed(xmlPath string, logger *logrus.Entry) (XMLInfo, *logrus.Entry) { @@ -118,7 +116,9 @@ linesMessages := regexLineValidation.FindAllStringSubmatch(resultError, -1) if errorsMessages != nil && linesMessages != nil { for i := 0; i < len(errorsMessages); i++ { - listError = append(listError, ErrorXML{Message: strings.NewReplacer(`"`, "").Replace(errorsMessages[i][1]), Line: linesMessages[i][1], File: file}) + if errorsMessages[i] != nil || linesMessages[i] != nil { + listError = append(listError, ErrorXML{Message: strings.NewReplacer(`"`, "").Replace(errorsMessages[i][1]), Line: linesMessages[i][1], File: file}) + } } } return listError @@ -220,62 +220,65 @@ return xmlDetailled, logger } -func ProcessXpath(xmlInfo XMLInfo, detailledInfo DetailledAnalysis, logger *logrus.Entry) json.RawMessage { +func ProcessXpath(xmlInfo XMLInfo, logger *logrus.Entry) DetailledAnalysis { doc, err := xmlquery.Parse(strings.NewReader(xmlInfo.data)) + detailledInfo := DetailledAnalysis{} - tmpXpath := "{" - if err == nil { - for _, field := range configDetailledAnalyze.XML.XPATH { - previousXpath := make(map[string]string) - for _, xpathContent := range field.Xpath { - goodXPath, hasQuote := FormatXpathByType(field.Type, xpathContent) - expr, _ := xpath.Compile(goodXPath) - result := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)) - text := fmt.Sprintf("%v", result) - // not process xpath if previous is good - if previousXpath[xpathContent] == "" && len(text) > 0 { - previousXpath[xpathContent] = xpathContent - // if regex add substring isValid - if field.Regex != "" { - filterText := regexp.MustCompile(field.Regex).FindString(text) - if len(filterText) > 0 && filterText != "0" { - tmpXpath += `"` + field.Name + `IsValid":` + "true" + `,` - } else { - tmpXpath += `"` + field.Name + `IsValid":` + "false" + `,` + if len(configDetailledAnalyze.XML.XPATH) > 0 { + tmpXpath := "{" + if err == nil { + for _, field := range configDetailledAnalyze.XML.XPATH { + previousXpath := make(map[string]string) + for _, xpathContent := range field.Xpath { + goodXPath, hasQuote := FormatXpathByType(field.Type, xpathContent) + expr, _ := xpath.Compile(goodXPath) + result := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)) + text := fmt.Sprintf("%v", result) + // not process xpath if previous is good + if previousXpath[xpathContent] == "" && len(text) > 0 { + previousXpath[xpathContent] = xpathContent + // if regex add element if isValid + if field.Regex != "" { + filterText := regexp.MustCompile(field.Regex).FindString(text) + if len(filterText) > 0 && filterText != "0" { + tmpXpath += `"` + field.Name + `IsValid":` + "true" + `,` + } else if filterText != "0" { + tmpXpath += `"` + field.Name + `IsValid":` + "false" + `,` + } } - } - // if fieldType = Count authorize 0 value - if field.Type == "Count" { - tmpXpath += `"` + field.Name + `":` + text + `,` - } else if text != "0" { - // for string - if hasQuote { - tmpXpath += `"` + field.Name + `":"` + strings.NewReplacer("\n", "").Replace(text) + `",` - } else { // for boolean and number + + // if fieldType = Count authorize 0 value + if field.Type == "Count" { tmpXpath += `"` + field.Name + `":` + text + `,` + } else if text != "0" { + // for string + if hasQuote { + tmpXpath += `"` + field.Name + `":"` + strings.NewReplacer("\n", "").Replace(text) + `",` + } else { // for boolean and number + tmpXpath += `"` + field.Name + `":` + text + `,` + } } } } + previousXpath = nil } - previousXpath = nil } + + tmpXpath = strings.TrimSuffix(tmpXpath, ",") + tmpXpath += "}" + + finalXpath := []byte(tmpXpath) + jsonData := XpathStructure{} + json.Unmarshal(finalXpath, &jsonData) + + detailledInfo.xpath = finalXpath + + logger = logger.WithFields(logrus.Fields{ + "xpath": detailledInfo.xpath, + }) } - tmpXpath = strings.TrimSuffix(tmpXpath, ",") - tmpXpath += "}" - - finalXpath := []byte(tmpXpath) - jsonData := XpathStructure{} - json.Unmarshal(finalXpath, &jsonData) - - detailledInfo.xpath = finalXpath - - logger = logger.WithFields(logrus.Fields{ - "xpath": detailledInfo.xpath, - }) - logger.Info("") - - return finalXpath + return detailledInfo } func FormatXpathByType(fieldType string, xpath string) (string, bool) { diff --git a/xml_test.go b/xml_test.go index 204fce2..167b589 100644 --- a/xml_test.go +++ b/xml_test.go @@ -1,8 +1,11 @@ package main import ( + "bytes" + "io/ioutil" "testing" + "github.com/goccy/go-json" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" ) @@ -83,12 +86,12 @@ func TestXPath(t *testing.T) { xmlData.path = "./example/xml/xml-for-xpath.xml" + jsonFile, _ := ioutil.ReadFile("./example/sisyphe-conf.json") + json.Unmarshal(bytes.TrimPrefix(jsonFile, []byte("\xef\xbb\xbf")), &configDetailledAnalyze) resultWellFormed, _ := CheckIfXmlIsWellFormed(xmlData.path, contextLogger) assert.Equal(t, resultWellFormed.doctype.Sysid, "JATS-journalpublishing1.dtd", "Doctype is valid") assert.Equal(t, resultWellFormed.isWellFormed, true, "XML is well formed") assert.Equal(t, resultWellFormed.wellFormedErrors, ErrorXML{}, "Return empty if xml is not well formed") - resultValidation, _ := CheckXMLValidation(xmlData.path, XMLInfo{}, contextLogger) - assert.Equal(t, resultValidation.isValidAgainstDTD, false, "XML must be valid according to the DTD") - assert.Equal(t, len(resultValidation.validationsErrors), 0, "Return empty if dtd is valid") - //jsonResult := ProcessXpath(resultWellFormed, contextLogger) + resultXpath := ProcessXpath(resultWellFormed, contextLogger) + assert.Equal(t, len(resultXpath.xpath), 659, "xpath is present") }