diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..af41ed2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +*.log +*.json +*.md +*.pdf +*.xml +*.jpg +*.dtd +out/* \ No newline at end of file diff --git a/.gitignore b/.gitignore index 97be5e0..f8007db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.log -*.json \ No newline at end of file +*.json +out/* \ No newline at end of file diff --git a/main.go b/main.go index b01f401..53500ca 100644 --- a/main.go +++ b/main.go @@ -47,7 +47,7 @@ xml LogMessageXML } -var queueForConcurrent = make(chan struct{}, 1000) +var queueForConcurrent = make(chan struct{}, 1100) var wg sync.WaitGroup var numberFiles int = 0 var corpusPath = flag.String("p", "", "Corpus path") diff --git a/xml.go b/xml.go index 40a2ae9..ee42e6e 100644 --- a/xml.go +++ b/xml.go @@ -14,7 +14,7 @@ queueForConcurrent <- struct{}{} defer func() { <-queueForConcurrent }() - message.xml.isWellFormed = isValidXML(message.path) + isWellFormed, errorXML := isValidXML(message.path) logrus.WithFields(logrus.Fields{ "corpusname": message.corpusname, "name": message.name, @@ -23,28 +23,28 @@ "path": message.path, "mimetype": message.mimetype, "size": message.size, - "isWellFormed": message.xml.isWellFormed, - "xmlError": message.xml.xmlError, + "isWellFormed": isWellFormed, + "xmlError": errorXML, }).Info("") incrementProcess() return } -func isValidXML(path string) bool { +func isValidXML(path string) (bool, error) { xmlFile, err := os.Open(path) if err != nil { - return false + return false, err } // defer the closing of our xmlFile so that we can parse it later on defer xmlFile.Close() // read our opened xmlFile1 as a byte array. here I am checking if the file is valid or not byteValue, err := ioutil.ReadAll(xmlFile) if err != nil { - return false + return false, err } if xml.Unmarshal(byteValue, new(interface{})) != nil { - return false + return false, err } - return true + return true, nil }