package main import ( "math" "os/exec" "strconv" "strings" ) type MetaResult struct { meta map[string]string err error } func processPDF(message *LogMessage) { // queue for read pdf canal <- struct{}{} defer func() { <-canal }() defer wg.Done() metadata, err := getMetadata(message.path) if err == nil { message.pdf.Author = metadata["Author"] message.pdf.Creator = metadata["Creator"] message.pdf.CreationDate = metadata["CreationDate"] message.pdf.pdfFormatVersion = metadata["PDF version"] numberPages, _ := strconv.Atoi(metadata["Pages"]) message.pdf.pdfPageTotal = numberPages } else { message.pdf.pdfError = err.Error() } if *withWordCount == true { pdfWordCount := getNumberWords(message.path) message.pdf.pdfWordCount = pdfWordCount message.pdf.pdfWordByPage = int(math.Floor(float64(pdfWordCount) / float64(message.pdf.pdfPageTotal))) } writeLog(message) } func getMetadata(path string) (map[string]string, error) { metaResult := MetaResult{meta: make(map[string]string)} metaStr, err := exec.Command("pdfinfo", path).Output() // Parse meta output for _, line := range strings.Split(string(metaStr), "\n") { if parts := strings.SplitN(line, ":", 2); len(parts) > 1 { metaResult.meta[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) } } return metaResult.meta, err } func getNumberWords(path string) int { text, _ := exec.Command("pdftotext", "-q", "-nopgbrk", "-enc", "UTF-8", "-eol", "unix", path, "-").Output() pdfWordCount := len(strings.Fields(string(text))) return pdfWordCount }