from global_functions import *
import sys
import json
filename = sys.stdin.read().strip()
full_text = ocr_pdf(filename)
## Extract infos
# DOI
unverified_doi = find_doi(full_text)
doi = ""
#only if a doi is found
if unverified_doi:
if verify_doi(unverified_doi):
#If DOI is found, write it in "doi" column instead of 'unverified_doi' column.
doi=unverified_doi
unverified_doi=""
# Article
item_editor = get_caracs_between(full_text,"PUBLICATION CHARGES","PAGE CHARGES").replace("\n","")
if item_editor != "":
item_editor = "PUBLICATION CHARGES" + item_editor
new_row = {'nom_complet': filename.split("/")[1], 'doi': doi, 'unverified_doi':unverified_doi ,"item_editor":item_editor,'full_text': re.sub("\n\n+","\n",re.sub("\s\s+"," ",full_text))}
sys.stdout.write((json.dumps(new_row)))