diff --git a/v1/dictionaries/code_fournisseur2fournisseur.json b/v1/dictionaries/code_fournisseur2fournisseur.json index f6f751f..ad3c166 100644 --- a/v1/dictionaries/code_fournisseur2fournisseur.json +++ b/v1/dictionaries/code_fournisseur2fournisseur.json @@ -13,7 +13,6 @@ "0001062310": 12, "0001021888": 13, "0001222959": 14, - "0001234181": 15, "0001226137": 16, "0001021626": 17, "0001247066": 18, diff --git a/v1/editorsFunctions/0001097617.py b/v1/editorsFunctions/0001097617.py index 42886d1..786d0d7 100644 --- a/v1/editorsFunctions/0001097617.py +++ b/v1/editorsFunctions/0001097617.py @@ -19,9 +19,9 @@ unverified_doi="" # Article -item_editor = get_caracs_between(full_text,"","").replace("\n"," ") +item_editor = get_caracs_between(full_text,"Publication","Title:").replace("\n"," ") if item_editor != "": - item_editor = "Title: " + item_editor + item_editor = "Publication + Title: " + item_editor new_row = {'nom_complet': filename.split("/")[1], 'doi': doi, 'unverified_doi':unverified_doi ,"item_editor":item_editor,'full_text': re.sub("\n\n+","\n",re.sub("\s\s+"," ",full_text))} diff --git a/v1/editorsFunctions/0001234181.py b/v1/editorsFunctions/0001234181.py deleted file mode 100644 index 42886d1..0000000 --- a/v1/editorsFunctions/0001234181.py +++ /dev/null @@ -1,28 +0,0 @@ -from global_functions import * -import sys -import json - - -filename = sys.stdin.read().strip() -full_text = ocr_pdf(filename) - - -## Extract infos -# DOI -unverified_doi = find_doi(full_text) -doi = "" -#only if a doi is found -if unverified_doi: - if verify_doi(unverified_doi): - #If DOI is found, write it in "doi" column instead of 'unverified_doi' column. - doi=unverified_doi - unverified_doi="" - -# Article -item_editor = get_caracs_between(full_text,"","").replace("\n"," ") -if item_editor != "": - item_editor = "Title: " + item_editor - - -new_row = {'nom_complet': filename.split("/")[1], 'doi': doi, 'unverified_doi':unverified_doi ,"item_editor":item_editor,'full_text': re.sub("\n\n+","\n",re.sub("\s\s+"," ",full_text))} -sys.stdout.write((json.dumps(new_row))) \ No newline at end of file