Newer
Older
APC-bills / v1 / apc.py
import json
from pathlib import Path
import subprocess
import pandas as pd
from editorsFunctions.global_functions import *


path_bills = "factures-2019-2023"

with open('./v1/dictionaries/code_fournisseur2fournisseur.json', 'r') as f_in:
    id_editor2editor = json.load(f_in)


def execute_script(script_name, file_name):
    result = subprocess.run(['python3', script_name], input=file_name, capture_output=True, text=True)
    if result.returncode == 0:
        try:
            data = json.loads(result.stdout)
            return data
        except json.JSONDecodeError as e:
            return None
    else:
        return None

df_res = pd.DataFrame(columns=["nom_complet","doi","unverified_doi","item_editor","full_text"])

files = Path('./%s/' % path_bills).glob('*')
for file in files:
    code_editor = str(file).split("_")[1]
    if code_editor in id_editor2editor.keys():
        script_name = f"v1/editorsFunctions/{code_editor}.py"
        data = execute_script(script_name, str(file))
        if data is not None:
            df = pd.DataFrame([data])
            df_res = pd.concat([df_res, df], ignore_index=True)
    else:
        script_name = f"v1/editorsFunctions/autres.py"
        data = execute_script(script_name, str(file))
        if data is not None:
            df = pd.DataFrame([data])
            df_res = pd.concat([df_res, df], ignore_index=True)

df_res.to_csv('res_facture.csv', index=False)