#!/usr/bin/python3
import json
import sys
import re
import urllib.parse
import requests
from ratelimit import limits, RateLimitException
from backoff import on_exception, expo
# Filtrage par mot clé pour ne garder que l'essentiel
def filter(affiliation) :
affiliation_lower = affiliation.lower()
adress = affiliation_lower.replace(",", "")
words = adress.split(" ")
private = ["sas", "sarl", "sa", "private", "edf", "orange"]
public = ["univ", "hop", "uar", "umr", "cea", "cnrs"]
for word in words :
if word in private :
return "private"
elif word in public :
return "public"
return adress
# requête pour récupérer le fichier json
def request_abbreviation(url):
response = requests.get(url)
return response.json()
# Normaliser les abréviations
def expand_abbreviations(affiliation,dict):
affiliation = affiliation.lower()
arr = affiliation.split(" ")
res = []
for word in arr :
short_word = word.replace(",","").replace(".","")
if short_word in dict:
suffix = ""
if "," in word:
suffix = ","
res.append(dict[short_word] + suffix)
else:
res.append(word)
return " ".join(res)
# découpage des adresses en plusieurs parties
def name_enterprise(affiliation) :
affiliation = affiliation.lower()
affiliations = affiliation.split(',')
return affiliations[0]
# Repérer le département dans l'affiliation
def num_dept(affiliation) :
res = re.findall('f-(\d{2})\d{3}',affiliation)
if len(res)==0 :
return None
return res[0]
# requêtage de l'API pour les données filtrées
@on_exception(expo, RateLimitException, max_time=1)
@limits(calls=7, period=1)
def request(name, dept) :
url = "https://recherche-entreprises.api.gouv.fr/search?q=" + urllib.parse.quote(name)
if dept:
url += "&departement=" + dept
response = requests.get(url,headers={'Accept':'application/json'})
return response.json()
# gérer les réponses de l'API
def is_private_public(information):
if len(information)==0 or 'results' not in information or not information['results']:
return "Informations manquantes"
# Parcourir chaque objet "results" extraire la valeur de "est_service_public"
est_service_public_list = []
for result in information['results']:
complements = result.get('complements')
est_service_public = complements.get('est_service_public', None)
if est_service_public is not None:
est_service_public_list.append(est_service_public)
if True in est_service_public_list :
return "public"
return "private"
# return est_service_public_list
def public_or_private(affiliation,my_dict):
privatePublicOrAffiliation = filter(affiliation)
if privatePublicOrAffiliation in ["private", "public"]:
adress = affiliation.lower().split(",")
enterprise = adress[0]
return "organisme: "+enterprise+", statut: "+privatePublicOrAffiliation
expanded_affiliation = expand_abbreviations(affiliation,my_dict)
name = name_enterprise(expanded_affiliation)
dept = num_dept(expanded_affiliation)
information = request(name, dept)
nature = is_private_public(information)
return "organisme: "+name+", statut: "+nature
def main():
my_dict = request_abbreviation("http://mapping-tables.daf.intra.inist.fr/affiliations-tools-corporate.json")
for line in sys.stdin:
data = json.loads(line)
texte = data["value"]
data["value"] = public_or_private(texte,my_dict)
sys.stdout.write(json.dumps(data))
sys.stdout.write("\n")
if __name__ == "__main__":
main()