diff --git a/hospital-affiliations/aff_hosp.py b/hospital-affiliations/aff_hosp.py index 8c6f614..f280ccc 100755 --- a/hospital-affiliations/aff_hosp.py +++ b/hospital-affiliations/aff_hosp.py @@ -50,29 +50,33 @@ "icans","paul strauss","paul str","paoli calmettes","inst j paoli i calmettes","claudius regaud","claudius rigaud","inst univ canc toulouse oncopole", "inst univ cancerol oncopole","iuct oncopole","ctr lutte contre canc","ico canc","icm, montpellier canc inst","univ inst canc toulouse oncopole","rothschild"] - hospital = "" + hospital = "N.C" affiliation = affiliation.lower() for acronym in acronyms: - if acronym in affiliation : - # standarize original dataframe - affiliations_dataframe["contains_acronyms"] = affiliations_dataframe["Affiliation"].apply(is_hospital_affiliation,list=acronyms) - affiliations_dataframe["standardized_city"] = affiliations_dataframe["Ville_canonique_Dpt"].apply(remove_department_numbers_and_convert_acronyms).apply(remove_accents) - if len(affiliations_dataframe[affiliations_dataframe['contains_acronyms'] == True]) > 0: - # create new dataframe only with affiliations that contain acronyms - dataframe_with_acronyms = affiliations_dataframe[affiliations_dataframe['contains_acronyms'] == True] - dataframe_with_acronyms["city_in_affiliations"] = dataframe_with_acronyms['standardized_city'].apply(is_city_in_affiliation,affiliation=affiliation) - if len(dataframe_with_acronyms[dataframe_with_acronyms["city_in_affiliations"] == True]) != 0: - # create new dataframe only with affiliations that contain acronyms and cities - dataframe_with_acronyms_and_cities = dataframe_with_acronyms[dataframe_with_acronyms["city_in_affiliations"] == True] - dataframe_with_acronyms_and_cities["ratio"] = dataframe_with_acronyms_and_cities["Affiliation"].apply(affiliations_match_ratio,second_affiliation=affiliation) - hospital = dataframe_with_acronyms_and_cities["Orga NonCnrs Acorriger"][dataframe_with_acronyms_and_cities["ratio"].idxmax()] - else: - hospital = "N.C" - else: - hospital = "N.C" - break - if hospital == "": - hospital = "N.C" + if acronym not in affiliation : + continue + + # standarize original dataframe + affiliations_dataframe["contains_acronyms"] = affiliations_dataframe["Affiliation"].apply(is_hospital_affiliation,list=acronyms) + affiliations_dataframe["standardized_city"] = affiliations_dataframe["Ville_canonique_Dpt"].apply(remove_department_numbers_and_convert_acronyms).apply(remove_accents) + + if len(affiliations_dataframe[affiliations_dataframe['contains_acronyms'] == True]) == 0: + continue + + # create new dataframe only with affiliations that contain acronyms + dataframe_with_acronyms = affiliations_dataframe[affiliations_dataframe['contains_acronyms'] == True] + dataframe_with_acronyms["city_in_affiliations"] = dataframe_with_acronyms['standardized_city'].apply(is_city_in_affiliation,affiliation=affiliation) + + if len(dataframe_with_acronyms[dataframe_with_acronyms["city_in_affiliations"] == True]) == 0: + continue + + # create new dataframe only with affiliations that contain acronyms and cities + dataframe_with_acronyms_and_cities = dataframe_with_acronyms[dataframe_with_acronyms["city_in_affiliations"] == True] + dataframe_with_acronyms_and_cities["ratio"] = dataframe_with_acronyms_and_cities["Affiliation"].apply(affiliations_match_ratio,second_affiliation=affiliation) + hospital = dataframe_with_acronyms_and_cities["Orga NonCnrs Acorriger"][dataframe_with_acronyms_and_cities["ratio"].idxmax()] + + break + return hospital for line in sys.stdin: diff --git a/hospital-affiliations/input_data.txt b/hospital-affiliations/input_data.txt index 9a74acb..2a7b0e1 100644 --- a/hospital-affiliations/input_data.txt +++ b/hospital-affiliations/input_data.txt @@ -1,5 +1,5 @@ {"id" :"3","value": "Hop La Pitie Salpetriere, AP HP, Serv Med Interne, Ctr Natl Reference Histiocytoses, Paris, France"} -{"id" :"4","value": "Hop Necker Enfants Malad, Gen Pediat Dept, Paris, France"} +{"id" :"4","value": "UMR CNRS 7021, Illkirch Graffenstaden, France"} {"id" :"5","value": "Bergonie Inst, Dept Radiat Oncol, Bordeaux, France"} {"id" :"6","value": "CHU Besancon, Serv Neurol, 2 Blvd Fleming, F-25030 Besancon, France"} {"id" :"7","value": "CHU St Etienne, Ave Albert Raimond, F-42055 St Etienne, France"} \ No newline at end of file