#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pickle import sys import json import re from unidecode import unidecode def detector(name,my_dict) : name = unidecode(name.lower()) if re.match(r"^([a-z][. '-]+){2,}[a-z]*",name) : return "name error" else : name = name.replace(".","") name = name.replace("- ","-") name = re.split("[\s-]+", name) ######################## POUR LES PRENOMS SIMPLE. EXEMPLE : BOB ########################################################### if len(name) == 1 : if name[0] in my_dict.keys() : if my_dict[name[0]] == "M": return u"male" elif my_dict[name[0]] == "1M" or my_dict[name[0]] == "?M": return u"mostly_male" elif my_dict[name[0]] == "F": return u"female" elif my_dict[name[0]] == "1F" or my_dict[name[0]] == "?F": return u"mostly_female" elif my_dict[name[0]] == "?": return u"andy" else : return "unknown" else : return "unknown" ######################## POUR LES PRENOMS COMPOSES : JEAN-CHRISTOPHE OU ERIN NICOLE ######################################### ########## ON RECHERCHE LE PRENOM COMPOSES EN ENTIER ####################################################################### ################## SI LE PRENOM COMPOSES N'EST PAS PRESENT ALORS ON REGARDE SEULEMENT LE PREMIER ######################### elif len(name) >= 2 and len(name[0]) > 1 and len(name[1]) > 1: possible_keys = [name[0]+name[1],name[0]+"-"+name[1],name[0]+" "+name[1],name[0]] for key in possible_keys: if key in my_dict: if my_dict[key] == "M": return u"male" elif my_dict[key] == "1M" or my_dict[key] == "?M": return u"mostly_male" elif my_dict[key] == "F": return u"female" elif my_dict[key] == "1F" or my_dict[key] == "?F": return u"mostly_female" elif my_dict[key] == "?": return u"andy" return "unknown" ######################### POUR LES PRENOMS TYPES : JAMES A OU JAMES JR ######################################################## elif len(name) >= 2 and len(name[0]) > 1 and len(name[1]) <= 2 : if name[0] in my_dict.keys() : if my_dict[name[0]] == "M": return u"male" elif my_dict[name[0]] == "1M" or my_dict[name[0]] == "?M": return u"mostly_male" elif my_dict[name[0]] == "F": return u"female" elif my_dict[name[0]] == "1F" or my_dict[name[0]] == "?F": return u"mostly_female" elif my_dict[name[0]] == "?": return u"andy" else : return "unknown" else : return "unknown" else : return "name error" def main(): with open('./v1/first-name/name_gender.pickle', 'rb') as handle: my_dict = pickle.load(handle) for line in sys.stdin: data = json.loads(line) texte = data["value"] data["value"] = detector(unidecode(texte), my_dict) sys.stdout.write(json.dumps(data)) sys.stdout.write("\n") if __name__ == "__main__": main()