Newer
Older
web-services / authors-tools / v1 / first-name / gender.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pickle
import sys
import json
import re
from unidecode import unidecode


def detector(name,my_dict) :
    if re.match(r"^([A-Z][. '-]+){2,}[A-Z]*",name) or re.match(r"\b[A-Z]+\b\.?",name) :
        return "name error"
    else :
        name = unidecode(name.lower())
        name = name.replace(".","")
        name = name.replace("- ","-")
        name = re.split("[\s-]+", name)

######################## POUR LES PRENOMS SIMPLE. EXEMPLE : BOB ###########################################################
        if len(name) == 1 :
            if name[0] in my_dict.keys() :
                if my_dict[name[0]] == "M":
                    return u"male"
                elif my_dict[name[0]] == "1M" or my_dict[name[0]] == "?M":
                    return u"mostly_male"
                elif my_dict[name[0]] == "F":
                    return u"female"
                elif my_dict[name[0]] == "1F" or my_dict[name[0]] == "?F":
                    return u"mostly_female"
                elif my_dict[name[0]] == "?":
                    return u"andy"
                else :
                    return "unknown" 
            else :
                return "unknown"
######################## POUR LES PRENOMS COMPOSES : JEAN-CHRISTOPHE OU ERIN NICOLE #########################################
########## ON RECHERCHE LE PRENOM COMPOSES EN ENTIER #######################################################################
################## SI LE PRENOM COMPOSES N'EST PAS PRESENT ALORS ON REGARDE SEULEMENT LE PREMIER #########################
        elif len(name) >= 2 and len(name[0]) > 1 and len(name[1]) > 1:
            possible_keys = [name[0]+name[1],name[0]+"-"+name[1],name[0]+" "+name[1],name[0]]
            for key in possible_keys:
                if key in my_dict:
                    if my_dict[key] == "M":
                        return u"male"
                    elif my_dict[key] == "1M" or my_dict[key] == "?M":
                        return u"mostly_male"
                    elif my_dict[key] == "F":
                        return u"female"
                    elif my_dict[key] == "1F" or my_dict[key] == "?F":
                        return u"mostly_female"
                    elif my_dict[key] == "?":
                        return u"andy"
            return "unknown"
######################### POUR LES PRENOMS TYPES : JAMES A OU JAMES JR ########################################################    
        elif len(name) >= 2 and len(name[0]) > 1 and len(name[1]) <= 2 :
            if name[0] in my_dict.keys() :
                if my_dict[name[0]] == "M":
                    return u"male"
                elif my_dict[name[0]] == "1M" or my_dict[name[0]] == "?M":
                    return u"mostly_male"
                elif my_dict[name[0]] == "F":
                    return u"female"
                elif my_dict[name[0]] == "1F" or my_dict[name[0]] == "?F":
                    return u"mostly_female"
                elif my_dict[name[0]] == "?":
                    return u"andy"
                else :
                    return "unknown"
            else :
                return "unknown"
        else :
            return "name error"


def main():
    with open('./v1/first-name/name_gender.pickle', 'rb') as handle:
        my_dict = pickle.load(handle)

    for line in sys.stdin:
        data = json.loads(line)
        texte = data["value"]
        data["value"] = detector(unidecode(texte), my_dict)
        sys.stdout.write(json.dumps(data))
        sys.stdout.write("\n")

if __name__ == "__main__":
    main()