diff --git a/geo-tagger/v1/perseeBaipTagger/tagger.py b/geo-tagger/v1/perseeBaipTagger/tagger.py index ff68ff5..bd5292e 100755 --- a/geo-tagger/v1/perseeBaipTagger/tagger.py +++ b/geo-tagger/v1/perseeBaipTagger/tagger.py @@ -52,8 +52,11 @@ sent = data_normalization(error_dic,text) - sentence = Sentence(sent) - tagger.predict(sentence) + sentS = sent.split(".") + sentences = [Sentence(sentS[i]+".") for i in range(len(sentS))] + + tagger.predict(sentences) + for word in sent.lower().split(" "): for transfo in trans: if word.startswith(transfo): @@ -61,24 +64,25 @@ operation.append(transfo) break - for entity in sentence.get_spans('ner'): - if (entity.labels[0].value == "LOC"): - if entity.text not in locL: - locL.append(entity.text) - if entity.labels[0].value == "ORG": - org = entity.text.split(" ") - if len(org[-1]) > 2: - for borg in uniqueOrg: - if entity.text.lower().startswith(borg): - basicOrg.append(borg) - if entity.text not in orgL: - orgL.append(entity.text) - if len(org)>1: - for k in ["à","de","l'","d'","'","du","la"]: - if org[-2] == k: - if org[-1] not in locL: - locL.append(org[-1]) - break + for sentence in sentences: + for entity in sentence.get_spans('ner'): + if (entity.labels[0].value == "LOC"): + if entity.text not in locL: + locL.append(entity.text) + if entity.labels[0].value == "ORG": + org = entity.text.split(" ") + if len(org[-1]) > 2: + for borg in uniqueOrg: + if entity.text.lower().startswith(borg): + basicOrg.append(borg) + if entity.text not in orgL: + orgL.append(entity.text) + if len(org)>1: + for k in ["à","de","l'","d'","'","du","la"]: + if org[-2] == k: + if org[-1] not in locL: + locL.append(org[-1]) + break returnDic = {"loc":locL,"org":orgL,"basicOrg":basicOrg,"operation":operation} data['value'] = returnDic