diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py index f7542c0..213be00 100755 --- a/data-computer/v1/lda.py +++ b/data-computer/v1/lda.py @@ -44,6 +44,16 @@ return ["n/a"] return tokens +## Max topic +def max_topic(dico): + best_topic = {} + best_proba = 0 + for topic in dico: + proba = float(dico[topic]["weight"]) + if proba>best_proba: + best_proba = proba + best_topic = topic + return {best_topic:dico[best_topic]} # WS # Datas @@ -98,7 +108,9 @@ topic_info[f"topic_{topic_id + 1}"]["words"] = topic_words topic_info[f"topic_{topic_id + 1}"]["weight"] = str(topic_weight) - line["value"]= topic_info + line["value"]={} + line["value"]["topics"]=topic_info + line["value"]["best_topic"]=max_topic(topic_info) # Write all corpus in once for line in all_data: