diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py index 213be00..0e99a1b 100755 --- a/data-computer/v1/lda.py +++ b/data-computer/v1/lda.py @@ -44,7 +44,7 @@ return ["n/a"] return tokens -## Max topic +# Max topic def max_topic(dico): best_topic = {} best_proba = 0 @@ -55,6 +55,24 @@ best_topic = topic return {best_topic:dico[best_topic]} +# # Max coherence +# For a corpus, return the optimal number of topic +# def how_many_topics(corpus,dictionary,texts): +# best_t = 2 +# best_coherence = 0 +# for t in range(3, 21): +# lda_model = models.LdaModel(corpus, id2word=dictionary, num_topics=t,iterations=200,alpha="symmetric", eta = "auto",minimum_probability=0.1) +# corpus_lda = lda_model[corpus] + +# cm = models.coherencemodel.CoherenceModel(model=lda_model, texts=texts, corpus=corpus_lda, coherence='c_v') +# coherence = cm.get_coherence() +# print(t,coherence) +# if coherence > best_coherence: +# best_t = t +# best_coherence = coherence +# return best_t + + # WS # Datas all_data = []