diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py index 0e99a1b..84ca117 100755 --- a/data-computer/v1/lda.py +++ b/data-computer/v1/lda.py @@ -55,22 +55,6 @@ best_topic = topic return {best_topic:dico[best_topic]} -# # Max coherence -# For a corpus, return the optimal number of topic -# def how_many_topics(corpus,dictionary,texts): -# best_t = 2 -# best_coherence = 0 -# for t in range(3, 21): -# lda_model = models.LdaModel(corpus, id2word=dictionary, num_topics=t,iterations=200,alpha="symmetric", eta = "auto",minimum_probability=0.1) -# corpus_lda = lda_model[corpus] - -# cm = models.coherencemodel.CoherenceModel(model=lda_model, texts=texts, corpus=corpus_lda, coherence='c_v') -# coherence = cm.get_coherence() -# print(t,coherence) -# if coherence > best_coherence: -# best_t = t -# best_coherence = coherence -# return best_t # WS