diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py index 916b5e3..9e467ab 100755 --- a/data-computer/v1/lda.py +++ b/data-computer/v1/lda.py @@ -5,7 +5,6 @@ from gensim import corpora, models import unicodedata import string -import math # params num_topics = 5 # Number of topics @@ -55,7 +54,6 @@ return tokens - # WS # Datas all_data = [] @@ -74,9 +72,8 @@ # extract infos for line in all_data: - idx = line["id"] doc = line["value"] - doc_bow = dictionary.doc2bow(texts[int(idx)-1]) + doc_bow = dictionary.doc2bow(tokenize(uniformize(line["value"]))) topics = lda_model[doc_bow] topic_info = {} for topic_id, topic_weight in topics: