diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py index e52e4f6..66c4535 100755 --- a/data-computer/v1/lda.py +++ b/data-computer/v1/lda.py @@ -94,7 +94,7 @@ else: index_without_value.append(i) dictionary = corpora.Dictionary(texts) # Create a tf dictionary, but replace text by an id : [ [(id_token,numb_token),...] , [....] ]. The list represent docs of corpus -dictionary.filter_extremes(no_below=3,no_above=0.5) +dictionary.filter_extremes(no_below=3,no_above=0.8) corpus = [dictionary.doc2bow(text) for text in texts] try: