diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py
index ca3081c..b694728 100755
--- a/data-computer/v1/lda.py
+++ b/data-computer/v1/lda.py
@@ -5,6 +5,7 @@
 from gensim import corpora, models
 import unicodedata
 import string
+import re
 
 # params
 num_topics = 5  # Number of topics
@@ -37,10 +38,15 @@
 
 stopwords = stopwords_lists
 
+def remove_accents(text):
+    normalized_text = unicodedata.normalize("NFD", text)
+    text_with_no_accent = re.sub("[\u0300-\u036f]", "", normalized_text)
+    return text_with_no_accent
+
 
 def uniformize(text):
     # del accents
-    text = ''.join(char for char in unicodedata.normalize('NFD', text) if unicodedata.category(char) != 'Mn')
+    text = remove_accents(text)
 
     # remove punctuation except " ' "
     punctuation = ''.join(char for char in string.punctuation if char != "'")
@@ -50,7 +56,7 @@
 
 
 def tokenize(text):
-    tokens = [word for word in text.split() if word not in stopwords]
+    tokens = [word for word in text.replace("'"," ").split() if word not in stopwords and len(word)>2]
     return tokens