diff --git a/data-computer/requirements.txt b/data-computer/requirements.txt
index 0bc7d8e..915444b 100755
--- a/data-computer/requirements.txt
+++ b/data-computer/requirements.txt
@@ -1 +1,3 @@
 gensim==4.3.2
+spacy==3.6.1
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
diff --git a/data-computer/v1/lda.py b/data-computer/v1/lda.py
index 85e207e..e8aedc2 100755
--- a/data-computer/v1/lda.py
+++ b/data-computer/v1/lda.py
@@ -6,11 +6,14 @@
 import unicodedata
 import string
 import re
+import spacy
 
 # params
 num_topics = 5  # Number of topics
 num_iterations=100 # "epochs" ====> shall it depends of number of docs ? number of topics ?
 
+nlp = spacy.load('en_core_web_sm', disable = ['parser','ner'])
+
 #stopwords
 with open('./v1/stopwords/en.json','r') as f_in:
     stopwords =json.load(f_in)
@@ -31,6 +34,11 @@
 
     return text.lower()
 
+#lemmatize
+def lemmatize(text):
+    doc = nlp(text)
+    return " ".join([token.lemma_ for token in doc])
+
 #tokenize
 def tokenize(text):
     tokens = [word for word in text.replace("'"," ").split() if word not in stopwords and len(word)>2]
@@ -46,7 +54,7 @@
 
 
 # training LDA
-texts = [tokenize(uniformize(line["value"])) for line in all_data]
+texts = [tokenize(lemmatize(uniformize(line["value"]))) for line in all_data]
 dictionary = corpora.Dictionary(texts) # Create a tf dictionary, but replace text by an id : [ [(id_token,numb_token),...] , [....] ]. The list represent docs of corpus
 corpus = [dictionary.doc2bow(text) for text in texts]