diff --git a/data-computer/README.md b/data-computer/README.md index c90bb0e..12e4f1b 100644 --- a/data-computer/README.md +++ b/data-computer/README.md @@ -157,95 +157,97 @@ ### v1/lda -Créer à partir de l'ensemble des documents un champ "lda" constitué de 5 topics. Chaque topic contient un champ "word", qui est composé une liste de 10 mots qui sont les plus caractéristiques du topic, ainsi que d'un champ "weight" qui donne la probabilité que le document soit classé dans le topic. Le texte doit être en anglais ou en français. +Créer à partir de l'ensemble des documents un champ "lda" constitué de 5 topics. Chaque topic contient un champ "word", qui est composé une liste de 10 mots qui sont les plus caractéristiques du topic, ainsi que d'un champ "weight" qui donne la probabilité que le document soit classé dans le topic. Le texte doit être en anglais. Par exemple, pour un document pris dans un ensemble de document (l'id "85" est totalement arbitraire) ```json -{"id": 85, "value": "L'anglais est souvent enseign\u00e9 comme langue seconde dans de nombreux pays \u00e0 travers le monde."} +{ + "id": 85, + "value": "During my culinary adventure through the bustling markets of Marrakech, where the scent of exotic spices hung in the air and vendors beckoned with colorful displays of fruits and textiles, I savored tagines, couscous, and mint tea, discovering the rich tapestry of Moroccan flavors." +} ``` On obtiendra : ```json { - "id": 85, - "value": "L'anglais est souvent enseign\u00e9 comme langue seconde dans de nombreux pays \u00e0 travers le monde.", + "value": "During my culinary adventure through the bustling markets of Marrakech, where the scent of exotic spices hung in the air and vendors beckoned with colorful displays of fruits and textiles, I savored tagines, couscous, and mint tea, discovering the rich tapestry of Moroccan flavors.", "lda": { "topic_1": { "words": [ - "plus", - "nombreux", - "nombre", - "grand", - "philosophie", - "concept", - "physique", - "second", - "fondamentale", - "relativite" + "sky", + "tranquil", + "yellow", + "solace", + "symphony", + "leave", + "bird", + "taxi", + "cityscape", + "provide" ], - "weight": "0.018401673" + "weight": "0.0133591" }, "topic_2": { "words": [ - "revolution", - "concept", - "francaise", - "philosophie", - "comme", - "revolutionne", - "empereur", - "tels", - "stoicisme", - "vertu" + "bustling", + "air", + "savor", + "tapestry", + "rich", + "adventure", + "tea", + "discover", + "flavor", + "hang" ], - "weight": "0.01821572" + "weight": "0.94660753" }, "topic_3": { "words": [ - "etatsunis", - "physique", - "philosophie", - "temps", - "principe", - "existence", - "histoire", - "siecle", - "trous", - "generale" + "street", + "air", + "cottage", + "quaint", + "melodic", + "seaside", + "water", + "shore", + "collect", + "sandy" ], - "weight": "0.01818413" + "weight": "0.013361818" }, "topic_4": { "words": [ - "nombreux", - "postulat", - "mathematiques", - "domaines", - "algebre", - "anglaise", - "connu", - "theoreme", - "devenu", - "science" + "forest", + "atmosphere", + "leave", + "filter", + "tale", + "tower", + "create", + "floor", + "enchant", + "shadow" ], - "weight": "0.018459676" + "weight": "0.013335978" }, "topic_5": { "words": [ - "anglais", - "production", - "connu", - "langue", - "litterature", - "nombreux", - "monde", - "revolutionne", - "travers", - "siecle" + "mystery", + "sky", + "embark", + "ponder", + "gaze", + "overwhelming", + "light", + "mountaintop", + "night", + "universe" ], - "weight": "0.92673886" + "weight": "0.013335522" } } }