from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
string1 = "hi Katie the self driving car will be late Best Sebastian"
string2 = "Hi Sebastian the machine learning class will be great great
           great Best Katie"
string3 = "Hi Katie the machine learning class will be most excellent"
email_list = [string1, string2, string3]
bag_of_words = vectorizer.fit(email_list)
bag_of_words = vectroizer.transform(email_list)
>>>>>>>>
  (0, 0)        1
  (0, 1)        1
  (0, 2)        1
  (0, 4)        1
  (0, 7)        1
  (0, 8)        1
  (0, 9)        1
  (0, 13)       1
  (0, 14)       1
  (0, 15)       1
  (0, 16)       1
  (1, 0)        1
  (1, 1)        1
  (1, 3)        1
  (1, 6)        3
  (1, 7)        1
  (1, 8)        1
  (1, 10)       1
  (1, 11)       1
  (1, 13)       1
  (1, 15)       1
  (1, 16)       1
  (2, 0)        1
  (2, 3)        1
  (2, 5)        1
  (2, 7)        1
  (2, 8)        1
  (2, 10)       1
  (2, 11)       1
  (2, 12)       1
  (2, 15)       1
  (2, 16)       1
10.1 - sklearn - CountVectorizer (Bag of words)

results matching ""

No results matching ""