"""
This is the code to accompany the Lesson 2 (SVM) mini-project.
Use a SVM to identify emails from the Enron corpus by their authors:
Sara has label 0
Chris has label 1
"""
import sys
from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess
features_train, features_test, labels_train, labels_test = preprocess()
from sklearn import svm
clf = svm.SVC(C = 10000.0,kernel = 'rbf')
t0 = time()
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"
t1 = time()
pred = clf.predict(features_test)
print "predicting time:", round(time()-t1,3), "s"
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(pred, labels_test)
print accuracy
print("pred[10]: ",pred[10])
print("pred[26]: ",pred[26])
print("pred[50]: ",pred[50])
print sum(pred)