#models
features = list(data.columns.values)
features.remove('Resolved')
y_train = data['Resolved']
clf1 = GaussianNB()
clf2 =linear_model.Ridge(alpha=.3)
clf3 =linear_model.Lasso(alpha=0.6)
#clf4 =linear_model.LassoLars(alpha=0.1)
clf4 =linear_model.BayesianRidge()
#clf6 =SGDClassifier(loss="log", max_iter=5,n_jobs=-1)
#clf = NearestCentroid()
#clf7 = MLPClassifier(solver='lbfgs', alpha=1e-4,hidden_layer_sizes=(3, 2), random_state=0)
#dtrain = xgb.DMatrix(train[features], y_train, missing=-1)
#clf = svm.SVC()
#clf.score(data[features], y_test)
#clf = tree.DecisionTreeRegressor()
#clf8 = SGDClassifier(loss="hinge", penalty="l2", max_iter=1000,n_jobs=-1)
#clf = BaggingClassifier(KNeighborsClassifier(),max_samples=0.5, max_features=0.5)
#clf = RandomForestClassifier(n_estimators=70,max_depth=10)
clf5 = DecisionTreeClassifier(max_depth=10, min_samples_split=100000, random_state=0)
#clf6=KNeighborsClassifier(n_neighbors=3)
#clf7 = QuadraticDiscriminantAnalysis()
#kernel = 1.0 * RBF(1.0)
#clf8 = GaussianProcessClassifier(kernel=kernel,random_state=0)
#clf9 = svm.SVC()
#clf10 = RandomForestClassifier(n_estimators=30,max_depth=10)
#clf10 = DecisionTreeClassifier(max_depth=10, min_samples_split=10000, random_state=0)
#clf11 = DecisionTreeClassifier(max_depth=10, min_samples_split=1000, random_state=0)
#clf12 = DecisionTreeClassifier(max_depth=7, min_samples_split=100000, random_state=0)
#clf = AdaBoostClassifier(n_estimators=100)
#clf = GradientBoostingClassifier(n_estimators=80,max_depth=7,min_samples_split=10000, random_state=0)
#clf = GradientBoostingRegressor(n_estimators=100, max_depth=7,min_samples_split=100, random_state=0, loss='ls')
#clf = GradientBoostingClassifier(n_estimators=100, max_depth=200,min_samples_split=100, random_state=0)
#clf1 = LogisticRegression(random_state=0)
#clf2 = RandomForestClassifier(n_estimators=70,max_depth=10)
#clf3 = GaussianNB()
#clf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],voting='hard')
#clf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],voting='soft', weights=[1, 2.5, 1])
#eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],voting='soft')
#clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
#clf = clf.fit(data[features], y_train)
#clf=AdaBoostClassifier(n_estimators=100)
#clf0=GradientBoostingClassifier(n_estimators=100,max_depth=5,min_samples_split=100, random_state=0)
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)
scores = cross_val_score(clf1, data[features], y_train, cv=cv, scoring='roc_auc',n_jobs=-1)
print("AUC GaussianNB: %0.2f+%0.2f" % (scores.mean(), scores.std() * 2))
print(scores)
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)
scores = cross_val_score(clf2, data[features], y_train, cv=cv, scoring='roc_auc',n_jobs=-1)
print("AUC Ridge: %0.2f+%0.2f" % (scores.mean(), scores.std() * 2))
print(scores)
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)
scores = cross_val_score(clf3, data[features], y_train, cv=cv, scoring='roc_auc',n_jobs=-1)
print("AUC Lasso: %0.2f+%0.2f" % (scores.mean(), scores.std() * 2))
print(scores)
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)
scores = cross_val_score(clf4, data[features], y_train, cv=cv, scoring='roc_auc',n_jobs=-1)
print("AUC BayesianRidge: %0.2f+%0.2f" % (scores.mean(), scores.std() * 2))
print(scores)
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)
scores = cross_val_score(clf5, data[features], y_train, cv=cv, scoring='roc_auc',n_jobs=-1)
print("AUC DecisionTreeClassifier: %0.2f+%0.2f" % (scores.mean(), scores.std() * 2))
print(scores)