from sklearn.datasets import load_svmlight_files trn_val_X, trn_val_y, tst_X, tst_y = load_svmlight_files(("trn.dat", "tst.dat")) from sklearn.cross_validation import train_test_split trn_X, val_X, trn_y, val_y = train_test_split(trn_val_X, trn_val_y, test_size = 0.2, random_state = 131071) bestAccuracy = 0.0 bestMaxDepth = -1 from sklearn.ensemble import GradientBoostingClassifier for max_depth in [ 3, 6, 12 ]: gbm = GradientBoostingClassifier(subsample = 0.8, min_samples_leaf = 5, max_depth = max_depth, random_state = 131071) gbm.fit(trn_X, trn_y) accuracy = gbm.score(val_X.toarray(), val_y) if (accuracy > bestAccuracy): bestAccuracy = accuracy bestMaxDepth = max_depth final = GradientBoostingClassifier(subsample = 0.8, min_samples_leaf = 5, max_depth = bestMaxDepth, random_state = 131071) final.fit(trn_val_X, trn_val_y) print("index,prediction") index = 0 for prediction in final.predict(tst_X.toarray()): print(str(index) + "," + str(int(prediction))) index = index + 1