In [1]:
#install XGBoost:
#!conda install -y -c conda-forge xgboost
In [2]:
import datetime as dt
import numpy as np
import pandas as pd 
import xgboost as xgb
import random
import time
from numpy import genfromtxt
In [3]:
#download files
#bucket = 'XXX'
#prefix = 'XXX/XXX'
#import boto3
#import re
#from sagemaker import get_execution_role
#role = get_execution_role()
#s3 = boto3.client('s3')
#s3.download_file(bucket, 'features.csv', 'features.csv')

data=pd.read_csv('features.csv')
In [4]:
data.drop(data[data.Pid > 57516454].index, inplace=True) # remove last 15 days
data.drop([ 'Pid','Row'], axis=1,inplace=True)
In [5]:
#wo new features
data.drop([ 'CodeSnippetsLen','Paragraphs','isQuestion','HasError','HasQuote','HasList','LOC',
          'min_quality', 'max_expert_ratio', 'max_quality', 'max_problem_rate', 'avg_quality',
          'QuestionAge','OwnerQuestionsScore','OwnerAnswersScore',
          'HadYearling', 'HadNiceAnswer', 'HadNiceQuestion','HadStudent', 
           'HadAnalytical', 'HadEnthusiast', 'HadCommentator', 'HadAutobiographer', 'HadCurious',
           'HadPopularQuestion', 'HadFamousQuestion', 'HadNotableQuestion', 'HadGoodAnswer', 'HadGreatAnswer',
           'HadNecromancer', 'HadScholar', 'HadCustodian', 'HadEditor', 'HadCritics', 'HadSupporter', 
           'HadTumbleweed', 'HadInformed','HadTeacher','BodySentenceCount'], axis=1,inplace=True)
In [6]:
features = list(data.columns.values)
features.remove('Resolved')
params = {
    "objective": "binary:logistic",
    "booster" : "gbtree",
    "eval_metric": "auc",
    "eta": .56,
    "tree_method": 'exact',
    "max_depth": 20,
    "subsample": 1,
    "colsample_bytree": 0.5,
    "silent": 1,
    "min_child_weight": 1,
    "n_jobs": -1,
    "gamma": 15,
    "num_parallel_tree": 8,
}
dtrain = xgb.DMatrix(data[features], data['Resolved'], missing=-1)
gbm = xgb.cv(params, dtrain, 35, nfold=10,  verbose_eval=1)
print(gbm)
[0]	train-auc:0.632947+0.000446445	test-auc:0.631741+0.00245129
[1]	train-auc:0.640257+0.00157999	test-auc:0.63879+0.00283166
[2]	train-auc:0.643962+0.00212885	test-auc:0.642063+0.00257002
[3]	train-auc:0.647367+0.0018214	test-auc:0.645142+0.00282956
[4]	train-auc:0.649802+0.00135646	test-auc:0.647382+0.00260818
[5]	train-auc:0.652321+0.000992182	test-auc:0.649843+0.0027966
[6]	train-auc:0.654177+0.000669948	test-auc:0.651486+0.00228102
[7]	train-auc:0.655677+0.000627948	test-auc:0.652827+0.00252023
[8]	train-auc:0.656943+0.00084723	test-auc:0.653767+0.00236524
[9]	train-auc:0.657909+0.000582084	test-auc:0.654579+0.00223769
[10]	train-auc:0.658735+0.000517257	test-auc:0.655266+0.00224739
[11]	train-auc:0.659369+0.000540523	test-auc:0.655786+0.00216296
[12]	train-auc:0.659809+0.000614789	test-auc:0.656126+0.00217825
[13]	train-auc:0.660241+0.000548698	test-auc:0.656426+0.00225837
[14]	train-auc:0.660681+0.000521016	test-auc:0.656768+0.00235627
[15]	train-auc:0.661073+0.000596349	test-auc:0.657051+0.00236232
[16]	train-auc:0.661355+0.000627901	test-auc:0.657246+0.00229869
[17]	train-auc:0.661661+0.000456705	test-auc:0.657495+0.0023405
[18]	train-auc:0.661839+0.000487984	test-auc:0.657624+0.0022869
[19]	train-auc:0.662064+0.000516929	test-auc:0.657818+0.00220027
[20]	train-auc:0.66228+0.000585246	test-auc:0.658032+0.00216729
[21]	train-auc:0.662477+0.000563099	test-auc:0.658201+0.00211635
[22]	train-auc:0.662658+0.000590827	test-auc:0.658345+0.00214925
[23]	train-auc:0.66283+0.000577932	test-auc:0.658508+0.00211215
[24]	train-auc:0.662975+0.000498489	test-auc:0.658614+0.00212715
[25]	train-auc:0.66306+0.000486107	test-auc:0.658702+0.00218288
[26]	train-auc:0.663142+0.000530879	test-auc:0.658765+0.00215586
[27]	train-auc:0.663185+0.000543899	test-auc:0.658788+0.00214399
[28]	train-auc:0.663236+0.00053457	test-auc:0.658828+0.00216283
[29]	train-auc:0.663384+0.000516907	test-auc:0.658975+0.00221738
[30]	train-auc:0.663486+0.000503675	test-auc:0.659052+0.002239
[31]	train-auc:0.66354+0.00049271	test-auc:0.659103+0.00225601
[32]	train-auc:0.663652+0.000526483	test-auc:0.65915+0.00219712
[33]	train-auc:0.663764+0.000526997	test-auc:0.659223+0.00221684
[34]	train-auc:0.663805+0.000531902	test-auc:0.65926+0.00224692
    train-auc-mean  train-auc-std  test-auc-mean  test-auc-std
0         0.632947       0.000446       0.631741      0.002451
1         0.640257       0.001580       0.638790      0.002832
2         0.643962       0.002129       0.642063      0.002570
3         0.647367       0.001821       0.645142      0.002830
4         0.649802       0.001356       0.647382      0.002608
5         0.652321       0.000992       0.649843      0.002797
6         0.654177       0.000670       0.651486      0.002281
7         0.655677       0.000628       0.652827      0.002520
8         0.656943       0.000847       0.653767      0.002365
9         0.657909       0.000582       0.654579      0.002238
10        0.658735       0.000517       0.655266      0.002247
11        0.659369       0.000541       0.655786      0.002163
12        0.659809       0.000615       0.656126      0.002178
13        0.660241       0.000549       0.656426      0.002258
14        0.660681       0.000521       0.656768      0.002356
15        0.661073       0.000596       0.657051      0.002362
16        0.661355       0.000628       0.657246      0.002299
17        0.661661       0.000457       0.657495      0.002341
18        0.661839       0.000488       0.657624      0.002287
19        0.662064       0.000517       0.657818      0.002200
20        0.662280       0.000585       0.658032      0.002167
21        0.662477       0.000563       0.658201      0.002116
22        0.662658       0.000591       0.658345      0.002149
23        0.662830       0.000578       0.658508      0.002112
24        0.662975       0.000498       0.658614      0.002127
25        0.663060       0.000486       0.658702      0.002183
26        0.663142       0.000531       0.658765      0.002156
27        0.663185       0.000544       0.658788      0.002144
28        0.663236       0.000535       0.658828      0.002163
29        0.663384       0.000517       0.658975      0.002217
30        0.663486       0.000504       0.659052      0.002239
31        0.663540       0.000493       0.659103      0.002256
32        0.663652       0.000526       0.659150      0.002197
33        0.663764       0.000527       0.659223      0.002217
34        0.663805       0.000532       0.659260      0.002247
In [ ]: