In [ ]:
#An API to get entered data on the web page and return our model's result
import csv
import sys
import os
import pickle
import xgboost as xgb
import numpy as np
import pandas as pd

ma={};
with open(os.path.abspath(os.path.dirname(__file__))+'/tags.csv', mode='r') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count != 0:
            ma[row[1]]=row[2:]
        line_count += 1
inputs=sys.argv[2]
args=inputs.split(',')

tags=args[-1].split('+')

min_followers=10000000
min_count=10000000
min_popularity=10000000
min_expert_ratio=10000000
min_quality=10000000
min_problem_rate=10000000
max_followers = 0.0
max_count = 0.0
max_popularity = 0.0
max_expert_ratio = 0.0
max_quality = 0.0
max_problem_rate = 0.0
sum_followers = 0.0
sum_count = 0.0
sum_popularity = 0.0
sum_expert_ratio = 0.0
sum_quality = 0.0
sum_problem_rate = 0.0

i=0
for tag in tags:
    if(tag=='' or not(tag in ma) or ma[tag][3]=='NULL'):
        continue
    if tag in ma and ma[tag][3]!='' and ma[tag]!='':
        i += 1
        count=float(ma[tag][0])
        followers=float(ma[tag][3])
        expert_ratio=float(ma[tag][6])
        problem_rate=float(ma[tag][7])
        quality=float(ma[tag][8])
        popularity=float(ma[tag][9])

        sum_followers += followers
        sum_count += count
        sum_popularity += popularity
        sum_expert_ratio += expert_ratio
        sum_quality += quality
        sum_problem_rate += problem_rate
        if(max_count<count):
            max_count=count
        if(max_expert_ratio<expert_ratio):
            max_expert_ratio=expert_ratio
        if(max_followers<followers):
            max_followers=followers
        if(max_popularity<popularity):
            max_popularity=popularity
        if(max_problem_rate<problem_rate):
            max_problem_rate=problem_rate
        if(max_quality<quality):
            max_quality=quality
        if (min_count > count):
            min_count = count
        if (min_expert_ratio > expert_ratio):
            min_expert_ratio = expert_ratio
        if (min_followers > followers):
            min_followers = followers
        if (min_popularity > popularity):
            min_popularity = popularity
        if (min_problem_rate > problem_rate):
            min_problem_rate = problem_rate
        if (min_quality > quality):
            min_quality = quality
for i in range(0,len(args)-1):
    args[i]=float(args[i])

new_args=args[:-1]+[min_quality, max_expert_ratio, max_quality, max_problem_rate, sum_quality]
header=['WordCountBody', 'WordCountTitle', 'HasWhQuestion', 'isQuestion', 'HasError', 'CodeSnippets', 'HasList', 'HasQuote', 'Paragraphs', 'CodeSnippetsLen', 'isCapital', 'isBodyCapital', 'Links', 'LOC', 'BodyAVGSentence', 'BodyAVGWord', 'BodySentenceCount', 'TitleAVGWord', 'TagCount', 'SOExperience', 'HadYearling', 'HadNiceAnswer', 'HadNiceQuestion', 'HadInformed', 'HadStudent', 'HadAnalytical', 'HadEnthusiast', 'HadCommentator', 'HadAutobiographer', 'HadCurious', 'HadPopularQuestion', 'HadFamousQuestion', 'HadNotableQuestion', 'HadGoodAnswer', 'HadGreatAnswer', 'HadNecromancer', 'HadScholar', 'HadCustodian', 'HadEditor', 'HadCritics', 'HadSupporter', 'HadTumbleweed', 'HadTeacher', 'AskingWeekDay', 'AskingHour', 'QuestionAge', 'OwnerQuestions', 'OwnerAnswers', 'OwnerAcceptedQuestions', 'OwnerAcceptedAnswers', 'OwnerAnswersScore', 'OwnerQuestionsScore', 'min_quality', 'max_expert_ratio', 'max_quality', 'max_problem_rate', 'avg_quality']
dataf={}
i=0;
for h in header:
    dataf[h]=[new_args[i]]
    i+=1
df = pd.DataFrame(data=dataf)
pd.feature_names=header
loaded_model = pickle.load(open(os.path.abspath(os.path.dirname(__file__))+'/a.pickle.dat', "rb"))
test_prediction = loaded_model.predict(xgb.DMatrix(df[header]), ntree_limit=loaded_model.best_iteration+1)
print (test_prediction[0])