#An API to get entered data on the web page and return our model's result
import csv
import sys
import os
import pickle
import xgboost as xgb
import numpy as np
import pandas as pd
ma={};
with open(os.path.abspath(os.path.dirname(__file__))+'/tags.csv', mode='r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count != 0:
ma[row[1]]=row[2:]
line_count += 1
inputs=sys.argv[2]
args=inputs.split(',')
tags=args[-1].split('+')
min_followers=10000000
min_count=10000000
min_popularity=10000000
min_expert_ratio=10000000
min_quality=10000000
min_problem_rate=10000000
max_followers = 0.0
max_count = 0.0
max_popularity = 0.0
max_expert_ratio = 0.0
max_quality = 0.0
max_problem_rate = 0.0
sum_followers = 0.0
sum_count = 0.0
sum_popularity = 0.0
sum_expert_ratio = 0.0
sum_quality = 0.0
sum_problem_rate = 0.0
i=0
for tag in tags:
if(tag=='' or not(tag in ma) or ma[tag][3]=='NULL'):
continue
if tag in ma and ma[tag][3]!='' and ma[tag]!='':
i += 1
count=float(ma[tag][0])
followers=float(ma[tag][3])
expert_ratio=float(ma[tag][6])
problem_rate=float(ma[tag][7])
quality=float(ma[tag][8])
popularity=float(ma[tag][9])
sum_followers += followers
sum_count += count
sum_popularity += popularity
sum_expert_ratio += expert_ratio
sum_quality += quality
sum_problem_rate += problem_rate
if(max_count<count):
max_count=count
if(max_expert_ratio<expert_ratio):
max_expert_ratio=expert_ratio
if(max_followers<followers):
max_followers=followers
if(max_popularity<popularity):
max_popularity=popularity
if(max_problem_rate<problem_rate):
max_problem_rate=problem_rate
if(max_quality<quality):
max_quality=quality
if (min_count > count):
min_count = count
if (min_expert_ratio > expert_ratio):
min_expert_ratio = expert_ratio
if (min_followers > followers):
min_followers = followers
if (min_popularity > popularity):
min_popularity = popularity
if (min_problem_rate > problem_rate):
min_problem_rate = problem_rate
if (min_quality > quality):
min_quality = quality
for i in range(0,len(args)-1):
args[i]=float(args[i])
new_args=args[:-1]+[min_quality, max_expert_ratio, max_quality, max_problem_rate, sum_quality]
header=['WordCountBody', 'WordCountTitle', 'HasWhQuestion', 'isQuestion', 'HasError', 'CodeSnippets', 'HasList', 'HasQuote', 'Paragraphs', 'CodeSnippetsLen', 'isCapital', 'isBodyCapital', 'Links', 'LOC', 'BodyAVGSentence', 'BodyAVGWord', 'BodySentenceCount', 'TitleAVGWord', 'TagCount', 'SOExperience', 'HadYearling', 'HadNiceAnswer', 'HadNiceQuestion', 'HadInformed', 'HadStudent', 'HadAnalytical', 'HadEnthusiast', 'HadCommentator', 'HadAutobiographer', 'HadCurious', 'HadPopularQuestion', 'HadFamousQuestion', 'HadNotableQuestion', 'HadGoodAnswer', 'HadGreatAnswer', 'HadNecromancer', 'HadScholar', 'HadCustodian', 'HadEditor', 'HadCritics', 'HadSupporter', 'HadTumbleweed', 'HadTeacher', 'AskingWeekDay', 'AskingHour', 'QuestionAge', 'OwnerQuestions', 'OwnerAnswers', 'OwnerAcceptedQuestions', 'OwnerAcceptedAnswers', 'OwnerAnswersScore', 'OwnerQuestionsScore', 'min_quality', 'max_expert_ratio', 'max_quality', 'max_problem_rate', 'avg_quality']
dataf={}
i=0;
for h in header:
dataf[h]=[new_args[i]]
i+=1
df = pd.DataFrame(data=dataf)
pd.feature_names=header
loaded_model = pickle.load(open(os.path.abspath(os.path.dirname(__file__))+'/a.pickle.dat', "rb"))
test_prediction = loaded_model.predict(xgb.DMatrix(df[header]), ntree_limit=loaded_model.best_iteration+1)
print (test_prediction[0])