Train XGBoost Model with Pandas Input
作者:XD / 发表: 2022年9月28日 03:58 / 更新: 2022年9月28日 03:58 / 编程笔记 / 阅读量:1205
Train XGBoost Model with Pandas Input
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import classification_report
train=pd.read_csv('./train.csv')
test=pd.read_csv('./test.csv')
info=pd.read_csv('info.csv')
print(info.head()) # column name
print(info.shape)
new_info = info.drop_duplicates(subset=['id']) # remove duplicate row with same id
train2=pd.merge(train, new_info[['id', 'number']], how='left', on='id').fillna(0) # merge table horizontally
train_y=train2['result']
train_x=train2.drop(columns=['uaid','result','others'])
test_id = test['id']
test_y=test['result']
test_x=test.drop(columns=['uaid','result','others'])
model = xgb.XGBClassifier()
model.fit(train_x, train_y)
train_predict_y = model.predict(train_x)
print(classification_report(train_y, train_predict_y))
result=model.predict_proba(test_x)
result=pd.concat([test_y,pd.DataFrame(result)],axis=1)
result.to_csv('./test_result.csv')