import pandas as pd # 数据科学计算工具
import numpy as np # 数值计算工具
import matplotlib.pyplot as plt # 可视化
import seaborn as sns # matplotlib的高级API
from sklearn.model_selection import StratifiedKFold #交叉验证
from sklearn.model_selection import GridSearchCV #网格搜索
from sklearn.model_selection import train_test_split #将数据集分开成训练集和测试集
from xgboost import XGBClassifier
# lgb_train = lgb.Dataset(x_train, y_train)
# lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)
parameters = {
'max_depth': range(3,8,2), # max_depth :设置树深度,深度越大可能过拟合
'num_leaves': range(50, 170, 30),
#num_leaves:因为 LightGBM 使用的是 leaf-wise 的算法,因此在调节树的复杂程度时,使用的是 num_leaves 而不是 max_depth。
#大致换算关系:num_leaves = 2^(max_depth),但是它的值的设置应该小于 2^(max_depth),否则可能会导致过拟合。
# 这是提高精确度的最重要的参数
}
#gbm = lgb.LGBMClassifier(objective='binary',num_leaves=31,learning_rate=0.05,n_estimators=20) 分类
gbm = lgb.LGBMRegressor(objective = 'regression', # 回归 设置
metric = 'binary_logloss,auc', # 评估函数
learning_rate = 0.1,
feature_fraction = 0.7, #建树的特征选择比例
min_child_samples=21,
min_child_weight=0.001,
bagging_fraction = 1,
bagging_freq = 2,
reg_alpha = 0.001,
reg_lambda = 8,
cat_smooth = 0,
num_iterations = 200,
verbose=-1 ## <0 显示致命的, =0 显示错误 (警告), >0 显示信息
)
gsearch = GridSearchCV(gbm, param_grid=parameters, scoring='roc_auc', cv=3)
gsearch.fit(x_train, y_train.astype('str'))
print('参数的最佳取值:{0}'.format(gsearch.best_params_))
print('最佳模型得分:{0}'.format(gsearch.best_score_))
print(gsearch.cv_results_['mean_test_score'])
print(gsearch.cv_results_['params'])