import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from ordinalgbt.lgb import LGBMOrdinal
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 3
1 import numpy as np
2 import pandas as pd
----> 3 from lightgbm import LGBMClassifier
4 from sklearn.datasets import load_diabetes
5 from sklearn.linear_model import LinearRegression, LogisticRegression
ModuleNotFoundError: No module named 'lightgbm'
data = load_diabetes()
X = pd.DataFrame(data["data"], columns = data["feature_names"])
y = data["target"]
nq = 10
thresholds = np.append(np.append(y.min()-1,np.quantile(y,np.arange(0,1,1/nq)[1:])),y.max()+1)
yq = pd.cut(x=y,bins=thresholds,right=True,labels=['q'+str(z+1) for z in range(nq)])
yord = yq.astype('category').codes
for ii in range(nsim):
# Do a train/test split (80/20)
ytrain, ytest, Xtrain, Xtest = train_test_split(yord, X, stratify=yord,test_size=0.2,
random_state=ii)
# Ordinal model
mdl_ord = LGBMOrdinal()
mdl_ord.fit(Xtrain, ytrain)
# Multinomial LGBM model
mdl_class = LGBMClassifier()
mdl_class.fit(Xtrain, ytrain)
# Multinomial Regression model
mdl_multi = LogisticRegression(penalty='l2',solver='lbfgs',max_iter=1000)
mdl_multi.fit(Xtrain,ytrain)
# Make predictions
yhat_ord = mdl_ord.predict(Xtest)
yhat_multi = mdl_multi.predict(Xtest)
yhat_class = mdl_class.predict(Xtest)
# Get MAE
acc_class = np.abs(yhat_class - ytest).mean()
acc_multi = np.abs(yhat_multi - ytest).mean()
acc_ord = np.abs(yhat_ord - ytest).mean()
holder.append(pd.DataFrame({'ord':acc_ord,'multi':acc_multi,'class':acc_class},index=[ii]))
df_mae = pd.concat(holder).mean(axis=0).reset_index().rename(columns={'index':'mdl',0:'MAE'})
di_lbls = {'ord':'LGBMOrdinal','multi':'SKlearn Multinomial','class':'LGBMClassifier'}
df_mae = df_mae.assign(mdl=lambda x: x.mdl.map(di_lbls))
print(np.round(df_mae,1))