from pycaret.classification import *


import pandas as pd


train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')


from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
enc.fit(train['type'])
train['type'] = enc.transform(train['type'])
test['type'] = enc.transform(test['type'])


data = train.drop(['index'], axis=1)


setup_model = setup(data = data, target = 'quality', session_id=123)


best_model = compare_models()


rf = create_model('rf')


tuned_rf = tune_model(rf)


plot_model(tuned_rf, plot = 'auc')


plot_model(tuned_rf, plot='feature')


plot_model(tuned_rf, plot = 'confusion_matrix')


evaluate_model(tuned_rf)


predict_model(tuned_rf);

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13304/783714694.py in <module>
----> 1 predict_model(tuned_rf);

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\classification.py in predict_model(estimator, data, probability_threshold, encoded_labels, raw_score, drift_report, round, verbose)
   2023     """
   2024 
-> 2025     return pycaret.internal.tabular.predict_model(
   2026         estimator=estimator,
   2027         data=data,

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\internal\tabular.py in predict_model(estimator, data, probability_threshold, encoded_labels, drift_report, raw_score, round, verbose, ml_usecase, display)
   8868         pred = pred.astype(int)
   8869         if not raw_score:
-> 8870             score = [s[pred[i]] for i, s in enumerate(score)]
   8871         try:
   8872             score = pd.DataFrame(score)

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\internal\tabular.py in <listcomp>(.0)
   8868         pred = pred.astype(int)
   8869         if not raw_score:
-> 8870             score = [s[pred[i]] for i, s in enumerate(score)]
   8871         try:
   8872             score = pd.DataFrame(score)

IndexError: index 7 is out of bounds for axis 0 with size 7


final_rf = finalize_model(tuned_rf)


# test_x = test.drop(['index'], axis=1)
unseen_predictions = predict_model(final_rf, data=test)
unseen_predictions.head()

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13304/1103807159.py in <module>
      1 # test_x = test.drop(['index'], axis=1)
----> 2 unseen_predictions = predict_model(final_rf, data=test)
      3 unseen_predictions.head()

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\classification.py in predict_model(estimator, data, probability_threshold, encoded_labels, raw_score, drift_report, round, verbose)
   2023     """
   2024 
-> 2025     return pycaret.internal.tabular.predict_model(
   2026         estimator=estimator,
   2027         data=data,

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\internal\tabular.py in predict_model(estimator, data, probability_threshold, encoded_labels, drift_report, raw_score, round, verbose, ml_usecase, display)
   8868         pred = pred.astype(int)
   8869         if not raw_score:
-> 8870             score = [s[pred[i]] for i, s in enumerate(score)]
   8871         try:
   8872             score = pd.DataFrame(score)

~\anaconda3\envs\pycaret\lib\site-packages\pycaret\internal\tabular.py in <listcomp>(.0)
   8868         pred = pred.astype(int)
   8869         if not raw_score:
-> 8870             score = [s[pred[i]] for i, s in enumerate(score)]
   8871         try:
   8872             score = pd.DataFrame(score)

IndexError: index 7 is out of bounds for axis 0 with size 7

	Description	Value
0	session_id	123
1	Target	quality
2	Target Type	Multiclass
3	Label Encoded	None
4	Original Data	(5497, 13)
5	Missing Values	0
6	Numeric Features	12
7	Categorical Features	0
8	Ordinal Features	0
9	High Cardinality Features	0
10	High Cardinality Method	None
11	Transformed Train Set	(3847, 12)
12	Transformed Test Set	(1650, 12)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	0
19	Log Experiment	0
20	Experiment Name	clf-default-name
21	USI	05b0
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	0
30	Normalize Method	None
31	Transformation	0
32	Transformation Method	None
33	PCA	0
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	0
37	Combine Rare Levels	0
38	Rare Level Threshold	None
39	Numeric Binning	0
40	Remove Outliers	0
41	Outliers Threshold	None
42	Remove Multicollinearity	0
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	1
45	Clustering	0
46	Clustering Iteration	None
47	Polynomial Features	0
48	Polynomial Degree	None
49	Trignometry Features	0
50	Polynomial Threshold	None
51	Group Features	0
52	Feature Selection	0
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	0
56	Feature Ratio	0
57	Interaction Threshold	None
58	Fix Imbalance	0
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
rf	Random Forest Classifier	0.6561	0.3325	0.3527	0.6601	0.6389	0.4552	0.4618	0.3510
et	Extra Trees Classifier	0.6553	0.3366	0.3595	0.6606	0.6378	0.4520	0.4597	0.2470
lightgbm	Light Gradient Boosting Machine	0.6337	0.3243	0.3465	0.6234	0.6194	0.4281	0.4312	0.4070
gbc	Gradient Boosting Classifier	0.5864	0.3045	0.3116	0.5813	0.5692	0.3451	0.3512	1.9210
dt	Decision Tree Classifier	0.5701	0.2728	0.3472	0.5724	0.5700	0.3596	0.3602	0.0210
lda	Linear Discriminant Analysis	0.5456	0.2840	0.2825	0.5268	0.5183	0.2678	0.2762	0.0130
lr	Logistic Regression	0.5448	0.2798	0.2347	0.5049	0.4922	0.2438	0.2613	1.6330
ridge	Ridge Classifier	0.5420	0.0000	0.2183	0.4460	0.4713	0.2318	0.2522	0.0090
qda	Quadratic Discriminant Analysis	0.4861	0.2768	0.3142	0.5035	0.4871	0.2529	0.2562	0.0150
knn	K Neighbors Classifier	0.4513	0.2503	0.2163	0.4291	0.4354	0.1421	0.1432	0.0330
dummy	Dummy Classifier	0.4463	0.2000	0.1571	0.1992	0.2755	0.0000	0.0000	0.0130
ada	Ada Boost Classifier	0.4045	0.2198	0.1971	0.4192	0.3873	0.1207	0.1267	0.1110
nb	Naive Bayes	0.3967	0.2531	0.2377	0.4353	0.4124	0.1303	0.1319	0.0080
svm	SVM - Linear Kernel	0.2806	0.0000	0.1759	0.3805	0.2070	0.0588	0.0961	0.0730

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.6753	0.0000	0.3875	0.6706	0.6613	0.4914	0.4948
1	0.6701	0.0000	0.3966	0.6824	0.6547	0.4797	0.4846
2	0.5948	0.0000	0.3450	0.6008	0.5824	0.3634	0.3665
3	0.6961	0.8616	0.3333	0.7105	0.6773	0.5178	0.5246
4	0.6519	0.8325	0.3265	0.6633	0.6347	0.4483	0.4544
5	0.6338	0.7985	0.3199	0.6658	0.6183	0.4102	0.4233
6	0.6494	0.8321	0.3163	0.6345	0.6295	0.4426	0.4514
7	0.6615	0.0000	0.3543	0.6382	0.6356	0.4639	0.4699
8	0.6667	0.0000	0.3750	0.6696	0.6490	0.4716	0.4784
9	0.6615	0.0000	0.3722	0.6649	0.6459	0.4627	0.4702
Mean	0.6561	0.3325	0.3527	0.6601	0.6389	0.4552	0.4618
SD	0.0258	0.4074	0.0275	0.0282	0.0247	0.0411	0.0409

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.5870	0.0000	0.2832	0.5369	0.5579	0.3542	0.3576
1	0.5169	0.0000	0.2470	0.4812	0.4884	0.2280	0.2325
2	0.4753	0.0000	0.2174	0.4334	0.4402	0.1478	0.1528
3	0.5143	0.7023	0.2021	0.4788	0.4766	0.2110	0.2184
4	0.5273	0.6945	0.2256	0.4984	0.5053	0.2496	0.2539
5	0.5013	0.6453	0.1981	0.4541	0.4690	0.1935	0.1989
6	0.5325	0.6987	0.2175	0.4956	0.5024	0.2546	0.2598
7	0.5286	0.0000	0.2600	0.4928	0.5082	0.2622	0.2640
8	0.5312	0.0000	0.2451	0.4992	0.4842	0.2252	0.2409
9	0.5443	0.0000	0.2661	0.5055	0.5221	0.2813	0.2839
Mean	0.5259	0.2741	0.2362	0.4876	0.4954	0.2407	0.2463
SD	0.0274	0.3360	0.0270	0.0270	0.0303	0.0523	0.0513

Day30 - Code(2)

Getting the Data¶

Setting up Environment in PyCaret¶

Comparing All Models¶

Create a Model¶

Tune a Model¶

Plot a Model¶

AUC Plot¶

Feature Importance Plot¶

Confusion Matrix¶

Predict on test / hold-out Sample¶

Finalize Model for Deployment¶

Predict on unseen data¶