preprocessed = pd.read_csv("preprocessed.csv")
y_var = 'Churn_Yes'
X_var = [
'SeniorCitizen_1', 'tenure',
'MonthlyCharges', 'TotalCharges', 'gender_Male',
'Partner_Yes', 'Dependents_Yes', 'PhoneService_Yes',
'MultipleLines_No phone service', 'MultipleLines_Yes',
'InternetService_Fiber optic', 'InternetService_No',
'OnlineSecurity_No internet service', 'OnlineSecurity_Yes',
'OnlineBackup_No internet service', 'OnlineBackup_Yes',
'DeviceProtection_No internet service', 'DeviceProtection_Yes',
'TechSupport_No internet service', 'TechSupport_Yes',
'StreamingTV_No internet service', 'StreamingTV_Yes',
'StreamingMovies_No internet service', 'StreamingMovies_Yes',
'Contract_One year', 'Contract_Two year', 'PaperlessBilling_Yes',
'PaymentMethod_Credit card (automatic)', 'PaymentMethod_Electronic check',
'PaymentMethod_Mailed check',
]
# Define X (model features) and y (target variable)
X = preprocessed[X_var]
y = preprocessed[y_var]
# First algorithm
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(penalty='l2', C=10)
lr.fit(X, y)
lr_predictions = lr.predict(X)
print(lr_predictions)
# Second algorithm
from sklearn.neighbors import KNeighborsClassifier
kn_classifier = KNeighborsClassifier(
n_neighbors=4, metric='euclidean', weights='distance'
)
kn_classifier.fit(X, y)
kn_predictions = kn_classifier.predict(X)
print(kn_predictions)
# Third algorithm
from sklearn.tree import DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier(
max_depth=5, min_samples_split=10
)
dt_classifier.fit(X, y)
dt_predictions = dt_classifier.predict(X)
print(dt_predictions)
# Ensemble
from sklearn.ensemble import StackingClassifier
estimators = [
('lr', lr),
('knn', kn_classifier),
('dt', dt_classifier)
]
clf = StackingClassifier(
estimators=estimators,
final_estimator=LogisticRegression()
)
clf.fit(X, y)
ensemble_predictions = clf.predict(X)
print(ensemble_predictions)
print(y)