import pandas as pd
import numpy as np
data = pd.read_csv('diabetes.csv')
data.head()
Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | Outcome | |
---|---|---|---|---|---|---|---|---|---|
0 | 6 | 148 | 72 | 35 | 0 | 33.6 | 0.627 | 50 | 1 |
1 | 1 | 85 | 66 | 29 | 0 | 26.6 | 0.351 | 31 | 0 |
2 | 8 | 183 | 64 | 0 | 0 | 23.3 | 0.672 | 32 | 1 |
3 | 1 | 89 | 66 | 23 | 94 | 28.1 | 0.167 | 21 | 0 |
4 | 0 | 137 | 40 | 35 | 168 | 43.1 | 2.288 | 33 | 1 |
# checking for null values
data.isnull().sum()
Pregnancies 0 Glucose 0 BloodPressure 0 SkinThickness 0 Insulin 0 BMI 0 DiabetesPedigreeFunction 0 Age 0 Outcome 0 dtype: int64
X = data.iloc[:,0:8]
y = data.iloc[:,8]
0 1 1 0 2 1 3 0 4 1 .. 763 0 764 0 765 0 766 1 767 0 Name: Outcome, Length: 768, dtype: int64
from sklearn.preprocessing import normalize
X = normalize(X, norm='l2')
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33)
y_train.shape
(514,)
# Calling the required Library
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
# Making the prediction
training_prediction = clf.predict(X_train)
test_prediction = clf.predict(X_test)
from sklearn.metrics import accuracy_score
training_accuracy = accuracy_score(y_train, training_prediction)
test_accuracy = accuracy_score(y_test, test_prediction)
print(f"Training accuracy = {training_accuracy}")
print(f"Test accuracy = {test_accuracy}")
Training accuracy = 0.669260700389105 Test accuracy = 0.6023622047244095