import pandas as pd 
import numpy as np
data = pd.read_csv('diabetes.csv')


data.head()


# checking for null values
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


X = data.iloc[:,0:8]


y = data.iloc[:,8]

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64


from sklearn.preprocessing import normalize
X = normalize(X, norm='l2')


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33)


y_train.shape

(514,)


# Calling the required Library
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
# Making the prediction
training_prediction = clf.predict(X_train)
test_prediction = clf.predict(X_test)


from sklearn.metrics import accuracy_score
training_accuracy = accuracy_score(y_train, training_prediction)
test_accuracy = accuracy_score(y_test, test_prediction)


print(f"Training accuracy = {training_accuracy}")
print(f"Test accuracy = {test_accuracy}")

Training accuracy = 0.669260700389105
Test accuracy = 0.6023622047244095

	Pregnancies	Glucose	BloodPressure	SkinThickness	Insulin	BMI	DiabetesPedigreeFunction	Age	Outcome
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0
2	8	183	64	0	0	23.3	0.672	32	1
3	1	89	66	23	94	28.1	0.167	21	0
4	0	137	40	35	168	43.1	2.288	33	1

Importing Libraries and Loading Data¶

Column Descriptions¶

Normalizing the data¶

Making the train test split¶

Training and Predicting using the model¶

Checking the accuracy¶