In
[1]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
df = pd.read_csv("insurance_data.csv")
df.head()
Out[2]: age bought_insurance
0 22 0
1 25 0
2 47 1
3 52 0
4 46 1
In [3]:
plt.scatter(df.age,df.bought_insurance,marker='+',color='red')
<matplotlib.collections.PathCollection at 0x254e2c52fd0>
Out[3]:
In [4]:
from sklearn.model_selection import train_test_split
In [5]:
X_train, X_test, y_train, y_test = train_test_split(df[['age']],df.bought_insurance,train_
In [6]:
X_test
Out[6]: age
3 52
24 50
17 58
16 25
6 55
23 45
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js
In [10]: X_train
Out[10]: age
1 25
4 46
11 28
15 55
22 40
20 21
13 29
0 22
5 56
8 62
26 23
19 18
2 47
21 26
25 54
12 27
18 19
14 49
9 61
10 18
7 60
In [7]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
In [8]:
model.fit(X_train, y_train)
LogisticRegression()
Out[8]:
In [11]:
model.predict(X_test)
array([1, 1, 1, 0, 1, 1], dtype=int64)
Out[11]:
In [12]:
X_test
Out[12]: age
3 52
24 50
17 58
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js
age
16 25
6 55
23 45
In [15]:
model.coef_
array([[0.61332404]])
Out[15]:
In [16]:
model.intercept_
array([-21.53221288])
Out[16]:
In [17]:
import math
def sigmoid(x):
return 1 / (1 + math.exp(-x))
In [18]:
def prediction_function(age):
z = 0.042 * age - 1.53 # 0.04150133 ~ 0.042 and -1.52726963 ~ -1.53
y = sigmoid(z)
return y
In [19]:
age = 35
prediction_function(age)
0.4850044983805899
Out[19]:
In [20]:
###0.485 is less than 0.5 which means person with 35 age will not buy insurance
In [21]:
age = 43
prediction_function(age)
0.568565299077705
Out[21]:
In [22]:
### 0.5685 is more than 0.5 which means person with 43 will buy the insurance
In [ ]:
####THANK YOU###
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js