Copy # https://www.kaggle.com/datasets/mirichoi0218/insurance
import numpy as np
import tensorflow as tf
import pandas as pd
Copy def mean_norm (df_input): #@save
return df_input.apply( lambda x: (x - x.mean()) / x.std(), axis = 0 )
Copy def de_mean_norm (result, df_input:pd.DataFrame): #@save
return result * df_input.std() + df_input.mean()
Copy insurenceData = pd.read_csv( "../data/insurance.csv" )
insurenceData.head( 10 )
age sex bmi children smoker region charges 0 19 female 27.900 0 yes southwest 16884.92400 1 18 male 33.770 1 no southeast 1725.55230 2 28 male 33.000 3 no southeast 4449.46200 3 33 male 22.705 0 no northwest 21984.47061 4 32 male 28.880 0 no northwest 3866.85520 5 31 female 25.740 0 no southeast 3756.62160 6 46 female 33.440 1 no southeast 8240.58960 7 37 female 27.740 3 no northwest 7281.50560 8 37 male 29.830 2 no northeast 6406.41070 9 60 female 25.840 0 no northwest 28923.13692
Copy sex_mapping = { 'female' : 0 , 'male' : 1 }
insurenceData[ 'sex' ] = insurenceData[ 'sex' ].map(sex_mapping)
Copy insurenceData.head( 10 )
age sex bmi children smoker region charges 0 19 0 27.900 0 yes southwest 16884.92400 1 18 1 33.770 1 no southeast 1725.55230 2 28 1 33.000 3 no southeast 4449.46200 3 33 1 22.705 0 no northwest 21984.47061 4 32 1 28.880 0 no northwest 3866.85520 5 31 0 25.740 0 no southeast 3756.62160 6 46 0 33.440 1 no southeast 8240.58960 7 37 0 27.740 3 no northwest 7281.50560 8 37 1 29.830 2 no northeast 6406.41070 9 60 0 25.840 0 no northwest 28923.13692
Copy # insurenceData['region'].value_counts()
region_mapping = { 'southeast' : 0 , 'southwest' : 1 , "northwest" : 2 , "northeast" : 3 }
insurenceData.region = insurenceData.region.map(region_mapping)
insurenceData.head( 10 )
age sex bmi children smoker region charges 0 19 0 27.900 0 yes 1 16884.92400 1 18 1 33.770 1 no 0 1725.55230 2 28 1 33.000 3 no 0 4449.46200 3 33 1 22.705 0 no 2 21984.47061 4 32 1 28.880 0 no 2 3866.85520 5 31 0 25.740 0 no 0 3756.62160 6 46 0 33.440 1 no 0 8240.58960 7 37 0 27.740 3 no 2 7281.50560 8 37 1 29.830 2 no 3 6406.41070 9 60 0 25.840 0 no 2 28923.13692
Copy # insurenceData['smoker'].value_counts()
smoker_mapping = { "yes" : 1 , "no" : 0 }
insurenceData.smoker = insurenceData.smoker.map(smoker_mapping)
insurenceData.head( 100 )
age sex bmi children smoker region charges 0 19 0 27.900 0 1 1 16884.92400 1 18 1 33.770 1 0 0 1725.55230 2 28 1 33.000 3 0 0 4449.46200 3 33 1 22.705 0 0 2 21984.47061 4 32 1 28.880 0 0 2 3866.85520 ... ... ... ... ... ... ... ... 95 28 0 37.620 1 0 0 3766.88380 96 54 0 30.800 3 0 1 12105.32000 97 55 1 38.280 0 0 0 10226.28420 98 56 1 19.950 0 1 3 22412.64850 99 38 1 19.300 0 1 1 15820.69900
100 rows × 7 columns
Copy rData = mean_norm(insurenceData)
# rData = insurenceData
# print(rData)
Y = rData[ 'charges' ]
print ( '1charges: ' , Y)
x = rData.drop( columns = 'charges' )
X = tf.convert_to_tensor(x)
print ( 'charges: ' , Y)
y = tf.convert_to_tensor(Y)
print (X,y)
Copy 1charges: 0 0.298472
1 -0.953333
2 -0.728402
3 0.719574
4 -0.776512
...
1333 -0.220468
1334 -0.913661
1335 -0.961237
1336 -0.930014
1337 1.310563
Name: charges, Length: 1338, dtype: float64
charges: 0 0.298472
1 -0.953333
2 -0.728402
3 0.719574
4 -0.776512
...
1333 -0.220468
1334 -0.913661
1335 -0.961237
1336 -0.930014
1337 1.310563
Name: charges, Length: 1338, dtype: float64
tf.Tensor(
[[-1.4382265 -1.010141 -0.45315057 -0.90827406 1.9698501 -0.40272369]
[-1.50940108 0.98922092 0.50943062 -0.07873775 -0.50727343 -1.2875255 ]
[-0.7976553 0.98922092 0.38316358 1.58033487 -0.50727343 -1.2875255 ]
...
[-1.50940108 -1.010141 1.01449877 -0.90827406 -0.50727343 -1.2875255 ]
[-1.29587735 -1.010141 -0.79751522 -0.90827406 -0.50727343 -0.40272369]
[ 1.55110577 -1.010141 -0.26129026 -0.90827406 1.9698501 0.48207812]], shape=(1338, 6), dtype=float64) tf.Tensor(
[ 0.2984722 -0.95333272 -0.72840232 ... -0.96123683 -0.93001377
1.31056344], shape=(1338,), dtype=float64)
Copy net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense( units = 1 , input_dim = 6 ))
initializer = tf.initializers.RandomNormal( stddev = 0.1 )
net.add(tf.keras.layers.Dense( 1 , kernel_initializer = initializer))
opt = tf.keras.optimizers.Adam( learning_rate = 0.001 )
# metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
net.compile( optimizer = opt, loss = tf.keras.losses.MeanSquaredError(), metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
Copy history = net.fit(X, y, batch_size = 1000 , epochs = 500 , validation_split = 0.2 , callbacks = [], shuffle = True )
net.summary()
Copy Epoch 1/500
2/2 [==============================] - 0s 78ms/step - loss: 0.2575 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2666 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 2/500
2/2 [==============================] - 0s 56ms/step - loss: 0.2573 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2663 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 3/500
2/2 [==============================] - 0s 47ms/step - loss: 0.2570 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2661 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 4/500
2/2 [==============================] - 0s 56ms/step - loss: 0.2568 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2659 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 5/500
2/2 [==============================] - 0s 30ms/step - loss: 0.2566 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2656 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 6/500
2/2 [==============================] - 0s 53ms/step - loss: 0.2565 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2653 - val_sparse_categorical_accuracy: 0.0000e+00
。。。。。。。。。。。。。。。。。。。。。
2/2 [==============================] - 0s 53ms/step - loss: 0.2484 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2521 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 499/500
2/2 [==============================] - 0s 25ms/step - loss: 0.2484 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2520 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 500/500
2/2 [==============================] - 0s 54ms/step - loss: 0.2484 - sparse_categorical_accuracy: 0.0000e+00 - val_loss: 0.2520 - val_sparse_categorical_accuracy: 0.0000e+00
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1) 7
dense_1 (Dense) (None, 1) 2
=================================================================
Total params: 9
Trainable params: 9
Non-trainable params: 0
_________________________________________________________________
Copy import matplotlib.pyplot as plt
print (history.history.keys())
plt.plot(history.history[ "loss" ], label = "Training Loss" )
plt.plot(history.history[ "val_loss" ], label = "Validation Loss" )
# plt.plot(history.history["sparse_categorical_accuracy"], label="sparse_categorical_accuracy")
# plt.plot(history.history["val_sparse_categorical_accuracy"], label="val_sparse_categorical_accuracy")
plt.legend()
plt.show()
Copy dict_keys(['loss', 'sparse_categorical_accuracy', 'val_loss', 'val_sparse_categorical_accuracy'])
Copy pY = net.predict([[ 19 , 0 , 27.900 , 0 , 1 , 1 ]])
Copy array([[11.344333]], dtype=float32)
Copy oriY = insurenceData[ 'charges' ]
Copy count 1338.000000
mean 13270.422265
std 12110.011237
min 1121.873900
25% 4740.287150
50% 9382.033000
75% 16639.912515
max 63770.428010
Name: charges, dtype: float64
Copy # 反归一化
de_mean_norm(pY, oriY)
Copy array([[150650.42]], dtype=float32)