import tensorflow as tf
import numpy as np
from matplotlib.pyplot import plot
import pandas as pd
train_data = pd.read_json("../data/dont-call-me-turkey/train.json")
test_data = pd.read_json("../data/dont-call-me-turkey/test.json")
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1195 entries, 0 to 1194
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 audio_embedding 1195 non-null object
1 is_turkey 1195 non-null int64
2 vid_id 1195 non-null object
3 end_time_seconds_youtube_clip 1195 non-null int64
4 start_time_seconds_youtube_clip 1195 non-null int64
dtypes: int64(3), object(2)
memory usage: 46.8+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1195 entries, 0 to 1194
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 audio_embedding 1195 non-null object
1 is_turkey 1195 non-null int64
2 vid_id 1195 non-null object
3 end_time_seconds_youtube_clip 1195 non-null int64
4 start_time_seconds_youtube_clip 1195 non-null int64
dtypes: int64(3), object(2)
memory usage: 46.8+ KB
train_X = tf.keras.preprocessing.sequence.pad_sequences(train_data['audio_embedding'], maxlen=10, padding='post')
labels = train_data['is_turkey']
labels = pd.get_dummies(labels, dummy_na=False)
labels.shape
labels.shape, train_X.shape
train_X_tf, labels_tf = tf.convert_to_tensor(train_X), tf.convert_to_tensor(labels)
train_X_tf.shape, labels_tf.shape
(TensorShape([1195, 10, 128]), TensorShape([1195, 2]))
net = tf.keras.Sequential([
tf.keras.layers.Dense(units=10, input_shape=(10, 128), activation=tf.keras.activations.relu),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.4),
tf.keras.layers.Dense(units=128, activation=tf.keras.activations.tanh),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(2, activation=tf.keras.activations.softmax)
])
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
net.compile(optimizer=opt, loss=tf.keras.losses.huber, metrics=['accuracy'])
history = net.fit(train_X_tf, labels_tf, batch_size=20, epochs=100, validation_split=0.2, callbacks=[], shuffle= True)
net.summary()
Epoch 1/100
48/48 [==============================] - 1s 4ms/step - loss: 0.1469 - accuracy: 0.5910 - val_loss: 0.0767 - val_accuracy: 0.7950
Epoch 2/100
48/48 [==============================] - 0s 2ms/step - loss: 0.1101 - accuracy: 0.6695 - val_loss: 0.0751 - val_accuracy: 0.7866
Epoch 3/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0950 - accuracy: 0.7333 - val_loss: 0.0550 - val_accuracy: 0.8368
Epoch 4/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0904 - accuracy: 0.7636 - val_loss: 0.0501 - val_accuracy: 0.8536
Epoch 5/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0798 - accuracy: 0.7939 - val_loss: 0.0415 - val_accuracy: 0.8954
Epoch 6/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0730 - accuracy: 0.8044 - val_loss: 0.0452 - val_accuracy: 0.8703
Epoch 99/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0250 - accuracy: 0.9372 - val_loss: 0.0285 - val_accuracy: 0.9205
Epoch 100/100
48/48 [==============================] - 0s 2ms/step - loss: 0.0255 - accuracy: 0.9383 - val_loss: 0.0192 - val_accuracy: 0.9540
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_13 (Dense) (None, 10, 10) 1290
flatten_3 (Flatten) (None, 100) 0
dropout_7 (Dropout) (None, 100) 0
dense_14 (Dense) (None, 128) 12928
dropout_8 (Dropout) (None, 128) 0
dense_15 (Dense) (None, 32) 4128
dropout_9 (Dropout) (None, 32) 0
dense_16 (Dense) (None, 2) 66
=================================================================
Total params: 18,412
Trainable params: 18,412
Non-trainable params: 0
_________________________________________________________________
import matplotlib.pyplot as plt
print(history.history.keys())
plt.plot(history.history["loss"], label="Training Loss")
# plt.plot(history.history["accuracy"], label="accuracy")
plt.plot(history.history["val_loss"], label="val_loss")
# plt.plot(history.history["val_accuracy"], label="val_accuracy")
# plt.plot(history.history["sparse_categorical_accuracy"], label="sparse_categorical_accuracy")
# plt.plot(history.history["val_sparse_categorical_accuracy"], label="val_sparse_categorical_accuracy")
plt.legend()
plt.show()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])