{ "cells": [ { "cell_type": "code", "execution_count": 10, "id": "9f49724a-a1cc-4973-8919-68d31c6186f1", "metadata": {}, "outputs": [], "source": [ "# imports\n", "\n", "import numpy as np\n", "import tensorflow as tf\n", "from tensorflow.keras import layers, models\n", "from tensorflow.keras.utils import to_categorical\n", "import scipy\n", "import pandas as pd\n", "from obspy import read\n", "from datetime import datetime, timedelta\n", "import matplotlib.pyplot as plt\n", "import os\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 11, "id": "8a233e36-5810-4cfb-b6d1-452acf432793", "metadata": {}, "outputs": [], "source": [ "# three types of fake data for testing\n", "\n", "# def signal1(A, t):\n", "# return A * np.sin(30*t) * np.exp(-t) + np.random.randn(t.shape[0])\n", "\n", "# def signal2(A, t):\n", "# return A * np.sin(30.5*t) * np.exp(-t)+ np.random.randn(t.shape[0])\n", "\n", "# def signal3(A, t):\n", "# return A * np.sin(31*t) * np.exp(-t) + np.random.randn(t.shape[0])" ] }, { "cell_type": "code", "execution_count": 12, "id": "f9d9086a-538e-4410-afd3-333093722ff1", "metadata": {}, "outputs": [], "source": [ "# tests\n", "A = 2\n", "# t = np.linspace(0.0001, 5, 40000)\n", "# s1 = signal1(A, t)\n", "# s2 = signal2(A, t)\n", "# s3 = signal3(A, t)" ] }, { "cell_type": "code", "execution_count": 13, "id": "d7510935-dd2c-4484-96b5-eefec9837f33", "metadata": {}, "outputs": [], "source": [ "# plot fn for convenience\n", "def plot(x, y, title):\n", " plt.figure()\n", " plt.plot(x, y, linewidth=0.7, c='blue')\n", " plt.title(title)\n", " plt.savefig(title + '.png')\n", " plt.show()\n", "\n", "# plot(t, s1, 'Example1')\n", "# plot(t, s2, 'Example2')\n", "# plot(t, s3, 'Example3')\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "3cfb129b-0ce8-4790-94ae-cb31fa3e0ec4", "metadata": {}, "outputs": [], "source": [ "# # Number of samples and length of each signal\n", "# n_samples = 200\n", "# signal_length = 40000\n", "\n", "# # Initialize arrays\n", "# X_data = np.zeros((n_samples, signal_length))\n", "# y_labels = np.zeros(n_samples)\n", "\n", "# # time array\n", "# t = np.linspace(0, 1, signal_length)\n", "\n", "# # Generate samples\n", "# for i in range(n_samples):\n", "# if i < n_samples // 3:\n", "# X_data[i] = signal1(A, t)\n", "# y_labels[i] = 0\n", "# elif i < 2 * n_samples // 3:\n", "# X_data[i] = signal2(A, t)\n", "# y_labels[i] = 1\n", "# else:\n", "# X_data[i] = signal3(A, t)\n", "# y_labels[i] = 2\n", "\n", "# # One-hot encode the labels\n", "# y_labels_one_hot = to_categorical(y_labels, num_classes=3)" ] }, { "cell_type": "code", "execution_count": 47, "id": "f09275a3-2673-4d61-9903-8719b9e1b161", "metadata": {}, "outputs": [], "source": [ "test_filename = 'xa.s12.00.mhz.1970-03-25HR00_evid00003_trimmed_7000_sec'\n", "\n", "data_directory = './'\n", "mseed_file = f'{data_directory}{test_filename}.mseed'\n", "st = read(mseed_file)\n", "st\n", "\n", "tr = st.traces[0].copy()\n", "tr_times = tr.times()\n", "tr_data = tr.data\n", "\n", "# plot(tr_times, tr_data, 'Mseed Example')\n", "\n", "# print(tr_times.shape)\n", "\n", "def read_all_mseed_files(data_directory, target_length=None):\n", " # List all files in the directory with \".mseed\" extension\n", " mseed_files = [f for f in os.listdir(data_directory) if f.endswith('.mseed')]\n", " \n", " data_matrix = []\n", " \n", " # Loop through all the mseed files and extract time and data series\n", " for filename in mseed_files:\n", " st = read(os.path.join(data_directory, filename))\n", " tr = st.traces[0].copy() \n", " tr_data = tr.data \n", " \n", " if target_length is None:\n", " target_length = len(tr_data) # Set target length to the first trace's length\n", " \n", " # Pad or trim the data to the target length\n", " if len(tr_data) < target_length:\n", " # Pad with zeros if shorter\n", " tr_data = np.pad(tr_data, (0, target_length - len(tr_data)), mode='constant')\n", " else:\n", " # Trim if longer\n", " tr_data = tr_data[:target_length]\n", " \n", " data_matrix.append(tr_data)\n", " \n", " # Convert the list to a numpy matrix\n", " data_matrix = np.array(data_matrix)\n", " \n", " return data_matrix\n", "\n", "X_data = read_all_mseed_files(data_directory, 46376)\n", "\n", "# plot(tr_times, data[3], 'Example 3')" ] }, { "cell_type": "code", "execution_count": 48, "id": "47c0ecea-e0fa-4ebf-99db-f6c8e77a72e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 0., 0.])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# read csv\n", "df = pd.read_csv('catalog.csv', header=None, names=['data'], skiprows=1)\n", "\n", "# split cells\n", "df['label'] = df['data'].apply(lambda x: x.split(',')[-1].strip())\n", "\n", "# Step 3: One-hot encode the labels\n", "encoder = OneHotEncoder(sparse_output=False)\n", "y_label = encoder.fit_transform(df['label'].values.reshape(-1, 1))\n", "\n", "\n", "\n", "y_label[4]\n", "\n", "# one-hot format: [deep, impact, shallow]" ] }, { "cell_type": "code", "execution_count": 49, "id": "092a184e-a79f-462a-8017-741471a05ae9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"functional_2\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"functional_2\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ input_layer_2 (InputLayer) │ (None, 46376) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_8 (Dense) │ (None, 256) │ 11,872,512 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_9 (Dense) │ (None, 128) │ 32,896 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_10 (Dense) │ (None, 64) │ 8,256 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_11 (Dense) │ (None, 3) │ 195 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ input_layer_2 (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m46376\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_8 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m11,872,512\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_9 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m32,896\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_10 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m8,256\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_11 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m195\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total params: 11,913,859 (45.45 MB)\n", "\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Trainable params: 11,913,859 (45.45 MB)\n", "\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Non-trainable params: 0 (0.00 B)\n", "\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "input_shape = (46376,)\n", "\n", "# Build the classifier model\n", "def build_classifier(input_shape):\n", " inputs = layers.Input(shape=input_shape)\n", " x = layers.Dense(256, activation='relu')(inputs)\n", " x = layers.Dense(128, activation='relu')(x)\n", " x = layers.Dense(64, activation='relu')(x)\n", " \n", " # Output layer (one hot encoding)\n", " outputs = layers.Dense(3, activation='softmax')(x)\n", " \n", " model = models.Model(inputs, outputs)\n", " return model\n", "\n", "classifier = build_classifier(input_shape)\n", "\n", "# Compile the model\n", "classifier.compile(optimizer='adam', \n", " loss='categorical_crossentropy', \n", " metrics=['CategoricalAccuracy'])\n", "\n", "# Display model\n", "classifier.summary()" ] }, { "cell_type": "code", "execution_count": 50, "id": "ec34ffe0-d4ed-484d-b82a-c3270083f082", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 374ms/step - CategoricalAccuracy: 0.3743 - loss: 1.0963 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0815\n", "Epoch 2/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 100ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0794 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0604\n", "Epoch 3/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step - CategoricalAccuracy: 0.8160 - loss: 1.0603 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0354\n", "Epoch 4/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0334 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0057\n", "Epoch 5/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 103ms/step - CategoricalAccuracy: 0.8264 - loss: 1.0080 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9703\n", "Epoch 6/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9747 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9285\n", "Epoch 7/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 106ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9361 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8798\n", "Epoch 8/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8870 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8243\n", "Epoch 9/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8350 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.7626\n", "Epoch 10/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 102ms/step - CategoricalAccuracy: 0.8368 - loss: 0.7783 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.6969\n" ] }, { "data": { "text/plain": [ "