{ "cells": [ { "cell_type": "code", "execution_count": 10, "id": "9f49724a-a1cc-4973-8919-68d31c6186f1", "metadata": {}, "outputs": [], "source": [ "# imports\n", "\n", "import numpy as np\n", "import tensorflow as tf\n", "from tensorflow.keras import layers, models\n", "from tensorflow.keras.utils import to_categorical\n", "import scipy\n", "import pandas as pd\n", "from obspy import read\n", "from datetime import datetime, timedelta\n", "import matplotlib.pyplot as plt\n", "import os\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 11, "id": "8a233e36-5810-4cfb-b6d1-452acf432793", "metadata": {}, "outputs": [], "source": [ "# three types of fake data for testing\n", "\n", "# def signal1(A, t):\n", "# return A * np.sin(30*t) * np.exp(-t) + np.random.randn(t.shape[0])\n", "\n", "# def signal2(A, t):\n", "# return A * np.sin(30.5*t) * np.exp(-t)+ np.random.randn(t.shape[0])\n", "\n", "# def signal3(A, t):\n", "# return A * np.sin(31*t) * np.exp(-t) + np.random.randn(t.shape[0])" ] }, { "cell_type": "code", "execution_count": 12, "id": "f9d9086a-538e-4410-afd3-333093722ff1", "metadata": {}, "outputs": [], "source": [ "# tests\n", "A = 2\n", "# t = np.linspace(0.0001, 5, 40000)\n", "# s1 = signal1(A, t)\n", "# s2 = signal2(A, t)\n", "# s3 = signal3(A, t)" ] }, { "cell_type": "code", "execution_count": 13, "id": "d7510935-dd2c-4484-96b5-eefec9837f33", "metadata": {}, "outputs": [], "source": [ "# plot fn for convenience\n", "def plot(x, y, title):\n", " plt.figure()\n", " plt.plot(x, y, linewidth=0.7, c='blue')\n", " plt.title(title)\n", " plt.savefig(title + '.png')\n", " plt.show()\n", "\n", "# plot(t, s1, 'Example1')\n", "# plot(t, s2, 'Example2')\n", "# plot(t, s3, 'Example3')\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "3cfb129b-0ce8-4790-94ae-cb31fa3e0ec4", "metadata": {}, "outputs": [], "source": [ "# # Number of samples and length of each signal\n", "# n_samples = 200\n", "# signal_length = 40000\n", "\n", "# # Initialize arrays\n", "# X_data = np.zeros((n_samples, signal_length))\n", "# y_labels = np.zeros(n_samples)\n", "\n", "# # time array\n", "# t = np.linspace(0, 1, signal_length)\n", "\n", "# # Generate samples\n", "# for i in range(n_samples):\n", "# if i < n_samples // 3:\n", "# X_data[i] = signal1(A, t)\n", "# y_labels[i] = 0\n", "# elif i < 2 * n_samples // 3:\n", "# X_data[i] = signal2(A, t)\n", "# y_labels[i] = 1\n", "# else:\n", "# X_data[i] = signal3(A, t)\n", "# y_labels[i] = 2\n", "\n", "# # One-hot encode the labels\n", "# y_labels_one_hot = to_categorical(y_labels, num_classes=3)" ] }, { "cell_type": "code", "execution_count": 47, "id": "f09275a3-2673-4d61-9903-8719b9e1b161", "metadata": {}, "outputs": [], "source": [ "test_filename = 'xa.s12.00.mhz.1970-03-25HR00_evid00003_trimmed_7000_sec'\n", "\n", "data_directory = './'\n", "mseed_file = f'{data_directory}{test_filename}.mseed'\n", "st = read(mseed_file)\n", "st\n", "\n", "tr = st.traces[0].copy()\n", "tr_times = tr.times()\n", "tr_data = tr.data\n", "\n", "# plot(tr_times, tr_data, 'Mseed Example')\n", "\n", "# print(tr_times.shape)\n", "\n", "def read_all_mseed_files(data_directory, target_length=None):\n", " # List all files in the directory with \".mseed\" extension\n", " mseed_files = [f for f in os.listdir(data_directory) if f.endswith('.mseed')]\n", " \n", " data_matrix = []\n", " \n", " # Loop through all the mseed files and extract time and data series\n", " for filename in mseed_files:\n", " st = read(os.path.join(data_directory, filename))\n", " tr = st.traces[0].copy() \n", " tr_data = tr.data \n", " \n", " if target_length is None:\n", " target_length = len(tr_data) # Set target length to the first trace's length\n", " \n", " # Pad or trim the data to the target length\n", " if len(tr_data) < target_length:\n", " # Pad with zeros if shorter\n", " tr_data = np.pad(tr_data, (0, target_length - len(tr_data)), mode='constant')\n", " else:\n", " # Trim if longer\n", " tr_data = tr_data[:target_length]\n", " \n", " data_matrix.append(tr_data)\n", " \n", " # Convert the list to a numpy matrix\n", " data_matrix = np.array(data_matrix)\n", " \n", " return data_matrix\n", "\n", "X_data = read_all_mseed_files(data_directory, 46376)\n", "\n", "# plot(tr_times, data[3], 'Example 3')" ] }, { "cell_type": "code", "execution_count": 48, "id": "47c0ecea-e0fa-4ebf-99db-f6c8e77a72e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 0., 0.])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# read csv\n", "df = pd.read_csv('catalog.csv', header=None, names=['data'], skiprows=1)\n", "\n", "# split cells\n", "df['label'] = df['data'].apply(lambda x: x.split(',')[-1].strip())\n", "\n", "# Step 3: One-hot encode the labels\n", "encoder = OneHotEncoder(sparse_output=False)\n", "y_label = encoder.fit_transform(df['label'].values.reshape(-1, 1))\n", "\n", "\n", "\n", "y_label[4]\n", "\n", "# one-hot format: [deep, impact, shallow]" ] }, { "cell_type": "code", "execution_count": 49, "id": "092a184e-a79f-462a-8017-741471a05ae9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"functional_2\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"functional_2\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ input_layer_2 (InputLayer)      │ (None, 46376)          │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_8 (Dense)                 │ (None, 256)            │    11,872,512 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_9 (Dense)                 │ (None, 128)            │        32,896 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_10 (Dense)                │ (None, 64)             │         8,256 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_11 (Dense)                │ (None, 3)              │           195 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ input_layer_2 (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m46376\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_8 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m11,872,512\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_9 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m32,896\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_10 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m8,256\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_11 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m195\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 11,913,859 (45.45 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 11,913,859 (45.45 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "input_shape = (46376,)\n", "\n", "# Build the classifier model\n", "def build_classifier(input_shape):\n", " inputs = layers.Input(shape=input_shape)\n", " x = layers.Dense(256, activation='relu')(inputs)\n", " x = layers.Dense(128, activation='relu')(x)\n", " x = layers.Dense(64, activation='relu')(x)\n", " \n", " # Output layer (one hot encoding)\n", " outputs = layers.Dense(3, activation='softmax')(x)\n", " \n", " model = models.Model(inputs, outputs)\n", " return model\n", "\n", "classifier = build_classifier(input_shape)\n", "\n", "# Compile the model\n", "classifier.compile(optimizer='adam', \n", " loss='categorical_crossentropy', \n", " metrics=['CategoricalAccuracy'])\n", "\n", "# Display model\n", "classifier.summary()" ] }, { "cell_type": "code", "execution_count": 50, "id": "ec34ffe0-d4ed-484d-b82a-c3270083f082", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 374ms/step - CategoricalAccuracy: 0.3743 - loss: 1.0963 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0815\n", "Epoch 2/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 100ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0794 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0604\n", "Epoch 3/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step - CategoricalAccuracy: 0.8160 - loss: 1.0603 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0354\n", "Epoch 4/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0334 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0057\n", "Epoch 5/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 103ms/step - CategoricalAccuracy: 0.8264 - loss: 1.0080 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9703\n", "Epoch 6/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9747 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9285\n", "Epoch 7/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 106ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9361 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8798\n", "Epoch 8/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8870 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8243\n", "Epoch 9/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8350 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.7626\n", "Epoch 10/10\n", "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 102ms/step - CategoricalAccuracy: 0.8368 - loss: 0.7783 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.6969\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# training\n", "classifier.fit(X_data, y_label, epochs=10, batch_size=32, validation_split=0.2)" ] }, { "cell_type": "code", "execution_count": 62, "id": "2bd36515-2777-4d49-82ec-b50f1e0904a0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step\n", "[[0.23078983 0.55988955 0.20932065]]\n" ] } ], "source": [ "# test\n", "\n", "test = X_data[8]\n", "test = test.reshape(1, -1)\n", "\n", "# Now call predict\n", "prediction = classifier.predict(test)\n", "\n", "print(prediction)" ] }, { "cell_type": "code", "execution_count": null, "id": "931bb66b-7171-4630-9753-d9ff7fa3cb78", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "077d77a2-c3bc-4606-b4bb-f420ff123817", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "711329a8-0b67-4afe-b455-3f8a296a622e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }