SeismStart/preprocessed/lunar/data/Classifier.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9f49724a-a1cc-4973-8919-68d31c6186f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras import layers, models\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "import scipy\n",
    "import pandas as pd\n",
    "from obspy import read\n",
    "from datetime import datetime, timedelta\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8a233e36-5810-4cfb-b6d1-452acf432793",
   "metadata": {},
   "outputs": [],
   "source": [
    "# three types of fake data for testing\n",
    "\n",
    "# def signal1(A, t):\n",
    "#     return A * np.sin(30*t) * np.exp(-t) + np.random.randn(t.shape[0])\n",
    "\n",
    "# def signal2(A, t):\n",
    "#     return A * np.sin(30.5*t) * np.exp(-t)+ np.random.randn(t.shape[0])\n",
    "\n",
    "# def signal3(A, t):\n",
    "#     return A * np.sin(31*t) * np.exp(-t) + np.random.randn(t.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f9d9086a-538e-4410-afd3-333093722ff1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# tests\n",
    "A = 2\n",
    "# t = np.linspace(0.0001, 5, 40000)\n",
    "# s1 = signal1(A, t)\n",
    "# s2 = signal2(A, t)\n",
    "# s3 = signal3(A, t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d7510935-dd2c-4484-96b5-eefec9837f33",
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot fn for convenience\n",
    "def plot(x, y, title):\n",
    "    plt.figure()\n",
    "    plt.plot(x, y, linewidth=0.7, c='blue')\n",
    "    plt.title(title)\n",
    "    plt.savefig(title + '.png')\n",
    "    plt.show()\n",
    "\n",
    "# plot(t, s1, 'Example1')\n",
    "# plot(t, s2, 'Example2')\n",
    "# plot(t, s3, 'Example3')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3cfb129b-0ce8-4790-94ae-cb31fa3e0ec4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Number of samples and length of each signal\n",
    "# n_samples = 200\n",
    "# signal_length = 40000\n",
    "\n",
    "# # Initialize arrays\n",
    "# X_data = np.zeros((n_samples, signal_length))\n",
    "# y_labels = np.zeros(n_samples)\n",
    "\n",
    "# # time array\n",
    "# t = np.linspace(0, 1, signal_length)\n",
    "\n",
    "# # Generate samples\n",
    "# for i in range(n_samples):\n",
    "#     if i < n_samples // 3:\n",
    "#         X_data[i] = signal1(A, t)\n",
    "#         y_labels[i] = 0\n",
    "#     elif i < 2 * n_samples // 3:\n",
    "#         X_data[i] = signal2(A, t)\n",
    "#         y_labels[i] = 1\n",
    "#     else:\n",
    "#         X_data[i] = signal3(A, t)\n",
    "#         y_labels[i] = 2\n",
    "\n",
    "# # One-hot encode the labels\n",
    "# y_labels_one_hot = to_categorical(y_labels, num_classes=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "f09275a3-2673-4d61-9903-8719b9e1b161",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_filename = 'xa.s12.00.mhz.1970-03-25HR00_evid00003_trimmed_7000_sec'\n",
    "\n",
    "data_directory = './'\n",
    "mseed_file = f'{data_directory}{test_filename}.mseed'\n",
    "st = read(mseed_file)\n",
    "st\n",
    "\n",
    "tr = st.traces[0].copy()\n",
    "tr_times = tr.times()\n",
    "tr_data = tr.data\n",
    "\n",
    "# plot(tr_times, tr_data, 'Mseed Example')\n",
    "\n",
    "# print(tr_times.shape)\n",
    "\n",
    "def read_all_mseed_files(data_directory, target_length=None):\n",
    "    # List all files in the directory with \".mseed\" extension\n",
    "    mseed_files = [f for f in os.listdir(data_directory) if f.endswith('.mseed')]\n",
    "    \n",
    "    data_matrix = []\n",
    "    \n",
    "    # Loop through all the mseed files and extract time and data series\n",
    "    for filename in mseed_files:\n",
    "        st = read(os.path.join(data_directory, filename))\n",
    "        tr = st.traces[0].copy()  \n",
    "        tr_data = tr.data  \n",
    "        \n",
    "        if target_length is None:\n",
    "            target_length = len(tr_data)  # Set target length to the first trace's length\n",
    "        \n",
    "        # Pad or trim the data to the target length\n",
    "        if len(tr_data) < target_length:\n",
    "            # Pad with zeros if shorter\n",
    "            tr_data = np.pad(tr_data, (0, target_length - len(tr_data)), mode='constant')\n",
    "        else:\n",
    "            # Trim if longer\n",
    "            tr_data = tr_data[:target_length]\n",
    "        \n",
    "        data_matrix.append(tr_data)\n",
    "    \n",
    "    # Convert the list to a numpy matrix\n",
    "    data_matrix = np.array(data_matrix)\n",
    "    \n",
    "    return data_matrix\n",
    "\n",
    "X_data = read_all_mseed_files(data_directory, 46376)\n",
    "\n",
    "# plot(tr_times, data[3], 'Example 3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "47c0ecea-e0fa-4ebf-99db-f6c8e77a72e0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 0., 0.])"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "\n",
    "# read csv\n",
    "df = pd.read_csv('catalog.csv', header=None, names=['data'], skiprows=1)\n",
    "\n",
    "# split cells\n",
    "df['label'] = df['data'].apply(lambda x: x.split(',')[-1].strip())\n",
    "\n",
    "# Step 3: One-hot encode the labels\n",
    "encoder = OneHotEncoder(sparse_output=False)\n",
    "y_label = encoder.fit_transform(df['label'].values.reshape(-1, 1))\n",
    "\n",
    "\n",
    "\n",
    "y_label[4]\n",
    "\n",
    "# one-hot format: [deep, impact, shallow]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "092a184e-a79f-462a-8017-741471a05ae9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_2\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"functional_2\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ input_layer_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)      │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">46376</span>)          │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_8 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)            │    <span style=\"color: #00af00; text-decoration-color: #00af00\">11,872,512</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_9 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)            │        <span style=\"color: #00af00; text-decoration-color: #00af00\">32,896</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_10 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)             │         <span style=\"color: #00af00; text-decoration-color: #00af00\">8,256</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_11 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>)              │           <span style=\"color: #00af00; text-decoration-color: #00af00\">195</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ input_layer_2 (\u001b[38;5;33mInputLayer\u001b[0m)      │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m46376\u001b[0m)          │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_8 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)            │    \u001b[38;5;34m11,872,512\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_9 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m)            │        \u001b[38;5;34m32,896\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_10 (\u001b[38;5;33mDense\u001b[0m)                │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)             │         \u001b[38;5;34m8,256\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_11 (\u001b[38;5;33mDense\u001b[0m)                │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m)              │           \u001b[38;5;34m195\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">11,913,859</span> (45.45 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">11,913,859</span> (45.45 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "input_shape = (46376,)\n",
    "\n",
    "# Build the classifier model\n",
    "def build_classifier(input_shape):\n",
    "    inputs = layers.Input(shape=input_shape)\n",
    "    x = layers.Dense(256, activation='relu')(inputs)\n",
    "    x = layers.Dense(128, activation='relu')(x)\n",
    "    x = layers.Dense(64, activation='relu')(x)\n",
    "    \n",
    "    # Output layer (one hot encoding)\n",
    "    outputs = layers.Dense(3, activation='softmax')(x)\n",
    "    \n",
    "    model = models.Model(inputs, outputs)\n",
    "    return model\n",
    "\n",
    "classifier = build_classifier(input_shape)\n",
    "\n",
    "# Compile the model\n",
    "classifier.compile(optimizer='adam', \n",
    "                   loss='categorical_crossentropy', \n",
    "                   metrics=['CategoricalAccuracy'])\n",
    "\n",
    "# Display model\n",
    "classifier.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "ec34ffe0-d4ed-484d-b82a-c3270083f082",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 374ms/step - CategoricalAccuracy: 0.3743 - loss: 1.0963 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0815\n",
      "Epoch 2/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 100ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0794 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0604\n",
      "Epoch 3/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step - CategoricalAccuracy: 0.8160 - loss: 1.0603 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0354\n",
      "Epoch 4/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0334 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0057\n",
      "Epoch 5/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 103ms/step - CategoricalAccuracy: 0.8264 - loss: 1.0080 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9703\n",
      "Epoch 6/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9747 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9285\n",
      "Epoch 7/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 106ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9361 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8798\n",
      "Epoch 8/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8870 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8243\n",
      "Epoch 9/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8350 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.7626\n",
      "Epoch 10/10\n",
      "\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 102ms/step - CategoricalAccuracy: 0.8368 - loss: 0.7783 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.6969\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x179aad80c50>"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# training\n",
    "classifier.fit(X_data, y_label, epochs=10, batch_size=32, validation_split=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "2bd36515-2777-4d49-82ec-b50f1e0904a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step\n",
      "[[0.23078983 0.55988955 0.20932065]]\n"
     ]
    }
   ],
   "source": [
    "# test\n",
    "\n",
    "test = X_data[8]\n",
    "test = test.reshape(1, -1)\n",
    "\n",
    "# Now call predict\n",
    "prediction = classifier.predict(test)\n",
    "\n",
    "print(prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "931bb66b-7171-4630-9753-d9ff7fa3cb78",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "077d77a2-c3bc-4606-b4bb-f420ff123817",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "711329a8-0b67-4afe-b455-3f8a296a622e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}