Files
SeismStart/preprocessed/lunar/data/Classifier.ipynb

453 lines
21 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "9f49724a-a1cc-4973-8919-68d31c6186f1",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from tensorflow.keras import layers, models\n",
"from tensorflow.keras.utils import to_categorical\n",
"import scipy\n",
"import pandas as pd\n",
"from obspy import read\n",
"from datetime import datetime, timedelta\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8a233e36-5810-4cfb-b6d1-452acf432793",
"metadata": {},
"outputs": [],
"source": [
"# three types of fake data for testing\n",
"\n",
"# def signal1(A, t):\n",
"# return A * np.sin(30*t) * np.exp(-t) + np.random.randn(t.shape[0])\n",
"\n",
"# def signal2(A, t):\n",
"# return A * np.sin(30.5*t) * np.exp(-t)+ np.random.randn(t.shape[0])\n",
"\n",
"# def signal3(A, t):\n",
"# return A * np.sin(31*t) * np.exp(-t) + np.random.randn(t.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f9d9086a-538e-4410-afd3-333093722ff1",
"metadata": {},
"outputs": [],
"source": [
"# tests\n",
"A = 2\n",
"# t = np.linspace(0.0001, 5, 40000)\n",
"# s1 = signal1(A, t)\n",
"# s2 = signal2(A, t)\n",
"# s3 = signal3(A, t)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d7510935-dd2c-4484-96b5-eefec9837f33",
"metadata": {},
"outputs": [],
"source": [
"# plot fn for convenience\n",
"def plot(x, y, title):\n",
" plt.figure()\n",
" plt.plot(x, y, linewidth=0.7, c='blue')\n",
" plt.title(title)\n",
" plt.savefig(title + '.png')\n",
" plt.show()\n",
"\n",
"# plot(t, s1, 'Example1')\n",
"# plot(t, s2, 'Example2')\n",
"# plot(t, s3, 'Example3')\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "3cfb129b-0ce8-4790-94ae-cb31fa3e0ec4",
"metadata": {},
"outputs": [],
"source": [
"# # Number of samples and length of each signal\n",
"# n_samples = 200\n",
"# signal_length = 40000\n",
"\n",
"# # Initialize arrays\n",
"# X_data = np.zeros((n_samples, signal_length))\n",
"# y_labels = np.zeros(n_samples)\n",
"\n",
"# # time array\n",
"# t = np.linspace(0, 1, signal_length)\n",
"\n",
"# # Generate samples\n",
"# for i in range(n_samples):\n",
"# if i < n_samples // 3:\n",
"# X_data[i] = signal1(A, t)\n",
"# y_labels[i] = 0\n",
"# elif i < 2 * n_samples // 3:\n",
"# X_data[i] = signal2(A, t)\n",
"# y_labels[i] = 1\n",
"# else:\n",
"# X_data[i] = signal3(A, t)\n",
"# y_labels[i] = 2\n",
"\n",
"# # One-hot encode the labels\n",
"# y_labels_one_hot = to_categorical(y_labels, num_classes=3)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "f09275a3-2673-4d61-9903-8719b9e1b161",
"metadata": {},
"outputs": [],
"source": [
"test_filename = 'xa.s12.00.mhz.1970-03-25HR00_evid00003_trimmed_7000_sec'\n",
"\n",
"data_directory = './'\n",
"mseed_file = f'{data_directory}{test_filename}.mseed'\n",
"st = read(mseed_file)\n",
"st\n",
"\n",
"tr = st.traces[0].copy()\n",
"tr_times = tr.times()\n",
"tr_data = tr.data\n",
"\n",
"# plot(tr_times, tr_data, 'Mseed Example')\n",
"\n",
"# print(tr_times.shape)\n",
"\n",
"def read_all_mseed_files(data_directory, target_length=None):\n",
" # List all files in the directory with \".mseed\" extension\n",
" mseed_files = [f for f in os.listdir(data_directory) if f.endswith('.mseed')]\n",
" \n",
" data_matrix = []\n",
" \n",
" # Loop through all the mseed files and extract time and data series\n",
" for filename in mseed_files:\n",
" st = read(os.path.join(data_directory, filename))\n",
" tr = st.traces[0].copy() \n",
" tr_data = tr.data \n",
" \n",
" if target_length is None:\n",
" target_length = len(tr_data) # Set target length to the first trace's length\n",
" \n",
" # Pad or trim the data to the target length\n",
" if len(tr_data) < target_length:\n",
" # Pad with zeros if shorter\n",
" tr_data = np.pad(tr_data, (0, target_length - len(tr_data)), mode='constant')\n",
" else:\n",
" # Trim if longer\n",
" tr_data = tr_data[:target_length]\n",
" \n",
" data_matrix.append(tr_data)\n",
" \n",
" # Convert the list to a numpy matrix\n",
" data_matrix = np.array(data_matrix)\n",
" \n",
" return data_matrix\n",
"\n",
"X_data = read_all_mseed_files(data_directory, 46376)\n",
"\n",
"# plot(tr_times, data[3], 'Example 3')"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "47c0ecea-e0fa-4ebf-99db-f6c8e77a72e0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1., 0., 0.])"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"# read csv\n",
"df = pd.read_csv('catalog.csv', header=None, names=['data'], skiprows=1)\n",
"\n",
"# split cells\n",
"df['label'] = df['data'].apply(lambda x: x.split(',')[-1].strip())\n",
"\n",
"# Step 3: One-hot encode the labels\n",
"encoder = OneHotEncoder(sparse_output=False)\n",
"y_label = encoder.fit_transform(df['label'].values.reshape(-1, 1))\n",
"\n",
"\n",
"\n",
"y_label[4]\n",
"\n",
"# one-hot format: [deep, impact, shallow]"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "092a184e-a79f-462a-8017-741471a05ae9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional_2\"</span>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1mModel: \"functional_2\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ input_layer_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">46376</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_8 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">11,872,512</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_9 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">32,896</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_10 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">8,256</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_11 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">195</span> │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ input_layer_2 (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m46376\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_8 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m11,872,512\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_9 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m32,896\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_10 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m8,256\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_11 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m195\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">11,913,859</span> (45.45 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">11,913,859</span> (45.45 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m11,913,859\u001b[0m (45.45 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"input_shape = (46376,)\n",
"\n",
"# Build the classifier model\n",
"def build_classifier(input_shape):\n",
" inputs = layers.Input(shape=input_shape)\n",
" x = layers.Dense(256, activation='relu')(inputs)\n",
" x = layers.Dense(128, activation='relu')(x)\n",
" x = layers.Dense(64, activation='relu')(x)\n",
" \n",
" # Output layer (one hot encoding)\n",
" outputs = layers.Dense(3, activation='softmax')(x)\n",
" \n",
" model = models.Model(inputs, outputs)\n",
" return model\n",
"\n",
"classifier = build_classifier(input_shape)\n",
"\n",
"# Compile the model\n",
"classifier.compile(optimizer='adam', \n",
" loss='categorical_crossentropy', \n",
" metrics=['CategoricalAccuracy'])\n",
"\n",
"# Display model\n",
"classifier.summary()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "ec34ffe0-d4ed-484d-b82a-c3270083f082",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 374ms/step - CategoricalAccuracy: 0.3743 - loss: 1.0963 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0815\n",
"Epoch 2/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 100ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0794 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0604\n",
"Epoch 3/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step - CategoricalAccuracy: 0.8160 - loss: 1.0603 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0354\n",
"Epoch 4/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8576 - loss: 1.0334 - val_CategoricalAccuracy: 0.8750 - val_loss: 1.0057\n",
"Epoch 5/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 103ms/step - CategoricalAccuracy: 0.8264 - loss: 1.0080 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9703\n",
"Epoch 6/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9747 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.9285\n",
"Epoch 7/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 106ms/step - CategoricalAccuracy: 0.8264 - loss: 0.9361 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8798\n",
"Epoch 8/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8870 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.8243\n",
"Epoch 9/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 101ms/step - CategoricalAccuracy: 0.8368 - loss: 0.8350 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.7626\n",
"Epoch 10/10\n",
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 102ms/step - CategoricalAccuracy: 0.8368 - loss: 0.7783 - val_CategoricalAccuracy: 0.8750 - val_loss: 0.6969\n"
]
},
{
"data": {
"text/plain": [
"<keras.src.callbacks.history.History at 0x179aad80c50>"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# training\n",
"classifier.fit(X_data, y_label, epochs=10, batch_size=32, validation_split=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "2bd36515-2777-4d49-82ec-b50f1e0904a0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step\n",
"[[0.23078983 0.55988955 0.20932065]]\n"
]
}
],
"source": [
"# test\n",
"\n",
"test = X_data[8]\n",
"test = test.reshape(1, -1)\n",
"\n",
"# Now call predict\n",
"prediction = classifier.predict(test)\n",
"\n",
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "931bb66b-7171-4630-9753-d9ff7fa3cb78",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "077d77a2-c3bc-4606-b4bb-f420ff123817",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "711329a8-0b67-4afe-b455-3f8a296a622e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}