mirror of
https://github.com/Derisis13/SeismStart.git
synced 2025-12-06 19:42:49 +01:00
49 lines
2.1 KiB
Python
49 lines
2.1 KiB
Python
"""Cut out relevant samples from the training set"""
|
|
import pandas as pd
|
|
from obspy import UTCDateTime, read
|
|
from datetime import datetime, timedelta
|
|
import os
|
|
from pathlib import Path
|
|
from tqdm import tqdm
|
|
|
|
|
|
# Define directories for use
|
|
CAT_LUNAR_DIR = './space_apps_2024_seismic_detection/data/lunar/training/catalogs/'
|
|
CAT_LUNAR_FILE = CAT_LUNAR_DIR + 'apollo12_catalog_GradeA_final.csv'
|
|
CAT_LUNAR = pd.read_csv(CAT_LUNAR_FILE)
|
|
LUNAR_DATA_DIR = './space_apps_2024_seismic_detection/data/lunar/training/data/S12_GradeA/'
|
|
PREPROCESSED_LUNAR_DIR = './preprocessed/lunar/data/'
|
|
|
|
def from_mseed(test_filename:str, data_directory:str, arrival_time:datetime):
|
|
mseed_file = f'{data_directory}{test_filename}.mseed'
|
|
st = read(mseed_file)
|
|
# This is how you get the data and the time, which is in seconds
|
|
tr = st.traces[0].copy()
|
|
# Start time of trace (another way to get the relative arrival time using datetime)
|
|
starttime = tr.stats.starttime.datetime
|
|
arrival = (arrival_time - starttime).total_seconds()
|
|
return st, arrival
|
|
|
|
if __name__ == "__main__":
|
|
# Make output dir if not present
|
|
Path(PREPROCESSED_LUNAR_DIR).mkdir(parents=True, exist_ok=True)
|
|
|
|
# Iterate over all lunar samples and extract arrival:arrival + 7000 sec of samples
|
|
for row in tqdm(CAT_LUNAR.iloc):
|
|
arrival_time = datetime.strptime(row['time_abs(%Y-%m-%dT%H:%M:%S.%f)'],'%Y-%m-%dT%H:%M:%S.%f')
|
|
test_filename = row.filename
|
|
try:
|
|
st, arrival = from_mseed(test_filename, LUNAR_DATA_DIR, arrival_time)
|
|
except FileNotFoundError:
|
|
# Because csv is faulty...
|
|
test_filename = test_filename.replace('HR00', 'HR02')
|
|
st, arrival = from_mseed(test_filename, LUNAR_DATA_DIR, arrival_time)
|
|
|
|
stream_out = st.copy()
|
|
utc_arrival = UTCDateTime(arrival_time)
|
|
endtime = UTCDateTime(arrival_time + timedelta(seconds=7000))
|
|
stream_out.trim(utc_arrival, endtime)
|
|
fout_name = test_filename + "_trimmed_7000_sec.mseed"
|
|
stream_out.write(os.path.join(PREPROCESSED_LUNAR_DIR, fout_name), format="mseed")
|
|
|