"""Cut out relevant samples from the training set""" import pandas as pd from obspy import UTCDateTime, read from datetime import datetime, timedelta import os from pathlib import Path from tqdm import tqdm # Define directories for use CAT_LUNAR_DIR = './space_apps_2024_seismic_detection/data/lunar/training/catalogs/' CAT_LUNAR_FILE = CAT_LUNAR_DIR + 'apollo12_catalog_GradeA_final.csv' CAT_LUNAR = pd.read_csv(CAT_LUNAR_FILE) LUNAR_DATA_DIR = './space_apps_2024_seismic_detection/data/lunar/training/data/S12_GradeA/' PREPROCESSED_LUNAR_DIR = './preprocessed/lunar/data/' def from_mseed(test_filename:str, data_directory:str, arrival_time:datetime): mseed_file = f'{data_directory}{test_filename}.mseed' st = read(mseed_file) # This is how you get the data and the time, which is in seconds tr = st.traces[0].copy() # Start time of trace (another way to get the relative arrival time using datetime) starttime = tr.stats.starttime.datetime arrival = (arrival_time - starttime).total_seconds() return st, arrival if __name__ == "__main__": # Make output dir if not present Path(PREPROCESSED_LUNAR_DIR).mkdir(parents=True, exist_ok=True) # Iterate over all lunar samples and extract arrival:arrival + 7000 sec of samples for row in tqdm(CAT_LUNAR.iloc): arrival_time = datetime.strptime(row['time_abs(%Y-%m-%dT%H:%M:%S.%f)'],'%Y-%m-%dT%H:%M:%S.%f') test_filename = row.filename try: st, arrival = from_mseed(test_filename, LUNAR_DATA_DIR, arrival_time) except FileNotFoundError: # Because csv is faulty... test_filename = test_filename.replace('HR00', 'HR02') st, arrival = from_mseed(test_filename, LUNAR_DATA_DIR, arrival_time) stream_out = st.copy() utc_arrival = UTCDateTime(arrival_time) endtime = UTCDateTime(arrival_time + timedelta(seconds=7000)) stream_out.trim(utc_arrival, endtime) fout_name = test_filename + "_trimmed_7000_sec.mseed" stream_out.write(os.path.join(PREPROCESSED_LUNAR_DIR, fout_name), format="mseed")