Compare commits

...

5 Commits

Author SHA1 Message Date
e52ee45261 An initial and unfinished attempt at creating an RNN model for the problem
Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2025-05-20 15:31:17 -04:00
c3524eda21 A brief look at pulling data for only extreme events
Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2025-05-20 15:30:52 -04:00
6c2247974c A notebook for pulling seismic event data, cleaning it and saving it
Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2025-05-20 15:30:26 -04:00
fc03e01629 Exploring Extreme Value Theory (EVT) techniques
Looking into how it may help with reducing to our problem

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2025-05-20 15:27:06 -04:00
6038fdef50 Initial files to configure data querying and live websocket connections
Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2025-05-20 15:24:35 -04:00
6 changed files with 2836 additions and 0 deletions

338
datapolling.ipynb Normal file
View File

@ -0,0 +1,338 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"id": "e06a5cf6",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import obspy"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "434a13eb",
"metadata": {},
"outputs": [],
"source": [
"url = \"www.seismicportal.eu/fdsnws/event/1/query?limit=10&start=2020-01-01&end=2022-01-01&format=json\""
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "e72fb8f7",
"metadata": {},
"outputs": [],
"source": [
"def geturl(url):\n",
" res = requests.get(\"https://\"+url, timeout=15)\n",
" return {'status': res.status_code,\n",
" 'content': res.text}"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "19355da2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"type\":\"FeatureCollection\",\"metadata\":{\"count\":10},\"features\":[{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" 122.38,\n",
" -8.11,\n",
" -10.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000155\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-02T06:11:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": 122.38,\n",
" \"auth\": \"DJA\",\n",
" \"lat\": -8.11,\n",
" \"depth\": 10.0,\n",
" \"unid\": \"20220101_0000155\",\n",
" \"mag\": 2.9,\n",
" \"time\": \"2022-01-01T23:50:06.0Z\",\n",
" \"source_id\": \"1083157\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"FLORES REGION, INDONESIA\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" 124.09,\n",
" -8.84,\n",
" -65.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000153\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-02T06:11:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": 124.09,\n",
" \"auth\": \"DJA\",\n",
" \"lat\": -8.84,\n",
" \"depth\": 65.0,\n",
" \"unid\": \"20220101_0000153\",\n",
" \"mag\": 3.3,\n",
" \"time\": \"2022-01-01T23:48:18.0Z\",\n",
" \"source_id\": \"1083154\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"KEPULAUAN ALOR, INDONESIA\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -173.92,\n",
" -21.37,\n",
" -10.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000137\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-02T08:20:00.0Z\",\n",
" \"magtype\": \"mb\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -173.92,\n",
" \"auth\": \"EMSC\",\n",
" \"lat\": -21.37,\n",
" \"depth\": 10.0,\n",
" \"unid\": \"20220101_0000137\",\n",
" \"mag\": 4.9,\n",
" \"time\": \"2022-01-01T23:47:39.0Z\",\n",
" \"source_id\": \"1083084\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"TONGA\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -66.64,\n",
" -23.73,\n",
" -233.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000136\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-01T23:56:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -66.64,\n",
" \"auth\": \"NSNA\",\n",
" \"lat\": -23.73,\n",
" \"depth\": 233.0,\n",
" \"unid\": \"20220101_0000136\",\n",
" \"mag\": 3.1,\n",
" \"time\": \"2022-01-01T23:45:36.0Z\",\n",
" \"source_id\": \"1083085\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"JUJUY, ARGENTINA\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -155.4,\n",
" 19.2,\n",
" -32.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000133\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-01T23:45:00.0Z\",\n",
" \"magtype\": \"md\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -155.4,\n",
" \"auth\": \"NEIR\",\n",
" \"lat\": 19.2,\n",
" \"depth\": 32.0,\n",
" \"unid\": \"20220101_0000133\",\n",
" \"mag\": 2.2,\n",
" \"time\": \"2022-01-01T23:42:34.8Z\",\n",
" \"source_id\": \"1083079\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"ISLAND OF HAWAII, HAWAII\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -16.21,\n",
" 28.09,\n",
" -8.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000202\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-02T20:45:00.0Z\",\n",
" \"magtype\": \"ml\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -16.21,\n",
" \"auth\": \"MDD\",\n",
" \"lat\": 28.09,\n",
" \"depth\": 8.0,\n",
" \"unid\": \"20220101_0000202\",\n",
" \"mag\": 1.8,\n",
" \"time\": \"2022-01-01T23:40:04.8Z\",\n",
" \"source_id\": \"1083360\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"CANARY ISLANDS, SPAIN REGION\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -69.31,\n",
" 18.08,\n",
" -10.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000134\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-01T23:51:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -69.31,\n",
" \"auth\": \"UASD\",\n",
" \"lat\": 18.08,\n",
" \"depth\": 10.0,\n",
" \"unid\": \"20220101_0000134\",\n",
" \"mag\": 3.1,\n",
" \"time\": \"2022-01-01T23:25:21.0Z\",\n",
" \"source_id\": \"1083082\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"DOMINICAN REPUBLIC REGION\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -74.06,\n",
" 18.95,\n",
" -10.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000132\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-01T23:36:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -74.06,\n",
" \"auth\": \"UASD\",\n",
" \"lat\": 18.95,\n",
" \"depth\": 10.0,\n",
" \"unid\": \"20220101_0000132\",\n",
" \"mag\": 3.1,\n",
" \"time\": \"2022-01-01T23:19:03.0Z\",\n",
" \"source_id\": \"1083078\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"HAITI REGION\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -98.05,\n",
" 16.31,\n",
" -8.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000141\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-02T02:01:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -98.05,\n",
" \"auth\": \"UNM\",\n",
" \"lat\": 16.31,\n",
" \"depth\": 8.0,\n",
" \"unid\": \"20220101_0000141\",\n",
" \"mag\": 3.3,\n",
" \"time\": \"2022-01-01T23:00:27.0Z\",\n",
" \"source_id\": \"1083098\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"OAXACA, MEXICO\"\n",
" }\n",
"},{\n",
" \"geometry\": {\n",
" \"type\": \"Point\",\n",
" \"coordinates\": [\n",
" -68.86,\n",
" -21.1,\n",
" -109.0\n",
" ]\n",
" },\n",
" \"type\": \"Feature\",\n",
" \"id\": \"20220101_0000131\",\n",
" \"properties\": {\n",
" \"lastupdate\": \"2022-01-01T23:08:00.0Z\",\n",
" \"magtype\": \"m\",\n",
" \"evtype\": \"ke\",\n",
" \"lon\": -68.86,\n",
" \"auth\": \"GUC\",\n",
" \"lat\": -21.1,\n",
" \"depth\": 109.0,\n",
" \"unid\": \"20220101_0000131\",\n",
" \"mag\": 2.7,\n",
" \"time\": \"2022-01-01T22:57:18.0Z\",\n",
" \"source_id\": \"1083075\",\n",
" \"source_catalog\": \"EMSC-RTS\",\n",
" \"flynn_region\": \"TARAPACA, CHILE\"\n",
" }\n",
"}]}\n"
]
}
],
"source": [
"res = geturl(url)\n",
"print(res['content'])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sideprojects",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

54
datawebsocket.py Normal file
View File

@ -0,0 +1,54 @@
from __future__ import unicode_literals
from tornado.websocket import websocket_connect
from tornado.ioloop import IOLoop
from tornado import gen
import logging
import json
import sys
echo_uri = 'wss://www.seismicportal.eu/standing_order/websocket'
PING_INTERVAL = 15
#You can modify this function to run custom process on the message
def myprocessing(message):
try:
data = json.loads(message)
print(data)
info = data['data']['properties']
info['action'] = data['action']
logging.info('>>>> {action:7} event from {auth:7}, unid:{unid}, T0:{time}, Mag:{mag}, Region: {flynn_region}'.format(**info))
except Exception:
logging.exception("Unable to parse json message")
@gen.coroutine
def listen(ws):
while True:
msg = yield ws.read_message()
if msg is None:
logging.info("close")
ws = None
break
myprocessing(msg)
@gen.coroutine
def launch_client():
try:
logging.info("Open WebSocket connection to %s", echo_uri)
ws = yield websocket_connect(echo_uri, ping_interval=PING_INTERVAL)
except Exception:
logging.exception("connection error")
else:
logging.info("Waiting for messages...")
listen(ws)
if __name__ == '__main__':
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
ioloop = IOLoop.instance()
launch_client()
try:
ioloop.start()
except KeyboardInterrupt:
logging.info("Close WebSocket")
ioloop.stop()

522
evt_testing.ipynb Normal file

File diff suppressed because one or more lines are too long

402
extreme_data.ipynb Normal file
View File

@ -0,0 +1,402 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"id": "bd523899",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import scipy.stats as stats\n",
"from scipy.stats import genextreme\n",
"from scipy.stats import genpareto\n",
"import requests\n",
"import json\n",
"\n",
"from datetime import date\n",
"from datetime import datetime\n",
"from datetime import timedelta\n",
"import pytz\n",
"\n",
"import os.path"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c63a1b94",
"metadata": {},
"outputs": [],
"source": [
"start_date = \"2024-01-01\""
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "903ed374",
"metadata": {},
"outputs": [],
"source": [
"def geturl(url):\n",
" res = requests.get(\"https://\"+url, timeout=15)\n",
" return {'status': res.status_code,\n",
" 'content': res.text}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "89e1979c",
"metadata": {},
"outputs": [],
"source": [
"def retrieveEvents(start, end, limit=20000, retry_limit=5, minmag=0):\n",
" events = []\n",
" moving_start = datetime.fromordinal(start.toordinal()).replace(tzinfo=pytz.utc)\n",
" end = datetime.fromordinal(end.toordinal()).replace(tzinfo=pytz.utc)\n",
" failures = 0\n",
" while moving_start <= end and failures < retry_limit:\n",
" # print(moving_start, end)\n",
" url = \"www.seismicportal.eu/fdsnws/event/1/query?orderby=time-asc&limit={limit}&start={startdate}&end={enddate}&format=json&minmag={minmag}\".format(limit=limit, startdate=moving_start.isoformat(), enddate=end.isoformat(), minmag=minmag)\n",
" # print(url)\n",
" res = geturl(url)\n",
" # print(res['status'])\n",
" if res['status'] != 200:\n",
" failures += 1\n",
" continue\n",
" content = res['content']\n",
" json_parser = json.loads(content)\n",
" temp_events = [event['properties'] for event in json_parser['features']]\n",
"\n",
" if len(temp_events) == 0:\n",
" # print(\"ending\")\n",
" break\n",
"\n",
" # temp_events = sorted(temp_events, key=lambda d: d['time'])\n",
"\n",
" if len(temp_events) == limit:\n",
" moving_start = datetime.fromisoformat(temp_events[-1]['time'])\n",
" else:\n",
" moving_start = end + timedelta(hours=1)\n",
" # print(\"ending here:\", moving_start)\n",
" events.extend(temp_events)\n",
" # print(\"hi\")\n",
" # return pd.DataFrame(events)\n",
" return events\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8bccb27c",
"metadata": {},
"outputs": [],
"source": [
"data = retrieveEvents(date.fromisoformat(start_date), date.today(), minmag=2)\n",
"df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "902b6b1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>source_id</th>\n",
" <th>source_catalog</th>\n",
" <th>lastupdate</th>\n",
" <th>time</th>\n",
" <th>flynn_region</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>depth</th>\n",
" <th>evtype</th>\n",
" <th>auth</th>\n",
" <th>mag</th>\n",
" <th>magtype</th>\n",
" <th>unid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1600054</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:02:51.439437Z</td>\n",
" <td>2024-01-01T00:00:29.5Z</td>\n",
" <td>CRETE, GREECE</td>\n",
" <td>35.1400</td>\n",
" <td>24.1200</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>THE</td>\n",
" <td>2.3</td>\n",
" <td>ml</td>\n",
" <td>20240101_0000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1600055</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:14:14.3925Z</td>\n",
" <td>2024-01-01T00:03:15.0Z</td>\n",
" <td>SULAWESI, INDONESIA</td>\n",
" <td>-1.3000</td>\n",
" <td>120.5100</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>BMKG</td>\n",
" <td>3.1</td>\n",
" <td>m</td>\n",
" <td>20240101_0000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1600058</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:24:28.774809Z</td>\n",
" <td>2024-01-01T00:03:15.14Z</td>\n",
" <td>PUERTO RICO</td>\n",
" <td>18.4087</td>\n",
" <td>-66.4270</td>\n",
" <td>105.2</td>\n",
" <td>ke</td>\n",
" <td>PR</td>\n",
" <td>3.2</td>\n",
" <td>md</td>\n",
" <td>20240101_0000004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1600056</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:14:58.984143Z</td>\n",
" <td>2024-01-01T00:05:28.0Z</td>\n",
" <td>COLOMBIA-ECUADOR BORDER REGION</td>\n",
" <td>0.1100</td>\n",
" <td>-78.9400</td>\n",
" <td>54.0</td>\n",
" <td>ke</td>\n",
" <td>QUI</td>\n",
" <td>3.5</td>\n",
" <td>m</td>\n",
" <td>20240101_0000003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1600057</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-02T08:04:45.107234Z</td>\n",
" <td>2024-01-01T00:10:05.6Z</td>\n",
" <td>NORWEGIAN SEA</td>\n",
" <td>72.2450</td>\n",
" <td>1.8470</td>\n",
" <td>6.1</td>\n",
" <td>ke</td>\n",
" <td>BER</td>\n",
" <td>3.7</td>\n",
" <td>mw</td>\n",
" <td>20240101_0000408</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126136</th>\n",
" <td>1809807</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T21:34:26.878673Z</td>\n",
" <td>2025-05-18T23:24:13.0Z</td>\n",
" <td>SAN JUAN, ARGENTINA</td>\n",
" <td>-31.6300</td>\n",
" <td>-70.3800</td>\n",
" <td>138.0</td>\n",
" <td>ke</td>\n",
" <td>CSN</td>\n",
" <td>3.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126137</th>\n",
" <td>1809808</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-18T23:38:19.408959Z</td>\n",
" <td>2025-05-18T23:27:24.13Z</td>\n",
" <td>SOUTHERN ITALY</td>\n",
" <td>39.0343</td>\n",
" <td>16.4318</td>\n",
" <td>9.8</td>\n",
" <td>ke</td>\n",
" <td>INGV</td>\n",
" <td>2.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000271</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126138</th>\n",
" <td>1809813</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T00:14:20.838747Z</td>\n",
" <td>2025-05-18T23:52:33.0Z</td>\n",
" <td>SOUTHWEST OF SUMATRA, INDONESIA</td>\n",
" <td>-7.8200</td>\n",
" <td>103.8600</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>BMKG</td>\n",
" <td>3.5</td>\n",
" <td>m</td>\n",
" <td>20250518_0000275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126139</th>\n",
" <td>1809809</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T06:33:38.794931Z</td>\n",
" <td>2025-05-18T23:58:01.56Z</td>\n",
" <td>OFF COAST OF TARAPACA, CHILE</td>\n",
" <td>-18.5159</td>\n",
" <td>-71.3039</td>\n",
" <td>25.0</td>\n",
" <td>ke</td>\n",
" <td>EMSC</td>\n",
" <td>4.2</td>\n",
" <td>mb</td>\n",
" <td>20250518_0000273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126140</th>\n",
" <td>1809814</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T06:33:56.681166Z</td>\n",
" <td>2025-05-18T23:58:56.73Z</td>\n",
" <td>WESTERN TURKEY</td>\n",
" <td>37.8953</td>\n",
" <td>27.6327</td>\n",
" <td>11.4</td>\n",
" <td>ke</td>\n",
" <td>EMSC</td>\n",
" <td>2.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000287</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>126141 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" source_id source_catalog lastupdate \\\n",
"0 1600054 EMSC-RTS 2024-01-01T00:02:51.439437Z \n",
"1 1600055 EMSC-RTS 2024-01-01T00:14:14.3925Z \n",
"2 1600058 EMSC-RTS 2024-01-01T00:24:28.774809Z \n",
"3 1600056 EMSC-RTS 2024-01-01T00:14:58.984143Z \n",
"4 1600057 EMSC-RTS 2024-01-02T08:04:45.107234Z \n",
"... ... ... ... \n",
"126136 1809807 EMSC-RTS 2025-05-19T21:34:26.878673Z \n",
"126137 1809808 EMSC-RTS 2025-05-18T23:38:19.408959Z \n",
"126138 1809813 EMSC-RTS 2025-05-19T00:14:20.838747Z \n",
"126139 1809809 EMSC-RTS 2025-05-19T06:33:38.794931Z \n",
"126140 1809814 EMSC-RTS 2025-05-19T06:33:56.681166Z \n",
"\n",
" time flynn_region lat \\\n",
"0 2024-01-01T00:00:29.5Z CRETE, GREECE 35.1400 \n",
"1 2024-01-01T00:03:15.0Z SULAWESI, INDONESIA -1.3000 \n",
"2 2024-01-01T00:03:15.14Z PUERTO RICO 18.4087 \n",
"3 2024-01-01T00:05:28.0Z COLOMBIA-ECUADOR BORDER REGION 0.1100 \n",
"4 2024-01-01T00:10:05.6Z NORWEGIAN SEA 72.2450 \n",
"... ... ... ... \n",
"126136 2025-05-18T23:24:13.0Z SAN JUAN, ARGENTINA -31.6300 \n",
"126137 2025-05-18T23:27:24.13Z SOUTHERN ITALY 39.0343 \n",
"126138 2025-05-18T23:52:33.0Z SOUTHWEST OF SUMATRA, INDONESIA -7.8200 \n",
"126139 2025-05-18T23:58:01.56Z OFF COAST OF TARAPACA, CHILE -18.5159 \n",
"126140 2025-05-18T23:58:56.73Z WESTERN TURKEY 37.8953 \n",
"\n",
" lon depth evtype auth mag magtype unid \n",
"0 24.1200 10.0 ke THE 2.3 ml 20240101_0000001 \n",
"1 120.5100 10.0 ke BMKG 3.1 m 20240101_0000002 \n",
"2 -66.4270 105.2 ke PR 3.2 md 20240101_0000004 \n",
"3 -78.9400 54.0 ke QUI 3.5 m 20240101_0000003 \n",
"4 1.8470 6.1 ke BER 3.7 mw 20240101_0000408 \n",
"... ... ... ... ... ... ... ... \n",
"126136 -70.3800 138.0 ke CSN 3.4 ml 20250518_0000270 \n",
"126137 16.4318 9.8 ke INGV 2.4 ml 20250518_0000271 \n",
"126138 103.8600 10.0 ke BMKG 3.5 m 20250518_0000275 \n",
"126139 -71.3039 25.0 ke EMSC 4.2 mb 20250518_0000273 \n",
"126140 27.6327 11.4 ke EMSC 2.4 ml 20250518_0000287 \n",
"\n",
"[126141 rows x 13 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sideprojects",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

440
model_training.ipynb Normal file
View File

@ -0,0 +1,440 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"id": "b4cc996f",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import scipy.stats as stats\n",
"from scipy.stats import genextreme\n",
"from scipy.stats import genpareto\n",
"import requests\n",
"import json\n",
"\n",
"# from datetime import date\n",
"# from datetime import datetime\n",
"# from datetime import timedelta\n",
"# import pytz\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import lightning as L\n",
"\n",
"import os\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "b0fe3fe6",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('reduced_data.csv')\n",
"# df['time'] = pd.to_datetime(df['time'], format='ISO8601')\n",
"\n",
"# df['lastupdate'] = pd.to_datetime(df['lastupdate'], format='ISO8601')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "083c39ae",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>bin</th>\n",
" <th>time_to_next_event</th>\n",
" <th>time</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>depth</th>\n",
" <th>mag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27</td>\n",
" <td>24</td>\n",
" <td>1.577837e+09</td>\n",
" <td>19.2200</td>\n",
" <td>-67.1300</td>\n",
" <td>12.0</td>\n",
" <td>2.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30</td>\n",
" <td>77</td>\n",
" <td>1.577837e+09</td>\n",
" <td>-2.7400</td>\n",
" <td>127.9000</td>\n",
" <td>20.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>425</td>\n",
" <td>1.577837e+09</td>\n",
" <td>19.0800</td>\n",
" <td>-67.0900</td>\n",
" <td>6.0</td>\n",
" <td>2.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>31</td>\n",
" <td>39</td>\n",
" <td>1.577837e+09</td>\n",
" <td>19.1900</td>\n",
" <td>-67.8400</td>\n",
" <td>28.0</td>\n",
" <td>3.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>35</td>\n",
" <td>69</td>\n",
" <td>1.577837e+09</td>\n",
" <td>-25.6400</td>\n",
" <td>-70.5200</td>\n",
" <td>53.0</td>\n",
" <td>3.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>548087</th>\n",
" <td>40</td>\n",
" <td>326</td>\n",
" <td>1.747745e+09</td>\n",
" <td>-4.3300</td>\n",
" <td>132.9700</td>\n",
" <td>10.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>548088</th>\n",
" <td>40</td>\n",
" <td>401</td>\n",
" <td>1.747745e+09</td>\n",
" <td>-30.1300</td>\n",
" <td>-69.4600</td>\n",
" <td>10.0</td>\n",
" <td>4.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>548089</th>\n",
" <td>13</td>\n",
" <td>75</td>\n",
" <td>1.747745e+09</td>\n",
" <td>38.9889</td>\n",
" <td>27.9292</td>\n",
" <td>8.9</td>\n",
" <td>1.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>548090</th>\n",
" <td>31</td>\n",
" <td>35</td>\n",
" <td>1.747746e+09</td>\n",
" <td>-8.0000</td>\n",
" <td>107.0500</td>\n",
" <td>16.0</td>\n",
" <td>3.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>548091</th>\n",
" <td>47</td>\n",
" <td>448</td>\n",
" <td>1.747746e+09</td>\n",
" <td>-23.3231</td>\n",
" <td>-179.9220</td>\n",
" <td>540.0</td>\n",
" <td>4.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>548092 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" bin time_to_next_event time lat lon depth mag\n",
"0 27 24 1.577837e+09 19.2200 -67.1300 12.0 2.8\n",
"1 30 77 1.577837e+09 -2.7400 127.9000 20.0 3.0\n",
"2 25 425 1.577837e+09 19.0800 -67.0900 6.0 2.5\n",
"3 31 39 1.577837e+09 19.1900 -67.8400 28.0 3.1\n",
"4 35 69 1.577837e+09 -25.6400 -70.5200 53.0 3.5\n",
"... ... ... ... ... ... ... ...\n",
"548087 40 326 1.747745e+09 -4.3300 132.9700 10.0 4.0\n",
"548088 40 401 1.747745e+09 -30.1300 -69.4600 10.0 4.1\n",
"548089 13 75 1.747745e+09 38.9889 27.9292 8.9 1.4\n",
"548090 31 35 1.747746e+09 -8.0000 107.0500 16.0 3.1\n",
"548091 47 448 1.747746e+09 -23.3231 -179.9220 540.0 4.7\n",
"\n",
"[548092 rows x 7 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "967ec29d",
"metadata": {},
"outputs": [],
"source": [
"# # Set threshold at the 95th percentile\n",
"# # threshold = np.percentile(data, 95)\n",
"# threshold = 6 ## the actual threshold value\n",
"# data = df['mag']\n",
"# extremes = data[data > threshold]\n",
"# print(f\"Threshold: {threshold}, Number of extremes: {len(extremes)}\")\n",
"\n",
"# # Visualize the threshold and extremes\n",
"# plt.hist(data, bins=30, edgecolor='k', alpha=0.7, label='Data')\n",
"# plt.axvline(threshold, color='red', linestyle='--', label='Threshold')\n",
"# plt.title('Threshold for POT')\n",
"# plt.legend()\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "77c900ad",
"metadata": {},
"outputs": [],
"source": [
"# from scipy.stats import genpareto\n",
"\n",
"# # Fit a GPD to the extremes\n",
"# gpd_params = genpareto.fit(extremes - threshold) # Subtract threshold for GPD fit\n",
"# print(f\"GPD Parameters: Shape={gpd_params[0]}, Location={gpd_params[1]}, Scale={gpd_params[2]}\")\n",
"\n",
"# # Visualize the GPD fit\n",
"# x = np.linspace(min(extremes), max(extremes), 100)\n",
"# pdf = genpareto.pdf(x - threshold, *gpd_params)\n",
"# plt.hist(extremes, bins=10, density=True, alpha=0.7, label='Data')\n",
"# plt.plot(x, pdf, label='GPD Fit', color='blue')\n",
"# plt.title('GPD Fit to Extremes')\n",
"# plt.legend()\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0c623e1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/l9/cmw34wr13t3cp2_91pq103100000gn/T/ipykernel_1457/1713124793.py:1: RuntimeWarning: divide by zero encountered in divide\n",
" inv_mag_frequency = 1/np.bincount(df['bin'].to_numpy())\n"
]
}
],
"source": [
"# inv_mag_frequency = 1/np.bincount(df['bin'].to_numpy())/\n",
"# print(inv_mag_frequency)\n",
"# inv_mag_frequency[inv_mag_frequency == np.inf] = 2\n",
"# print(inv_mag_frequency)\n",
"# print(sum(mag_frequency))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00b89e53",
"metadata": {},
"outputs": [],
"source": [
"class MultiRNN(nn.Module):\n",
" def __init__(self, input_size, hidden_size, class_count, output_size):\n",
" super(MultiRNN, self).__init__()\n",
" self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)\n",
" self.classify = nn.Linear(hidden_size, class_count)\n",
" self.regress = nn.Linear(hidden_size, output_size)\n",
" \n",
" def forward(self, x):\n",
" h0 = torch.zeros(1, x.size(0), hidden_size).to(x.device)\n",
" out, _ = self.rnn(x, h0)\n",
" classes = self.classify(out[:,-1])\n",
" regresses = self.regress(out[:,-1])\n",
"\n",
" if self.training:\n",
" return classes, regresses\n",
" else:\n",
" return torch.argmax(classes, dim=-1), regresses\n",
"\n",
"\n",
"input_size = 5\n",
"hidden_size = 20\n",
"class_count = 100\n",
"regressor_output_size = 3\n",
"# model = MultiRNN(input_size, hidden_size, class_count, regressor_output_size)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a35aa9f2",
"metadata": {},
"outputs": [],
"source": [
"class MutliRNNLightning(L.LightningModule):\n",
" def __init__(self, input_size, hidden_size, class_count, output_size):\n",
" super().__init__()\n",
" self.model = MultiRNN(input_size, hidden_size, class_count, regressor_output_size)\n",
"\n",
" def forward(self, x):\n",
" return self.model.forward(x)\n",
" \n",
" def training_step(self, batch, batch_idx):\n",
" x, y1, y2 = batch\n",
" y_hat = self(x)\n",
" loss1 = nn.functional.cross_entropy(y_hat, y1)\n",
" loss2 = nn.functional.mse_loss(y_hat,y2)\n",
" # loss = F.cross_entropy(y_hat, y)\n",
" loss = loss1 + loss2\n",
" self.log('train_loss_class', loss1)\n",
" self.log('train_loss_regress', loss1)\n",
" return loss\n",
"\n",
" def training_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" y_hat = self(x)\n",
" \n",
" loss = F.cross_entropy(y_hat, y)\n",
" acc = (y_hat.argmax(1) == y).float().mean()\n",
" \n",
" self.log(\"train_loss\", loss)\n",
" self.log(\"train_acc\", acc)\n",
" return loss\n",
" \n",
"\n",
" def configure_optimizers(self):\n",
" optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)\n",
" scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n",
" optimizer, mode=\"min\", factor=0.1, patience=5\n",
" )\n",
" \n",
" return {\n",
" \"optimizer\": optimizer,\n",
" \"lr_scheduler\": {\n",
" \"scheduler\": scheduler,\n",
" \"monitor\": \"val_loss\",\n",
" },\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b49f9d92",
"metadata": {},
"outputs": [],
"source": [
"sequence_length = 1000\n",
"\n",
"# criterion = nn.MSELoss()\n",
"class_loss = nn.CrossEntropyLoss(weight=torch.from_numpy(inv_mag_frequency))\n",
"regressor_loss = nn.MSELoss()\n",
"# optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
"X = torch.tensor(df.iloc[:sequence_length][['time', 'lat', 'lon', 'depth', 'mag']].values).to(torch.float)\n",
"X = torch.stack((torch.tensor(df.iloc[sequence_length:sequence_length*2][['time', 'lat', 'lon', 'depth', 'mag']].values).to(torch.float),X))\n",
"# print(X.shape)\n",
"model.train()\n",
"outputs = model(X)\n",
"# print(outputs[0].shape)\n",
"# print(outputs[1].shape)\n",
"\n",
"# model.eval()\n",
"# outputs = model(X)\n",
"# print(outputs[0].shape)\n",
"# print(outputs[1].shape)\n",
"\n",
"# num_epochs = 100\n",
"# for epoch in range(num_epochs):\n",
"# model.train()\n",
"# outputs = model(X.unsqueeze(2))\n",
"# loss = criterion(outputs, y.unsqueeze(2))\n",
" \n",
"# optimizer.zero_grad()\n",
"# loss.backward()\n",
"# optimizer.step()\n",
" \n",
"# if (epoch + 1) % 10 == 0:\n",
"# print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sideprojects",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

1080
on_data.ipynb Normal file

File diff suppressed because one or more lines are too long