A brief look at pulling data for only extreme events

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
This commit is contained in:
Ethan Wellenreiter 2025-05-20 15:30:32 -04:00
parent 6c2247974c
commit c3524eda21

402
extreme_data.ipynb Normal file
View File

@ -0,0 +1,402 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"id": "bd523899",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import scipy.stats as stats\n",
"from scipy.stats import genextreme\n",
"from scipy.stats import genpareto\n",
"import requests\n",
"import json\n",
"\n",
"from datetime import date\n",
"from datetime import datetime\n",
"from datetime import timedelta\n",
"import pytz\n",
"\n",
"import os.path"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c63a1b94",
"metadata": {},
"outputs": [],
"source": [
"start_date = \"2024-01-01\""
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "903ed374",
"metadata": {},
"outputs": [],
"source": [
"def geturl(url):\n",
" res = requests.get(\"https://\"+url, timeout=15)\n",
" return {'status': res.status_code,\n",
" 'content': res.text}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "89e1979c",
"metadata": {},
"outputs": [],
"source": [
"def retrieveEvents(start, end, limit=20000, retry_limit=5, minmag=0):\n",
" events = []\n",
" moving_start = datetime.fromordinal(start.toordinal()).replace(tzinfo=pytz.utc)\n",
" end = datetime.fromordinal(end.toordinal()).replace(tzinfo=pytz.utc)\n",
" failures = 0\n",
" while moving_start <= end and failures < retry_limit:\n",
" # print(moving_start, end)\n",
" url = \"www.seismicportal.eu/fdsnws/event/1/query?orderby=time-asc&limit={limit}&start={startdate}&end={enddate}&format=json&minmag={minmag}\".format(limit=limit, startdate=moving_start.isoformat(), enddate=end.isoformat(), minmag=minmag)\n",
" # print(url)\n",
" res = geturl(url)\n",
" # print(res['status'])\n",
" if res['status'] != 200:\n",
" failures += 1\n",
" continue\n",
" content = res['content']\n",
" json_parser = json.loads(content)\n",
" temp_events = [event['properties'] for event in json_parser['features']]\n",
"\n",
" if len(temp_events) == 0:\n",
" # print(\"ending\")\n",
" break\n",
"\n",
" # temp_events = sorted(temp_events, key=lambda d: d['time'])\n",
"\n",
" if len(temp_events) == limit:\n",
" moving_start = datetime.fromisoformat(temp_events[-1]['time'])\n",
" else:\n",
" moving_start = end + timedelta(hours=1)\n",
" # print(\"ending here:\", moving_start)\n",
" events.extend(temp_events)\n",
" # print(\"hi\")\n",
" # return pd.DataFrame(events)\n",
" return events\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8bccb27c",
"metadata": {},
"outputs": [],
"source": [
"data = retrieveEvents(date.fromisoformat(start_date), date.today(), minmag=2)\n",
"df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "902b6b1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>source_id</th>\n",
" <th>source_catalog</th>\n",
" <th>lastupdate</th>\n",
" <th>time</th>\n",
" <th>flynn_region</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>depth</th>\n",
" <th>evtype</th>\n",
" <th>auth</th>\n",
" <th>mag</th>\n",
" <th>magtype</th>\n",
" <th>unid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1600054</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:02:51.439437Z</td>\n",
" <td>2024-01-01T00:00:29.5Z</td>\n",
" <td>CRETE, GREECE</td>\n",
" <td>35.1400</td>\n",
" <td>24.1200</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>THE</td>\n",
" <td>2.3</td>\n",
" <td>ml</td>\n",
" <td>20240101_0000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1600055</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:14:14.3925Z</td>\n",
" <td>2024-01-01T00:03:15.0Z</td>\n",
" <td>SULAWESI, INDONESIA</td>\n",
" <td>-1.3000</td>\n",
" <td>120.5100</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>BMKG</td>\n",
" <td>3.1</td>\n",
" <td>m</td>\n",
" <td>20240101_0000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1600058</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:24:28.774809Z</td>\n",
" <td>2024-01-01T00:03:15.14Z</td>\n",
" <td>PUERTO RICO</td>\n",
" <td>18.4087</td>\n",
" <td>-66.4270</td>\n",
" <td>105.2</td>\n",
" <td>ke</td>\n",
" <td>PR</td>\n",
" <td>3.2</td>\n",
" <td>md</td>\n",
" <td>20240101_0000004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1600056</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-01T00:14:58.984143Z</td>\n",
" <td>2024-01-01T00:05:28.0Z</td>\n",
" <td>COLOMBIA-ECUADOR BORDER REGION</td>\n",
" <td>0.1100</td>\n",
" <td>-78.9400</td>\n",
" <td>54.0</td>\n",
" <td>ke</td>\n",
" <td>QUI</td>\n",
" <td>3.5</td>\n",
" <td>m</td>\n",
" <td>20240101_0000003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1600057</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2024-01-02T08:04:45.107234Z</td>\n",
" <td>2024-01-01T00:10:05.6Z</td>\n",
" <td>NORWEGIAN SEA</td>\n",
" <td>72.2450</td>\n",
" <td>1.8470</td>\n",
" <td>6.1</td>\n",
" <td>ke</td>\n",
" <td>BER</td>\n",
" <td>3.7</td>\n",
" <td>mw</td>\n",
" <td>20240101_0000408</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126136</th>\n",
" <td>1809807</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T21:34:26.878673Z</td>\n",
" <td>2025-05-18T23:24:13.0Z</td>\n",
" <td>SAN JUAN, ARGENTINA</td>\n",
" <td>-31.6300</td>\n",
" <td>-70.3800</td>\n",
" <td>138.0</td>\n",
" <td>ke</td>\n",
" <td>CSN</td>\n",
" <td>3.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126137</th>\n",
" <td>1809808</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-18T23:38:19.408959Z</td>\n",
" <td>2025-05-18T23:27:24.13Z</td>\n",
" <td>SOUTHERN ITALY</td>\n",
" <td>39.0343</td>\n",
" <td>16.4318</td>\n",
" <td>9.8</td>\n",
" <td>ke</td>\n",
" <td>INGV</td>\n",
" <td>2.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000271</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126138</th>\n",
" <td>1809813</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T00:14:20.838747Z</td>\n",
" <td>2025-05-18T23:52:33.0Z</td>\n",
" <td>SOUTHWEST OF SUMATRA, INDONESIA</td>\n",
" <td>-7.8200</td>\n",
" <td>103.8600</td>\n",
" <td>10.0</td>\n",
" <td>ke</td>\n",
" <td>BMKG</td>\n",
" <td>3.5</td>\n",
" <td>m</td>\n",
" <td>20250518_0000275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126139</th>\n",
" <td>1809809</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T06:33:38.794931Z</td>\n",
" <td>2025-05-18T23:58:01.56Z</td>\n",
" <td>OFF COAST OF TARAPACA, CHILE</td>\n",
" <td>-18.5159</td>\n",
" <td>-71.3039</td>\n",
" <td>25.0</td>\n",
" <td>ke</td>\n",
" <td>EMSC</td>\n",
" <td>4.2</td>\n",
" <td>mb</td>\n",
" <td>20250518_0000273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126140</th>\n",
" <td>1809814</td>\n",
" <td>EMSC-RTS</td>\n",
" <td>2025-05-19T06:33:56.681166Z</td>\n",
" <td>2025-05-18T23:58:56.73Z</td>\n",
" <td>WESTERN TURKEY</td>\n",
" <td>37.8953</td>\n",
" <td>27.6327</td>\n",
" <td>11.4</td>\n",
" <td>ke</td>\n",
" <td>EMSC</td>\n",
" <td>2.4</td>\n",
" <td>ml</td>\n",
" <td>20250518_0000287</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>126141 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" source_id source_catalog lastupdate \\\n",
"0 1600054 EMSC-RTS 2024-01-01T00:02:51.439437Z \n",
"1 1600055 EMSC-RTS 2024-01-01T00:14:14.3925Z \n",
"2 1600058 EMSC-RTS 2024-01-01T00:24:28.774809Z \n",
"3 1600056 EMSC-RTS 2024-01-01T00:14:58.984143Z \n",
"4 1600057 EMSC-RTS 2024-01-02T08:04:45.107234Z \n",
"... ... ... ... \n",
"126136 1809807 EMSC-RTS 2025-05-19T21:34:26.878673Z \n",
"126137 1809808 EMSC-RTS 2025-05-18T23:38:19.408959Z \n",
"126138 1809813 EMSC-RTS 2025-05-19T00:14:20.838747Z \n",
"126139 1809809 EMSC-RTS 2025-05-19T06:33:38.794931Z \n",
"126140 1809814 EMSC-RTS 2025-05-19T06:33:56.681166Z \n",
"\n",
" time flynn_region lat \\\n",
"0 2024-01-01T00:00:29.5Z CRETE, GREECE 35.1400 \n",
"1 2024-01-01T00:03:15.0Z SULAWESI, INDONESIA -1.3000 \n",
"2 2024-01-01T00:03:15.14Z PUERTO RICO 18.4087 \n",
"3 2024-01-01T00:05:28.0Z COLOMBIA-ECUADOR BORDER REGION 0.1100 \n",
"4 2024-01-01T00:10:05.6Z NORWEGIAN SEA 72.2450 \n",
"... ... ... ... \n",
"126136 2025-05-18T23:24:13.0Z SAN JUAN, ARGENTINA -31.6300 \n",
"126137 2025-05-18T23:27:24.13Z SOUTHERN ITALY 39.0343 \n",
"126138 2025-05-18T23:52:33.0Z SOUTHWEST OF SUMATRA, INDONESIA -7.8200 \n",
"126139 2025-05-18T23:58:01.56Z OFF COAST OF TARAPACA, CHILE -18.5159 \n",
"126140 2025-05-18T23:58:56.73Z WESTERN TURKEY 37.8953 \n",
"\n",
" lon depth evtype auth mag magtype unid \n",
"0 24.1200 10.0 ke THE 2.3 ml 20240101_0000001 \n",
"1 120.5100 10.0 ke BMKG 3.1 m 20240101_0000002 \n",
"2 -66.4270 105.2 ke PR 3.2 md 20240101_0000004 \n",
"3 -78.9400 54.0 ke QUI 3.5 m 20240101_0000003 \n",
"4 1.8470 6.1 ke BER 3.7 mw 20240101_0000408 \n",
"... ... ... ... ... ... ... ... \n",
"126136 -70.3800 138.0 ke CSN 3.4 ml 20250518_0000270 \n",
"126137 16.4318 9.8 ke INGV 2.4 ml 20250518_0000271 \n",
"126138 103.8600 10.0 ke BMKG 3.5 m 20250518_0000275 \n",
"126139 -71.3039 25.0 ke EMSC 4.2 mb 20250518_0000273 \n",
"126140 27.6327 11.4 ke EMSC 2.4 ml 20250518_0000287 \n",
"\n",
"[126141 rows x 13 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "sideprojects",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}