A brief look at pulling data for only extreme events
Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
This commit is contained in:
parent
6c2247974c
commit
c3524eda21
402
extreme_data.ipynb
Normal file
402
extreme_data.ipynb
Normal file
@ -0,0 +1,402 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "bd523899",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import scipy.stats as stats\n",
|
||||
"from scipy.stats import genextreme\n",
|
||||
"from scipy.stats import genpareto\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from datetime import date\n",
|
||||
"from datetime import datetime\n",
|
||||
"from datetime import timedelta\n",
|
||||
"import pytz\n",
|
||||
"\n",
|
||||
"import os.path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "c63a1b94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"start_date = \"2024-01-01\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "903ed374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def geturl(url):\n",
|
||||
" res = requests.get(\"https://\"+url, timeout=15)\n",
|
||||
" return {'status': res.status_code,\n",
|
||||
" 'content': res.text}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "89e1979c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def retrieveEvents(start, end, limit=20000, retry_limit=5, minmag=0):\n",
|
||||
" events = []\n",
|
||||
" moving_start = datetime.fromordinal(start.toordinal()).replace(tzinfo=pytz.utc)\n",
|
||||
" end = datetime.fromordinal(end.toordinal()).replace(tzinfo=pytz.utc)\n",
|
||||
" failures = 0\n",
|
||||
" while moving_start <= end and failures < retry_limit:\n",
|
||||
" # print(moving_start, end)\n",
|
||||
" url = \"www.seismicportal.eu/fdsnws/event/1/query?orderby=time-asc&limit={limit}&start={startdate}&end={enddate}&format=json&minmag={minmag}\".format(limit=limit, startdate=moving_start.isoformat(), enddate=end.isoformat(), minmag=minmag)\n",
|
||||
" # print(url)\n",
|
||||
" res = geturl(url)\n",
|
||||
" # print(res['status'])\n",
|
||||
" if res['status'] != 200:\n",
|
||||
" failures += 1\n",
|
||||
" continue\n",
|
||||
" content = res['content']\n",
|
||||
" json_parser = json.loads(content)\n",
|
||||
" temp_events = [event['properties'] for event in json_parser['features']]\n",
|
||||
"\n",
|
||||
" if len(temp_events) == 0:\n",
|
||||
" # print(\"ending\")\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" # temp_events = sorted(temp_events, key=lambda d: d['time'])\n",
|
||||
"\n",
|
||||
" if len(temp_events) == limit:\n",
|
||||
" moving_start = datetime.fromisoformat(temp_events[-1]['time'])\n",
|
||||
" else:\n",
|
||||
" moving_start = end + timedelta(hours=1)\n",
|
||||
" # print(\"ending here:\", moving_start)\n",
|
||||
" events.extend(temp_events)\n",
|
||||
" # print(\"hi\")\n",
|
||||
" # return pd.DataFrame(events)\n",
|
||||
" return events\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8bccb27c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = retrieveEvents(date.fromisoformat(start_date), date.today(), minmag=2)\n",
|
||||
"df = pd.DataFrame(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "902b6b1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>source_id</th>\n",
|
||||
" <th>source_catalog</th>\n",
|
||||
" <th>lastupdate</th>\n",
|
||||
" <th>time</th>\n",
|
||||
" <th>flynn_region</th>\n",
|
||||
" <th>lat</th>\n",
|
||||
" <th>lon</th>\n",
|
||||
" <th>depth</th>\n",
|
||||
" <th>evtype</th>\n",
|
||||
" <th>auth</th>\n",
|
||||
" <th>mag</th>\n",
|
||||
" <th>magtype</th>\n",
|
||||
" <th>unid</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1600054</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2024-01-01T00:02:51.439437Z</td>\n",
|
||||
" <td>2024-01-01T00:00:29.5Z</td>\n",
|
||||
" <td>CRETE, GREECE</td>\n",
|
||||
" <td>35.1400</td>\n",
|
||||
" <td>24.1200</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>THE</td>\n",
|
||||
" <td>2.3</td>\n",
|
||||
" <td>ml</td>\n",
|
||||
" <td>20240101_0000001</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1600055</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2024-01-01T00:14:14.3925Z</td>\n",
|
||||
" <td>2024-01-01T00:03:15.0Z</td>\n",
|
||||
" <td>SULAWESI, INDONESIA</td>\n",
|
||||
" <td>-1.3000</td>\n",
|
||||
" <td>120.5100</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>BMKG</td>\n",
|
||||
" <td>3.1</td>\n",
|
||||
" <td>m</td>\n",
|
||||
" <td>20240101_0000002</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1600058</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2024-01-01T00:24:28.774809Z</td>\n",
|
||||
" <td>2024-01-01T00:03:15.14Z</td>\n",
|
||||
" <td>PUERTO RICO</td>\n",
|
||||
" <td>18.4087</td>\n",
|
||||
" <td>-66.4270</td>\n",
|
||||
" <td>105.2</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>PR</td>\n",
|
||||
" <td>3.2</td>\n",
|
||||
" <td>md</td>\n",
|
||||
" <td>20240101_0000004</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1600056</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2024-01-01T00:14:58.984143Z</td>\n",
|
||||
" <td>2024-01-01T00:05:28.0Z</td>\n",
|
||||
" <td>COLOMBIA-ECUADOR BORDER REGION</td>\n",
|
||||
" <td>0.1100</td>\n",
|
||||
" <td>-78.9400</td>\n",
|
||||
" <td>54.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>QUI</td>\n",
|
||||
" <td>3.5</td>\n",
|
||||
" <td>m</td>\n",
|
||||
" <td>20240101_0000003</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1600057</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2024-01-02T08:04:45.107234Z</td>\n",
|
||||
" <td>2024-01-01T00:10:05.6Z</td>\n",
|
||||
" <td>NORWEGIAN SEA</td>\n",
|
||||
" <td>72.2450</td>\n",
|
||||
" <td>1.8470</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>BER</td>\n",
|
||||
" <td>3.7</td>\n",
|
||||
" <td>mw</td>\n",
|
||||
" <td>20240101_0000408</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126136</th>\n",
|
||||
" <td>1809807</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2025-05-19T21:34:26.878673Z</td>\n",
|
||||
" <td>2025-05-18T23:24:13.0Z</td>\n",
|
||||
" <td>SAN JUAN, ARGENTINA</td>\n",
|
||||
" <td>-31.6300</td>\n",
|
||||
" <td>-70.3800</td>\n",
|
||||
" <td>138.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>CSN</td>\n",
|
||||
" <td>3.4</td>\n",
|
||||
" <td>ml</td>\n",
|
||||
" <td>20250518_0000270</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126137</th>\n",
|
||||
" <td>1809808</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2025-05-18T23:38:19.408959Z</td>\n",
|
||||
" <td>2025-05-18T23:27:24.13Z</td>\n",
|
||||
" <td>SOUTHERN ITALY</td>\n",
|
||||
" <td>39.0343</td>\n",
|
||||
" <td>16.4318</td>\n",
|
||||
" <td>9.8</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>INGV</td>\n",
|
||||
" <td>2.4</td>\n",
|
||||
" <td>ml</td>\n",
|
||||
" <td>20250518_0000271</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126138</th>\n",
|
||||
" <td>1809813</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2025-05-19T00:14:20.838747Z</td>\n",
|
||||
" <td>2025-05-18T23:52:33.0Z</td>\n",
|
||||
" <td>SOUTHWEST OF SUMATRA, INDONESIA</td>\n",
|
||||
" <td>-7.8200</td>\n",
|
||||
" <td>103.8600</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>BMKG</td>\n",
|
||||
" <td>3.5</td>\n",
|
||||
" <td>m</td>\n",
|
||||
" <td>20250518_0000275</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126139</th>\n",
|
||||
" <td>1809809</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2025-05-19T06:33:38.794931Z</td>\n",
|
||||
" <td>2025-05-18T23:58:01.56Z</td>\n",
|
||||
" <td>OFF COAST OF TARAPACA, CHILE</td>\n",
|
||||
" <td>-18.5159</td>\n",
|
||||
" <td>-71.3039</td>\n",
|
||||
" <td>25.0</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>EMSC</td>\n",
|
||||
" <td>4.2</td>\n",
|
||||
" <td>mb</td>\n",
|
||||
" <td>20250518_0000273</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>126140</th>\n",
|
||||
" <td>1809814</td>\n",
|
||||
" <td>EMSC-RTS</td>\n",
|
||||
" <td>2025-05-19T06:33:56.681166Z</td>\n",
|
||||
" <td>2025-05-18T23:58:56.73Z</td>\n",
|
||||
" <td>WESTERN TURKEY</td>\n",
|
||||
" <td>37.8953</td>\n",
|
||||
" <td>27.6327</td>\n",
|
||||
" <td>11.4</td>\n",
|
||||
" <td>ke</td>\n",
|
||||
" <td>EMSC</td>\n",
|
||||
" <td>2.4</td>\n",
|
||||
" <td>ml</td>\n",
|
||||
" <td>20250518_0000287</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>126141 rows × 13 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" source_id source_catalog lastupdate \\\n",
|
||||
"0 1600054 EMSC-RTS 2024-01-01T00:02:51.439437Z \n",
|
||||
"1 1600055 EMSC-RTS 2024-01-01T00:14:14.3925Z \n",
|
||||
"2 1600058 EMSC-RTS 2024-01-01T00:24:28.774809Z \n",
|
||||
"3 1600056 EMSC-RTS 2024-01-01T00:14:58.984143Z \n",
|
||||
"4 1600057 EMSC-RTS 2024-01-02T08:04:45.107234Z \n",
|
||||
"... ... ... ... \n",
|
||||
"126136 1809807 EMSC-RTS 2025-05-19T21:34:26.878673Z \n",
|
||||
"126137 1809808 EMSC-RTS 2025-05-18T23:38:19.408959Z \n",
|
||||
"126138 1809813 EMSC-RTS 2025-05-19T00:14:20.838747Z \n",
|
||||
"126139 1809809 EMSC-RTS 2025-05-19T06:33:38.794931Z \n",
|
||||
"126140 1809814 EMSC-RTS 2025-05-19T06:33:56.681166Z \n",
|
||||
"\n",
|
||||
" time flynn_region lat \\\n",
|
||||
"0 2024-01-01T00:00:29.5Z CRETE, GREECE 35.1400 \n",
|
||||
"1 2024-01-01T00:03:15.0Z SULAWESI, INDONESIA -1.3000 \n",
|
||||
"2 2024-01-01T00:03:15.14Z PUERTO RICO 18.4087 \n",
|
||||
"3 2024-01-01T00:05:28.0Z COLOMBIA-ECUADOR BORDER REGION 0.1100 \n",
|
||||
"4 2024-01-01T00:10:05.6Z NORWEGIAN SEA 72.2450 \n",
|
||||
"... ... ... ... \n",
|
||||
"126136 2025-05-18T23:24:13.0Z SAN JUAN, ARGENTINA -31.6300 \n",
|
||||
"126137 2025-05-18T23:27:24.13Z SOUTHERN ITALY 39.0343 \n",
|
||||
"126138 2025-05-18T23:52:33.0Z SOUTHWEST OF SUMATRA, INDONESIA -7.8200 \n",
|
||||
"126139 2025-05-18T23:58:01.56Z OFF COAST OF TARAPACA, CHILE -18.5159 \n",
|
||||
"126140 2025-05-18T23:58:56.73Z WESTERN TURKEY 37.8953 \n",
|
||||
"\n",
|
||||
" lon depth evtype auth mag magtype unid \n",
|
||||
"0 24.1200 10.0 ke THE 2.3 ml 20240101_0000001 \n",
|
||||
"1 120.5100 10.0 ke BMKG 3.1 m 20240101_0000002 \n",
|
||||
"2 -66.4270 105.2 ke PR 3.2 md 20240101_0000004 \n",
|
||||
"3 -78.9400 54.0 ke QUI 3.5 m 20240101_0000003 \n",
|
||||
"4 1.8470 6.1 ke BER 3.7 mw 20240101_0000408 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"126136 -70.3800 138.0 ke CSN 3.4 ml 20250518_0000270 \n",
|
||||
"126137 16.4318 9.8 ke INGV 2.4 ml 20250518_0000271 \n",
|
||||
"126138 103.8600 10.0 ke BMKG 3.5 m 20250518_0000275 \n",
|
||||
"126139 -71.3039 25.0 ke EMSC 4.2 mb 20250518_0000273 \n",
|
||||
"126140 27.6327 11.4 ke EMSC 2.4 ml 20250518_0000287 \n",
|
||||
"\n",
|
||||
"[126141 rows x 13 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "sideprojects",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user