I went little bit further with the answer by @Bart.
You are working with 3D data - time dimension and then latitude and longitude.
If one needs to calculate statistics in different regions, I would suggest making a 2D map with some integer like values for different regions with the same spatial size as the original input data.
Here is the updated code based on @Bart's initial solution to find the number occurrences of means of 2 different regions in some predefined range:
import numpy as np
#from numba import jit
#@jit(nopython=True, nogil=True)
def is_left(xp, yp, x0, y0, x1, y1):
"""
Check whether point (xp,yp) is left of line segment ((x0,y0) to (x1,y1))
returns: >0 if left of line, 0 if on line, <0 if right of line
"""
return (x1-x0) * (yp-y0) - (xp-x0) * (y1-y0)
#@jit(nopython=True, nogil=True)
def distance(x1, y1, x2, y2):
"""
Calculate Euclidean distance.
"""
return ((x1-x2)**2 + (y1-y2)**2)**0.5
#@jit(nopython=True, nogil=True)
def point_is_on_line(x, y, x1, y1, x2, y2):
"""
Check whether point it exactly on line
"""
d1 = distance(x, y, x1, y1)
d2 = distance(x, y, x2, y2)
d3 = distance(x1, y1, x2, y2)
eps = 1e-12
return np.abs((d1+d2)-d3) < eps
#@jit(nopython=True, nogil=True)
def is_inside(xp, yp, x_set, y_set, size):
"""
Given location (xp,yp) and set of line segments (x_set, y_set), determine
whether (xp,yp) is inside (or on) polygon.
"""
# First simple check on bounds
if (xp < x_set.min() or xp > x_set.max() or yp < y_set.min() or yp > y_set.max()):
return False
wn = 0
for i in range(size-1):
# Second check: see if point exactly on line segment:
if point_is_on_line(xp, yp, x_set[i], y_set[i], x_set[i+1], y_set[i+1]):
return False
#if (is_left(xp, yp, x_set[i], y_set[i], x_set[i+1], y_set[i+1]) == 0):
# return False
# Calculate winding number
if (y_set[i] <= yp):
if (y_set[i+1] > yp):
if (is_left(xp, yp, x_set[i], y_set[i], x_set[i+1], y_set[i+1]) > 0):
wn += 1
else:
if (y_set[i+1] <= yp):
if (is_left(xp, yp, x_set[i], y_set[i], x_set[i+1], y_set[i+1]) < 0):
wn -= 1
if (wn == 0):
return False
else:
return True
#@jit(nopython=True, nogil=True)
def get_mask(mask, x, y, poly_x, poly_y):
"""
Generate mask for grid points inside polygon
"""
for j in range(y.size):
for i in range(x.size):
if is_inside(lons[i], lats[j], poly_x, poly_y, poly_x.size):
mask[j,i] = True
return mask
if __name__ == '__main__':
import pandas as pd
import matplotlib.pyplot as plt
plt.close('all')
# --------------------------------------------------------------------------------------------------------------
# Dummy data.
lats = np.linspace(0, 20, 16)
lons = np.linspace(0, 16, 16)
time = np.linspace(0,365,365)
# --------------------------------------------------------------------------------------------------------------
# let us make some random yearly data of temperature:
np.random.seed(9)
data = 0+100*(np.random.random((time.size,lats.size,lons.size))-0.5) # this is really random data
temprange = [10,20]
# --------------------------------------------------------------------------------------------------------------
# let us have several areas:
# Bounding box.
poly_x = np.array([2, 12, 9, 6, 2])
poly_y = np.array([4, 8.5, 15, 14, 4])
# ---------------------------------------------------------------------------------------------------------------
# Define areas for calculating statistics, I will use values 1 and 2 for in polygon and outside of it, one define a lot of different:
regions = np.zeros((lats.size,lons.size))
mask = np.zeros((lats.size,lons.size), dtype=bool)
get_mask(mask, lons, lats, poly_x, poly_y)
regions[mask==True] = 1
regions[mask==False] = 2
# ---------------------------------------------------------------------------------------------------------------
# our "complicated" region map:
fig = plt.figure(figsize=(10,10));ax = fig.add_subplot(111);
p0 = ax.pcolormesh(lons,lats,regions);plt.colorbar(p0);
ax.set_title('Different regions')
plt.show()
# ---------------------------------------------------------------------------------------------------------------
# Let us find the number of days within some range:
statsout = {}
for regval in np.unique(regions):
regmeantemp = np.average(data[:,regions==regval],axis=(1)) # this is mean serie for the polygon
# -----------------------------------------------------------------------------------------------------------
fig = plt.figure(figsize=(10,10));ax = fig.add_subplot(111)
p0 = ax.plot(time,regmeantemp);
ax.set_xlabel('Time');ax.set_ylabel('Mean temperature')
ax.set_title('Mean inside basin set to value '+str(regval));
plt.show()
# -----------------------------------------------------------------------------------------------------------
# let us collect the occurrences of temperature in some pre-defined range:
statsout[regval] = np.sum((regmeantemp > temprange[0]) & (regmeantemp < temprange[1]))
# ----------------------------------------------------------------------------------------------------------------
print(statsout)
Hope this helps!