%%capture --no-display
!pip install cdsapi


import os
import cdsapi
from pathlib import Path
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import xarray as xr
import shutil
import pandas as pd
import numpy as np
import zipfile
from collections import OrderedDict
from matplotlib.lines import Line2D
import matplotlib.colors as colors
import warnings

warnings.filterwarnings("ignore")

%matplotlib inline


CM_SM = colors.LinearSegmentedColormap.from_list(
    "BrownBlue",
    np.array(
        [[134, 80, 16],
         [164, 117, 13],
         [219, 190, 24],
         [250, 249, 156],
         [144, 202, 240],
         [4, 145, 251],
         [8, 83, 211],
         [13, 37, 161]]) / 255.0
)


URL = "https://cds.climate.copernicus.eu/api/v2"
# If you have a valid key, set it in the following line:
KEY = "#######################################"


# Specify some filename and directories to handle the data
DATADIR = "./data_dir"
os.makedirs(DATADIR, exist_ok=True)
# Filename for the zip file downloaded from the CDS
download_zip_file = os.path.join(DATADIR, "sm_monthly_passive_v202012.zip")
# Filename for the netCDF file which contain the merged contents of the monthly files.
merged_netcdf_file = os.path.join(DATADIR, "sm_monthly_passive_v202012.nc")


# If we have not downloaded the zip file, we make the data request via the cdsapi
if not os.path.isfile(download_zip_file):
    c = cdsapi.Client(url=URL, key=KEY)
    c.retrieve(
        "satellite-soil-moisture",
        {
            "variable": "volumetric_surface_soil_moisture",
            "type_of_sensor": "passive",
            "time_aggregation": "month_average",  # required for examples in this notebook
            "year": [str(y) for y in range(1991, 2023)],
            "month": [f"{m:02}" for m in range(1, 13)],
            "day": "01",
            "area": [72, -11, 34, 40],
            "type_of_record": ["cdr", "icdr"],
            "version": ["v202012"],
            "format": "zip",
        },
        download_zip_file,
    )
else:
    print(f"Using previously downloaded file: {download_zip_file}")

Using previously downloaded file: ./data_dir/sm_monthly_passive_v202012.zip


if not os.path.isfile(merged_netcdf_file):
    # Unzip the data. The dataset is split in monthly files.
    with zipfile.ZipFile(download_zip_file, "r") as zip_ref:
        filelist = [os.path.join(DATADIR, f) for f in zip_ref.namelist()]
        zip_ref.extractall(DATADIR)
    # Ensure the filelist is in the correct order:
    filelist = sorted(filelist)

    # Merge all unpacked files into one.
    # We do this in batches of 100 files to avoid issues with dask
    new_filelist = []
    for i in range(int(len(filelist) / 100.0) + 1):
        temp_fname = os.path.join(DATADIR, f"temp_file_{i}.nc")
        new_filelist += [temp_fname]
        ds = xr.open_mfdataset(filelist[i * 100 : (i * 100) + 100])
        ds.to_netcdf(temp_fname)

    ds = xr.open_mfdataset(new_filelist)
    ds.to_netcdf(merged_netcdf_file)
    # Recursively delete unpacked data
    for f in filelist:
        os.remove(f)
    for f in new_filelist:
        os.remove(f)

    print(f"Preprocessing done. Netcdf stack now available at: {merged_netcdf_file}")
else:
    print(f"No preprocessing required. Netcdf stack already available at: {merged_netcdf_file}")

No preprocessing required. Netcdf stack already available at: ./data_dir/sm_monthly_passive_v202012.nc


DS = xr.open_mfdataset(merged_netcdf_file)
SM_UNIT = DS["sm"].attrs["units"]
SM_RANGE = DS["sm"].attrs["valid_range"]

DS

<xarray.Dataset>
Dimensions:     (lat: 152, lon: 204, time: 384)
Coordinates:
  * lat         (lat) float32 71.88 71.62 71.38 71.12 ... 34.62 34.38 34.12
  * lon         (lon) float32 -10.88 -10.62 -10.38 -10.12 ... 39.38 39.62 39.88
  * time        (time) datetime64[ns] 1991-01-01 1991-02-01 ... 2022-12-01
Data variables:
    sm          (time, lat, lon) float32 dask.array<chunksize=(384, 152, 204), meta=np.ndarray>
    sensor      (time, lat, lon) float32 dask.array<chunksize=(384, 152, 204), meta=np.ndarray>
    freqbandID  (time, lat, lon) float32 dask.array<chunksize=(384, 152, 204), meta=np.ndarray>
    nobs        (time, lat, lon) float32 dask.array<chunksize=(384, 152, 204), meta=np.ndarray>
Attributes: (12/40)
    title:                      C3S Surface Soil Moisture merged PASSIVE Product
    institution:                EODC (AUT); TU Wien (AUT); VanderSat B.V. (NL)
    contact:                    C3S_SM_Science@eodc.eu
    source:                     LPRMv6/SMMR/Nimbus 7 L3 Surface Soil Moisture...
    platform:                   Nimbus 7, DMSP, TRMM, AQUA, Coriolis, GCOM-W1...
    sensor:                     SMMR, SSM/I, TMI, AMSR-E, WindSat, AMSR2, SMO...
    ...                         ...
    id:                         C3S-SOILMOISTURE-L3S-SSMV-PASSIVE-MONTHLY-199...
    history:                    2021-03-29T13:46:57.630282 mean calculated
    date_created:               2021-03-29T13:46:57Z
    time_coverage_start:        1990-12-31T12:00:00Z
    time_coverage_end:          1991-01-31T12:00:00Z
    time_coverage_duration:     P1M

array([71.875, 71.625, 71.375, 71.125, 70.875, 70.625, 70.375, 70.125, 69.875,
       69.625, 69.375, 69.125, 68.875, 68.625, 68.375, 68.125, 67.875, 67.625,
       67.375, 67.125, 66.875, 66.625, 66.375, 66.125, 65.875, 65.625, 65.375,
       65.125, 64.875, 64.625, 64.375, 64.125, 63.875, 63.625, 63.375, 63.125,
       62.875, 62.625, 62.375, 62.125, 61.875, 61.625, 61.375, 61.125, 60.875,
       60.625, 60.375, 60.125, 59.875, 59.625, 59.375, 59.125, 58.875, 58.625,
       58.375, 58.125, 57.875, 57.625, 57.375, 57.125, 56.875, 56.625, 56.375,
       56.125, 55.875, 55.625, 55.375, 55.125, 54.875, 54.625, 54.375, 54.125,
       53.875, 53.625, 53.375, 53.125, 52.875, 52.625, 52.375, 52.125, 51.875,
       51.625, 51.375, 51.125, 50.875, 50.625, 50.375, 50.125, 49.875, 49.625,
       49.375, 49.125, 48.875, 48.625, 48.375, 48.125, 47.875, 47.625, 47.375,
       47.125, 46.875, 46.625, 46.375, 46.125, 45.875, 45.625, 45.375, 45.125,
       44.875, 44.625, 44.375, 44.125, 43.875, 43.625, 43.375, 43.125, 42.875,
       42.625, 42.375, 42.125, 41.875, 41.625, 41.375, 41.125, 40.875, 40.625,
       40.375, 40.125, 39.875, 39.625, 39.375, 39.125, 38.875, 38.625, 38.375,
       38.125, 37.875, 37.625, 37.375, 37.125, 36.875, 36.625, 36.375, 36.125,
       35.875, 35.625, 35.375, 35.125, 34.875, 34.625, 34.375, 34.125],
      dtype=float32)

array([-10.875, -10.625, -10.375, ...,  39.375,  39.625,  39.875],
      dtype=float32)

array(['1991-01-01T00:00:00.000000000', '1991-02-01T00:00:00.000000000',
       '1991-03-01T00:00:00.000000000', ..., '2022-10-01T00:00:00.000000000',
       '2022-11-01T00:00:00.000000000', '2022-12-01T00:00:00.000000000'],
      dtype='datetime64[ns]')

PandasIndex(Float64Index([71.875, 71.625, 71.375, 71.125, 70.875, 70.625, 70.375, 70.125,
              69.875, 69.625,
              ...
              36.375, 36.125, 35.875, 35.625, 35.375, 35.125, 34.875, 34.625,
              34.375, 34.125],
             dtype='float64', name='lat', length=152))

PandasIndex(Float64Index([-10.875, -10.625, -10.375, -10.125,  -9.875,  -9.625,  -9.375,
               -9.125,  -8.875,  -8.625,
              ...
               37.625,  37.875,  38.125,  38.375,  38.625,  38.875,  39.125,
               39.375,  39.625,  39.875],
             dtype='float64', name='lon', length=204))

PandasIndex(DatetimeIndex(['1991-01-01', '1991-02-01', '1991-03-01', '1991-04-01',
               '1991-05-01', '1991-06-01', '1991-07-01', '1991-08-01',
               '1991-09-01', '1991-10-01',
               ...
               '2022-03-01', '2022-04-01', '2022-05-01', '2022-06-01',
               '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01',
               '2022-11-01', '2022-12-01'],
              dtype='datetime64[ns]', name='time', length=384, freq=None))


BBOXES = OrderedDict(
    [
        # (Name, ([min Lon., max Lon., min Lat., max Lat.], [Lon, Lat])),
        ("Balkans", ([16, 29, 36, 45], [24, 42])),
        ("Cental Europe", ([6, 22.5, 46, 51], [15, 49.5])),
        ("France", ([-4.8, 8.4, 42.3, 51], [4, 47])),
        ("Germany", ([6, 15, 47, 55], [9, 50])),
        ("Iberian Peninsula", ([-10, 3.4, 36, 44.4], [-5.4, 41.3])),
        ("Italy", ([7, 19.0, 36.7, 47.0], [12.8, 43.4])),
        ("S-UK & N-France", ([-5.65, 2.5, 48, 54], [0, 51])),
    ]
)


# Set global variable (to access in later examples). You can select any name from the list above.
# Don't forget to re-run the cell after selecting a different study area!
STUDY_AREA = "Germany"

print(f"Available study areas are: {list(BBOXES.keys())}")
print(f"The chosen study area for the following examples is: {STUDY_AREA}")

Available study areas are: ['Balkans', 'Cental Europe', 'France', 'Germany', 'Iberian Peninsula', 'Italy', 'S-UK & N-France']
The chosen study area for the following examples is: Germany


DATE = "2022-05-01"  # Time stamp to create plot for, you can choose a different date!


# Check whether the above selected study area and date are available
if STUDY_AREA not in BBOXES:
    raise KeyError(f"Unknown STUDY_AREA: {STUDY_AREA}. Select one of {BBOXES.keys()}")
_dates = [str(pd.to_datetime(t).date()) for t in DS["time"].values]
if DATE not in _dates:
    raise KeyError(f"{DATE} not found in data. " f"Select one of {_dates}")

# Create an empty figure with 2 subplots
fig, axs = plt.subplots(1, 2, figsize=(17, 5), subplot_kw={"projection": ccrs.PlateCarree()})

# Extract and plot soil moisture image for chosen date using xarray.Dataset.sel and Dataset.plot:
p_sm = (
    DS["sm"]
    .sel(time=DATE)
    .plot(
        transform=ccrs.PlateCarree(),
        ax=axs[0],
        cmap=CM_SM,
        cbar_kwargs={"label": f"Soil Moisture [{SM_UNIT}]"},
    )
)
axs[0].set_title(f"{DATE} - Soil Moisture")

# Extract and plot 'nobs' image for chosen date:
if "nobs" in DS.variables:
    # Note: nobs is only available for monthly and 10-daily data
    p_obs = (
        DS["nobs"]
        .sel(time=DATE)
        .plot(
            transform=ccrs.PlateCarree(),
            ax=axs[1],
            vmax=31,
            vmin=0,
            cmap=plt.get_cmap("YlGnBu"),
            cbar_kwargs={"label": "Days with valid observations"},
        )
    )
    axs[1].set_title(f"{DATE} - Data coverage")
else:
    p_obs = None

bbox = BBOXES[STUDY_AREA][0]
point = BBOXES[STUDY_AREA][1]

# Add basemape features:
for p in [p_sm, p_obs]:
    if p is None:
        continue
    p.axes.add_feature(cartopy.feature.LAND, zorder=0, facecolor="gray")
    p.axes.coastlines()

# Add focus point of study area to first map:
axs[0].plot(
    [point[0]],
    [point[1]],
    color="red",
    marker="X",
    markersize=10,
    transform=ccrs.PlateCarree(),
)
# Add study area bounding box to first map:
axs[0].plot(
    [bbox[0], bbox[0], bbox[1], bbox[1], bbox[0]],
    [bbox[2], bbox[3], bbox[3], bbox[2], bbox[2]],
    color="red",
    linewidth=3,
    transform=ccrs.PlateCarree(),
)

# Draw grid lines and labels for both maps:
for ax in axs:
    if ax is not None:
        gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, alpha=0.25)
        gl.top_labels, gl.right_labels = False, False

# Add study area legend
axs[0].legend(
    handles=[
        Line2D([0], [0], color="r", lw=4, label=f"{STUDY_AREA} Study Area"),
        Line2D([0], [0], lw=0, marker="x", color="r", label=f"{STUDY_AREA} Focus Point"),
    ]
)

<matplotlib.legend.Legend at 0x7fc7315aac10>


BASELINE_YEARS = (1991, 2020)  # first and last year to consider for climatology, can be changed.


# Extract data at location at the 'focus point' of the chosen study area:
lon, lat = float(BBOXES[STUDY_AREA][1][0]), float(BBOXES[STUDY_AREA][1][1])
ts = DS["sm"].sel(lon=lon, lat=lat, method="nearest").to_pandas()

# Compute all metrics (climatology, abs. / rel. anomalies, z-scores):
clim_data = ts.loc[f"{BASELINE_YEARS[0]}-01-01":f"{BASELINE_YEARS[1]}-12-31"]
clim_std = pd.Series(clim_data.groupby(clim_data.index.month).std(), name="climatology_std")
clim_mean = pd.Series(clim_data.groupby(clim_data.index.month).mean(), name="climatology")

ts = pd.DataFrame(ts, columns=["sm"]).join(on=ts.index.month, other=clim_mean)
ts["climatology_std"] = ts.join(on=ts.index.month, other=clim_std)["climatology_std"]
ts["abs_anomaly"] = ts["sm"] - ts["climatology"]
ts["rel_anomaly"] = (ts["sm"] - ts["climatology"]) / ts["climatology"] * 100
ts["z_score"] = (ts["sm"] - ts["climatology"]) / ts["climatology_std"]

# Generate three time series plots (absolute SM, climatology, all 3 anomaly metrics):
fig, axs = plt.subplots(5, 1, figsize=(10, 12))

ts["sm"].plot(
    ax=axs[0],
    title=f"Soil Moisture at focus point of `{STUDY_AREA}` study area "
    f"(Lon: {lon}° W, Lat: {lat}° N)",
    ylabel=f"SM $[{SM_UNIT}]$",
    xlabel="Time [year]",
)

for i, g in clim_data.groupby(clim_data.index.year):
    axs[1].plot(range(1, 13), g.values, alpha=0.2)

clim_mean.plot(
    ax=axs[1],
    color="blue",
    title=f"Soil Moisture Climatology at Lon: {lon}° W, Lat: {lat}° N",
    ylabel=f"SM $[{SM_UNIT}]$",
    label="mean",
)
clim_std.plot(ax=axs[1], label="std.dev. $\sigma$", xlabel="Time [month]")
axs[1].legend()

metrics = ["Absolute Anomalies", "Relative Anomalies", "Z-Scores"]
columns = ["abs_anomaly", "rel_anomaly", "z_score"]
ylabels = [f"Anomaly $[{SM_UNIT}]$", "Anomaly $[\%]$", "Z-score $[\sigma]$"]

for i, (metric, col, ylabel) in enumerate(zip(metrics, columns, ylabels), start=2):
    axs[i].axhline(0, color="k")
    axs[i].fill_between(ts[col].index, ts[col].values, where=ts[col].values >= 0, color="blue")
    axs[i].fill_between(ts[col].index, ts[col].values, where=ts[col].values < 0, color="red")
    axs[i].set_ylabel(ylabel)
    axs[i].set_xlabel("Time [year]")
    axs[i].set_title(f"Soil Moisture {metric} at Lon: {lon}° W, Lat: {lat}° N")

plt.tight_layout()


BASELINE_YEARS = (1991, 2020)


baseline_slice = slice(f"{BASELINE_YEARS[0]}-01-01", f"{BASELINE_YEARS[1]}-12-31")
CLIM = DS.sel(time=baseline_slice)["sm"].groupby(DS.sel(time=baseline_slice).time.dt.month).mean()

CLIM

<xarray.DataArray 'sm' (month: 12, lat: 152, lon: 204)>
dask.array<stack, shape=(12, 152, 204), dtype=float32, chunksize=(1, 152, 204), chunktype=numpy.ndarray>
Coordinates:
  * lat      (lat) float32 71.88 71.62 71.38 71.12 ... 34.88 34.62 34.38 34.12
  * lon      (lon) float32 -10.88 -10.62 -10.38 -10.12 ... 39.38 39.62 39.88
  * month    (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
Attributes:
    dtype:            float32
    units:            m3 m-3
    valid_range:      [0. 1.]
    long_name:        Volumetric Soil Moisture
    _CoordinateAxes:  time lat lon

array([71.875, 71.625, 71.375, 71.125, 70.875, 70.625, 70.375, 70.125, 69.875,
       69.625, 69.375, 69.125, 68.875, 68.625, 68.375, 68.125, 67.875, 67.625,
       67.375, 67.125, 66.875, 66.625, 66.375, 66.125, 65.875, 65.625, 65.375,
       65.125, 64.875, 64.625, 64.375, 64.125, 63.875, 63.625, 63.375, 63.125,
       62.875, 62.625, 62.375, 62.125, 61.875, 61.625, 61.375, 61.125, 60.875,
       60.625, 60.375, 60.125, 59.875, 59.625, 59.375, 59.125, 58.875, 58.625,
       58.375, 58.125, 57.875, 57.625, 57.375, 57.125, 56.875, 56.625, 56.375,
       56.125, 55.875, 55.625, 55.375, 55.125, 54.875, 54.625, 54.375, 54.125,
       53.875, 53.625, 53.375, 53.125, 52.875, 52.625, 52.375, 52.125, 51.875,
       51.625, 51.375, 51.125, 50.875, 50.625, 50.375, 50.125, 49.875, 49.625,
       49.375, 49.125, 48.875, 48.625, 48.375, 48.125, 47.875, 47.625, 47.375,
       47.125, 46.875, 46.625, 46.375, 46.125, 45.875, 45.625, 45.375, 45.125,
       44.875, 44.625, 44.375, 44.125, 43.875, 43.625, 43.375, 43.125, 42.875,
       42.625, 42.375, 42.125, 41.875, 41.625, 41.375, 41.125, 40.875, 40.625,
       40.375, 40.125, 39.875, 39.625, 39.375, 39.125, 38.875, 38.625, 38.375,
       38.125, 37.875, 37.625, 37.375, 37.125, 36.875, 36.625, 36.375, 36.125,
       35.875, 35.625, 35.375, 35.125, 34.875, 34.625, 34.375, 34.125],
      dtype=float32)

array([-10.875, -10.625, -10.375, ...,  39.375,  39.625,  39.875],
      dtype=float32)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

PandasIndex(Float64Index([71.875, 71.625, 71.375, 71.125, 70.875, 70.625, 70.375, 70.125,
              69.875, 69.625,
              ...
              36.375, 36.125, 35.875, 35.625, 35.375, 35.125, 34.875, 34.625,
              34.375, 34.125],
             dtype='float64', name='lat', length=152))

PandasIndex(Float64Index([-10.875, -10.625, -10.375, -10.125,  -9.875,  -9.625,  -9.375,
               -9.125,  -8.875,  -8.625,
              ...
               37.625,  37.875,  38.125,  38.375,  38.625,  38.875,  39.125,
               39.375,  39.625,  39.875],
             dtype='float64', name='lon', length=204))

PandasIndex(Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64', name='month'))


DS["sm_anomaly"] = DS["sm"] - CLIM.sel(month=DS.time.dt.month).drop("month")


subset = DS[["sm_anomaly"]].sel(
    lon=slice(BBOXES[STUDY_AREA][0][0], BBOXES[STUDY_AREA][0][1]),
    lat=slice(BBOXES[STUDY_AREA][0][3], BBOXES[STUDY_AREA][0][2]),
)
MEAN_TS = subset.mean(dim=["lat", "lon"]).to_pandas()


YEAR = 2018  # choose a year to plot on the map


# Combine all data in the study area to an annual mean time series
mean_ts_annual = pd.Series(MEAN_TS["sm_anomaly"]).resample("A").mean()
mean_ts_annual.index = mean_ts_annual.index.year

# Create a new figure with 2 subplots (1:2 width ratio) using matplotlib GridSpec
fig = plt.figure(figsize=(15, 4), constrained_layout=True)
gs = fig.add_gridspec(1, 3)
map_ax = fig.add_subplot(gs[0, 0], projection=ccrs.PlateCarree())
ts_ax = fig.add_subplot(gs[0, 1:])

# Select all anomalies for a year and compute the average, then create a map plot
DS["sm_anomaly"].sel(time=slice(f"{YEAR}-01-01", f"{YEAR}-12-31")).mean("time").plot(
    transform=ccrs.PlateCarree(),
    ax=map_ax,
    cmap=plt.get_cmap("RdBu"),
    cbar_kwargs={"label": f"Anomaly [{SM_UNIT}]"},
)

# Add features to map
map_ax.axes.add_feature(cartopy.feature.LAND, zorder=0, facecolor="gray")
map_ax.axes.coastlines()
map_ax.add_feature(cartopy.feature.BORDERS)
map_ax.set_title(f"{YEAR} - Soil Moisture Anomaly")

# Plot study area bounding box on map
bbox = BBOXES[STUDY_AREA][0]
map_ax.plot(
    [bbox[0], bbox[0], bbox[1], bbox[1], bbox[0]],
    [bbox[2], bbox[3], bbox[3], bbox[2], bbox[2]],
    color="red",
    linewidth=3,
    transform=ccrs.PlateCarree(),
)
# Add grid lines to map
gl = map_ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, alpha=0.25)
gl.right_labels, gl.top_labels = False, False

# Add time series of study area as bar plot to right subplot
bars = ts_ax.bar(mean_ts_annual.index, mean_ts_annual.values)
ts_ax.set_title(f"Annual conditions in `{STUDY_AREA}` study area")
ts_ax.set_xlabel("Year")
ts_ax.set_ylabel(f"Anomaly [{SM_UNIT}]")
ts_ax.axhline(y=0, color="black", linewidth=1)

# Highlight the bar in the right plot that contains the year chosen on the left side:
for i, bar in enumerate(bars.patches):
    if mean_ts_annual.values[i] > 0:
        bar.set_facecolor("blue")
    else:
        bar.set_facecolor("red")
    if mean_ts_annual.index.values[i] == YEAR:
        bar.set_edgecolor("k")
    bar.set_linewidth(3)

C3S Soil Moisture Data Access and Anomaly Analysis Notebook¶

Setup¶

1. About C3S Satellite Soil Moisture¶

Soil moisture from radiometer measurements (PASSIVE)¶

Soil moisture from scatterometer measurements (ACTIVE)¶

Merged product (COMBINED)¶

2. Data Access and Download¶

Creating a valid CDS data request for satellite soil moisture¶

Getting your CDS API Key¶

Unpacking & Loading Data with xarray¶

Study areas¶

Application 1: Data Visualization¶

Application 2: Time Series Extraction and Analysis¶

Application 3: Vectorized Anomaly Computation¶