Jupyter Book GitHub repo.


This notebook brings ARGO drifter data into the picture.

  • compare with RCA profilers

  • compare with ROMS

From Jessica Scheick and team: The ICESAT Icepyx repository has search/retrieval APIs for ARGO. See the documentation.ipynb notebook for installation and use notes.

Jessica contributes the following context:

  • The history of ARGO drifters features a transition from basic CTD instrumentation to a BGC ensemble

    • The latter is comparable with the shallow profiler ensemble

  • The Icepyx interface is able to search for an download data from both CTD and BGC drifters

    • Icepyx is a generic data interface with additional code focused on ARGO

    My experience comes from working with a team on creating a way to easily request ARGO data coincident with ICESat-2 data

    • Includes creating the scaffolding to readily add any other datasets you might want to grab…

    • …while relying on some of the same underlying space/time management infrastructure (preliminary documentation)…

    • …and taking care of properly formatting requests.

  • At this stage: The code works so I should be able to get you set up to use it

    • even though it might still change a bit

    • even though isn’t merged into a packaged copy of the software yet.

  • Ultimately, the API selectively returns the argo parameters you’re interested in

    • (and for the space, time, and if you request it depth as well),

    • and we read it right in to a dataframe.

  • There is a data size request limit, but I haven’t pushed it

    • and we could probably pretty easily make a few smaller requests

  • Idea: Install/work from a branch of a GitHub repository

    • specifically the icepyx argo branch, which is where we’re building this functionality

    • Zoom to help with setting things up…

    • Get some code up and running

import os, sys, time, glob, warnings
from IPython.display import clear_output             # use inside loop with clear_output(wait = True) followed by print(i)
this_dir = os.getcwd()
data_dir = this_dir + '/../../data'                  # large datasets reside outside the repository

from matplotlib import pyplot as plt
from matplotlib import colors as mplcolors
import numpy as np, pandas as pd, xarray as xr
from numpy import datetime64 as dt64, timedelta64 as td64

# convenience functions abbreviating 'datetime64' and so on
def doy(theDatetime): return 1 + int((theDatetime - dt64(str(theDatetime)[0:4] + '-01-01')) / td64(1, 'D'))
def dt64_from_doy(year, doy): return dt64(str(year) + '-01-01') + td64(doy-1, 'D')
def day_of_month_to_string(d): return str(d) if d > 9 else '0' + str(d)

print('\nJupyter Notebook running Python {}'.format(sys.version_info[0]))
Jupyter Notebook running Python 3

subsequent cells are copy paste from the argo notebook, chlorophyll repo#

argo1_ds = xr.open_dataset(data_dir + '/argo/argo_profiles1.nc')
argo2_ds = xr.open_dataset(data_dir + '/argo/argo_profiles2.nc')

# printing argo1['LONGITUDE'] and for argo2 shows the latter is close to Oregon Slope Base
# more to try printing:
# argo2
# argo2['JULD']
# argo2['JULD'], argo2['LONGITUDE'], argo2['LATITUDE']

argo2_df = argo2_ds['TEMP'].to_dataframe()

fig,ax = plt.subplots()
KeyError                                  Traceback (most recent call last)
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    210 try:
--> 211     file = self._cache[self._key]
    212 except KeyError:

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
     55 with self._lock:
---> 56     value = self._cache[key]
     57     self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/home/runner/work/oceanography/oceanography/data/argo/argo_profiles1.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '0d01b2e7-466c-4f1c-b9c9-4c8a909a1d26']

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
Cell In[2], line 1
----> 1 argo1_ds = xr.open_dataset(data_dir + '/argo/argo_profiles1.nc')
      2 argo2_ds = xr.open_dataset(data_dir + '/argo/argo_profiles2.nc')
      4 # printing argo1['LONGITUDE'] and for argo2 shows the latter is close to Oregon Slope Base
      5 # more to try printing:
      6 # argo2
      7 # argo2['JULD']
      8 # argo2['JULD'], argo2['LONGITUDE'], argo2['LATITUDE']

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/api.py:686, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    674 decoders = _resolve_decoders_kwargs(
    675     decode_cf,
    676     open_backend_dataset_parameters=backend.open_dataset_parameters,
    682     decode_coords=decode_coords,
    683 )
    685 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 686 backend_ds = backend.open_dataset(
    687     filename_or_obj,
    688     drop_variables=drop_variables,
    689     **decoders,
    690     **kwargs,
    691 )
    692 ds = _dataset_from_backend_dataset(
    693     backend_ds,
    694     filename_or_obj,
    704     **kwargs,
    705 )
    706 return ds

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:666, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, auto_complex, lock, autoclose)
    644 def open_dataset(
    645     self,
    646     filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    663     autoclose=False,
    664 ) -> Dataset:
    665     filename_or_obj = _normalize_path(filename_or_obj)
--> 666     store = NetCDF4DataStore.open(
    667         filename_or_obj,
    668         mode=mode,
    669         format=format,
    670         group=group,
    671         clobber=clobber,
    672         diskless=diskless,
    673         persist=persist,
    674         auto_complex=auto_complex,
    675         lock=lock,
    676         autoclose=autoclose,
    677     )
    679     store_entrypoint = StoreBackendEntrypoint()
    680     with close_on_error(store):

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:452, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, auto_complex, lock, lock_maker, autoclose)
    448     kwargs["auto_complex"] = auto_complex
    449 manager = CachingFileManager(
    450     netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    451 )
--> 452 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:393, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
    391 self._group = group
    392 self._mode = mode
--> 393 self.format = self.ds.data_model
    394 self._filename = self.ds.filepath()
    395 self.is_remote = is_remote_uri(self._filename)

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:461, in NetCDF4DataStore.ds(self)
    459 @property
    460 def ds(self):
--> 461     return self._acquire()

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:455, in NetCDF4DataStore._acquire(self, needs_lock)
    454 def _acquire(self, needs_lock=True):
--> 455     with self._manager.acquire_context(needs_lock) as root:
    456         ds = _nc4_require_group(root, self._group, self._mode)
    457     return ds

File ~/micromamba/envs/geosmart-template/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
    135 del self.args, self.kwds, self.func
    136 try:
--> 137     return next(self.gen)
    138 except StopIteration:
    139     raise RuntimeError("generator didn't yield") from None

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
    196 @contextlib.contextmanager
    197 def acquire_context(self, needs_lock=True):
    198     """Context manager for acquiring a file."""
--> 199     file, cached = self._acquire_with_cache_info(needs_lock)
    200     try:
    201         yield file

File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    215     kwargs = kwargs.copy()
    216     kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
    218 if self._mode == "w":
    219     # ensure file doesn't get overridden when opened again
    220     self._mode = "a"

File src/netCDF4/_netCDF4.pyx:2521, in netCDF4._netCDF4.Dataset.__init__()

File src/netCDF4/_netCDF4.pyx:2158, in netCDF4._netCDF4._ensure_nc_success()

FileNotFoundError: [Errno 2] No such file or directory: '/home/runner/work/oceanography/oceanography/data/argo/argo_profiles1.nc'
# Plot three views of ARGO profile data: Vertical axis is depth, horizontal is salinity

f,a = plt.subplots(3)
a[0].set(ylim=(2100.,0.), xlim=(29.5,35.))
a[1].set(ylim=(200.,0.), xlim=(29.5,35.))
a[2].set(ylim=(1000.,200.), xlim=(33.9,34.4))

c = ['brown', 'red', 'salmon', 'tomato', 'sandybrown', 'peru', 'darkorange', 'orange', 'gold',\
    'yellow', 'chartreuse', 'lightgreen', 'lime', 'aquamarine', 'teal', 'cyan', 'deepskyblue',\
    'dodgerblue', 'royalblue', 'navy', 'blue', 'mediumpurple', 'darkviolet', 'magenta', 'crimson']

for nProfile in range(18):
    argo2_ds_psu.coords['depth'] = ('depth', argo2_ds.PRES_ADJUSTED[nProfile])
    argo2_ds_psu['psu'] = (('depth'), argo2_ds_psu.psu)
    a[0].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
    a[1].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
    a[2].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
NameError                                 Traceback (most recent call last)
Cell In[2], line 3
      1 # Plot three views of ARGO profile data: Vertical axis is depth, horizontal is salinity
----> 3 f,a = plt.subplots(3)
      4 f.set_size_inches(14,18)
      5 a[0].set(ylim=(2100.,0.), xlim=(29.5,35.))

NameError: name 'plt' is not defined
glodapTemperatureFnm = data_dir + '/glodap/glodap_temperature.nc'
glodapSalinityFnm    = data_dir + '/glodap/glodap_salinity.nc'
glodapOxygenFnm      = data_dir + '/glodap/glodap_oxygen.nc'

glodap_dsSal  = xr.open_mfdataset(glodapSalinityFnm, combine='by_coords')
glodap_dsTemp = xr.open_mfdataset(glodapTemperatureFnm, combine='by_coords')
glodap_dsO2   = xr.open_mfdataset(glodapOxygenFnm, combine='by_coords')
# Oregon Offshore        44.37415        -124.95648
# Oregon Slope Base      44.52897        -125.38966 
# Axial Base             45.83049        -129.75326

# The 360 degree bias is an idiosyncrasy of the glodap dataset
osbLatGlodap, osbLonGlodap = 44.52897, -125.38966 + 360.

# DataArrays: From glodap data for dissolved oxygen, temperature and salinity
#   The first of each pair selects a vertical profile; the second selects the depth values for that profile

glodap_daO2 = glodap_dsO2.oxygen.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daO2D = glodap_dsO2.Depth.sel()

glodap_daTemp = glodap_dsTemp.temperature.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daTempD = glodap_dsTemp.Depth.sel()

glodap_daSal = glodap_dsSal.salinity.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daSalD = glodap_dsSal.Depth.sel()

print(glodap_daSalD[0:33].values == glodap_daTempD[0:33].values)


f,a = plt.subplots(3)
a[0].set(ylim=(3000.,0.), xlim=(0.,300.))
a[1].set(ylim=(3000.,0.), xlim=(0,12))
a[2].set(ylim=(3000.,0.), xlim=(31.,35.))

c = ['brown', 'red', 'salmon', 'tomato', 'sandybrown', 'peru', 'darkorange', 'orange', 'gold',\
    'yellow', 'chartreuse', 'lightgreen', 'lime', 'aquamarine', 'teal', 'cyan', 'deepskyblue',\
    'dodgerblue', 'royalblue', 'navy', 'blue', 'mediumpurple', 'darkviolet', 'magenta', 'crimson']

# argo2psu=argo2.PSAL_ADJUSTED[nProfile].to_dataset(name='psu')
# argo2psu.coords['depth'] = ('depth', argo2.PRES_ADJUSTED[nProfile])
# argo2psu['psu'] = (('depth'), argo2psu.psu)
# a[0].plot(daO2.values, daO2D.values, ',-', color=c[1])
# a[1].plot(daTempD.values, daTemp.values, ',-', color=c[13])
# a[2].plot(daSalD.values, daSal.values, ',-', color=c[20])

a[0].plot(glodap_daO2.values, glodap_daO2D.values)
a[1].plot(glodap_daTemp.values, glodap_daTempD.values)
a[2].plot(glodap_daSal.values, glodap_daSalD.values, 'd')

for nProfile in range(2):

    argo2_ds_temp                 = argo2_ds.TEMP_ADJUSTED[nProfile].to_dataset(name='temp')
    argo2_ds_temp.coords['depth'] = ('depth', argo2_ds.TEMP_ADJUSTED[nProfile])
    argo2_ds_temp['temp']         = (('depth'), argo2_ds_temp.temp)

    argo2_ds_psu                  = argo2_ds.PSAL_ADJUSTED[nProfile].to_dataset(name='psu')
    argo2_ds_psu.coords['depth']  = ('depth', argo2_ds.PRES_ADJUSTED[nProfile])
    argo2_ds_psu['psu']           = (('depth'), argo2_ds_psu.psu)

    a[1].scatter(argo2_ds_temp.temp.values, argo2_ds_psu.depth.values, s=100., color=c[nProfile])
    a[2].scatter(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, s=100., color=c[nProfile])
