ARGO#
This notebook brings ARGO drifter data into the picture.
compare with RCA profilers
compare with ROMS
From Jessica Scheick and team: The ICESAT Icepyx repository
has search/retrieval APIs for ARGO. See the documentation.ipynb
notebook for installation and use notes.
Jessica contributes the following context:
The history of ARGO drifters features a transition from basic CTD instrumentation to a BGC ensemble
The latter is comparable with the shallow profiler ensemble
The Icepyx interface is able to search for an download data from both CTD and BGC drifters
Icepyx is a generic data interface with additional code focused on ARGO
My experience comes from working with a team on creating a way to easily request ARGO data coincident with ICESat-2 data
Includes creating the scaffolding to readily add any other datasets you might want to grab…
…while relying on some of the same underlying space/time management infrastructure (preliminary documentation)…
…and taking care of properly formatting requests.
At this stage: The code works so I should be able to get you set up to use it
even though it might still change a bit
even though isn’t merged into a packaged copy of the software yet.
Ultimately, the API selectively returns the argo parameters you’re interested in
(and for the space, time, and if you request it depth as well),
and we read it right in to a dataframe.
There is a data size request limit, but I haven’t pushed it
and we could probably pretty easily make a few smaller requests
Idea: Install/work from a branch of a GitHub repository
specifically the icepyx argo branch, which is where we’re building this functionality
Zoom to help with setting things up…
Get some code up and running
import os, sys, time, glob, warnings
from IPython.display import clear_output # use inside loop with clear_output(wait = True) followed by print(i)
warnings.filterwarnings('ignore')
this_dir = os.getcwd()
data_dir = this_dir + '/../../data' # large datasets reside outside the repository
from matplotlib import pyplot as plt
from matplotlib import colors as mplcolors
import numpy as np, pandas as pd, xarray as xr
from numpy import datetime64 as dt64, timedelta64 as td64
# convenience functions abbreviating 'datetime64' and so on
def doy(theDatetime): return 1 + int((theDatetime - dt64(str(theDatetime)[0:4] + '-01-01')) / td64(1, 'D'))
def dt64_from_doy(year, doy): return dt64(str(year) + '-01-01') + td64(doy-1, 'D')
def day_of_month_to_string(d): return str(d) if d > 9 else '0' + str(d)
print('\nJupyter Notebook running Python {}'.format(sys.version_info[0]))
Jupyter Notebook running Python 3
subsequent cells are copy paste from the argo notebook, chlorophyll repo#
argo1_ds = xr.open_dataset(data_dir + '/argo/argo_profiles1.nc')
argo2_ds = xr.open_dataset(data_dir + '/argo/argo_profiles2.nc')
# printing argo1['LONGITUDE'] and for argo2 shows the latter is close to Oregon Slope Base
# more to try printing:
# argo2
# argo2['JULD']
# argo2['JULD'], argo2['LONGITUDE'], argo2['LATITUDE']
argo2_df = argo2_ds['TEMP'].to_dataframe()
fig,ax = plt.subplots()
fig.set_size_inches(16,6)
argo2_df['TEMP'].plot()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/home/runner/work/oceanography/oceanography/data/argo/argo_profiles1.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '0d01b2e7-466c-4f1c-b9c9-4c8a909a1d26']
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
Cell In[2], line 1
----> 1 argo1_ds = xr.open_dataset(data_dir + '/argo/argo_profiles1.nc')
2 argo2_ds = xr.open_dataset(data_dir + '/argo/argo_profiles2.nc')
4 # printing argo1['LONGITUDE'] and for argo2 shows the latter is close to Oregon Slope Base
5 # more to try printing:
6 # argo2
7 # argo2['JULD']
8 # argo2['JULD'], argo2['LONGITUDE'], argo2['LATITUDE']
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/api.py:686, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
674 decoders = _resolve_decoders_kwargs(
675 decode_cf,
676 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
682 decode_coords=decode_coords,
683 )
685 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 686 backend_ds = backend.open_dataset(
687 filename_or_obj,
688 drop_variables=drop_variables,
689 **decoders,
690 **kwargs,
691 )
692 ds = _dataset_from_backend_dataset(
693 backend_ds,
694 filename_or_obj,
(...)
704 **kwargs,
705 )
706 return ds
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:666, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, auto_complex, lock, autoclose)
644 def open_dataset(
645 self,
646 filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
(...)
663 autoclose=False,
664 ) -> Dataset:
665 filename_or_obj = _normalize_path(filename_or_obj)
--> 666 store = NetCDF4DataStore.open(
667 filename_or_obj,
668 mode=mode,
669 format=format,
670 group=group,
671 clobber=clobber,
672 diskless=diskless,
673 persist=persist,
674 auto_complex=auto_complex,
675 lock=lock,
676 autoclose=autoclose,
677 )
679 store_entrypoint = StoreBackendEntrypoint()
680 with close_on_error(store):
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:452, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, auto_complex, lock, lock_maker, autoclose)
448 kwargs["auto_complex"] = auto_complex
449 manager = CachingFileManager(
450 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
451 )
--> 452 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:393, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
391 self._group = group
392 self._mode = mode
--> 393 self.format = self.ds.data_model
394 self._filename = self.ds.filepath()
395 self.is_remote = is_remote_uri(self._filename)
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:461, in NetCDF4DataStore.ds(self)
459 @property
460 def ds(self):
--> 461 return self._acquire()
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:455, in NetCDF4DataStore._acquire(self, needs_lock)
454 def _acquire(self, needs_lock=True):
--> 455 with self._manager.acquire_context(needs_lock) as root:
456 ds = _nc4_require_group(root, self._group, self._mode)
457 return ds
File ~/micromamba/envs/geosmart-template/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
196 @contextlib.contextmanager
197 def acquire_context(self, needs_lock=True):
198 """Context manager for acquiring a file."""
--> 199 file, cached = self._acquire_with_cache_info(needs_lock)
200 try:
201 yield file
File ~/micromamba/envs/geosmart-template/lib/python3.12/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2521, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2158, in netCDF4._netCDF4._ensure_nc_success()
FileNotFoundError: [Errno 2] No such file or directory: '/home/runner/work/oceanography/oceanography/data/argo/argo_profiles1.nc'
# Plot three views of ARGO profile data: Vertical axis is depth, horizontal is salinity
f,a = plt.subplots(3)
f.set_size_inches(14,18)
a[0].set(ylim=(2100.,0.), xlim=(29.5,35.))
a[1].set(ylim=(200.,0.), xlim=(29.5,35.))
a[2].set(ylim=(1000.,200.), xlim=(33.9,34.4))
c = ['brown', 'red', 'salmon', 'tomato', 'sandybrown', 'peru', 'darkorange', 'orange', 'gold',\
'yellow', 'chartreuse', 'lightgreen', 'lime', 'aquamarine', 'teal', 'cyan', 'deepskyblue',\
'dodgerblue', 'royalblue', 'navy', 'blue', 'mediumpurple', 'darkviolet', 'magenta', 'crimson']
for nProfile in range(18):
argo2_ds_psu=argo2_ds.PSAL_ADJUSTED[nProfile].to_dataset(name='psu')
argo2_ds_psu.coords['depth'] = ('depth', argo2_ds.PRES_ADJUSTED[nProfile])
argo2_ds_psu['psu'] = (('depth'), argo2_ds_psu.psu)
a[0].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
a[1].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
a[2].plot(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, ',-', color=c[nProfile])
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[2], line 3
1 # Plot three views of ARGO profile data: Vertical axis is depth, horizontal is salinity
----> 3 f,a = plt.subplots(3)
4 f.set_size_inches(14,18)
5 a[0].set(ylim=(2100.,0.), xlim=(29.5,35.))
NameError: name 'plt' is not defined
glodapTemperatureFnm = data_dir + '/glodap/glodap_temperature.nc'
glodapSalinityFnm = data_dir + '/glodap/glodap_salinity.nc'
glodapOxygenFnm = data_dir + '/glodap/glodap_oxygen.nc'
glodap_dsSal = xr.open_mfdataset(glodapSalinityFnm, combine='by_coords')
glodap_dsTemp = xr.open_mfdataset(glodapTemperatureFnm, combine='by_coords')
glodap_dsO2 = xr.open_mfdataset(glodapOxygenFnm, combine='by_coords')
# Oregon Offshore 44.37415 -124.95648
# Oregon Slope Base 44.52897 -125.38966
# Axial Base 45.83049 -129.75326
# The 360 degree bias is an idiosyncrasy of the glodap dataset
osbLatGlodap, osbLonGlodap = 44.52897, -125.38966 + 360.
# DataArrays: From glodap data for dissolved oxygen, temperature and salinity
# The first of each pair selects a vertical profile; the second selects the depth values for that profile
glodap_daO2 = glodap_dsO2.oxygen.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daO2D = glodap_dsO2.Depth.sel()
glodap_daTemp = glodap_dsTemp.temperature.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daTempD = glodap_dsTemp.Depth.sel()
glodap_daSal = glodap_dsSal.salinity.sel(lon=osbLonGlodap, lat=osbLatGlodap, method='nearest')
glodap_daSalD = glodap_dsSal.Depth.sel()
print(glodap_daSalD[0:33].values == glodap_daTempD[0:33].values)
glodap_daSalD
%%time
glodap_daSal.values
f,a = plt.subplots(3)
f.set_size_inches(14,27)
a[0].set(ylim=(3000.,0.), xlim=(0.,300.))
a[1].set(ylim=(3000.,0.), xlim=(0,12))
a[2].set(ylim=(3000.,0.), xlim=(31.,35.))
c = ['brown', 'red', 'salmon', 'tomato', 'sandybrown', 'peru', 'darkorange', 'orange', 'gold',\
'yellow', 'chartreuse', 'lightgreen', 'lime', 'aquamarine', 'teal', 'cyan', 'deepskyblue',\
'dodgerblue', 'royalblue', 'navy', 'blue', 'mediumpurple', 'darkviolet', 'magenta', 'crimson']
# argo2psu=argo2.PSAL_ADJUSTED[nProfile].to_dataset(name='psu')
# argo2psu.coords['depth'] = ('depth', argo2.PRES_ADJUSTED[nProfile])
# argo2psu['psu'] = (('depth'), argo2psu.psu)
# a[0].plot(daO2.values, daO2D.values, ',-', color=c[1])
# a[1].plot(daTempD.values, daTemp.values, ',-', color=c[13])
# a[2].plot(daSalD.values, daSal.values, ',-', color=c[20])
a[0].plot(glodap_daO2.values, glodap_daO2D.values)
a[1].plot(glodap_daTemp.values, glodap_daTempD.values)
a[2].plot(glodap_daSal.values, glodap_daSalD.values, 'd')
for nProfile in range(2):
argo2_ds_temp = argo2_ds.TEMP_ADJUSTED[nProfile].to_dataset(name='temp')
argo2_ds_temp.coords['depth'] = ('depth', argo2_ds.TEMP_ADJUSTED[nProfile])
argo2_ds_temp['temp'] = (('depth'), argo2_ds_temp.temp)
argo2_ds_psu = argo2_ds.PSAL_ADJUSTED[nProfile].to_dataset(name='psu')
argo2_ds_psu.coords['depth'] = ('depth', argo2_ds.PRES_ADJUSTED[nProfile])
argo2_ds_psu['psu'] = (('depth'), argo2_ds_psu.psu)
a[1].scatter(argo2_ds_temp.temp.values, argo2_ds_psu.depth.values, s=100., color=c[nProfile])
a[2].scatter(argo2_ds_psu.psu.values, argo2_ds_psu.depth.values, s=100., color=c[nProfile])
argo2_ds_psu