3.5 MODIS for fsCA#

3.5.1 Characteristics of MODIS#

Earth Science Data Type (ESDT): MOD10A1

Product Level: L3

Nominal Data Array Dimensions: 1200km by 1200km

Spatial Resolution: 500m

Temporal Resolution: day

Map Projection: Sinusoidal

3.5.2 Procedure#

This script is designed to download MODIS snow cover data from NASA servers, convert the downloaded HDF files to GeoTIFF format, and then merge these GeoTIFF tiles into a single file for each day within a specified date range.

import os
import subprocess
import threading
from datetime import datetime, timedelta
import requests
import earthaccess
from osgeo import gdal

os, subprocess, threading: Libraries for file operations, running shell commands, and multithreading.

datetime: Library for date and time manipulation.

requests: Library for making HTTP requests.

earthaccess: Library for accessing Earth data.

gdal: Library for geospatial data operations.

create an account in urs.earthdata.nasa.gov for earth access

start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 1, 31)
tile_list = ["h08v04", "h08v05", "h09v04", "h09v05", "h10v04", "h10v05", "h11v04", "h11v05", "h12v04", "h12v05", "h13v04", "h13v05", "h15v04", "h16v03", "h16v04"]
input_folder = f"{work_dir}/temp/"
output_folder = f"{work_dir}/output_folder/"
modis_day_wise = f"{work_dir}/final_output/"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(modis_day_wise, exist_ok=True)

Defines the date range and a list of MODIS tiles.

Creates input, output, and final output directories if they don’t exist.

def download_tiles_and_merge(start_date, end_date):
    date_list = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]
    for i in date_list:
        current_date = i.strftime("%Y-%m-%d")
        target_output_tif = f'{modis_day_wise}/{current_date}__snow_cover.tif'
        
        if os.path.exists(target_output_tif):
            file_size_bytes = os.path.getsize(target_output_tif)
            print(f"file_size_bytes: {file_size_bytes}")
            print(f"The file {target_output_tif} exists. skip.")
        else:
            print(f"The file {target_output_tif} does not exist.")
            print("start to download files from NASA server to local")
            earthaccess.login(strategy="netrc")
            results = earthaccess.search_data(short_name="MOD10A1", cloud_hosted=True, bounding_box=(-124.77, 24.52, -66.95, 49.38), temporal=(current_date, current_date))
            earthaccess.download(results, input_folder)
            print("done with downloading, start to convert HDF to geotiff..")

            convert_all_hdf_in_folder(input_folder, output_folder)
            print("done with conversion, start to merge geotiff tiles to one tif per day..")

            merge_tifs(folder_path=output_folder, target_date = current_date, output_file=target_output_tif)
        #delete_files_in_folder(input_folder)  # cleanup
        #delete_files_in_folder(output_folder)  # cleanup
download_tiles_and_merge(start_date, end_date)
The file ../data/fsca/final_output//2023-01-01__snow_cover.tif does not exist.
start to download files from NASA server to local
done with downloading, start to convert HDF to geotiff..

3.5.3 Convert All HDF Files in Folder#

def convert_all_hdf_in_folder(folder_path, output_folder):
    file_lst = list()
    for file in os.listdir(folder_path):
        file_lst.append(file)
        if file.lower().endswith(".hdf"):
            hdf_file = os.path.join(folder_path, file)
            convert_hdf_to_geotiff(hdf_file, output_folder)
    return file_lst
  • iterates through all files in the specified folder.

  • Converts each HDF file to GeoTIFF format.

3.5.4 Convert HDF to GeoTIFF#

def convert_hdf_to_geotiff(hdf_file, output_folder):
  hdf_ds = gdal.Open(hdf_file, gdal.GA_ReadOnly)

  # Specific subdataset name you're interested in
  target_subdataset_name = "MOD_Grid_Snow_500m:NDSI_Snow_Cover"
  # Create a name for the output file based on the HDF file name and subdataset
  output_file_name = os.path.splitext(os.path.basename(hdf_file))[0] + ".tif"
  output_path = os.path.join(output_folder, output_file_name)

  if os.path.exists(output_path):
    pass
    #print(f"The file {output_path} exists. skip.")
  else:
    for subdataset in hdf_ds.GetSubDatasets():
      # Check if the subdataset is the one we want to convert
      if target_subdataset_name in subdataset[0]:
        ds = gdal.Open(subdataset[0], gdal.GA_ReadOnly)
        # Convert to GeoTIFF
        gdal.Translate(output_path, ds)
        ds = None
        break  # Exit the loop after converting the target subdataset

  hdf_ds = None
  • Opens the HDF file.

  • Identifies the specific subdataset (“NDSI_Snow_Cover”) to convert.

  • Converts the identified subdataset to GeoTIFF format and saves it to the output directory.

3.5.5 Merge GeoTIFF Files#

def merge_tifs(folder_path, target_date, output_file):
    julian_date = date_to_julian(target_date)
    print("target julian date", julian_date)
    tif_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.tif') and julian_date in f]
    if len(tif_files) == 0:
        print(f"uh-oh, didn't find HDFs for date {target_date}")
        print("generate a new csv file with empty values for each point")
        gdal_command = ['gdal_translate', '-b', '1', '-outsize', '100%', '100%', '-scale', '0', '255', '200', '200', f"{modis_day_wise}/fsca_template.tif", output_file]
        print("Running ", gdal_command)
        subprocess.run(gdal_command)
    else:
        gdal_command = ['gdalwarp', '-r', 'min', ] + tif_files + [f"{output_file}_500m.tif"]
        print("Running ", gdal_command)
        subprocess.run(gdal_command)
        gdal_command = ['gdalwarp', '-t_srs', 'EPSG:4326', '-tr', '0.036', '0.036', '-cutline', f'{work_dir}/template.shp', '-crop_to_cutline', '-overwrite', f"{output_file}_500m.tif", output_file]
        print("Running ", gdal_command)
        subprocess.run(gdal_command)
  • Merges GeoTIFF files for the specified date.

  • If no GeoTIFF files are found for the date, it creates an empty GeoTIFF using a template.

  • If files are found, it merges them using gdalwarp and reprojects them to EPSG:4326.

3.5.6 Utility Functions#

work_dir = '../data/fsca'
def date_to_julian(date_str):
    """
    Convert a date to Julian date.
    """
    date_object = datetime.strptime(date_str, "%Y-%m-%d")
    tt = date_object.timetuple()
    

    # Format the result as 'YYYYDDD'
    julian_format = str('%d%03d' % (tt.tm_year, tt.tm_yday))

    return julian_format
def list_files(directory):
    return [os.path.abspath(os.path.join(directory, f)) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
def delete_files_in_folder(folder_path):
    if not os.path.exists(folder_path):
        print("Folder does not exist.")
        return

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            else:
                print(f"Skipping {filename}, as it is not a file.")
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

Deletes all files in the specified folder.