#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Apr 20 09:34:21 2020 @author: smahanam """ from bs4 import BeautifulSoup import requests from netCDF4 import Dataset import numpy as np import numpy.ma as ma from datetime import datetime, timedelta import os import re, sys from pyhdf.SD import SD, SDC import shutil from itertools import product #----------------------------------------------------------# # BEGIN USER DEFINED VARIABLES # M6_NAME = 'MCD15A2H' FORWARD = True # Specify output grid resolution if REGRID IM = 43200 JM = 21600 OUTDIR = '/ldas/sarith/LAI/MODIS6/Python/' + M6_NAME + '.006/' MASKFILE= '/l_data/model_parameters/MODIS-Albedo/MCD12Q1.006/land_mask.nc4' # If you want to process a single MODIS date, define below # YYYYDOY - the format is YYYY + '/' + DOY (for e.g. 2016/065) # If you want to loop through: YYYYDOY='' (an empty string) YYYYDOY= "" # "2018/001" YEARS = [2020] XYLIM = [-24., -60., -12.5, -52.] # END USER DEFINED VARIABLES # #----------------------------------------------------------# # ---- Global Parameters NC = 86400 NR = 43200 DXY = np.double(360.)/NC N_MODIS = 2400 CWD = os.getcwd() MAPL_UNDEF = 255 MODIS_PATH = "https://ladsweb.modaps.eosdis.nasa.gov/opendap/hyrax/allData/6/" + M6_NAME + '/' if M6_NAME == 'MOD15A2H': firstdate = '20000222' if M6_NAME == 'MCD15A2H': firstdate = '20020708' M6_DIR ={'MCD15A2H':'MOTA', 'MOD15A2H':'MOLT','MYD15A2H':'MOLA'} MODIS_DOWN = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/6/" + M6_NAME + '/' DY = np.double(180.) / NR DX = np.double(360.) / NC lon_g = np.array([ -180. + DX/2. + DX*i for i in range(NC)],dtype=np.double) lat_g = np.array([DY*i + -90. + DY/2. for i in range(NR)],dtype=np.double) xsub_ind = np.array(np.where ((lon_g >= XYLIM[1]) & (lon_g <= XYLIM[3]))) xsub_ind = xsub_ind[0,:] ysub_ind = np.array(np.where ((lat_g >= XYLIM[0]) & (lat_g <= XYLIM[2]))) ysub_ind = ysub_ind[0,:] IM = xsub_ind.size JM = ysub_ind.size earth_radius = np.double(6371007.181) plane_width = np.double(1111950.) Xmin = np.double(-18.)*plane_width Ymax = np.double(9.)*plane_width limits = np.array(XYLIM)*(np.pi /np.double(180.)) limits[1] = limits[1]*earth_radius*np.cos(limits[0]) limits[3] = limits[3]*earth_radius*np.cos(limits[2]) limits[0] = limits[0]*earth_radius limits[2] = limits[2]*earth_radius HH1 = np.floor((limits[1] - Xmin)/plane_width).astype(int) -1 HH2 = np.floor((limits[3] - Xmin)/plane_width).astype(int) +1 VV2 = np.floor((Ymax - limits[0])/plane_width).astype(int) VV1 = np.floor((Ymax - limits[2])/plane_width).astype(int) VVS = ['v'+str(x).zfill(2) for x in range(VV1,VV2+1)] HHS = ['h'+str(x).zfill(2) for x in range(HH1,HH2+1)] tog = list(product(HHS, VVS)) HHVV = [x[0]+x[1] for x in tog] flabel= 'h' + str(HH1).zfill(2) + '-' + str(HH2).zfill(2) + 'v' + str(VV1).zfill(2) + '-' + str(VV2).zfill(2) print(HHVV) print(flabel) #command = 'module load nco/4.8.1' #os.system(command) os.chdir(CWD) #----------------------------------------------------------# # MODULE FUNCTIONS # #----------------------------------------------------------# class DriverFunctions (object): def create_netcdf (FILE_NAME,VAR_NAMES): import datetime ncFidOut = Dataset(FILE_NAME,'w',format='NETCDF4') LatDim = ncFidOut.createDimension('lat', JM) LonDim = ncFidOut.createDimension('lon', IM) ncFidOut.description = "MODIS " + M6_NAME + " @ " + str(DXY*3600) + ' arc-sec' ncFidOut.history = "Created on " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " by Sarith Mahanama (sarith.p.mahanama@nasa.gov)" lonout = ncFidOut.createVariable('lon','f8',('lon',)) latout = ncFidOut.createVariable('lat','f8',('lat',)) for l in range (len(VAR_NAMES)): varout = ncFidOut.createVariable(VAR_NAMES[l], 'u1', ('lat','lon'), fill_value=MAPL_UNDEF) setattr(ncFidOut.variables[VAR_NAMES[l]],'missing_value' ,np.uint8(MAPL_UNDEF)) setattr(ncFidOut.variables[VAR_NAMES[l]],'fmissing_value',np.uint8(MAPL_UNDEF)) latout.units = 'degrees north' lonout.units = 'degrees east' lonout [:] = lon_g[xsub_ind] latout [:] = lat_g[ysub_ind] ncFidOut.close() del ncFidOut def nearest_cell (array, value): array = np.asarray(array) idx = (np.abs(array - value)).argmin() return idx def fill_gaps (data, fill_value=None, ocean=None): NX = min (data.shape) data = ma.masked_array(data,data==fill_value) odata= ma.masked_array(data,data==ocean) for zoom in range (1,NX//2): for direction in (-1,1): shift = direction * zoom if not np.any(data.mask): break for axis in (0,1): a_shifted = np.roll(data ,shift=shift,axis=axis) o_shifted = np.roll(odata,shift=shift,axis=axis) idx=~a_shifted.mask * data.mask*~o_shifted.mask data[idx]=a_shifted[idx] def regrid_to_coarse (data): global NC, NR, IM, JM JX = NR // JM IX = NC // IM temp = data.reshape((data.shape[0] // JX, JX, data.shape[1] // IX, IX)) return np.nanmean(temp, axis=(1,3)) def get_tag_list(url,label): page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') allfiles = soup.find_all(label) thislist = [tag.text for tag in allfiles] return thislist class MAP_FROM_LATLON(object): # ---- stitch all hdf files together and construct global arrays on the sin-grid. # ---- employs forward mapping from the lat lon grid using a mask file. def __init__ (self, HDF_FILES, DATADIR, OUTFILE, lc_mask, x_index,y_index): lai_500 = np.full ((NR,NC),np.uint8(MAPL_UNDEF)) QC_500 = np.full ((NR,NC),np.uint8(MAPL_UNDEF)) FP_500 = np.full ((NR,NC),np.uint8(MAPL_UNDEF)) XC_500 = np.full ((NR,NC),np.uint8(MAPL_UNDEF)) print (OUTFILE) for f in range(len(HDF_FILES)): FILE_NAME = DATADIR + HDF_FILES[f] if os.path.isfile(FILE_NAME): print (FILE_NAME) H = int(HDF_FILES[f][HDF_FILES[f].find('h')+1:HDF_FILES[f].find('h')+3]) V = int(HDF_FILES[f][HDF_FILES[f].find('v')+1:HDF_FILES[f].find('v')+3]) i1 = H * N_MODIS j1 = V * N_MODIS hdf = SD(FILE_NAME, SDC.READ) DATAFIELD_NAME = 'Lai_500m' data2D = hdf.select(DATAFIELD_NAME) data = data2D[:,:].astype(np.int) lai_500 [j1: j1 + N_MODIS,i1: i1 + N_MODIS] = data hdf = SD(FILE_NAME, SDC.READ) DATAFIELD_NAME = 'FparLai_QC' data2D = hdf.select(DATAFIELD_NAME) data = data2D[:,:].astype(np.int) QC_500 [j1: j1 + N_MODIS,i1: i1 + N_MODIS] = data hdf = SD(FILE_NAME, SDC.READ) DATAFIELD_NAME = 'Fpar_500m' data2D = hdf.select(DATAFIELD_NAME) data = data2D[:,:].astype(np.int) FP_500 [j1: j1 + N_MODIS,i1: i1 + N_MODIS] = data hdf = SD(FILE_NAME, SDC.READ) DATAFIELD_NAME = 'FparExtra_QC' data2D = hdf.select(DATAFIELD_NAME) data = data2D[:,:].astype(np.int) XC_500 [j1: j1 + N_MODIS,i1: i1 + N_MODIS] = data hdf.end() del hdf else: print ('MISSING hdf FILE ') print (FILE_NAME) shutil.rmtree(CWD + '/download/') sys.exit() DriverFunctions.create_netcdf (OUTFILE,['Lai_500m','FparLai_QC','Fpar_500m','FparExtra_QC']) ixgrid = np.ix_(ysub_ind, xsub_ind) data_high = np.full((NR,NC),np.uint8(MAPL_UNDEF)) ncFidOut = Dataset(OUTFILE,mode='a') data_high.reshape(NC*NR)[lc_mask] = lai_500[y_index,x_index] ncFidOut.variables['Lai_500m' ][:] = data_high[ixgrid] data_high[:,:] = np.uint8(MAPL_UNDEF) data_high.reshape(NC*NR)[lc_mask] = QC_500[y_index,x_index] ncFidOut.variables['FparLai_QC'][:] = data_high[ixgrid] data_high[:,:] = np.uint8(MAPL_UNDEF) data_high.reshape(NC*NR)[lc_mask] = FP_500[y_index,x_index] ncFidOut.variables['Fpar_500m'][:] = data_high[ixgrid] data_high[:,:] = np.uint8(MAPL_UNDEF) data_high.reshape(NC*NR)[lc_mask] = XC_500[y_index,x_index] ncFidOut.variables['FparExtra_QC'][:] = data_high[ixgrid] ncFidOut.close() del lai_500 del QC_500 del FP_500 del XC_500 # del ncFidOut # del data_high if FORWARD: class GET_ARRAY_INDICES (object): # ---- read MODIS LAI 15A2H granule from downloaded .hdf file. # ---- employs inverse mapping from sinusoidal grid to lat/lon def __init__ (self): print ('Initialize array indices') LANDMASK = Dataset(MASKFILE) self.lc_mask = np.array(LANDMASK.variables['lc_mask'][:]) self.x_index = np.array(LANDMASK.variables['x_index'][:]) self.y_index = np.array(LANDMASK.variables['y_index'][:]) LANDMASK.close() #----------------------------------------------------------# # BEGIN PROCESSING MODIS 8-day COMPOSITES # #----------------------------------------------------------# # ---- single MODIS date if FORWARD: FWI = GET_ARRAY_INDICES() if YYYYDOY: year = YYYYDOY[0:-3] doy = YYYYDOY[5:] + '/' this_doy = int(doy[0:3]) date1 = datetime(int(year[0:4]),1,1) + timedelta (days=this_doy-1) files = DriverFunctions.get_tag_list(MODIS_PATH + year + doy,"span")[1:-1] # HVV,flabel = DriverFunctions.domain_setup DATADIR = CWD + '/download/' + year + '/' os.makedirs(DATADIR) os.chdir(DATADIR) for f in range(len(HHVV)): flab = "-A \'*" + HHVV[f] + "*.hdf\'" command = 'wget -e robots=off -m -np -R .html,.tmp -nH ' + flab + ' --cut-dirs=5 "' + MODIS_DOWN + year + doy +'" --header "Authorization: Bearer D4FAE892-9F8D-11EA-BB77-CA202F439EFB" -P .' os.system(command) os.chdir(CWD) DATADIR = DATADIR + doy OUTFILE = CWD + '/download/' + '/' + M6_NAME + '.006_' + year[0:-1] + doy[0:-1] + flabel + '.nc4' files = os.listdir(DATADIR) thisset = MAP_FROM_LATLON (files, DATADIR, OUTFILE, FWI.lc_mask, FWI.x_index,FWI.y_index) del thisset FINALFILE = OUTDIR + '/' + M6_NAME + '.006_' + year[0:-1] + doy[0:-1] + flabel + '.nc4' command = 'nccopy -d6 ' + OUTFILE + ' ' + FINALFILE os.system(command) shutil.rmtree(CWD + '/download/') sys.exit() # ---- loop through years years = DriverFunctions.get_tag_list(MODIS_PATH,"a")[1:-6] alldates = [] os.chdir(CWD) #HVV,flabel = DriverFunctions.domain_setup for year in YEARS: yyyy = str(year).zfill(4) + '/' doys = DriverFunctions.get_tag_list(MODIS_PATH + yyyy, "a")[1:-6] # ---- DOY loop for idx, doy in enumerate(doys): this_doy = int(doy[0:3]) date1 = datetime(year,1,1) + timedelta (days=this_doy-1) if this_doy < 361: date2 = date1 + timedelta (days=8) else: date2 = datetime(year+1,1,5) mday = date1 + (date2 - date1)/2 OUTFILE = OUTDIR + '/' + M6_NAME + '.006_' + yyyy[0:-1] + doy[0:-1] + '_' + flabel + '.nc4' print (OUTFILE) if not os.path.isfile(OUTFILE): files = DriverFunctions.get_tag_list(MODIS_PATH + yyyy + doy,"span")[1:-1] #DATADIR = OUTDIR + date1.strftime('%Y.%m.%d') + '/' os.chdir(CWD) DATADIR = CWD + '/download/' + yyyy os.makedirs(DATADIR) os.chdir(DATADIR) for f in range(len(HHVV)): flab = "-A \'*" + HHVV[f] + "*.hdf\'" command = 'wget -e robots=off -m -np -R .html,.tmp -nH ' + flab + ' --cut-dirs=5 "' + MODIS_DOWN + yyyy + doy +'" --header "Authorization: Bearer D4FAE892-9F8D-11EA-BB77-CA202F439EFB" -P .' os.system(command) os.chdir(CWD) DATADIR = DATADIR + doy # ---- Stitch MODIS granules TMPFILE = CWD + '/download/' + '/' + M6_NAME + '.006_' + yyyy[0:-1] + doy[0:-1] + '_' + flabel + '.nc4' files = os.listdir(DATADIR) thisset = MAP_FROM_LATLON (files, DATADIR, TMPFILE, FWI.lc_mask, FWI.x_index,FWI.y_index) del thisset command = 'nccopy -d6 ' + TMPFILE + ' ' + OUTFILE os.system(command) shutil.rmtree(CWD + '/download/') alldates.extend(doys)