'''
A module to store and treat data
================================
in griglia nativa 1986-2020 giornaliera
/data/products/GLOBAL_REANALYSES/C-GLORSv7/DAILY_MONTHLY
in griglia nativa 1986-2020 mensile
/data/products/GLOBAL_REANALYSES/C-GLORSv7/MONTHLY
'''
import os
import numpy as np
import xarray as xr
import pandas as pd
import zapata.lib as lib
import netCDF4 as net
import docrep
# Uses DOCREP for avoiding copying docstring, contrary to the docs delete works
# only on one param at the time.
d = docrep.DocstringProcessor()
# NCEP Reanalysis standard pressure levels
# 1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100, 70, 50, 30, 20, 10
# 1000 925 850 700 600, 500 400 300, 250, 200, 150, 100 50 10
#
# def read_month(dataset, vardir,var1,level,yy,mm,type,option,verbose=False):
# """
# A routine to read one month of data from various datasets.
# This routine will read data one month at a time from various data sets
# described in *DataGrid()*
# Parameters
# ----------
# dataset :
# Name of the dataset, ``ERA5``, ``GPCP``
# vardir :
# Path to the dataset
# var1 :
# Variable to extract
# level :
# Level of the Variable
# yy :
# Year
# mm :
# Month
# type :
# Type of data to reay. Currently hardwired to ``npy``
# option :
# 'Celsius' For temperature Transform to Celsius
# verbose:
# Tons of Output
# Returns
# --------
# average :
# Monthly data.
# Examples
# --------
# >>> read_month('ERA5','.','Z','500',1979,12,'npy',[],verbose=verbose)
# >>> read_month('GPCP','.','TPREP','SURF',1979,12,'nc',[],verbose=verbose)
# >>> read_month('ERA5','.','T','850',1979,12,'npy',option=Celsius,verbose=verbose)
# """
# info=DataGrid()
# if dataset == 'ERA5':
# # def adddir(name,dir):
# # return dir +'/' + name.split('.')[0]+'.npy'
# fil1=lib.adddir(lib.makemm(var1,str(level),yy,mm),info[dataset]['place'])
# if verbose: print(fil1)
# if var1 == 'T' and option == 'Celsius':
# data1=np.load(fil1) - 273.16
# else:
# data1=np.load(fil1)
# elif dataset == 'GPCP':
# file = info[dataset]['place'] + '/gpcp_cdr_v23rB1_y' + str(yy) + '_m' + '{:02d}'.format(mm) + '.nc'
# data1 = net.Dataset(file).variables["precip"][:,:]
# else:
# Print(' Error in read_month, datset set as {}'.format(dataset))
# return data1
[docs]
def date_param():
"""
Data Bank to resolve Month and Season averaging information
Examples
--------
>>> index = data_param()
>>> mon=index['DJF']['month_index']
"""
months=['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
DJF ={'label':'DJF','month_index':[12,1,2]}
JFM ={'label':'JFM','month_index':[1,2,3]}
AMJ ={'label':'AMJ','month_index':[4,5,6]}
JJA ={'label':'JAS','month_index':[6,7,8]}
JAS ={'label':'JAS','month_index':[7,8,9]}
SON ={'label':'SON','month_index':[10,11,12]}
ANN ={'label':'ANN','month_index':[i for i in range(1,13)]}
JAN ={'label':'JAN','month_index':[1]}
FEB ={'label':'FEB','month_index':[2]}
MAR ={'label':'JAN','month_index':[3]}
APR ={'label':'APR','month_index':[4]}
MAY ={'label':'MAY','month_index':[5]}
JUN ={'label':'JUN','month_index':[6]}
JUL ={'label':'JUL','month_index':[7]}
AUG ={'label':'AUG','month_index':[8]}
SEP ={'label':'SEP','month_index':[9]}
OCT ={'label':'OCT','month_index':[10]}
NOV ={'label':'NOV','month_index':[11]}
DEC ={'label':'DEC','month_index':[12]}
out={'DJF':DJF,
'JFM': JFM,
'AMJ': AMJ,
'JAS': JAS,
'JJA': JJA,
'SON': SON,
'ANN': ANN,
'JAN': JAN,
'FEB': FEB,
'MAR': MAR,
'APR': APR,
'MAY': MAY,
'JUN': JUN,
'JUL': JUL,
'AUG': AUG,
'SEP': SEP,
'OCT': OCT,
'NOV': NOV,
'DEC': DEC,
'MONTHS': months
}
return out
[docs]
def DataGrid(option=None):
"""
Routine that returns a Dictionary with information on the requested Data Set.
Currently these data sets are supported
* ERA5 -- Subset of monthly data of ERA5
* GPCP -- Monthly data of precipitation data set
* OCRD'-- cGLORS renalaysis V7 daily 1986-2020
* OCRM'-- cGLORS renalaysis V7 monthly 1986-2020
Info can be retrieved as ``grid[dataset][var]['start']`` for the starting years.
See source for full explanation of the content.
Parameters
----------
Option :
* 'Verbose' Tons of Output
* 'Info' Info on data sets
Examples
--------
>>> DataGrid('info')
>>> dat = DataGrid('verbose')
"""
homedir = os.path.expanduser("~")
if option == 'verbose': print('Root Directory for Local Data ',homedir)
U ={ 'level': [10, 50, 100,150, 200,250,300,400,500,600,700,850,925,1000],
'start': 1979,
'end': 2018,
'label': 'U',
'longname': 'Zonal Wind',
'factor':1
}
V ={ 'level': [10,50,100,150,200,250,300,400,500,600, 700,850,925,1000],
'start': 1979,
'end': 2018,
'label': 'V',
'longname': 'Meridional Wind',
'factor':1
}
T ={ 'level': [10,50,100,150, 200,250,300,400, 500,600, 700,850,925, 1000],
'start': 1979,
'end': 2018,
'cv': 4,
'label': 'T',
'longname': 'Temperature',
'factor':1
}
W ={ 'level': [10,50,100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000],
'start': 1979,
'end': 2018,
'cv': 0.01,
'label': 'W',
'longname': 'Vertical Velocity',
'factor':1
}
Z ={ 'level': [ 200, 500],
'start': 1979,
'end': 2018,
'cv': 0.01,
'label': 'Z',
'longname': 'Geopotential Height',
'factor':1
}
tp ={ 'level': ['SURF'],
'start': 1979,
'end': 2018,
'cv': 0.01,
'label': 'TP',
'longname': 'Precipitation',
'factor':60
}
MSL ={ 'level': ['SURF'],
'start': 1979,
'end': 2018,
'cv': 10,
'label': 'MSL',
'longname': 'Mean Sea Level Pressure',
'factor':1/100.
}
SST ={ 'level': ['SURF'],
'start': 1979,
'end': 2018,
'cv': 10,
'label': 'SST',
'longname': 'Sea Surface Tenperature',
'factor':1
}
THETA ={ 'level': [10,50,100,150, 200,250,300,400, 500,600, 700,850,925, 1000],
'start': 1979,
'end': 2018,
'cv': 4,
'label': 'T',
'longname': 'Potential Temperature',
'factor':1
}
dataera5={'nlat': 721,
'nlon': 1440,
'latvec':[i for i in np.arange(-90,90.1,0.25)],
'lonvec': [i for i in np.arange(0,360.1,0.25)],
'latnp': np.asarray([i for i in np.arange(-90,90.1,0.25)][::-1]), # For plotting
'lonnp': np.asarray([i for i in np.arange(0,360.1,0.25)]),
'clim': homedir + '/Dropbox (CMCC)/ERA5/CLIM',
'place': homedir +'/Dropbox (CMCC)/ERA5/DATA/ERA5_MM',
'host': 'local',
'source_url': 'http://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Parameterlistings',
'desc':'ERA5 Monthly Mean for U,V,T,W,SLP 1979-2018',
'special_value': 9999.,
'U': U,
'T': T,
'V': V,
'W': W,
'Z': Z,
'tp': tp,
'MSL': MSL,
'SST': SST,
'THETA': THETA
}
precip_gpcp ={ 'level': 'SURF',
'start': 1979,
'end': 2018,
'label': 'precip',
'factor':1,
'longname': 'Precipitation GPCP',
}
datagpcp={'nlat': 72,
'nlon': 144,
'latvec':[i for i in np.arange(-88.75,90.1,2.5)],
'lonvec': [i for i in np.arange(1.25,360.1,2.5)],
'latnp': np.asarray([i for i in np.arange(-88.75,90.1,2.5)][::-1]), # For plotting
'lonnp': np.asarray([i for i in np.arange(1.25,360.,2.5)]),
'precip': precip_gpcp,
'place': homedir +'/Dropbox (CMCC)/ERA5/DATA/GPCP/TPREP',
'clim': homedir + '/Dropbox (CMCC)/ERA5/DATA/GPCP/TPREP',
'desc': 'Precipitation from the GPCP Project',
'source_url': 'http://gpcp.umd.edu/',
'host': 'local'
}
ocean_monthly={
'start': 1986,
'end': 2020,
'place': '/data/products/GLOBAL_REANALYSES/C-GLORSv7/MONTHLY',
'names': 'CMCC-CM2-HR4-pi_1m_<year><month>01_<year><month>31_grid_T.nc',
'desc': 'Ocean Reanalysis V7 Monthly',
'source_url': '',
'host': 'DSS'
}
ocean_daily={
'start': 1986,
'end': 2020,
'place': '/data/products/GLOBAL_REANALYSES/C-GLORSv7/DAILY_MONTHLY',
'names': 'CMCC-CM2-HR4-pi_1m_<year><month>01_<year><month>31_grid_T.nc',
'desc': 'Ocean Reanalysis V7 Daily',
'source_url': '',
'host': 'DSS'
}
grid={'ERA5': dataera5,
'GPCP': datagpcp,
'OCRD': ocean_daily,
'OCRM': ocean_monthly
}
if option == 'info':
for i in list(grid.keys()):
print(grid[i]['desc'])
print(grid[i]['place'])
print(grid[i]['host'])
print(grid[i]['source_url']+'\n')
return
return grid
# def readvar_grid(region='globe',dataset='ERA5',var='Z',level='500',season='JAN',Celsius=False,verbose=False):
# """
# Read Variable from data sets
# Parameters
# ----------
# region :
# *globe* for global maps, or [east, west, north, south]
# for limited region, longitude 0-360
# dataset :
# name of data set
# var :
# variable name
# level :
# level, either a value or 'SURF' for surface fields
# season :
# Month ('JAN') or season (,'DJF') or annual 'ANN')
# Celsius :
# True/False for temperature transform to Celsius
# verbose :
# True/False -- tons of output
# Returns
# -------
# xdat : numpy
# array data
# nlon :
# Number of longitudes
# nlat :
# Number of Latitudes
# lat :
# Latitudes
# lon :
# Longitudes
# Examples
# --------
# >>> readvar_grid(region='globe',dataset='ERA5',var='Z',level='500',season='JAN',Celsius=False,verbose=False)
# >>> readvar_grid(region='globe',dataset='ERA5',var='SST',level='SURF',season='JAN',Celsius=True,verbose=False)
# """
# vardir = '.'
# dat=date_param()
# grid = DataGrid()
# nlat = grid[dataset]['nlat']
# nlon = grid[dataset]['nlon']
# lat = grid[dataset]['latnp']
# lon = grid[dataset]['lonnp']
# #Correct for longitude in ERA5
# if dataset =='ERA5':
# lon=lon[:-1]
# sv=None
# try:
# sv=grid[dataset]['special_value']
# if verbose: print(' Using Special Value ---->', sv)
# except:
# print(' Special Value not defined for dataset {}'.format(dataset))
# ys=grid[dataset][var]['start']
# ye=grid[dataset][var]['end']
# ys=1979
# ye=2018
# lname=grid[dataset][var]['longname']
# nyears= ye-ys
# years =[i for i in range(ys,ye+1)]
# factor=grid[dataset][var]['factor']
# xdat= np.zeros([nlat,nlon,nyears+1])
# for tim in years:
# itim =years.index(tim)
# dat=date_param()
# mon=dat[season]['month_index']
# if verbose:
# print(' Plotting ' + var + ' from dataset ' + dataset)
# print('Printing year ', tim)
# #
# if len(mon) > 1:
# if verbose: print('Mean on these months: {}'.format(mon))
# temp= np.zeros([nlat,nlon,len(mon)])
# for k in range(len(mon)):
# temp[:,:,k]=read_month(dataset,vardir,var,level,tim,mon[k],'npy',[],verbose=verbose)
# xdat[:,:,itim]=np.mean(temp,axis=2)
# else:
# xdat[:,:,itim]=read_month(dataset, vardir,var,level,tim,mon[0],'npy',[],verbose=verbose)
# return xdat,nlon, nlat,lat,lon,sv
# def read_xarray(dataset='ERA5',region='globe',var='Z',level='500',season='DJF',verbose=False):
# '''
# Read npy files from data and generates xarray.
# This a xarray implementation of read_var. It always grabs the global data.
# Parameters
# ----------
# dataset :
# Name of data set
# region:
# Select region
# * *globe*, Entire globe
# * [East, West, North, South], Specific Region
# var :
# variable name
# level :
# level, either a value or 'SURF' for surface fields
# season :
# Month ('JAN') or season (,'DJF') or annual 'ANN'), or 'ALL' for every year
# verbose:
# True/False -- Tons of Output
# Returns
# -------
# out : xarray
# array data
# '''
# if season != 'ALL':
# xdat,nlon, nlat,lat,lon,sv=readvar_grid(region='globe',dataset=dataset, \
# var=var,level=level,season=season,Celsius=False,verbose=verbose)
# times = pd.date_range('1979-01-01', periods=40,freq='YS')
# elif season == 'ALL':
# xdat,nlon, nlat,lat,lon,sv=readvar_year(region='globe',dataset=dataset, \
# var=var,level=level,period='all',Celsius=False,verbose=verbose)
# times = pd.date_range('1979-01-01', periods=480,freq='MS')
# out = xr.DataArray(xdat, coords=[lat, lon, times], dims=['lat','lon','time'])
# if sv:
# out=xr.where(out == sv, np.nan, out)
# if region != 'globe':
# out = out.sel(lon = slice(region[0],region[1]), lat = slice(region[2],region[3]))
# return out
# def read_dataset(dataset='ERA5',region='globe',var='Z',level='500',season='DJF',verbose=False):
# '''
# Similar to `read_xarray` but returns a ``xarray DataSet``
# '''
# out = read_xarray(dataset=dataset, region=region, \
# var=var,level=level,season=season,verbose=verbose)
# ds = xr.Dataset({var: out})
# return ds
# def readvar_year(region='globe',period='all',dataset='ERA5',var='Z',level='500',
# Celsius=False,verbose=False):
# """
# Read Variable from data banks, all month, no averaging
# Parameters
# ----------
# Region :
# 'globe' for global maps, or [east, west, north, south]
# for limited region, longitude 0-360
# dataset :
# name of data set
# var :
# variable name
# level :
# level, either a value or 'SURF' for surface fields
# period :
# Time period to be read
# * 'all' Every time level in databank
# * [start_year,end_year] period in those years
# Celsius :
# True/False for temperature transform to Celsius
# verbose :
# True/False -- tons of output
# Returns
# -------
# xdat : numpy
# array data
# nlon :
# Number of longitudes
# nlat :
# Number of Latitudes
# lat :
# Latitudes
# lon :
# Longitudes
# Examples
# --------
# >>> readvar_year(region='globe',dataset='ERA5',var='Z',level='500',season='JAN',Celsius=False,verbose=False)
# >>> readvar_year(region='globe',dataset='ERA5',var='SST',level='SURF',season='JAN',Celsius=True,verbose=False)
# """
# vardir = '.'
# dat=date_param()
# grid = DataGrid()
# nlat = grid[dataset]['nlat']
# nlon = grid[dataset]['nlon']
# lat = grid[dataset]['latnp']
# lon = grid[dataset]['lonnp']
# #Correct for longitude in ERA5
# if dataset =='ERA5':
# lon=lon[:-1]
# sv=None
# try:
# sv=grid[dataset]['special_value']
# if verbose: print(' Using Special Value ---->', sv)
# except:
# print(' Special Value not defined for dataset {}'.format(dataset))
# ys=grid[dataset][var]['start']
# ye=grid[dataset][var]['end']
# ys=1979
# ye=2018
# lname=grid[dataset][var]['longname']
# #Choose period
# if period =='all':
# nyears= ye-ys
# years =[i for i in range(ys,ye+1)]
# else:
# nyears= period[1]-period[0]
# years =[i for i in range(period[0],period[1]+1)]
# factor=grid[dataset][var]['factor']
# xdat= np.zeros([nlat,nlon,12*(nyears+1)])
# itim=0
# dat=date_param()
# mon=dat['ANN']['month_index']
# if verbose : print(' Reading ' + var + ' from databank ' + dataset)
# for tim in years:
# print('Reading year ', tim)
# for imon in mon:
# if verbose : print('Reading mon ', imon)
# xdat[:,:,itim]=read_month(dataset,vardir,var,level,tim,mon[imon-1],'npy',[],verbose=verbose)
# if verbose : print('Reading time ', itim)
# itim = itim + 1
# return xdat,nlon, nlat,lat,lon,sv
[docs]
@d.get_sections(base='read_era5', sections=['Parameters', 'Returns'])
@d.dedent
def read_era5(var,lev,period='JAN',epoch='AFT', loc = ' ',averaging=True,verbose=False):
'''
This routine reads monthly data files from monthly ERA5,
optionally combining the backward (1950-1979) and current analysis (1979-2019)
Parameters
----------
var:
Variable selected:
* u: U-velocity
* v: V-velocity
* t: Temperature
* w: Vertical Velocity
* q: Specific Humidity
* sst: Sea Surface Temperature
* msl: Mean Sea Level Pressure
* mtnlwrf: mean_top_net_long_wave_radiation_flux
* mtnswrf: mean_top_net_short_wave_radiation_flux
* mslhf: mean_surface_latent_heat_flux
* msshf: mean_surface_sensible_heat_flux
* msnswrf: mean_surface_net_short_wave_radiation_flux
* msnlwrf: mean_surface_net_long_wave_radiation_flux
* tcw: total_column_water
* t2m: 2m_Temperature
lev:
pressure level,
[10,50,100,150, 200,250,300,400, 500,600, 700,850,925, 1000]
period:
Month or season to be selected. For periods across years, i.e. 'DJF' the
first and last years are dropped.
Values are month or season labels
'JFM','AMJ','JAS','OND','DJF','JJA'
'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'
'ANN' -- Entire time series
epoch:
* 'BCK' -- Only backward period 1950-1978
* 'AFT' -- Current period 1979 - 2019
* 'ALL' -- Combine the two
* 'V5' -- ERA5 V5 1940-2022
loc:
Location of the root directory for the data set
averaging:
True/False If averaged output is desired
Returns
-------
data:
data in xarray format
'''
#Form file names
dash = '/'
udl = '_'
if var == 'SST':
flev = 'SST'
else:
flev = lev
filbck =loc + dash + var.upper() + dash +var+ udl + flev + udl + 'ER5BCK.nc'
filaft =loc + dash + var.upper() + dash +var+ udl + flev + udl + 'ER5AFT.nc'
filV5 =loc + dash + var.upper() + dash +var+ udl + flev + udl + 'V5.nc'
if epoch == 'AFT':
dat = xr.open_dataset(filaft)
enddat=pd.to_datetime(["2019-12-01"])[0]
elif epoch == 'BCK':
dat = xr.open_dataset(filbck)
enddat=pd.to_datetime(["1978-12-01"])[0]
elif epoch == 'ALL':
dat1 = xr.open_dataset(filaft)
dat2 = xr.open_dataset(filbck)
dat = xr.concat([dat2,dat1],dim='time')
enddat=pd.to_datetime(["2019-12-01"])[0]
elif epoch == 'V5':
dat = xr.open_dataset(filV5)
enddat=pd.to_datetime(["2022-12-01"])[0]
else:
SystemError(f'Wrong Period choice')
if verbose:
print(f'Selected data from {dat.time[0].data} to {dat.time[-1].data} \n')
if period != 'ANN':
perlab,fr = decode_period(period)
dat = dat.sel(time=dat.time.dt.month.isin(perlab))
#Adjust for period across years
if period == 'DJF':
dat=dat.where(dat.time > dat.time[1], drop=True).where(dat.time < enddat,drop=True)
#Check for averaging
if averaging:
dat = dat.coarsen(time=fr).mean()
#Adjust names
dat = dat.rename({'longitude':'lon','latitude':'lat'})
return dat
[docs]
def decode_period(period):
'''
Decode Season or month to index
'''
season = ['JFM','AMJ','JAS','OND','DJF','JJA']
months =[ 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
seanum = [[1,2,3],[4,5,6],[7,8,9],[10,11,12],[12,1,2],[6,7,8]]
if period in season:
res = seanum[season.index(period)]
fr = 3
elif period in months:
res = months.index(period) + 1
fr = 1
return res,fr
[docs]
@d.dedent
def in_data(var,lev,dataset='ERA5',period='JAN',epoch='AFT', loc = ' ',averaging=True,verbose=False, use_cache=True):
'''
This is a wrapper around `read_era5`. Using a file cache to avoid
slow readings.
Optionally combining the backward and current analysis
Parameters
----------
dataset:
Data set to be read:
* 'ERA5' -- Monthly mean Reanalysis
* 'CERES' -- CERES Satellite Tadiance Product
%(read_era5.parameters)s
Returns
-------
%(read_era5.returns)s
'''
if use_cache and verbose and period != 'ANN':
print('Using cache ..\n')
# Consider cache only for ERA5
if dataset != 'ERA5':
use_cache = False
#Form file names
dash = '/'
udl = '_'
cdir = loc + dash + 'DATA_CACHE'
file = cdir + dash + var.upper() + udl + lev + udl + epoch + udl+ period + udl + str(averaging) + '.nc'
# Create cache Directory
try:
os.mkdir(cdir)
except FileExistsError:
if verbose:
print("Directory " , cdir , " Already Exists")
if use_cache:
if period != 'ANN':
try:
res = xr.open_dataset(file)
if verbose:
print(f'Reading file {file}')
except:
if verbose:
print(f'Creating file {file}')
res = read_era5(var,lev,period=period,epoch=epoch, loc = loc,
averaging=averaging,verbose=verbose)
res.to_netcdf(file)
else:
res = read_era5(var,lev,period=period,epoch=epoch, loc = loc,averaging=averaging,verbose=verbose)
else:
if dataset == 'ERA5':
res = read_era5(var,lev,period=period,epoch=epoch, loc = loc,averaging=averaging,verbose=verbose)
elif dataset == 'CERES':
res = read_ceres(var,lev,period=period,epoch=epoch, loc = loc,averaging=averaging,verbose=verbose)
else:
print(f' Wrong choice in `in_data`')
return res
# def in_zonal(var,avedim=['time','lon'],**kw):
# '''
# Read Zonally Averaged Sections
# Examples
# --------
# >>> in_zonal(var,period='DJF',epoch='AFT', loc = ' ',averaging=True)
# '''
# totlev = [ '10','50','100','150','200','250','300','400','500','600','700','850','925','1000']
# first_lev= totlev[0]
# print(f'Treating level {var} {first_lev}')
# if avedim:
# dd = in_data(var,first_lev,**kw).mean(dim=avedim)
# else:
# dd = in_data(var,first_lev,**kw)
# zon=dd.expand_dims('pressure').assign_coords(pressure=[float(totlev[0])])
# totlev.remove(first_lev)
# for l in totlev:
# print(f'Treating level {var} {l}')
# if avedim:
# dd = in_data(var,l,**kw).mean(dim=avedim).expand_dims('pressure').assign_coords(pressure=[float(l)])
# else:
# dd = in_data(var,l,**kw).expand_dims('pressure').assign_coords(pressure=[float(l)])
# zon=xr.concat([zon,dd],dim='pressure')
# return zon
[docs]
@d.get_sections(base='read_ceres', sections=['Parameters', 'Returns'])
@d.dedent
def read_ceres(var,lev,period='JAN',epoch='AFT', loc = ' ',averaging=True,verbose=False):
'''
This routine reads monthly data files from monthly ERA5,
optionally combining the backward (1950-1979) and current analysis (1979-2019)
Parameters
----------
var:
Variable selected:
* toa_sw_all_mon W/m2 TOA Shortwave Flux - All-Sky
* toa_lw_all_mon W/m2 TOA Longwave Flux - All-Sky
* toa_net_all_mon W/m2 TOA Net Flux - All-Sky
* toa_sw_clr_c_mon W/m2 TOA Shortwave Flux - Clear-Sky (for cloud-free areas of region)
* toa_lw_clr_c_mon W/m2 TOA Longwave Flux - Clear-Sky (for cloud-free areas of region)
* toa_net_clr_c_mon W/m2 TOA Net Flux - Clear-Sky (for cloud-free areas of region)
* solar_mon W/m2 Incoming Solar Flux
* cldarea_total_daynight_mon % Cloud Area Fraction - Daytime-and-Nighttime
* cldpress_total_daynight_mon hPa Cloud Effective Pressure - Daytime-and-Nighttime
* cldtemp_total_daynight_mon K Cloud Effective Temperature - Daytime-and-Nighttime
* cldtau_total_day_mon 1 Cloud Visible Optical Depth - Daytime
lev:
TOA
period:
Month or season to be selected. For periods across years, i.e. 'DJF' the
first and last years are dropped.
Values are month or season labels
'JFM','AMJ','JAS','OND','DJF','JJA'
'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'
'ANN' -- Entire time series
epoch:
* 'ALL' -- Entire data series
loc:
Location of the root directory for the data set
averaging:
True/False If averaged output is desired
Returns
-------
data:
data in xarray format
'''
#Form file names
dash = '/'
udl = '_'
fil =loc + dash + 'CERES' + dash + 'CERES_EBAF-TOA_Ed4.1_Subset_200003-202012.nc'
if epoch == 'ALL':
dat = xr.open_dataset(fil)
enddat=pd.to_datetime(["2020-12-01"])[0]
else:
SystemError(f'Wrong Period choice')
if verbose:
print(f'Selected data from {dat.time[0].data} to {dat.time[-1].data} \n')
if period != 'ANN':
perlab,fr = decode_period(period)
dat = dat.sel(time=dat.time.dt.month.isin(perlab))
#Adjust for period across years
if period == 'DJF':
dat=dat.where(dat.time > pd.to_datetime(["2001-03-01"])[0], drop=True).where(dat.time < enddat,drop=True)
#Check for averaging
if averaging:
dat = dat.coarsen(time=fr).mean()
return dat[var]
[docs]
def select_time(dataset_dates,select=None, period=None):
'''
Function to create the time range for the data selection.
A subperiod can be selected within the data set using `select` and `period`,
`select` is the data set interval , `period` is the calendar choice within the data set.
If both are 'none' the entire data set is selected from `dataset_dates` is chosen.
Parameters
----------
dataset_dates : list
List with the starting and ending dates of the data set
select : str
Selection of the period within the data set given by `dataset_dates`
* ERA5 -- 1940-2022
* COBE -- 1891-2020
* XXSEC -- 1900-2020
period : str
Period to be selected
* JAN -- January
* JUL -- July
* ANN -- Annual
'''
if select is not None:
if select == 'ERA5':
tstart = 'MON/1/1940'
tend = 'MON/12/2022'
elif select == 'COBE':
tstart = 'MON/1/1891'
tend = 'MON/1/2020'
elif select == 'XXSEC':
tstart = 'MON/1/1900'
tend = 'MON/1/2020'
else:
ValueError(f'Wrong selection {selection} in `select_time`')
if period is not None:
if period =='JAN':
sel_start = tstart.replace('MON','1')
sel_end = tend.replace('MON','2')
sel_time = pd.date_range(start=sel_start, end=sel_end, freq='12MS')
elif period == 'JUL':
sel_start = tstart.replace('MON','7')
sel_end = tend.replace('MON','8')
sel_time = pd.date_range(start=sel_start, end=sel_end, freq='12MS')
elif period == 'ANN':
sel_start = tstart.replace('MON','1')
sel_end = tend.replace('MON','12')
sel_time = pd.date_range(start=sel_start, end=sel_end, freq='12MS')
else:
ValueError(f'Wrong period {period} in `select_time`')
dstart = dataset_dates[0]
dtend = dataset_dates[1]
data_time = pd.date_range(start=dstart, end=dtend, freq='1MS')
return (data_time, sel_time) if select is not None else data_time