|
| 1 | +# Utilities for processing observations/OutputVars |
| 2 | +# Used to generate observations and compute the observation map |
| 3 | +using ClimaAnalysis, ClimaCoupler |
| 4 | +using Dates, LinearAlgebra, Statistics |
| 5 | +import EnsembleKalmanProcesses as EKP |
| 6 | + |
| 7 | +# Workaround to read ql, qi from file nicely |
| 8 | +push!(ClimaAnalysis.Var.TIME_NAMES, "valid_time") |
| 9 | + |
| 10 | +# Constants |
| 11 | +const days_in_seconds = 86_400 |
| 12 | +const months = 31days_in_seconds |
| 13 | +const years = 365days_in_seconds |
| 14 | +const start_date = DateTime(2000, 3, 1) |
| 15 | +const first_year_start_date = DateTime(2000, 12, 1) |
| 16 | + |
| 17 | +include(joinpath(pkgdir(ClimaCoupler), "experiments/ClimaEarth/leaderboard/data_sources.jl")) |
| 18 | + |
| 19 | +# The ERA5 pressure range is not as large as the ClimaAtmos default pressure levels, |
| 20 | +# so we need to limit outputvars to the ERA5 dims |
| 21 | +function limit_pressure_dim_to_era5_range(diagnostic_var3d) |
| 22 | + @assert has_pressure(diagnostic_var3d) |
| 23 | + era5_pressure_min = 100.0 # Pa |
| 24 | + era5_pressure_max = 100_000.0 # Pa |
| 25 | + pfull_dims = diagnostic_var3d.dims[pressure_name(diagnostic_var3d)] |
| 26 | + left = minimum(filter(x -> x >= era5_pressure_min, pfull_dims)) |
| 27 | + right = maximum(filter(x -> x <= era5_pressure_max, pfull_dims)) |
| 28 | + # Window the diagnostic var to use the era5 pressure bounds |
| 29 | + return window(diagnostic_var3d, "pfull"; left, right) |
| 30 | +end |
| 31 | + |
| 32 | +""" |
| 33 | + get_all_output_vars(obs_dir, diagnostic_var2d, diagnostic_var3d) |
| 34 | +
|
| 35 | +Return a NamedTuple of OutputVars containing all initial coarse AMIP observations. |
| 36 | +Start date is set to `DateTime(2000, 3, 1)`. All OutputVars are resampled to the model diagnostic grid. |
| 37 | +""" |
| 38 | +function get_all_output_vars(obs_dir, diagnostic_var2d, diagnostic_var3d) |
| 39 | + diagnostic_var3d = limit_pressure_dim_to_era5_range(diagnostic_var3d) |
| 40 | + |
| 41 | + resample_2d(output_var) = resampled_as(output_var, diagnostic_var2d, dim_names = ["longitude", "latitude"]) |
| 42 | + resample_3d(output_var) = |
| 43 | + resampled_as(output_var, diagnostic_var3d, dim_names = ["longitude", "latitude", "pressure_level"]) |
| 44 | + resample(ov) = has_pressure(ov) ? resample_3d(ov) : resample_2d(ov) |
| 45 | + |
| 46 | + era5_outputvar(path) = OutputVar(path; new_start_date = start_date, shift_by = Dates.firstdayofmonth) |
| 47 | + |
| 48 | + rad_and_pr_obs_dict = get_obs_var_dict() |
| 49 | + |
| 50 | + # 2D Fields |
| 51 | + # TOA incoming shortwave radiation |
| 52 | + rsdt = resample(rad_and_pr_obs_dict["rsdt"](start_date)) |
| 53 | + # TOA outgoing long, shortwave radiation |
| 54 | + rlut = resample(rad_and_pr_obs_dict["rlut"](start_date)) |
| 55 | + rsut = resample(rad_and_pr_obs_dict["rsut"](start_date)) |
| 56 | + # TOA clearsky outgoing long, shortwave radiation |
| 57 | + rsutcs = resample(rad_and_pr_obs_dict["rsutcs"](start_date)) |
| 58 | + rlutcs = resample(rad_and_pr_obs_dict["rlutcs"](start_date)) |
| 59 | + |
| 60 | + # TOA net radiative flux |
| 61 | + net_rad = rlut + rsut - rsdt |
| 62 | + # For some reason we need to add the start date back in |
| 63 | + net_rad.attributes["start_date"] = string(start_date) |
| 64 | + |
| 65 | + # cloud radiative effect |
| 66 | + cre = rsut + rlut - rsutcs - rlutcs |
| 67 | + cre.attributes["start_date"] = string(start_date) |
| 68 | + |
| 69 | + # Precipitation |
| 70 | + pr = resample(rad_and_pr_obs_dict["pr"](start_date)) |
| 71 | + |
| 72 | + # Latent heat flux |
| 73 | + lhf = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_averages_surface_single_level_mslhf.nc"))) |
| 74 | + # Sensible heat flux |
| 75 | + shf = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_averages_surface_single_level_msshf.nc"))) |
| 76 | + shf = lhf + shf |
| 77 | + shf.attributes["start_date"] = string(start_date) |
| 78 | + # Surface temperature |
| 79 | + ts = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_avg_ts.nc"))) |
| 80 | + # 3D Fields |
| 81 | + # Air temperature |
| 82 | + ta = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_avg_pressure_level_t.nc"))) |
| 83 | + |
| 84 | + # relative humidity |
| 85 | + hur = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_avg_pressure_level_r.nc"))) |
| 86 | + # specific humidity |
| 87 | + hus = resample(era5_outputvar(joinpath(obs_dir, "era5_monthly_avg_pressure_level_q.nc"))) |
| 88 | + |
| 89 | + # # Cloud specific liquid water content |
| 90 | + # ql = era5_outputvar(joinpath(obs_dir, "era5_specific_cloud_liquid_water_content.nc")) |
| 91 | + # # Cloud specific ice water content |
| 92 | + # qi = era5_outputvar(joinpath(obs_dir, "era5_specific_cloud_ice_water_content.nc")) |
| 93 | + # foreach((ql, qi)) do var |
| 94 | + # # Convert from hPa to Pa in-place so we don't create more huge OutputVars |
| 95 | + # @assert var.dim_attributes[pressure_name(var)]["units"] == "hPa" |
| 96 | + # var.dims[pressure_name(var)] .*= 100.0 |
| 97 | + # set_dim_units!(var, pressure_name(var), "Pa") |
| 98 | + # end |
| 99 | + |
| 100 | + # ql = resample(reverse_dim(reverse_dim(ql, latitude_name(ql)), pressure_name(ql))) |
| 101 | + # qi = resample(reverse_dim(reverse_dim(qi, latitude_name(qi)), pressure_name(qi))) |
| 102 | + |
| 103 | + return (; rlut, rsut, rsutcs, rlutcs, pr, net_rad, cre, shf, ts, ta, hur, hus)#, ql, qi) |
| 104 | +end |
| 105 | + |
| 106 | +##### |
| 107 | +# Processing to create EKP.ObservationSeries |
| 108 | +##### |
| 109 | + |
| 110 | +to_datetime(start_date, time) = DateTime(start_date) + Second(time) |
| 111 | + |
| 112 | +get_monthly_averages(simdir, var_name) = get(simdir; short_name = var_name, reduction = "average", period = "1M") |
| 113 | + |
| 114 | +get_seasonal_averages(var) = average_time.(split_by_season_across_time(var)) |
| 115 | + |
| 116 | +function get_seasonal_averages(simdir, var_name) |
| 117 | + var = get(simdir; short_name = var_name, reduction = "average", period = "1M") |
| 118 | + get_seasonal_averages(var) |
| 119 | +end |
| 120 | + |
| 121 | +function get_yearly_averages(var) |
| 122 | + seasonal_avgs = get_seasonal_averages(var) |
| 123 | + nyears = fld(length(seasonal_avgs), 4) |
| 124 | + matrices = getproperty.(seasonal_avgs, :data) |
| 125 | + year_averaged_matrices = map(1:nyears) do i |
| 126 | + start_idx = (i - 1) * 4 + 1 |
| 127 | + end_idx = i * 4 |
| 128 | + group = matrices[start_idx:end_idx] |
| 129 | + |
| 130 | + # Compute the average matrix for this group |
| 131 | + averaged_matrix = sum(group) / 4 |
| 132 | + averaged_matrix |
| 133 | + end |
| 134 | + return year_averaged_matrices |
| 135 | +end |
| 136 | + |
| 137 | +# Given an outputvar, compute the standard deviation at each point for each season. |
| 138 | +function get_seasonal_stdev(output_var) |
| 139 | + all_seasonal_averages = get_seasonal_averages(output_var) |
| 140 | + all_seasonal_averages = downsample.(all_seasonal_averages, 3) |
| 141 | + seasonal_average_matrix = cat(all_seasonal_averages...; dims = 3) |
| 142 | + interannual_stdev = std(seasonal_average_matrix, dims = 3) |
| 143 | + # TODO: Add intraseasonal stdev? |
| 144 | + return dropdims(interannual_stdev; dims = 3) |
| 145 | +end |
| 146 | + |
| 147 | +# Given an outputvar, compute the covariance for each season. |
| 148 | +function get_seasonal_covariance(output_var) |
| 149 | + stdev = get_seasonal_stdev(output_var) |
| 150 | + return Diagonal(vec(stdev) .^ 2) |
| 151 | +end |
| 152 | + |
| 153 | +# Given a year, return the indices of that year within a seasonal array |
| 154 | +# Assume each year has 4 seasons and starts at index % 4 == 1 |
| 155 | +function get_year_indices(year) |
| 156 | + start_index = (year * 4) - 3 |
| 157 | + end_index = year * 4 |
| 158 | + return start_index:end_index |
| 159 | +end |
| 160 | + |
| 161 | +# Take in a vector of seasonal average OutputVars and a range or single number representing the years, |
| 162 | +# return a vector of all data within the year range |
| 163 | +function vectorize_nyears_of_seasonal_outputvars(vec_of_vars, year_range) |
| 164 | + # Generate indices for all specified years |
| 165 | + all_year_indices = vcat(get_year_indices.(year_range)...) |
| 166 | + result = vcat(vec.(getproperty.(vec_of_vars[all_year_indices], :data))...) |
| 167 | + return result |
| 168 | +end |
| 169 | + |
| 170 | +# Make an EKP.Observation of a single year of seasonal averages from an OutputVar |
| 171 | +function make_single_year_of_seasonal_observations(output_var, yr) |
| 172 | + seasonal_avgs = get_seasonal_averages(output_var) |
| 173 | + downsampled_seasonal_avg_arrays = downsample.(seasonal_avgs, 3) |
| 174 | + all_year_indices = vcat(get_year_indices.(yr)...) |
| 175 | + obs_vec = vcat(vec.(downsampled_seasonal_avg_arrays[all_year_indices])...) |
| 176 | + |
| 177 | + name = get(output_var.attributes, "CF_name", get(output_var.attributes, "long_name", "")) |
| 178 | + cov = get_seasonal_covariance(output_var) |
| 179 | + return EKP.Observation(obs_vec, Diagonal(repeat(cov.diag, 4)), "$(yr)_$name") |
| 180 | +end |
| 181 | + |
| 182 | +""" |
| 183 | + create_observation_vector(nt, yrs = 19) |
| 184 | +
|
| 185 | +""" |
| 186 | +function create_observation_vector(nt, yrs = 19) |
| 187 | + # Starting year is 2000-12 to 2001-11 |
| 188 | + t_start = Second(first_year_start_date - start_date).value |
| 189 | + rsut = window(nt.rsut, "time"; left = t_start) |
| 190 | + rlut = window(nt.rlut, "time"; left = t_start) |
| 191 | + rsutcs = window(nt.rsutcs, "time"; left = t_start) |
| 192 | + rlutcs = window(nt.rlutcs, "time"; left = t_start) |
| 193 | + cre = window(nt.cre, "time"; left = t_start) |
| 194 | + |
| 195 | + # Net radiation uses yearly averages, so we treat it differently |
| 196 | + net_rad = window(nt.net_rad, "time"; left = t_start) |> average_lat |> average_lon |
| 197 | + net_rad = get_yearly_averages(net_rad) |
| 198 | + net_rad_stdev = std(cat(net_rad..., dims = 3), dims = 3) |
| 199 | + net_rad_covariance = Diagonal(vec(net_rad_stdev) .^ 2) |
| 200 | + |
| 201 | + ts = window(nt.ts, "time"; left = t_start) |
| 202 | + pr = window(nt.pr, "time"; left = t_start) |
| 203 | + shf = window(nt.shf, "time"; left = t_start) |
| 204 | + |
| 205 | + ta = window(nt.ta, "time"; left = t_start) |
| 206 | + |
| 207 | + hur = window(nt.hur, "time"; left = t_start) |
| 208 | + hus = window(nt.hus, "time"; left = t_start) |
| 209 | + |
| 210 | + all_observations = map(1:yrs) do yr |
| 211 | + net_rad_obs = EKP.Observation(vec(net_rad[yr]), net_rad_covariance, "$(yr)_net_rad") |
| 212 | + |
| 213 | + rsut_obs = make_single_year_of_seasonal_observations(rsut, yr) |
| 214 | + rlut_obs = make_single_year_of_seasonal_observations(rlut, yr) |
| 215 | + rsutcs_obs = make_single_year_of_seasonal_observations(rsutcs, yr) |
| 216 | + rlutcs_obs = make_single_year_of_seasonal_observations(rlutcs, yr) |
| 217 | + cre_obs = make_single_year_of_seasonal_observations(cre, yr) |
| 218 | + pr_obs = make_single_year_of_seasonal_observations(pr, yr) |
| 219 | + # shf_obs = make_single_year_of_seasonal_observations(shf, yr) |
| 220 | + ts_obs = make_single_year_of_seasonal_observations(ts, yr) |
| 221 | + |
| 222 | + # ta_obs = make_single_year_of_seasonal_observations(ta, yr) |
| 223 | + # hur_obs = make_single_year_of_seasonal_observations(hur, yr) |
| 224 | + # hus_obs = make_single_year_of_seasonal_observations(hus, yr) |
| 225 | + EKP.combine_observations([net_rad_obs, rsut_obs, rlut_obs, cre_obs, pr_obs, ts_obs])#, ta_obs, hur_obs, hus_obs]) |
| 226 | + end |
| 227 | + |
| 228 | + return all_observations # NOT an EKP.ObservationSeries |
| 229 | +end |
| 230 | + |
| 231 | +downsample(var::ClimaAnalysis.OutputVar, n) = downsample(var.data, n) |
| 232 | + |
| 233 | +function downsample(arr::AbstractArray, n) |
| 234 | + if n < 1 |
| 235 | + error("Downsampling factor n must be at least 1.") |
| 236 | + end |
| 237 | + if ndims(arr) == 2 |
| 238 | + return arr[1:n:end, 1:n:end] |
| 239 | + elseif ndims(arr) == 3 |
| 240 | + return arr[1:n:end, 1:n:end, :] |
| 241 | + else |
| 242 | + error("Only 2D and 3D arrays are supported.") |
| 243 | + end |
| 244 | +end |
| 245 | + |
0 commit comments