diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml index 0f8924d69..2d8857c70 100644 --- a/examples/exp_fluxnet_hybrid/Project.toml +++ b/examples/exp_fluxnet_hybrid/Project.toml @@ -1,9 +1,11 @@ [deps] CMAEvolutionStrategy = "8d3b24bd-414e-49e0-94fb-163cc3a3e411" +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GCMAES = "4aa9d100-eb0f-11e8-15f1-25748831eb3b" -GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" +MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" Sindbad = "6686e6de-2010-4bf8-9178-b2bcc470766e" SindbadData = "772b809f-5329-4bfd-a2e9-cc4714ce84b1" SindbadML = "eb1d8004-2d42-4eef-b369-ed0645c101e8" @@ -12,14 +14,26 @@ SindbadOptimization = "64fff8bd-0c13-402c-aa96-b97d0bc3655c" SindbadSetup = "2b7f2987-8a1e-48b4-8a02-cc9e7c4eeb1c" SindbadTEM = "f6108451-10cb-42fa-b0c1-67671cf08f15" SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab" +SlurmClusterManager = "c82cd089-7bf7-41d7-976b-6b5d413cbe0a" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -[sources] -Sindbad = {path = "../.."} -SindbadData = {path = "../../lib/SindbadData"} -SindbadML = {path = "../../lib/SindbadML"} -SindbadMetrics = {path = "../../lib/SindbadMetrics"} -SindbadOptimization = {path = "../../lib/SindbadOptimization"} -SindbadSetup = {path = "../../lib/SindbadSetup"} -SindbadTEM = {path = "../../lib/SindbadTEM"} -SindbadUtils = {path = "../../lib/SindbadUtils"} +[sources.Sindbad] +path = "../../" + +[sources.SindbadData] +path = "../../lib/SindbadData" + +[sources.SindbadML] +path = "../../lib/SindbadML" + +[sources.SindbadMetrics] +path = "../../lib/SindbadMetrics" + +[sources.SindbadSetup] +path = "../../lib/SindbadSetup" + +[sources.SindbadTEM] +path = "../../lib/SindbadTEM" + +[sources.SindbadUtils] +path = "../../lib/SindbadUtils" diff --git a/examples/exp_fluxnet_hybrid/analysis/Project.toml b/examples/exp_fluxnet_hybrid/analysis/Project.toml index 744fd3cf6..cf7aed186 100644 --- a/examples/exp_fluxnet_hybrid/analysis/Project.toml +++ b/examples/exp_fluxnet_hybrid/analysis/Project.toml @@ -3,7 +3,6 @@ ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" -GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -13,5 +12,7 @@ NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" Proj = "c94c279d-25a6-4763-9509-64d165bea63e" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +SwarmMakie = "0b1c068e-6a84-4e66-8136-5c95cafa83ed" +TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" diff --git a/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl b/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl index 2505a2659..320939b9b 100644 --- a/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl +++ b/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl @@ -1,3 +1,4 @@ +ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" using CairoMakie using SwarmMakie using Flux @@ -5,28 +6,37 @@ using Statistics using TypedTables using JLD2 -path_ptmp = "/ptmp/lalonso/HybridOutput/" - +path_ptmp = "/ptmp/lalonso/HybridOutputALL/" +exp_name = "HyALL_ALL" +_nfold = 5 +nlayers = 3 +n_neurons = 32 +bs= 32 +nepochs=500 experiment = "$(exp_name)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(bs)/checkpoint" +# experiment = "$(exp_name)_kσ_1.0_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(nepochs)epochs_batch_size_$(bs)/checkpoint" checkpoint_path = joinpath(path_ptmp, experiment) -losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_500.jld2")) +losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_100.jld2")) +# "HyALL_ALL_kσ_1.0_fold_5_nlayers_3_n_neurons_32_500epochs_batch_size_32" +# "HyALL_ALL_kσ_1.0_fold_5_nlayers_3_n_neurons_32_batch_size_32_500epochs" losses["loss_split_testing"] # Load the training history -function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs) - path_ptmp = "/ptmp/lalonso/HybridOutput/" +function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs, load_nepochs) + path_ptmp = "/ptmp/lalonso/HybridOutputALL/" if exp_name == "HyFixK_PFT" bs = bs * 123 end experiment = "$(exp_name)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(bs)/checkpoint" + # experiment = "$(exp_name)_kσ_1.0_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(nepochs)epochs_batch_size_$(bs)/checkpoint" checkpoint_path = joinpath(path_ptmp, experiment) μtrain = Float32[] μval = Float32[] μtest = Float32[] - for epoch in 1:nepochs + for epoch in 1:load_nepochs losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_$epoch.jld2")) push!(μtrain, mean(losses["loss_training"])) push!(μval, mean(losses["loss_validation"])) @@ -36,6 +46,8 @@ function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs) return μtrain, μval, μtest end +mkpath(joinpath(@__DIR__, "figs")) + function plot_training_history(μtrain, μval, μtest) with_theme(theme_light()) do fig = Figure(; size = (600, 400)) @@ -45,62 +57,72 @@ function plot_training_history(μtrain, μval, μtest) lines!(ax, μtest, color = :olive, linewidth = 1.25, label = "test") # ylims!(ax, 3, 4) axislegend(ax, position = :rt) - save("history_losses_5.png", fig) + save(joinpath(@__DIR__, "figs/history_losses_5.png"), fig) end end # ? load n-fold history nepochs=500 _nfold = 5 -nlayers= 2 +nlayers= 3 n_neurons = 32 bs = 32 -exp_name = "HyFixK_ALL" +# exp_name = "HyFixK_ALL" +exp_name = "HyALL_ALL" -μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500) +μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500, 210) plot_training_history(μtrain, μval, μtest) -function plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) - with_theme(theme_light()) do - fig = Figure(; size = (400, 900)) - axs = [Axis(fig[row, 1]) for row in 1:5] - for (_nfold, ax) in enumerate(axs) - μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs) +function plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs; xpos = 185) + with_theme(theme_latexfonts()) do + fig = Figure(; size = (1200, 400), fontsize=24) + axs = [Axis(fig[row, col]; xgridstyle=:dash, ygridstyle=:dash, xlabel = "epoch", ylabel="loss") + for row in 1:2 for col in 1:3] + for (_nfold, ax) in enumerate(axs[1:5]) + μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500, nepochs) lines!(ax, μtrain, color = :dodgerblue, linewidth = 1.25, label = "training") lines!(ax, μval, color = :orangered, linewidth = 1.25, label = "validation") lines!(ax, μtest, color = :olive, linewidth = 1.25, label = "test") - ax.title = "fold $(_nfold)" + # ax.title = "fold $(_nfold)" + text!(ax, [xpos], [5.5], text="fold $(_nfold)", color = :grey25) end - axislegend(axs[1], position = :rt, nbanks=3) - hidexdecorations!.(axs[1:4], ticks=false, grid=false) + Legend(fig[2, 3], axs[1], tellwidth=false, tellheight=false, halign=0, + framewidth=0.25, patchcolor = (:white, 0.25) ) + # axislegend(axs[], position = :ct, nbanks=3, framewidth=0.25, patchcolor = (:white, 0.25)) + hidexdecorations!.(axs[1:3], ticks=false, grid=false) + hidespines!(axs[end]) + hidedecorations!(axs[end]) linkaxes!.(axs) - limits!.(axs, 0, nepochs, 2.7, 4.5) + limits!.(axs, 0, nepochs, 2.7, 6) rowgap!(fig.layout, 0) - save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_history.png", fig) + hidespines!.(axs) + save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_history.pdf", fig) end end # ? load n-fold history -nepochs=500 -_nfold = 5 -nlayers= 2 -n_neurons = 32 -bs = 32 -exp_name = "HyFixK_ALL" # HyFixK_PFT -plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) +# nepochs=500 +# _nfold = 5 +# nlayers= 2 +# n_neurons = 32 +# bs = 32 +# exp_name = "HyFixK_ALL" # HyFixK_PFT +# plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) -exp_name = "HyFixK_PFT" # -plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) +# exp_name = "HyFixK_PFT" # +# plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) exp_name = "HyALL_ALL" -nepochs = 150 -plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs) +nepochs = 210 +plot_training_history_folds(exp_name, 2, n_neurons, bs, nepochs; xpos = 170) +plot_training_history_folds(exp_name, 3, n_neurons, bs, 180; xpos = 170) -exp_name = "HyALL_PFT" -nepochs = 100 -plot_training_history_folds(exp_name, 3, n_neurons, bs, nepochs) + +# exp_name = "HyALL_PFT" +# nepochs = 100 +# plot_training_history_folds(exp_name, 3, n_neurons, bs, nepochs) # do per variable @@ -147,8 +169,8 @@ split_tested = collect_folds() function plot_box_split(split_m) _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"] _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"] - with_theme() do - fig = Figure(; size = (1200, 300)) + with_theme(theme_latexfonts()) do + fig = Figure(; size = (1200, 300), fontsize=24) axs = [Axis(fig[1, i]) for i in 1:7] for (i, ax) in enumerate(axs) tmp_vec = filter(!isnan, split_m[:,i]) @@ -165,7 +187,7 @@ function plot_box_split(split_m) # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true) hidexdecorations!.(axs, grid=false, label=false, ticklabels=true) hidespines!.(axs) - save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_box_test.png", fig) + save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_box_test.pdf", fig) end end @@ -175,8 +197,8 @@ plot_box_split(split_tested) function plot_violin_split(split_m) _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"] _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"] - with_theme() do - fig = Figure(; size = (1200, 300)) + with_theme(theme_latexfonts()) do + fig = Figure(; size = (1200, 300), fontsize=24) axs = [Axis(fig[1, i]) for i in 1:7] for (i, ax) in enumerate(axs) tmp_vec = filter(!isnan, split_m[:,i]) @@ -195,19 +217,50 @@ function plot_violin_split(split_m) # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true) hidexdecorations!.(axs, grid=false, label=false, ticklabels=true) hidespines!.(axs) - save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_violin_test.png", fig) + save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_violin_test.pdf", fig) end end plot_violin_split(split_tested) +function plot_beeswarm_split(split_m) + _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"] + _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"] + with_theme(theme_latexfonts()) do + fig = Figure(; size = (1200, 300), fontsize=24) + axs = [Axis(fig[1, i]) for i in 1:7] + for (i, ax) in enumerate(axs) + tmp_vec = filter(!isnan, split_m[:,i]) + + beeswarm!(ax, fill(1, length(tmp_vec)), tmp_vec; color = (_colors[i], 0.65), markersize = 6) + # boxplot!(ax, fill(1, length(tmp_vec)), tmp_vec; width=0.35, strokecolor = :white, + # strokewidth=1.5, whiskercolor=:white, mediancolor=:white, + # color = :transparent) + ax.title= rich("$(_constraints[i])", color=:grey15, font=:bold) + ax.yticks=[0, 0.25, 0.5, 0.75, 1.0] + end + axs[1].ylabel = "Loss" + ylims!.(axs, 0, 1) + xlims!.(axs, 0.5, 1.5) + hideydecorations!.(axs[2:end], ticks=true, grid=false) + # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true) + hidexdecorations!.(axs, grid=false, label=false, ticklabels=true) + hidespines!.(axs) + save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_beeswarm_test.pdf", fig) + end +end # ! Collect all experiments # "HyFixK_ALL", "HyFixK_PFT", "HyALL_ALL", "HyALL_PFT" -split_tested_HyALL_ALL = collect_folds("HyALL_ALL", 400, 32, 2, 32) -split_tested_HyALL_PFT = collect_folds("HyALL_PFT", 300, 32, 2, 32) -split_tested_HyFixK_ALL = collect_folds("HyFixK_ALL", 500, 32, 2, 32) -split_tested_HyFixK_PFT = collect_folds("HyFixK_PFT", 500, 32, 2, 32) +split_tested_HyALL_ALL = collect_folds("HyALL_ALL", 200, 32, 2, 32) +# split_tested_HyALL_PFT = collect_folds("HyALL_PFT", 300, 32, 2, 32) +# split_tested_HyFixK_ALL = collect_folds("HyFixK_ALL", 500, 32, 2, 32) +# split_tested_HyFixK_PFT = collect_folds("HyFixK_PFT", 500, 32, 2, 32) +plot_box_split(split_tested_HyALL_ALL) +plot_violin_split(split_tested_HyALL_ALL) +plot_beeswarm_split(split_tested_HyALL_ALL) + + function plot_box_split_all(split_m1, split_m2, split_m3, split_m4) _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"] diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl index 281eab616..ab58b0fbb 100644 --- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl +++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl @@ -1,3 +1,14 @@ +# activate project's environment and develop the package +using Pkg +Pkg.activate("examples/exp_fluxnet_hybrid") +Pkg.develop(path=pwd()) +Pkg.instantiate() + +# get data +# $examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr . +# @examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr . + +# start using the package using SindbadData using SindbadData.DimensionalData using SindbadData.AxisKeys @@ -7,7 +18,7 @@ using SindbadML using SindbadML.JLD2 using ProgressMeter using SindbadOptimization -include("load_covariates.jl") +# include("load_covariates.jl") # load folds # $nfold $nlayer $neuron $batchsize _nfold = 5 #Base.parse(Int, ARGS[1]) diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl new file mode 100644 index 000000000..1c0d79359 --- /dev/null +++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl @@ -0,0 +1,235 @@ +# ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production! +import Pkg +Pkg.activate(@__DIR__) + +using Distributed +using SlurmClusterManager # pkg> add https://github.com/lazarusA/SlurmClusterManager.jl.git#la/asynclaunch +addprocs(SlurmManager()) + +@everywhere begin + import Pkg + Pkg.activate(@__DIR__) + using SindbadUtils + using SindbadTEM + using SindbadSetup + using SindbadData + using SindbadData.DimensionalData + using SindbadData.AxisKeys + using SindbadData.YAXArrays + using SindbadML + using SindbadML.JLD2 + using ProgressMeter + using SindbadML.Zygote + using SindbadML.Sindbad +end + +using SindbadUtils +using SindbadTEM +using SindbadSetup +using SindbadData +using SindbadData.DimensionalData +using SindbadData.AxisKeys +using SindbadData.YAXArrays +using SindbadML +using SindbadML.JLD2 +using ProgressMeter +using SindbadML.Zygote +using SindbadML.Sindbad + +# # activate project's environment and develop the package +# using Pkg +# Pkg.activate("examples/exp_fluxnet_hybrid") +# # Pkg.develop(path=pwd()) +# Pkg.instantiate() +# # start using the package +# using SindbadUtils +# using SindbadTEM +# using SindbadSetup +# using SindbadData +# using SindbadData.DimensionalData +# using SindbadData.AxisKeys +# using SindbadData.YAXArrays +# using SindbadML +# using SindbadML.JLD2 +# using ProgressMeter +# using SindbadML.Zygote + + +# import AbstractDifferentiation as AD, Zygote + +# extra includes for covariate and activation functions +# include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/load_covariates.jl")) +# include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/test_activation_functions.jl")) + +# load folds # $nfold $nlayer $neuron $batchsize +_nfold = Base.parse(Int, ARGS[1]) # 5 +nlayers = Base.parse(Int, ARGS[2]) # 3 +n_neurons = Base.parse(Int, ARGS[3]) # 32 +batch_size = Base.parse(Int, ARGS[4]) # 32 +id_fold = Base.parse(Int, ARGS[5]) # 1 + +batch_seed = 123 * batch_size * 2 +n_epochs = 500 + +## paths +file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices_$(id_fold).jld2")) +path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json" + +# for remote node +# path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr" +path_input = "/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr" +# path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr" +path_covariates = "/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr" + +replace_info = Dict() + +replace_info = Dict( + "forcing.default_forcing.data_path" => path_input, + "optimization.observations.default_observation.data_path" => path_input, + # TODO: + + ); + +info = getExperimentInfo(path_experiment_json; replace_info=replace_info); + + +selected_models = info.models.forward; +parameter_scaling_type = info.optimization.run_options.parameter_scaling + +## parameters +tbl_params = info.optimization.parameter_table; +param_to_index = getParameterIndices(selected_models, tbl_params); + +## forcing and obs +forcing = getForcing(info); +observations = getObservation(info, forcing.helpers); + +## helpers +run_helpers = prepTEM(selected_models, forcing, observations, info); + +space_forcing = run_helpers.space_forcing; +space_observations = run_helpers.space_observation; +space_output = run_helpers.space_output; +space_spinup_forcing = run_helpers.space_spinup_forcing; +space_ind = run_helpers.space_ind; +land_init = run_helpers.loc_land; +loc_forcing_t = run_helpers.loc_forcing_t; + +space_cost_options = [prepCostOptions(loc_obs, info.optimization.cost_options) for loc_obs in space_observations]; +constraint_method = info.optimization.run_options.multi_constraint_method; + +tem_info = run_helpers.tem_info; +## do example site +## + +site_example_1 = space_ind[1][1]; +@time coreTEM!(selected_models, space_forcing[site_example_1], space_spinup_forcing[site_example_1], loc_forcing_t, space_output[site_example_1], land_init, tem_info) + +## + +## features +sites_forcing = forcing.data[1].site; # sites names + + +# ! selection and batching +# _nfold = 5 #Base.parse(Int, ARGS[1]) # select the fold +xtrain, xval, xtest = file_folds["unfold_training"][_nfold], file_folds["unfold_validation"][_nfold], file_folds["unfold_tests"][_nfold] + +# ? training +sites_training = sites_forcing[xtrain]; +indices_sites_training = siteNameToID.(sites_training, Ref(sites_forcing)); +# # ? validation +sites_validation = sites_forcing[xval]; +indices_sites_validation = siteNameToID.(sites_validation, Ref(sites_forcing)); +# # ? test +sites_testing = sites_forcing[xtest]; +indices_sites_testing = siteNameToID.(sites_testing, Ref(sites_forcing)); + +indices_sites_batch = indices_sites_training; + +xfeatures = loadCovariates(sites_forcing; kind="all", cube_path=joinpath(@__DIR__, path_covariates)); +@info "xfeatures: [$(minimum(xfeatures)), $(maximum(xfeatures))]" + +nor_names_order = xfeatures.features; +n_features = length(nor_names_order) + +## Build ML method +n_params = sum(tbl_params.is_ml); +k_σ = 1.f0 +# custom_activation = CustomSigmoid(k_σ) +# custom_activation = sigmoid_3 +mlBaseline = denseNN(n_features, n_neurons, n_params; extra_hlayers=nlayers, seed=batch_seed); + +# Initialize params and grads +params_sites = mlBaseline(xfeatures); +@info "params_sites: [$(minimum(params_sites)), $(maximum(params_sites))]" + +grads_batch = zeros(Float32, n_params, length(sites_training))[:,1:batch_size]; +sites_batch = sites_training;#[1:n_sites_train]; +params_batch = params_sites(; site=sites_batch); +@info "params_batch: [$(minimum(params_batch)), $(maximum(params_batch))]" +scaled_params_batch = getParamsAct(params_batch, tbl_params); +@info "scaled_params_batch: [$(minimum(scaled_params_batch)), $(maximum(scaled_params_batch))]" + +forward_args = ( + selected_models, + space_forcing, + space_spinup_forcing, + loc_forcing_t, + space_output, + land_init, + tem_info, + param_to_index, + parameter_scaling_type, + space_observations, + space_cost_options, + constraint_method + ); + + +input_args = ( + scaled_params_batch, + forward_args..., + indices_sites_batch, + sites_batch +); + +# grads_lib = ForwardDiffGrad(); +# grads_lib = FiniteDifferencesGrad(); +# grads_lib = FiniteDiffGrad(); +# grads_lib = ZygoteGrad(); +# grads_lib = EnzymeGrad(); +# backend = AD.ZygoteBackend(); + +grads_lib = PolyesterForwardDiffGrad(); +loc_params, inner_args = getInnerArgs(1, grads_lib, input_args...); + +loss_tmp(x) = lossSite(x, grads_lib, inner_args...) + +# AD.gradient(backend, loss_tmp, collect(loc_params)) + +@time gg = gradientSite(grads_lib, loc_params, 2, lossSite, inner_args...) + +gradientBatch!(grads_lib, grads_batch, 2, lossSite, getInnerArgs,input_args...; showprog=true) + + +# ? training arguments +chunk_size = 2 +metadata_global = info.output.file_info.global_metadata + +in_gargs=(; + train_refs = (; sites_training, indices_sites_training, xfeatures, tbl_params, batch_size, chunk_size, metadata_global), + test_val_refs = (; sites_validation, indices_sites_validation, sites_testing, indices_sites_testing), + total_constraints = length(info.optimization.cost_options.variable), + forward_args, + loss_fargs = (lossSite, getInnerArgs) +); + +# checkpoint_path = "$(info.output.dirs.data)/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/" +remote_raven = "/ptmp/lalonso/HybridOutputALL/HyALL_ALL_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(batch_size)/" +# remote_raven = "/ptmp/lalonso/HybridOutput/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/" +mkpath(remote_raven) +checkpoint_path = remote_raven + +@info checkpoint_path +mixedGradientTraining(grads_lib, mlBaseline, in_gargs.train_refs, in_gargs.test_val_refs, in_gargs.total_constraints, in_gargs.loss_fargs, in_gargs.forward_args; n_epochs=n_epochs, path_experiment=checkpoint_path) \ No newline at end of file diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh new file mode 100644 index 000000000..70a69b014 --- /dev/null +++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#SBATCH --job-name=HyALL_ALL_la +#SBATCH --ntasks=8 +#SBATCH --cpus-per-task=17 +#SBATCH --mem-per-cpu=2GB +#SBATCH --time=23:50:00 +#SBATCH --array=0-9 # 10 jobs +#SBATCH -o /ptmp/lalonso/slurmOutput/HyALL_ALL_la-%A_%a.out + +module load julia/1.10 +export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +# Define parameter ranges +nfolds=(1 2 3 4 5) +nlayers=(3 2) +neurons=(32) +batchsizes=(32) + +# Calculate which combination to use based on SLURM_ARRAY_TASK_ID +id=$SLURM_ARRAY_TASK_ID +# Change the order to prioritize folds first +n_fold=$((id % ${#nfolds[@]})) +id=$((id / ${#nfolds[@]})) +n_layer=$((id % ${#nlayers[@]})) +id=$((id / ${#nlayers[@]})) +n_neuron=$((id % ${#neurons[@]})) +id=$((id / ${#neurons[@]})) +n_batch=$((id % ${#batchsizes[@]})) + +# Get the actual parameter values +nfold=${nfolds[$n_fold]} +nlayer=${nlayers[$n_layer]} +neuron=${neurons[$n_neuron]} +batchsize=${batchsizes[$n_batch]} + +echo "Running with: id=$id nfold=$nfold nlayer=$nlayer neuron=$neuron batchsize=$batchsize" +# Run the program with calculated parameters +julia --project --heap-size-hint=16G exp_fluxnet_hybrid_clean_la.jl $nfold $nlayer $neuron $batchsize $id \ No newline at end of file diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl new file mode 100644 index 000000000..3435b0aa0 --- /dev/null +++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl @@ -0,0 +1,86 @@ +using Revise +using SindbadML +using SindbadTEM +using SindbadUtils +using SindbadSetup +using SindbadData +using SindbadML.JLD2 +using SindbadML.Random +using SindbadML.Flux +using ProgressMeter + +# TODO: update these in replace_info! +# TODO: proper attribution of trainML file! +# load folds # $nfold $nlayer $neuron $batchsize +_nfold = 5 # Base.parse(Int, ARGS[1]) # 5 +nlayers = 3 # Base.parse(Int, ARGS[2]) # 3 +n_neurons = 32 # Base.parse(Int, ARGS[3]) # 32 +batch_size = 32 # Base.parse(Int, ARGS[4]) # 32 +id_fold = 1 # Base.parse(Int, ARGS[5]) # 1 + + +path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json" +# path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr" +path_input = "/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr" +path_observation = path_input + +# path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr" + +replace_info = Dict( + "forcing.default_forcing.data_path" => path_input, + "optimization.observations.default_observation.data_path" => path_observation, + "optimization.optimization_cost_threaded" => false, + "hybrid.ml_training.options.batch_size" => batch_size, + "hybrid.ml_training.which_fold" => _nfold, + # "hybrid.ml_training.fold_path" => "blablabla", + "hybrid.ml_model.options.n_layers" => nlayers, + "hybrid.ml_model.options.n_neurons" => n_neurons, +) + +info = getExperimentInfo(path_experiment_json; replace_info=replace_info); + +forcing = getForcing(info); +observations = getObservation(info, forcing.helpers); +sites_forcing = forcing.data[1].site; + +# disturbance years +# get disturbance years from forcing helpers +# apply getSequence to each site +# ds = open_dataset(path_input) +# disturbances_per_site = Pair.(ds.properties["SITE_ID"], ds.properties["last_disturbance_on"]) +# allSequences = getSequence.(ds.properties["last_disturbance_on"], Ref(info.helpers.dates)); + +hybrid_helpers = prepHybrid(forcing, observations, info, info.hybrid.ml_training.method); +selected_models = info.models.forward; +run_helpers = hybrid_helpers.run_helpers; + +space_forcing = run_helpers.space_forcing; +space_observations = run_helpers.space_observation; +space_output = run_helpers.space_output; +space_spinup_forcing = run_helpers.space_spinup_forcing; +space_spinup_sequence = run_helpers.space_spinup_sequence; +space_ind = run_helpers.space_ind; +land_init = run_helpers.loc_land; +loc_forcing_t = run_helpers.loc_forcing_t; + +space_cost_options = [prepCostOptions(loc_obs, info.optimization.cost_options) for loc_obs in space_observations]; +constraint_method = info.optimization.run_options.multi_constraint_method; + +tem_info = run_helpers.tem_info; +## do example site +## + +site_example_1 = space_ind[7][1]; +@time SindbadTEM.coreTEM!(selected_models, space_forcing[site_example_1], space_spinup_forcing[site_example_1], + space_spinup_sequence[site_example_1], + loc_forcing_t, space_output[site_example_1], land_init, tem_info) + +trainML(hybrid_helpers, info.hybrid.ml_training.method) + + +# ds = open_dataset(path_input) +# d_arr = replace(ds.properties["last_disturbance_on"], "undisturbed" => "9999-01-01") +# disturbance_yax = YAXArray((ds.site, ), year.(Date.(d_arr))) +# ds2 = Dataset(disturbance_year = disturbance_yax) +# savedataset(ds2, path=path_input, backend=:zarr, append=true) + diff --git a/examples/exp_fluxnet_hybrid/kfolds/5fold_forcing.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_forcing.jl similarity index 100% rename from examples/exp_fluxnet_hybrid/kfolds/5fold_forcing.jl rename to examples/exp_fluxnet_hybrid/sampling/5fold_forcing.jl diff --git a/examples/exp_fluxnet_hybrid/kfolds/5fold_observations.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_observations.jl similarity index 100% rename from examples/exp_fluxnet_hybrid/kfolds/5fold_observations.jl rename to examples/exp_fluxnet_hybrid/sampling/5fold_observations.jl diff --git a/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl new file mode 100644 index 000000000..abd8b6143 --- /dev/null +++ b/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl @@ -0,0 +1,130 @@ + +# activate project's environment and develop the package +using Pkg +Pkg.activate("examples/exp_fluxnet_hybrid/sampling") +Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie"]) +Pkg.instantiate() + +using MLUtils +using StatsBase +using YAXArrays +using Zarr +using JLD2 +using GLMakie +GLMakie.activate!() +#! load folds +file_folds = load(joinpath(@__DIR__, "nfolds_sites_indices.jld2")) + +function getPFTsite(set_names, set_pfts) + return set_pfts[findfirst(site_name, set_names)] +end + +function countmapPFTs(x) + x_counts = countmap(x) + x_keys = collect(keys(x_counts)) + x_vals = collect(values(x_counts)) + return x_counts, x_keys, x_vals +end + +# get all PFTs from dataset +ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr")) +ds.properties["SITE_ID"][[98, 99, 100, 137, 138]] +# ! update PFTs categories, original ones are not up to date! +ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] +updatePFTs = ds.properties["PFT"] + + +# ? site names +site_names = ds.site.val + +function get_fold_names(x_fold_set, site_names, updatePFTs; color=:black) + set_pfts = updatePFTs[x_fold_set] + fold_names = site_names[x_fold_set] + + x_counts, x_keys, x_vals = countmapPFTs(set_pfts) + px = sortperm(x_keys) + u_pfts = unique(set_pfts) + indx_names = [findall(x->x==p, set_pfts) for p in u_pfts] + + dict_names = Dict(u_pfts .=> indx_names) + + box_colors = repeat([color], length(u_pfts)) + + return x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts +end + +using CairoMakie +CairoMakie.activate!() +# load fold +for _nfold in 1:5 + xtrain, xval, xtest = file_folds["unfold_training"][_nfold], file_folds["unfold_validation"][_nfold], file_folds["unfold_tests"][_nfold] + with_theme(theme_latexfonts()) do + fig = Figure(; size=(1200, 1400), fontsize=24) + + x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xtest, site_names, updatePFTs) + x_vals_test = x_vals + + ax = Axis(fig[1,1]; xgridstyle=:dash, ygridstyle=:dash) + # ax_gl = GridLayout(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash) + + barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:dodgerblue) + text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]), + align = (:center, :bottom), fontsize=24) + + for (i, k) in enumerate(x_keys[px]) + text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25, + align=(:center, 1.1), fontsize = 16) + end + ax.xticks = (1:length(x_keys), x_keys[px]) + hidedecorations!(ax, grid=false) + ylims!(ax, -0.15, 12) + hidespines!(ax) + + # validation + x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xval, site_names, updatePFTs; color=:tomato) + x_vals_val = x_vals + + ax = Axis(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash) + # ax_gl = GridLayout(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash) + + barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:tomato) + text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]), + align = (:center, :bottom), fontsize=24) + + for (i, k) in enumerate(x_keys[px]) + text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25, + align=(:center, 1.1), fontsize = 16) + end + ax.xticks = (1:length(x_keys), x_keys[px]) + hidedecorations!(ax, grid=false) + ylims!(ax, -0.15, 5) + hidespines!(ax) + + # trainining + x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xtrain, site_names, updatePFTs; color=:tomato) + x_vals_train = x_vals + + ax = Axis(fig[3,1]; xgridstyle=:dash, ygridstyle=:dash) + + barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:black) + text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]), + align = (:center, :bottom), fontsize=24) + + for (i, k) in enumerate(x_keys[px]) + text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25, + align=(:center, 1.05), fontsize = 14) + end + ax.xticks = (1:length(x_keys), x_keys[px]) + hideydecorations!(ax, grid=false) + ylims!(ax, -1, 36) + hidespines!(ax) + rowsize!(fig.layout, 2, Auto(0.5)) + rowsize!(fig.layout, 3, Auto(1.5)) + Label(fig[1,1], "Fold $(_nfold)", tellwidth=false, tellheight=false, halign=0.98, valign=1, color=:grey10, font=:bold) + Label(fig[1,1], "Test ($(sum(x_vals_test)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:dodgerblue, font=:bold) + Label(fig[2,1], "Validation ($(sum(x_vals_val)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:tomato, font=:bold) + Label(fig[3,1], "Training ($(sum(x_vals_train)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:grey25, font=:bold) + save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/fold_$(_nfold)_names_counts.pdf"), fig) + fig + end +end diff --git a/examples/exp_fluxnet_hybrid/sampling/Project.toml b/examples/exp_fluxnet_hybrid/sampling/Project.toml new file mode 100644 index 000000000..64ebca66a --- /dev/null +++ b/examples/exp_fluxnet_hybrid/sampling/Project.toml @@ -0,0 +1,6 @@ +[deps] +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" +Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" diff --git a/examples/exp_fluxnet_hybrid/kfold_stratify.jl b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl similarity index 52% rename from examples/exp_fluxnet_hybrid/kfold_stratify.jl rename to examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl index 0b80142da..b2657130c 100644 --- a/examples/exp_fluxnet_hybrid/kfold_stratify.jl +++ b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl @@ -1,11 +1,37 @@ +# ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production! +# # activate project's environment and develop the package +# using Pkg +# Pkg.activate("examples/exp_fluxnet_hybrid/sampling") +# Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2"]) +# # Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"]) +# Pkg.instantiate() + +# start using packages using MLUtils -using SindbadData.YAXArrays -using SindbadData.Zarr +using StatsBase +using YAXArrays +using Zarr +using JLD2 +# using GLMakie + +# get data +# $examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr . +# @examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr . -c_read = Cube("examples/data/CovariatesFLUXNET_3.zarr"); +# create a new folder for plots -ds = open_dataset(joinpath(@__DIR__, "../data/FLUXNET_v2023_12_1D.zarr")) -ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] +# mkpath(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/")) + +# c_read = Cube(joinpath(@__DIR__, "../../data/CovariatesFLUXNET_3.zarr")); +# c_read = Cube("/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr") +c_read = Cube("/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr") +# ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr")) +# ds = open_dataset("/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr") +ds = open_dataset("/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr") + +ds.properties["SITE_ID"][[98, 99, 100, 137, 138]] +# ! update PFTs categories, original ones are not up to date! +ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] updatePFTs = ds.properties["PFT"] # ? kfolds @@ -42,8 +68,8 @@ to_remove = [ # "US-GLE", "US-Tw1", # "US-Tw2" - ] -not_these = ["RU-Tks", "US-Atq", "US-UMd"] + ] # duplicates ! +not_these = ["RU-Tks", "US-Atq", "US-UMd"] # with NaNs not_these = vcat(not_these, to_remove) new_sites = setdiff(c_read.site, not_these) @@ -54,7 +80,7 @@ setPFT = updatePFTs[new_sites_index] x, y, z = splitobs_stratified(at=(0.1,0.82), y=setPFT) # map back to original site names and indices -using StatsBase + function countmapPFTs(x) x_counts = countmap(x) x_keys = collect(keys(x_counts)) @@ -70,39 +96,50 @@ px = sortperm(x_keys) pz = sortperm(z_keys) py = sortperm(y_keys) -with_theme(theme_light()) do - fig = Figure(; size=(600, 600)) - ax1 = Axis(fig[1,1]; title = "training") - ax2 = Axis(fig[3,1]; title = "test") - ax3 = Axis(fig[2,1]; title = "validation") +# with_theme(theme_light()) do +# fig = Figure(; size=(600, 600)) +# ax1 = Axis(fig[1,1]; title = "training") +# ax2 = Axis(fig[3,1]; title = "test") +# ax3 = Axis(fig[2,1]; title = "validation") - barplot!(ax1, x_vals[px]; color = 1:13, colormap = :Hiroshige) - barplot!(ax2, z_vals[pz]; color = 1:length(z_vals), colorrange=(1,13), colormap = :Hiroshige) - barplot!(ax3, y_vals[py]; color = 1:length(y_vals), colorrange=(1,13), colormap = :Hiroshige) +# barplot!(ax1, x_vals[px]; color = 1:13, colormap = :Hiroshige) +# barplot!(ax2, z_vals[pz]; color = 1:length(z_vals), colorrange=(1,13), colormap = :Hiroshige) +# barplot!(ax3, y_vals[py]; color = 1:length(y_vals), colorrange=(1,13), colormap = :Hiroshige) - ax1.xticks = (1:length(x_keys), x_keys[px]) - ax2.xticks = (1:length(z_keys), z_keys[pz]) - ax3.xticks = (1:length(y_keys), y_keys[py]) - fig -end +# ax1.xticks = (1:length(x_keys), x_keys[px]) +# ax2.xticks = (1:length(z_keys), z_keys[pz]) +# ax3.xticks = (1:length(y_keys), y_keys[py]) +# fig +# end all_counts, all_keys, all_vals = countmapPFTs(setPFT) -with_theme(theme_light()) do - px = sortperm(all_keys) - fig = Figure(; size=(600, 600)) - ax = Axis(fig[1,1]; title = "all") - barplot!(ax, all_vals[px]; color = 1:13, colormap = :Hiroshige) - hlines!(ax, 5) - ax.xticks = (1:length(all_keys), all_keys[px]) - # ax.yticks = 1:10 - fig -end +# using CairoMakie +# CairoMakie.activate!() +# # GLMakie.activate!() + +# with_theme(theme_latexfonts()) do +# px = sortperm(all_keys) +# fig = Figure(; size=(1200, 400), fontsize= 24) +# ax = Axis(fig[1,1]; title = "Total number of sites ($(sum(all_vals))) per Plant Functional Type (PFT)", titlefont=:regular) +# # barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85) +# barplot!(ax, all_vals[px]; color = (:grey25, 0.05), strokewidth=0.65, width=0.85) + +# # hlines!(ax, 5; color =:black, linewidth=0.85) +# ax.xticks = (1:length(all_keys), all_keys[px]) +# ylims!(ax, 0, 50) +# ax.yticks = (0:15:50, ["", "", "", ""]) +# text!(ax, Point2f.(1:length(all_keys), all_vals[px]), text=string.(all_vals[px]), align = (:center, :bottom), fontsize=24) +# hidespines!(ax) +# hideydecorations!(ax, grid=false) +# fig +# save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/PFTs_sites_counts_bw.pdf"), fig) +# end # custom rules # validation samples (get them from the training split), the validation split coming from kfolds goes to test! - +# some times we have additional training samples and depending on the PFT we can use them for validation or not function split_to_validation(k) if k == "GRA" || k == "ENF" return 3 @@ -161,8 +198,10 @@ unfold_training = [vcat(_fold_t_i[:, f]...) for f in 1:5] _fold_v_i = [validation_folds[i][f] for i in 1:10, f in 1:5] unfold_validation = [vcat(_fold_v_i[:, f]...) for f in 1:5] -using JLD2 -jldsave("nfolds_sites_indices.jld2"; unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests) +for id in 0:10 + jldsave(joinpath(@__DIR__, "nfolds_sites_indices_$(id).jld2"); + unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests) +end _nfold = 1 xtrain, xval, xtest = unfold_training[_nfold], unfold_validation[_nfold], unfold_tests[_nfold] diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json index e816e59f2..b45a6b943 100644 --- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json +++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json @@ -49,6 +49,16 @@ "source_unit": null, "source_variable": "dist_frac_sb2018" }, + "f_disturbance_year": { + "bounds": [0, 10000], + "is_categorical": false, + "standard_name": "year of last disturbance", + "sindbad_unit": null, + "source_product": "Besnard2018", + "source_unit": null, + "source_variable": "disturbance_year", + "space_time_type": "spatial" + }, "f_burnt_area": { "bounds": [0.0, 1.0], "standard_name": "fire fraction area per area pixel", diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json index 1e5035086..9deeaeff2 100644 --- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json +++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json @@ -9,7 +9,6 @@ "autoRespiration,RMN": null, "autoRespiration,YG": null, "autoRespirationAirT,Q10": null, - "cCycleBase,c_remain": null, "cCycleBase,ηH": null, "cFlow,f_τ": null, "cFlow,k_shedding": null, diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json index 74b0e96d1..bc0d6e8a5 100644 --- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json +++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json @@ -20,15 +20,15 @@ "n_folds": 10 }, "package": "", - "fold_path": "./nfolds_sites_indices.jld2", - "which_fold": 5 + "fold_path": "/User/homes/lalonso/SINDBAD/examples/exp_fluxnet_hybrid/sampling/nfolds_sites_indices_0.jld2", + "which_fold": 2 }, "ml_gradient": { "method": "Grad", "options": { "chunk_size": 2 }, - "package": "FiniteDiff" + "package": "PolyesterForwardDiff" }, "ml_optimizer": { "method": "Adam", @@ -42,7 +42,7 @@ "replace_value_for_gradient": 0.0, "save_checkpoint": true, "covariates": { - "path": "../data/CovariatesFLUXNET_3.zarr", + "path": "/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr", "variables": "all" }, diff --git a/lib/SindbadML/src/SindbadML.jl b/lib/SindbadML/src/SindbadML.jl index ac6661b02..e9fe9d5d3 100644 --- a/lib/SindbadML/src/SindbadML.jl +++ b/lib/SindbadML/src/SindbadML.jl @@ -81,7 +81,7 @@ module SindbadML include("loss.jl") include("prepHybrid.jl") include("mlGradient.jl") - include("mlTrain.jl") + include("trainML.jl") include("neuralNetwork.jl") include("siteLosses.jl") include("oneHots.jl") diff --git a/lib/SindbadML/src/loss.jl b/lib/SindbadML/src/loss.jl index b911b552b..286f4dd1b 100644 --- a/lib/SindbadML/src/loss.jl +++ b/lib/SindbadML/src/loss.jl @@ -40,13 +40,14 @@ This function runs the core TEM model with the provided parameters, forcing data loss_vec, loss_idx = lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, LossModelObsML()) ``` """ -function lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,::LossModelObsML) +function lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,::LossModelObsML) loc_output_from_cache = getOutputFromCache(loc_output, params, gradient_lib) models = updateModels(params, parameter_to_index, parameter_scaling_type, models) coreTEM!( models, loc_forcing, loc_spinup_forcing, + loc_spinup_sequence, loc_forcing_t, loc_output_from_cache, land_init, @@ -92,14 +93,14 @@ This function computes the loss value for a given site by first calling `lossVec t_loss = loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, LossModelObsML()) ``` """ -function loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML) - loss_vector, _ = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type) +function loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML) + loss_vector, _ = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type) t_loss = combineMetric(loss_vector, constraint_method) return t_loss end -function lossComponents(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML) - loss_vector, loss_indices = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type) +function lossComponents(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML) + loss_vector, loss_indices = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type) t_loss = combineMetric(loss_vector, constraint_method) return t_loss, loss_vector, loss_indices end diff --git a/lib/SindbadML/src/mlGradient.jl b/lib/SindbadML/src/mlGradient.jl index 7390bfa2c..f097c7ba1 100644 --- a/lib/SindbadML/src/mlGradient.jl +++ b/lib/SindbadML/src/mlGradient.jl @@ -150,7 +150,7 @@ function gradientSite(::PolyesterForwardDiffGrad, x_vals, gradient_options::Name # cfg = ForwardDiff.GradientConfig(loss_tmp, x_vals, Chunk{chunk_size}()); ForwardDiff.gradient!(∇x, loss_f, x_vals) # ?, add `cfg` at the end if further control is needed. else - PolyesterForwardDiff.threaded_gradient!(loss_f, ∇x, x_vals, ForwardDiff.Chunk(chunk_size)); + PolyesterForwardDiff.threaded_gradient!(loss_f, ∇x, x_vals, ForwardDiff.Chunk(gradient_options.chunk_size)); end return ∇x end diff --git a/lib/SindbadML/src/prepHybrid.jl b/lib/SindbadML/src/prepHybrid.jl index 11fa80008..c7679c4b8 100644 --- a/lib/SindbadML/src/prepHybrid.jl +++ b/lib/SindbadML/src/prepHybrid.jl @@ -152,10 +152,11 @@ function getLossFunctionHandles(info, run_helpers, sites) loc_obs = run_helpers.space_observation[site_location] loc_output = getCacheFromOutput(run_helpers.space_output[site_location], info.hybrid.ml_gradient.method) loc_spinup_forcing = run_helpers.space_spinup_forcing[site_location] + loc_spinup_sequence = run_helpers.space_spinup_sequence[site_location] loc_cost_option = prepCostOptions(loc_obs, info.optimization.cost_options) - loss_tmp(x) = loss(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function) + loss_tmp(x) = loss(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function) - loss_vector_tmp(x) = lossComponents(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function) + loss_vector_tmp(x) = lossComponents(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function) push!(loss_functions, loss_tmp) push!(loss_component_functions, loss_vector_tmp) diff --git a/lib/SindbadML/src/mlTrain.jl b/lib/SindbadML/src/trainML.jl similarity index 99% rename from lib/SindbadML/src/mlTrain.jl rename to lib/SindbadML/src/trainML.jl index b45c38139..662138fb4 100644 --- a/lib/SindbadML/src/mlTrain.jl +++ b/lib/SindbadML/src/trainML.jl @@ -101,4 +101,4 @@ function trainML(hybrid_helpers, ::MixedGradient) end -end +end \ No newline at end of file diff --git a/lib/SindbadSetup/src/setupHybridML.jl b/lib/SindbadSetup/src/setupHybridML.jl index 13f85c6f4..fe6f8afc2 100644 --- a/lib/SindbadSetup/src/setupHybridML.jl +++ b/lib/SindbadSetup/src/setupHybridML.jl @@ -151,7 +151,6 @@ function setHybridInfo(info::NamedTuple) info = setTupleSubfield(info, :hybrid, (:replace_value_for_gradient, info.temp.helpers.numbers.num_type(replace_value_for_gradient))) - covariates_path = getAbsDataPath(info.temp, info.settings.hybrid.covariates.path) covariates = (; path=covariates_path, variables=info.settings.hybrid.covariates.variables) info = setTupleSubfield(info, :hybrid, (:covariates, covariates)) diff --git a/lib/SindbadSetup/src/setupOutput.jl b/lib/SindbadSetup/src/setupOutput.jl index b0553986f..c9c9cf034 100644 --- a/lib/SindbadSetup/src/setupOutput.jl +++ b/lib/SindbadSetup/src/setupOutput.jl @@ -255,6 +255,7 @@ Saves a copy of the experiment settings to the output folder. function saveExperimentSettings(info) sindbad_experiment = info.temp.experiment.dirs.sindbad_experiment showInfo(saveExperimentSettings, @__FILE__, @__LINE__, "saving Experiment JSON Settings to : $(info.output.dirs.settings)") + mkpath(dirname(joinpath(info.output.dirs.settings, split(sindbad_experiment, path_separator)[end]))) # create output directory if it does not exist cp(sindbad_experiment, joinpath(info.output.dirs.settings, split(sindbad_experiment, path_separator)[end]); force=true) diff --git a/lib/SindbadSetup/src/setupParameters.jl b/lib/SindbadSetup/src/setupParameters.jl index d817a4f66..bf80ea2e7 100644 --- a/lib/SindbadSetup/src/setupParameters.jl +++ b/lib/SindbadSetup/src/setupParameters.jl @@ -73,7 +73,7 @@ function getParameters(selected_models::Tuple, num_type, model_timestep; return_ nbounds = length(constrains) lower = [constrains[i][1] for i in 1:nbounds] upper = [constrains[i][2] for i in 1:nbounds] - + model = [Symbol(supertype(getproperty(Models, m))) for m in model_approach] model_str = string.(model) name_full = [join((last(split(model_str[i], ".")), name[i]), ".") for i in 1:nbounds] @@ -176,7 +176,8 @@ function getOptimizationParametersTable(parameter_table_all::Table, model_parame parameter_keys = optimization_parameters end parameter_list = replaceCommaSeparatedParams(parameter_keys) - missing_parameters = filter(x -> !(x in parameter_table_all.name_full), parameter_list) + # [print("$p"*"\n") for p in parameter_table_all.name] + missing_parameters = filter(x -> !(x in parameter_table_all.name_full), parameter_list) # doing `occursin` here could also work, so that we don't update the funtion getParameters if !isempty(missing_parameters) error("Model Inconsistency: $([missing_parameters...]) parameter(s) not found in the selected model structure. Check the model structure in model_structure.json to include the parameter(s) or change model_parameters_to_optimize in optimization.json to exclude the parameter(s).") end diff --git a/lib/SindbadTEM/src/prepTEM.jl b/lib/SindbadTEM/src/prepTEM.jl index 184e6bb33..dde1935d6 100644 --- a/lib/SindbadTEM/src/prepTEM.jl +++ b/lib/SindbadTEM/src/prepTEM.jl @@ -433,6 +433,10 @@ function helpPrepTEM(selected_models, info, forcing::NamedTuple, observations::N getAllSpinupForcing(loc_forcing, info.spinup.sequence, tem_info); end + # f_disturbance_years + fd_index = findall(forcing.variables.==:f_disturbance_year)[1] + space_spinup_sequence = getSequence.(forcing.data[fd_index], Ref(info.helpers.dates)); + space_output = map([space_ind...]) do lsi getLocData(output_array, lsi) end @@ -441,7 +445,7 @@ function helpPrepTEM(selected_models, info, forcing::NamedTuple, observations::N forcing_nt_array = nothing - run_helpers = (; space_selected_models, space_forcing, space_observation, space_ind, space_spinup_forcing, loc_forcing_t, space_output, loc_land, output_vars=output.variables, tem_info) + run_helpers = (; space_selected_models, space_forcing, space_observation, space_ind, space_spinup_forcing, space_spinup_sequence, loc_forcing_t, space_output, loc_land, output_vars=output.variables, tem_info) return run_helpers end @@ -573,7 +577,7 @@ function prepTEM(forcing::NamedTuple, info::NamedTuple) end function prepTEM(selected_models, forcing::NamedTuple, info::NamedTuple) - showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM)", n_f=1) + showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM) for $(nameof(typeof(info.helpers.run.land_output_type)))", n_f=1) output = prepTEMOut(info, forcing.helpers) showInfo(prepTEM, @__FILE__, @__LINE__, " preparing helpers for running model experiment", n_f=4) run_helpers = helpPrepTEM(selected_models, info, forcing, output, info.helpers.run.land_output_type) @@ -583,7 +587,7 @@ function prepTEM(selected_models, forcing::NamedTuple, info::NamedTuple) end function prepTEM(selected_models, forcing::NamedTuple, observations::NamedTuple, info::NamedTuple) - showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM)", n_f=1) + showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM) for $(nameof(typeof(info.helpers.run.land_output_type)))", n_f=1) output = prepTEMOut(info, forcing.helpers) run_helpers = helpPrepTEM(selected_models, info, forcing, observations, output, info.helpers.run.land_output_type) showInfoSeparator() diff --git a/lib/SindbadTEM/src/runTEMSpace.jl b/lib/SindbadTEM/src/runTEMSpace.jl index 84d889b32..2c464b27b 100644 --- a/lib/SindbadTEM/src/runTEMSpace.jl +++ b/lib/SindbadTEM/src/runTEMSpace.jl @@ -36,6 +36,18 @@ function coreTEM!(selected_models, loc_forcing, loc_spinup_forcing, loc_forcing_ return nothing end +function coreTEM!(selected_models, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, loc_land, tem_info) + # update the loc_forcing with the actual location + loc_forcing_t = getForcingForTimeStep(loc_forcing, loc_forcing_t, 1, tem_info.vals.forcing_types) + # run precompute + land_prec = precomputeTEM(selected_models, loc_forcing_t, loc_land, tem_info.model_helpers) + # run spinup + land_spin = spinupTEM(selected_models, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, land_prec, tem_info, tem_info.run.spinup_TEM) + + timeLoopTEM!(selected_models, loc_forcing, loc_forcing_t, loc_output, land_spin, tem_info.vals.forcing_types, tem_info.model_helpers, tem_info.vals.output_vars, tem_info.n_timesteps, tem_info.run.debug_model) + return nothing +end + """ parallelizeTEM!(space_selected_models, space_forcing, space_spinup_forcing, loc_forcing_t, space_output, space_land, tem_info, parallelization_mode::SindbadParallelizationMethod) diff --git a/lib/SindbadTEM/src/spinupSequence.jl b/lib/SindbadTEM/src/spinupSequence.jl index 90b498bed..ccb55a629 100644 --- a/lib/SindbadTEM/src/spinupSequence.jl +++ b/lib/SindbadTEM/src/spinupSequence.jl @@ -37,25 +37,36 @@ end - new spinup sequence object """ function getSequence(year_disturbance, info_helpers_dates; nrepeat_base=200, year_start = 1979) - nrepeat_age = nrepeatYearsAge(year_disturbance; year_start) - sequence = [ - Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1), - Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base), - Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1), - ] - if nrepeat_age == 0 + nrepeat_d = nrepeatYearsAge(year_disturbance; year_start) + sequence = nothing + nrepeat = nrepeat_base + if isnothing(nrepeat_d) sequence = [ Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1), - Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base), + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat), + Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1), + ] + elseif nrepeat_d < 0 + sequence = [ + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1), + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat), + Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1), + ] + elseif nrepeat_d == 0 + sequence = [ + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1), + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat), Dict("spinup_mode" => "eta_scale_A0H", "forcing" => "day_MSC", "n_repeat" => 1), ] - elseif nrepeat_age > 0 + elseif nrepeat_d > 0 sequence = [ Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1), - Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base), + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat), Dict("spinup_mode" => "eta_scale_A0H", "forcing" => "day_MSC", "n_repeat" => 1), - Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_age), + Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_d), ] + else + error("cannot determine the repeat for disturbance") end new_sequence = getSpinupTemLite(getSpinupSequenceWithTypes(sequence, info_helpers_dates)) return new_sequence diff --git a/lib/SindbadTEM/src/spinupTEM.jl b/lib/SindbadTEM/src/spinupTEM.jl index 3d5151483..e88c2634d 100644 --- a/lib/SindbadTEM/src/spinupTEM.jl +++ b/lib/SindbadTEM/src/spinupTEM.jl @@ -623,6 +623,22 @@ function spinupTEM(selected_models, spinup_forcings, loc_forcing_t, land, tem_in return land end +function spinupTEM(selected_models, spinup_forcings, spinup_sequence, loc_forcing_t, land, tem_info, ::DoSpinupTEM) + land = setSpinupLog(land, 1, tem_info.run.store_spinup) + log_index = 2 + for spin_seq ∈ spinup_sequence + forc_name = spin_seq.forcing + n_timesteps = spin_seq.n_timesteps + n_repeat = spin_seq.n_repeat + spinup_mode = spin_seq.spinup_mode + @debug "Spinup: \n spinup_mode: $(nameof(typeof(spinup_mode))), forcing: $(forc_name)" + sel_forcing = sequenceForcing(spinup_forcings, forc_name) + land = spinupSequence(selected_models, sel_forcing, loc_forcing_t, land, tem_info, n_timesteps, log_index, n_repeat, spinup_mode) + log_index += n_repeat + end + return land +end + function spinupTEM(selected_models, spinup_forcings, loc_forcing_t, land, tem_info, ::DoNotSpinupTEM) return land end diff --git a/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl b/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl new file mode 100644 index 000000000..b2aecca28 --- /dev/null +++ b/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl @@ -0,0 +1,192 @@ +export cCycleBase_GSITOOPFT, adjustPackPoolComponents + +struct CCycleBaseGSITOOPFT end +#! format: off +@bounds @describe @units @with_kw struct cCycleBase_GSITOOPFT{T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13} <: cCycleBase + c_τ_Root::T1 = 1.0 | (0.05, 3.3) | "turnover rate of root carbon pool" | "yr-1" + c_τ_Wood::T2 = 0.03 | (0.001, 10.0) | "turnover rate of wood carbon pool" | "yr-1" + c_τ_Leaf::T3 = 1.0 | (0.05, 10.0) | "turnover rate of leaf carbon pool" | "yr-1" + c_τ_Reserve::T4 = 1.0e-11 | (1.0e-12, 1.0) | "Reserve does not respire, but has a small value to avoid numerical error" | "yr-1" + c_τ_LitFast::T5 = 14.8 | (0.5, 148.0) | "turnover rate of fast litter (leaf litter) carbon pool" | "yr-1" + c_τ_LitSlow::T6 = 3.9 | (0.39, 39.0) | "turnover rate of slow litter carbon (wood litter) pool" | "yr-1" + c_τ_SoilSlow::T7 = 0.2 | (0.02, 2.0) | "turnover rate of slow soil carbon pool" | "yr-1" + c_τ_SoilOld::T8 = 0.0045 | (0.00045, 0.045) | "turnover rate of old soil carbon pool" | "yr-1" + c_flow_A_array::T9 = Float64[ + -1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 + 0.0 -1.0 0.0 0.0 0 0.0 0.0 0.0 + 0.0 0.0 -1.0 1.0 0.0 0 0.0 0.0 + 1.0 0.0 1.0 -1.0 0.0 0.0 0.0 0.0 + 1.0 0.0 1.0 0.0 -1.0 0.0 0.0 0.0 + 0.0 1.0 0.0 0.0 0 -1.0 0.0 0.0 + 0.0 0.0 0 0.0 1.0 1.0 -1.0 0.0 + 0.0 0.0 0 0.0 0.0 0.0 1.0 -1.0 + ] | (-Inf, Inf) | "Transfer matrix for carbon at ecosystem level" | "" + p_C_to_N_cVeg::T10 = Float64[25.0, 260.0, 260.0, 10.0] | (-Inf, Inf) | "carbon to nitrogen ratio in vegetation pools" | "gC/gN" + ηH::T11 = 1.0 | (0.01, 100.0) | "scaling factor for heterotrophic pools after spinup" | "" + ηA::T12 = 1.0 | (0.01, 100.0) | "scaling factor for vegetation pools after spinup" | "" + c_remain::T13 = 10.0 | (0.1, 100.0) | "remaining carbon after disturbance" | "" +end +#! format: on + +function define(params::cCycleBase_GSITOOPFT, forcing, land, helpers) + @unpack_cCycleBase_GSITOOPFT params + @unpack_nt begin + cEco ⇐ land.pools + (z_zero, o_one) ⇐ land.constants + end + ## instantiate variables + C_to_N_cVeg = zero(cEco) #sujan + # C_to_N_cVeg[getZix(land.pools.cVeg, helpers.pools.zix.cVeg)] .= p_C_to_N_cVeg + c_eco_k_base = zero(cEco) + c_eco_τ = zero(cEco) + + # if there is flux order check that is consistent + c_flow_order = Tuple(collect(1:length(findall(>(z_zero), c_flow_A_array)))) + c_taker = Tuple([ind[1] for ind ∈ findall(>(z_zero), c_flow_A_array)]) + c_giver = Tuple([ind[2] for ind ∈ findall(>(z_zero), c_flow_A_array)]) + + c_model = CCycleBaseGSITOOPFT() + + + ## pack land variables + @pack_nt begin + c_flow_A_array ⇒ land.diagnostics + (c_flow_order, c_taker, c_giver) ⇒ land.constants + (C_to_N_cVeg, c_eco_τ, c_eco_k_base) ⇒ land.diagnostics + c_model ⇒ land.models + end + return land +end + +function precompute(params::cCycleBase_GSITOOPFT, forcing, land, helpers) + @unpack_cCycleBase_GSITOOPFT params + @unpack_nt begin + (C_to_N_cVeg, c_eco_k_base, c_eco_τ) ⇐ land.diagnostics + (z_zero, o_one) ⇐ land.constants + end + @unpack_nt f_pft ⇐ forcing + # update turnover rates according to plant functional type lookup table + # c_τ_Wood, c_τ_Leaf, c_τ_Root = turnover_rates(only(f_pft)) + c_τ_Wood, c_τ_Leaf, c_τ_Root = turnover_rates(f_pft[1]) + + ## replace values + @rep_elem c_τ_Root ⇒ (c_eco_τ, 1, :cEco) + @rep_elem c_τ_Wood ⇒ (c_eco_τ, 2, :cEco) + @rep_elem c_τ_Leaf ⇒ (c_eco_τ, 3, :cEco) + @rep_elem c_τ_Reserve ⇒ (c_eco_τ, 4, :cEco) + @rep_elem c_τ_LitFast ⇒ (c_eco_τ, 5, :cEco) + @rep_elem c_τ_LitSlow ⇒ (c_eco_τ, 6, :cEco) + @rep_elem c_τ_SoilSlow ⇒ (c_eco_τ, 7, :cEco) + @rep_elem c_τ_SoilOld ⇒ (c_eco_τ, 8, :cEco) + + vegZix = getZix(land.pools.cVeg, helpers.pools.zix.cVeg) + for ix ∈ eachindex(vegZix) + @rep_elem p_C_to_N_cVeg[ix] ⇒ (C_to_N_cVeg, vegZix[ix], :cEco) + end + c_one = one(eltype(c_eco_k_base)) + for i ∈ eachindex(c_eco_k_base) + tmp = c_one - (exp(-c_eco_τ[i])^(c_one / helpers.dates.timesteps_in_year)) + @rep_elem tmp ⇒ (c_eco_k_base, i, :cEco) + end + + ## pack land variables + @pack_nt begin + (C_to_N_cVeg, c_eco_τ, c_eco_k_base, ηA, ηH) ⇒ land.diagnostics + c_remain ⇒ land.states + end + return land +end + +function adjustPackPoolComponents(land, helpers, ::CCycleBaseGSITOOPFT) + @unpack_nt (cVeg, + cLit, + cSoil, + cVegRoot, + cVegWood, + cVegLeaf, + cVegReserve, + cLitFast, + cLitSlow, + cSoilSlow, + cSoilOld, + cEco) ⇐ land.pools + + zix = helpers.pools.zix + for (lc, l) in enumerate(zix.cVeg) + @rep_elem cEco[l] ⇒ (cVeg, lc, :cVeg) + end + + for (lc, l) in enumerate(zix.cVegRoot) + @rep_elem cEco[l] ⇒ (cVegRoot, lc, :cVegRoot) + end + + for (lc, l) in enumerate(zix.cVegWood) + @rep_elem cEco[l] ⇒ (cVegWood, lc, :cVegWood) + end + + for (lc, l) in enumerate(zix.cVegLeaf) + @rep_elem cEco[l] ⇒ (cVegLeaf, lc, :cVegLeaf) + end + + for (lc, l) in enumerate(zix.cVegReserve) + @rep_elem cEco[l] ⇒ (cVegReserve, lc, :cVegReserve) + end + + for (lc, l) in enumerate(zix.cLit) + @rep_elem cEco[l] ⇒ (cLit, lc, :cLit) + end + + for (lc, l) in enumerate(zix.cLitFast) + @rep_elem cEco[l] ⇒ (cLitFast, lc, :cLitFast) + end + + for (lc, l) in enumerate(zix.cLitSlow) + @rep_elem cEco[l] ⇒ (cLitSlow, lc, :cLitSlow) + end + + for (lc, l) in enumerate(zix.cSoil) + @rep_elem cEco[l] ⇒ (cSoil, lc, :cSoil) + end + + for (lc, l) in enumerate(zix.cSoilSlow) + @rep_elem cEco[l] ⇒ (cSoilSlow, lc, :cSoilSlow) + end + + for (lc, l) in enumerate(zix.cSoilOld) + @rep_elem cEco[l] ⇒ (cSoilOld, lc, :cSoilOld) + end + @pack_nt (cVeg, + cLit, + cSoil, + cVegRoot, + cVegWood, + cVegLeaf, + cVegReserve, + cLitFast, + cLitSlow, + cSoilSlow, + cSoilOld, + cEco) ⇒ land.pools + return land +end + +purpose(::Type{cCycleBase_GSITOOPFT}) = "Implements the carbon cycle base model with GSI parameterization for multiple PFTs. Defines turnover rates and carbon to nitrogen ratios for different vegetation and soil pools. " + +@doc """ + +$(getModelDocString(cCycleBase_GSITOOPFT)) + +--- + +# Extended help + +*References* + - Potter; C. S.; J. T. Randerson; C. B. Field; P. A. Matson; P. M. Vitousek; H. A. Mooney; & S. A. Klooster. 1993. Terrestrial ecosystem production: A process model based on global satellite & surface data. Global Biogeochemical Cycles. 7: 811-841. + +*Versions* + - 1.0 on 28.02.2020 [skoirala] + +*Created by:* + - ncarvalhais +""" +cCycleBase_GSITOOPFT