From 6fe0fbef901af1f02cace767a14cbb3f47ec54cc Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Mon, 7 Jul 2025 17:58:31 +0100
Subject: [PATCH 01/21] kfolds runs, plus some comments

---
 examples/exp_fluxnet_hybrid/Project.toml      |  4 +-
 .../exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl  | 13 +++++-
 examples/exp_fluxnet_hybrid/kfold_stratify.jl | 40 +++++++++++++------
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 0f8924d69..2502d7ab7 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -1,8 +1,10 @@
 [deps]
 CMAEvolutionStrategy = "8d3b24bd-414e-49e0-94fb-163cc3a3e411"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GCMAES = "4aa9d100-eb0f-11e8-15f1-25748831eb3b"
 GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
+MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Sindbad = "6686e6de-2010-4bf8-9178-b2bcc470766e"
 SindbadData = "772b809f-5329-4bfd-a2e9-cc4714ce84b1"
@@ -15,7 +17,7 @@ SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [sources]
-Sindbad = {path = "../.."}
+Sindbad = {path = "C:\\Users\\lalonso\\Documents\\SINDBAD"}
 SindbadData = {path = "../../lib/SindbadData"}
 SindbadML = {path = "../../lib/SindbadML"}
 SindbadMetrics = {path = "../../lib/SindbadMetrics"}
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl
index 281eab616..ab58b0fbb 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid.jl
@@ -1,3 +1,14 @@
+# activate project's environment and develop the package
+using Pkg
+Pkg.activate("examples/exp_fluxnet_hybrid")
+Pkg.develop(path=pwd())
+Pkg.instantiate()
+
+# get data
+# $examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr .
+# @examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr .
+
+# start using the package
 using SindbadData
 using SindbadData.DimensionalData
 using SindbadData.AxisKeys
@@ -7,7 +18,7 @@ using SindbadML
 using SindbadML.JLD2
 using ProgressMeter
 using SindbadOptimization
-include("load_covariates.jl")
+# include("load_covariates.jl")
 
 # load folds # $nfold $nlayer $neuron $batchsize
 _nfold = 5 #Base.parse(Int, ARGS[1])
diff --git a/examples/exp_fluxnet_hybrid/kfold_stratify.jl b/examples/exp_fluxnet_hybrid/kfold_stratify.jl
index 0b80142da..7cbbc44dc 100644
--- a/examples/exp_fluxnet_hybrid/kfold_stratify.jl
+++ b/examples/exp_fluxnet_hybrid/kfold_stratify.jl
@@ -1,11 +1,18 @@
 using MLUtils
 using SindbadData.YAXArrays
 using SindbadData.Zarr
+using SindbadML.JLD2
+using GLMakie
+
+# create a new folder for plots
+mkpath(joinpath(@__DIR__, "../../../fluxnet_hybrid_plots/"))
 
 c_read = Cube("examples/data/CovariatesFLUXNET_3.zarr");
 
 ds = open_dataset(joinpath(@__DIR__, "../data/FLUXNET_v2023_12_1D.zarr"))
-ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"]
+ds.properties["SITE_ID"][[98, 99, 100, 137, 138]]
+# ! update PFTs categories, original ones are not up to date!
+ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] 
 updatePFTs = ds.properties["PFT"]
 
 # ? kfolds
@@ -42,8 +49,8 @@ to_remove = [
     # "US-GLE",
     "US-Tw1",
     # "US-Tw2"
-    ]
-not_these = ["RU-Tks", "US-Atq", "US-UMd"]
+    ] # duplicates !
+not_these = ["RU-Tks", "US-Atq", "US-UMd"] # with NaNs
 not_these = vcat(not_these, to_remove)
 new_sites = setdiff(c_read.site, not_these)
 
@@ -88,21 +95,30 @@ end
 
 all_counts, all_keys, all_vals = countmapPFTs(setPFT)
 
-with_theme(theme_light()) do 
+using CairoMakie
+CairoMakie.activate!()
+# GLMakie.activate!()
+
+with_theme(theme_latexfonts()) do 
     px = sortperm(all_keys)
-    fig = Figure(; size=(600, 600))
-    ax = Axis(fig[1,1]; title = "all")
-    barplot!(ax, all_vals[px]; color = 1:13, colormap = :Hiroshige)
-    hlines!(ax, 5)
+    fig = Figure(; size=(1200, 400), fontsize= 24)
+    ax = Axis(fig[1,1]; title = "Total number of sites ($(sum(all_vals))) per Plant Functional Type (PFT)", titlefont=:regular)
+    barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85)
+    # hlines!(ax, 5; color =:black, linewidth=0.85)
     ax.xticks = (1:length(all_keys), all_keys[px])
-    # ax.yticks = 1:10
+    ylims!(ax, 0, 50)
+    ax.yticks = (0:15:50, ["", "", "", ""])
+    text!(ax, Point2f.(1:length(all_keys), all_vals[px]), text=string.(all_vals[px]), align = (:center, :bottom), fontsize=24)
+    hidespines!(ax)
+    hideydecorations!(ax, grid=false)
     fig 
+    save(joinpath(@__DIR__, "../../../fluxnet_hybrid_plots/PFTs_sites_counts.pdf"), fig)
 end
 
 
 # custom rules
 # validation samples (get them from the training split), the validation split coming from kfolds goes to test!
-
+# some times we have additional training samples and depending on the PFT we can use them for validation or not
 function split_to_validation(k)
     if k == "GRA" || k == "ENF"
         return 3
@@ -161,8 +177,8 @@ unfold_training = [vcat(_fold_t_i[:, f]...) for f in 1:5]
 _fold_v_i = [validation_folds[i][f] for i in 1:10, f in 1:5]
 unfold_validation = [vcat(_fold_v_i[:, f]...) for f in 1:5]
 
-using JLD2
-jldsave("nfolds_sites_indices.jld2"; unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests)
+jldsave(joinpath(@__DIR__, "nfolds_sites_indices.jld2");
+    unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests)
 
 _nfold = 1
 xtrain, xval, xtest = unfold_training[_nfold], unfold_validation[_nfold], unfold_tests[_nfold]

From 204fcbe7898ab967c4fbf8ff7a59fb016388b1bc Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Tue, 8 Jul 2025 13:37:16 +0100
Subject: [PATCH 02/21] see sites in each fold

---
 .../{kfolds => sampling}/5fold_forcing.jl     |   0
 .../5fold_observations.jl                     |   0
 .../sampling/5fold_plots.jl                   | 130 ++++++++++++++++++
 .../exp_fluxnet_hybrid/sampling/Project.toml  |   8 ++
 .../{ => sampling}/kfold_stratify.jl          |  32 +++--
 5 files changed, 161 insertions(+), 9 deletions(-)
 rename examples/exp_fluxnet_hybrid/{kfolds => sampling}/5fold_forcing.jl (100%)
 rename examples/exp_fluxnet_hybrid/{kfolds => sampling}/5fold_observations.jl (100%)
 create mode 100644 examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl
 create mode 100644 examples/exp_fluxnet_hybrid/sampling/Project.toml
 rename examples/exp_fluxnet_hybrid/{ => sampling}/kfold_stratify.jl (84%)

diff --git a/examples/exp_fluxnet_hybrid/kfolds/5fold_forcing.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_forcing.jl
similarity index 100%
rename from examples/exp_fluxnet_hybrid/kfolds/5fold_forcing.jl
rename to examples/exp_fluxnet_hybrid/sampling/5fold_forcing.jl
diff --git a/examples/exp_fluxnet_hybrid/kfolds/5fold_observations.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_observations.jl
similarity index 100%
rename from examples/exp_fluxnet_hybrid/kfolds/5fold_observations.jl
rename to examples/exp_fluxnet_hybrid/sampling/5fold_observations.jl
diff --git a/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl b/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl
new file mode 100644
index 000000000..abd8b6143
--- /dev/null
+++ b/examples/exp_fluxnet_hybrid/sampling/5fold_plots.jl
@@ -0,0 +1,130 @@
+
+# activate project's environment and develop the package
+using Pkg
+Pkg.activate("examples/exp_fluxnet_hybrid/sampling")
+Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie"])
+Pkg.instantiate()
+
+using MLUtils
+using StatsBase
+using YAXArrays
+using Zarr
+using JLD2
+using GLMakie
+GLMakie.activate!()
+#! load folds
+file_folds = load(joinpath(@__DIR__, "nfolds_sites_indices.jld2"))
+
+function getPFTsite(set_names, set_pfts)
+    return set_pfts[findfirst(site_name, set_names)]
+end
+
+function countmapPFTs(x)
+    x_counts = countmap(x)
+    x_keys = collect(keys(x_counts))
+    x_vals = collect(values(x_counts))
+    return x_counts, x_keys, x_vals
+end
+
+# get all PFTs from dataset
+ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr"))
+ds.properties["SITE_ID"][[98, 99, 100, 137, 138]]
+# ! update PFTs categories, original ones are not up to date!
+ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] 
+updatePFTs = ds.properties["PFT"]
+
+
+# ? site names
+site_names = ds.site.val
+
+function get_fold_names(x_fold_set, site_names, updatePFTs; color=:black)
+    set_pfts = updatePFTs[x_fold_set]
+    fold_names = site_names[x_fold_set]
+
+    x_counts, x_keys, x_vals = countmapPFTs(set_pfts)
+    px = sortperm(x_keys)
+    u_pfts = unique(set_pfts)
+    indx_names = [findall(x->x==p, set_pfts) for p in u_pfts]
+
+    dict_names = Dict(u_pfts .=> indx_names)
+
+    box_colors = repeat([color], length(u_pfts))
+
+    return x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts
+end
+
+using CairoMakie
+CairoMakie.activate!()
+# load fold
+for _nfold in 1:5
+    xtrain, xval, xtest = file_folds["unfold_training"][_nfold], file_folds["unfold_validation"][_nfold], file_folds["unfold_tests"][_nfold]
+    with_theme(theme_latexfonts()) do 
+        fig = Figure(; size=(1200, 1400), fontsize=24)
+
+        x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xtest, site_names, updatePFTs)
+        x_vals_test = x_vals
+
+        ax = Axis(fig[1,1]; xgridstyle=:dash, ygridstyle=:dash)
+        # ax_gl = GridLayout(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash)
+
+        barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:dodgerblue)
+        text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]),
+            align = (:center, :bottom), fontsize=24)
+
+        for (i, k) in enumerate(x_keys[px])
+            text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25,
+                align=(:center, 1.1), fontsize = 16)
+        end
+        ax.xticks = (1:length(x_keys), x_keys[px])
+        hidedecorations!(ax, grid=false)
+        ylims!(ax, -0.15, 12)
+        hidespines!(ax)
+
+        # validation
+        x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xval, site_names, updatePFTs; color=:tomato)
+        x_vals_val = x_vals
+
+        ax = Axis(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash)
+        # ax_gl = GridLayout(fig[2,1]; xgridstyle=:dash, ygridstyle=:dash)
+
+        barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:tomato)
+        text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]),
+            align = (:center, :bottom), fontsize=24)
+
+        for (i, k) in enumerate(x_keys[px])
+            text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25,
+                align=(:center, 1.1), fontsize = 16)
+        end
+        ax.xticks = (1:length(x_keys), x_keys[px])
+        hidedecorations!(ax, grid=false)
+        ylims!(ax, -0.15, 5)
+        hidespines!(ax)
+        
+        # trainining
+        x_vals, px, x_keys, fold_names, dict_names, box_colors, x_counts = get_fold_names(xtrain, site_names, updatePFTs; color=:tomato)
+        x_vals_train = x_vals
+
+        ax = Axis(fig[3,1]; xgridstyle=:dash, ygridstyle=:dash)
+
+        barplot!(ax, x_vals[px]; color=:transparent, strokewidth=0.65, strokecolor=:black)
+        text!(ax, Point2f.(1:length(x_keys), x_vals[px]), text=string.(x_vals[px]),
+            align = (:center, :bottom), fontsize=24)
+
+        for (i, k) in enumerate(x_keys[px])
+            text!(ax, [i], [x_vals[px][i]]; text= join(fold_names[dict_names[k]], "\n"), color=:grey25,
+                align=(:center, 1.05), fontsize = 14)
+        end
+        ax.xticks = (1:length(x_keys), x_keys[px])
+        hideydecorations!(ax, grid=false)
+        ylims!(ax, -1, 36)
+        hidespines!(ax)
+        rowsize!(fig.layout, 2, Auto(0.5))
+        rowsize!(fig.layout, 3, Auto(1.5))
+        Label(fig[1,1], "Fold $(_nfold)", tellwidth=false, tellheight=false, halign=0.98, valign=1, color=:grey10, font=:bold)
+        Label(fig[1,1], "Test ($(sum(x_vals_test)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:dodgerblue, font=:bold)
+        Label(fig[2,1], "Validation ($(sum(x_vals_val)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:tomato, font=:bold)
+        Label(fig[3,1], "Training ($(sum(x_vals_train)))", tellwidth=false, tellheight=false, halign=0.0, valign=0.9, color=:grey25, font=:bold)
+        save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/fold_$(_nfold)_names_counts.pdf"), fig)
+        fig 
+    end
+end
diff --git a/examples/exp_fluxnet_hybrid/sampling/Project.toml b/examples/exp_fluxnet_hybrid/sampling/Project.toml
new file mode 100644
index 000000000..d3705788f
--- /dev/null
+++ b/examples/exp_fluxnet_hybrid/sampling/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
+Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
diff --git a/examples/exp_fluxnet_hybrid/kfold_stratify.jl b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
similarity index 84%
rename from examples/exp_fluxnet_hybrid/kfold_stratify.jl
rename to examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
index 7cbbc44dc..20bdb8408 100644
--- a/examples/exp_fluxnet_hybrid/kfold_stratify.jl
+++ b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
@@ -1,15 +1,27 @@
+# activate project's environment and develop the package
+using Pkg
+Pkg.activate("examples/exp_fluxnet_hybrid/sampling")
+Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"])
+Pkg.instantiate()
+
+# start using packages
 using MLUtils
-using SindbadData.YAXArrays
-using SindbadData.Zarr
-using SindbadML.JLD2
+using StatsBase
+using YAXArrays
+using Zarr
+using JLD2
 using GLMakie
 
+# get data
+# $examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr .
+# @examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr .
+
 # create a new folder for plots
-mkpath(joinpath(@__DIR__, "../../../fluxnet_hybrid_plots/"))
+mkpath(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/"))
 
-c_read = Cube("examples/data/CovariatesFLUXNET_3.zarr");
+c_read = Cube(joinpath(@__DIR__, "../../data/CovariatesFLUXNET_3.zarr"));
 
-ds = open_dataset(joinpath(@__DIR__, "../data/FLUXNET_v2023_12_1D.zarr"))
+ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr"))
 ds.properties["SITE_ID"][[98, 99, 100, 137, 138]]
 # ! update PFTs categories, original ones are not up to date!
 ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] 
@@ -61,7 +73,7 @@ setPFT = updatePFTs[new_sites_index]
 
 x, y, z = splitobs_stratified(at=(0.1,0.82),  y=setPFT)
 # map back to original site names and indices
-using StatsBase
+
 function countmapPFTs(x)
     x_counts = countmap(x)
     x_keys = collect(keys(x_counts))
@@ -103,7 +115,9 @@ with_theme(theme_latexfonts()) do
     px = sortperm(all_keys)
     fig = Figure(; size=(1200, 400), fontsize= 24)
     ax = Axis(fig[1,1]; title = "Total number of sites ($(sum(all_vals))) per Plant Functional Type (PFT)", titlefont=:regular)
-    barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85)
+    # barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85)
+    barplot!(ax, all_vals[px]; color = (:grey25, 0.05), strokewidth=0.65, width=0.85)
+
     # hlines!(ax, 5; color =:black, linewidth=0.85)
     ax.xticks = (1:length(all_keys), all_keys[px])
     ylims!(ax, 0, 50)
@@ -112,7 +126,7 @@ with_theme(theme_latexfonts()) do
     hidespines!(ax)
     hideydecorations!(ax, grid=false)
     fig 
-    save(joinpath(@__DIR__, "../../../fluxnet_hybrid_plots/PFTs_sites_counts.pdf"), fig)
+    save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/PFTs_sites_counts_bw.pdf"), fig)
 end
 
 

From abf8affd8c7bb11b4082f21259733d1fa63ee8bd Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Tue, 8 Jul 2025 14:55:02 +0100
Subject: [PATCH 03/21] mkpath windows fix, but now it errors with model
 inconsistency

---
 .../exp_fluxnet_hybrid_clean_la.jl            | 189 ++++++++++++++++++
 lib/SindbadSetup/src/setupOutput.jl           |   1 +
 2 files changed, 190 insertions(+)
 create mode 100644 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
new file mode 100644
index 000000000..e83dc889c
--- /dev/null
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -0,0 +1,189 @@
+# activate project's environment and develop the package
+using Pkg
+Pkg.activate("examples/exp_fluxnet_hybrid")
+Pkg.develop(path=pwd())
+Pkg.instantiate()
+# start using the package
+using SindbadUtils
+using SindbadTEM
+using SindbadSetup
+using SindbadData
+using SindbadData.DimensionalData
+using SindbadData.AxisKeys
+using SindbadData.YAXArrays
+# using SindbadTEM
+using SindbadML
+using SindbadML.JLD2
+using ProgressMeter
+# using SindbadOptimization
+using SindbadML.Zygote
+# import AbstractDifferentiation as AD, Zygote
+
+
+# extra includes for covariate and activation functions
+# extra includes for covariate and activation functions
+# include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/load_covariates.jl"))
+# include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/test_activation_functions.jl"))
+
+
+## paths
+file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices.jld2"))
+experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment.json"
+
+# for remote node
+path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr"
+
+path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
+replace_info = Dict()
+
+replace_info = Dict(
+      "forcing.default_forcing.data_path" => path_input,
+      "optimization.observations.default_observation.data_path" => path_input,
+      );
+
+info = getExperimentInfo(experiment_json; replace_info=replace_info);
+
+
+selected_models = info.models.forward;
+parameter_scaling_type = info.optimization.run_options.parameter_scaling
+
+## parameters
+tbl_params = info.optimization.parameter_table;
+param_to_index = getParameterIndices(selected_models, tbl_params);
+
+## forcing and obs
+forcing = getForcing(info);
+observations = getObservation(info, forcing.helpers);
+
+## helpers
+run_helpers = prepTEM(selected_models, forcing, observations, info);
+
+space_forcing = run_helpers.space_forcing;
+space_observations = run_helpers.space_observation;
+space_output = run_helpers.space_output;
+space_spinup_forcing = run_helpers.space_spinup_forcing;
+space_ind = run_helpers.space_ind;
+land_init = run_helpers.loc_land;
+loc_forcing_t = run_helpers.loc_forcing_t;
+
+space_cost_options = [prepCostOptions(loc_obs, info.optimization.cost_options) for loc_obs in space_observations];
+constraint_method = info.optimization.run_options.multi_constraint_method;
+
+tem_info = run_helpers.tem_info;
+## do example site
+##
+
+site_example_1 = space_ind[1][1];
+@time coreTEM!(selected_models, space_forcing[site_example_1], space_spinup_forcing[site_example_1], loc_forcing_t, space_output[site_example_1], land_init, tem_info)
+
+##
+
+## features 
+sites_forcing = forcing.data[1].site; # sites names
+
+
+# ! selection and batching
+_nfold = 5 #Base.parse(Int, ARGS[1]) # select the fold
+xtrain, xval, xtest = file_folds["unfold_training"][_nfold], file_folds["unfold_validation"][_nfold], file_folds["unfold_tests"][_nfold]
+
+# ? training
+sites_training = sites_forcing[xtrain];
+indices_sites_training = siteNameToID.(sites_training, Ref(sites_forcing));
+# # ? validation
+sites_validation = sites_forcing[xval];
+indices_sites_validation = siteNameToID.(sites_validation, Ref(sites_forcing));
+# # ? test
+sites_testing = sites_forcing[xtest];
+indices_sites_testing = siteNameToID.(sites_testing, Ref(sites_forcing));
+
+indices_sites_batch = indices_sites_training;
+
+xfeatures = loadCovariates(sites_forcing; kind="all", cube_path=path_covariates);
+@info "xfeatures: [$(minimum(xfeatures)), $(maximum(xfeatures))]"
+
+nor_names_order = xfeatures.features;
+n_features = length(nor_names_order)
+
+## Build ML method
+n_params = sum(tbl_params.is_ml);
+nlayers = 3 # Base.parse(Int, ARGS[2])
+n_neurons = 32 # Base.parse(Int, ARGS[3])
+batch_size = 32 # Base.parse(Int, ARGS[4])
+batch_seed = 123 * batch_size * 2
+n_epochs = 2
+k_σ = 1.f0
+# custom_activation = CustomSigmoid(k_σ)
+# custom_activation = sigmoid_3
+mlBaseline = denseNN(n_features, n_neurons, n_params; extra_hlayers=nlayers, seed=batch_seed);
+
+# Initialize params and grads
+params_sites = mlBaseline(xfeatures);
+@info "params_sites: [$(minimum(params_sites)), $(maximum(params_sites))]"
+
+grads_batch = zeros(Float32, n_params, length(sites_training))[:,1:batch_size];
+sites_batch = sites_training;#[1:n_sites_train];
+params_batch = params_sites(; site=sites_batch);
+@info "params_batch: [$(minimum(params_batch)), $(maximum(params_batch))]"
+scaled_params_batch = getParamsAct(params_batch, tbl_params);
+@info "scaled_params_batch: [$(minimum(scaled_params_batch)), $(maximum(scaled_params_batch))]"
+
+forward_args = (
+    selected_models,
+    space_forcing,
+    space_spinup_forcing,
+    loc_forcing_t,
+    space_output,
+    land_init,
+    tem_info,
+    param_to_index,
+    parameter_scaling_type,
+    space_observations,
+    space_cost_options,
+    constraint_method
+    );
+
+
+input_args = (
+        scaled_params_batch, 
+        forward_args..., 
+        indices_sites_batch,
+        sites_batch
+);
+
+grads_lib = ForwardDiffGrad();
+grads_lib = FiniteDifferencesGrad();
+grads_lib = FiniteDiffGrad();
+grads_lib = PolyesterForwardDiffGrad();
+# grads_lib = ZygoteGrad();
+# grads_lib = EnzymeGrad();
+# backend = AD.ZygoteBackend();
+
+loc_params, inner_args = getInnerArgs(1, grads_lib, input_args...);
+
+loss_tmp(x) = lossSite(x, grads_lib, inner_args...)
+
+# AD.gradient(backend, loss_tmp, collect(loc_params))
+
+@time gg = gradientSite(grads_lib, loc_params, 2, lossSite, inner_args...)
+
+gradientBatch!(grads_lib, grads_batch, 2, lossSite, getInnerArgs,input_args...; showprog=true)
+
+
+# ? training arguments
+chunk_size = 2
+metadata_global = info.output.file_info.global_metadata
+
+in_gargs=(;
+    train_refs = (; sites_training, indices_sites_training, xfeatures, tbl_params, batch_size, chunk_size, metadata_global),
+    test_val_refs = (; sites_validation, indices_sites_validation, sites_testing, indices_sites_testing),
+    total_constraints = length(info.optimization.cost_options.variable),
+    forward_args,
+    loss_fargs = (lossSite, getInnerArgs)
+);
+
+checkpoint_path = "$(info.output.dirs.data)/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
+
+mkpath(checkpoint_path)
+
+@info checkpoint_path
+mixedGradientTraining(grads_lib, mlBaseline, in_gargs.train_refs, in_gargs.test_val_refs, in_gargs.total_constraints, in_gargs.loss_fargs, in_gargs.forward_args; n_epochs=n_epochs, path_experiment=checkpoint_path)
diff --git a/lib/SindbadSetup/src/setupOutput.jl b/lib/SindbadSetup/src/setupOutput.jl
index b0553986f..c9c9cf034 100644
--- a/lib/SindbadSetup/src/setupOutput.jl
+++ b/lib/SindbadSetup/src/setupOutput.jl
@@ -255,6 +255,7 @@ Saves a copy of the experiment settings to the output folder.
 function saveExperimentSettings(info)
     sindbad_experiment = info.temp.experiment.dirs.sindbad_experiment
     showInfo(saveExperimentSettings, @__FILE__, @__LINE__, "saving Experiment JSON Settings to : $(info.output.dirs.settings)")
+    mkpath(dirname(joinpath(info.output.dirs.settings, split(sindbad_experiment, path_separator)[end]))) # create output directory if it does not exist
     cp(sindbad_experiment,
         joinpath(info.output.dirs.settings, split(sindbad_experiment, path_separator)[end]);
         force=true)

From e1acb3f141e6a6e7993ba70af82716b1f1b9a547 Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Tue, 8 Jul 2025 18:13:59 +0100
Subject: [PATCH 04/21] fixed legacy issues, paths, tbl parameters, info.jld2
 error

---
 .../exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl    | 4 ++--
 .../settings_fluxnet_hybrid/parameter_learning.json      | 2 +-
 lib/SindbadSetup/src/setupExperimentInfo.jl              | 3 +--
 lib/SindbadSetup/src/setupHybridML.jl                    | 3 +--
 lib/SindbadSetup/src/setupParameters.jl                  | 9 ++++++---
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index e83dc889c..a496c2e21 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -28,7 +28,7 @@ using SindbadML.Zygote
 
 ## paths
 file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices.jld2"))
-experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment.json"
+path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json"
 
 # for remote node
 path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr"
@@ -41,7 +41,7 @@ replace_info = Dict(
       "optimization.observations.default_observation.data_path" => path_input,
       );
 
-info = getExperimentInfo(experiment_json; replace_info=replace_info);
+info = getExperimentInfo(path_experiment_json; replace_info=replace_info);
 
 
 selected_models = info.models.forward;
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
index 74b0e96d1..609e2683b 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
@@ -20,7 +20,7 @@
       "n_folds": 10
     },
     "package": "",
-    "fold_path": "./nfolds_sites_indices.jld2",
+    "fold_path": "./sampling/nfolds_sites_indices.jld2",
     "which_fold": 5
   },
   "ml_gradient": {
diff --git a/lib/SindbadSetup/src/setupExperimentInfo.jl b/lib/SindbadSetup/src/setupExperimentInfo.jl
index 6015da3b3..340e13ede 100644
--- a/lib/SindbadSetup/src/setupExperimentInfo.jl
+++ b/lib/SindbadSetup/src/setupExperimentInfo.jl
@@ -104,8 +104,7 @@ Saves or skips saving the experiment configuration to a file.
 function saveInfo end
 
 function saveInfo(info, ::DoSaveInfo)
-    info_path = joinpath(info.output.dirs.settings, "info.jld
-    2")
+    info_path = joinpath(info.output.dirs.settings, "info.jld2")
     showInfo(saveInfo, @__FILE__, @__LINE__, "saving info to `$(info_path)`")
     @save info_path info
     return nothing
diff --git a/lib/SindbadSetup/src/setupHybridML.jl b/lib/SindbadSetup/src/setupHybridML.jl
index d9c1b02c2..fe6f8afc2 100644
--- a/lib/SindbadSetup/src/setupHybridML.jl
+++ b/lib/SindbadSetup/src/setupHybridML.jl
@@ -151,8 +151,7 @@ function setHybridInfo(info::NamedTuple)
 
     info = setTupleSubfield(info, :hybrid, (:replace_value_for_gradient, info.temp.helpers.numbers.num_type(replace_value_for_gradient)))
 
-
-    covariates_path = getAbsDataPath(info, info.settings.hybrid.covariates.path)
+    covariates_path = getAbsDataPath(info.temp, info.settings.hybrid.covariates.path)
     covariates = (; path=covariates_path, variables=info.settings.hybrid.covariates.variables)
     info = setTupleSubfield(info, :hybrid, (:covariates, covariates))
     info = setTupleSubfield(info, :hybrid, (:random_seed, info.settings.hybrid.random_seed))
diff --git a/lib/SindbadSetup/src/setupParameters.jl b/lib/SindbadSetup/src/setupParameters.jl
index b4edfc7f3..10839d06f 100644
--- a/lib/SindbadSetup/src/setupParameters.jl
+++ b/lib/SindbadSetup/src/setupParameters.jl
@@ -73,9 +73,11 @@ function getParameters(selected_models::Tuple, num_type, model_timestep; return_
     nbounds = length(constrains)
     lower = [constrains[i][1] for i in 1:nbounds]
     upper = [constrains[i][2] for i in 1:nbounds]
-    
+
     model = [Symbol(supertype(getproperty(Models, m))) for m in model_approach]
-    name_full = [join((model[i], name[i]), ".") for i in 1:nbounds]
+    model_str = string.(model)
+    # name_full = [join((model[i], name[i]), ".") for i in 1:nbounds]
+    name_full = [join((last(split(model_str[i], ".")), name[i]), ".") for i in 1:nbounds]
     approach_func = [getfield(Models, m) for m in model_approach]
     model_prev = model_approach[1]
     m_id = findall(x-> x==model_prev, model_names_list)[1]
@@ -175,7 +177,8 @@ function getOptimizationParametersTable(parameter_table_all::Table, model_parame
         parameter_keys = optimization_parameters
     end
     parameter_list = replaceCommaSeparatedParams(parameter_keys)
-    missing_parameters = filter(x -> !(x in parameter_table_all.name_full), parameter_list)
+    # [print("$p"*"\n") for p in parameter_table_all.name]
+    missing_parameters = filter(x -> !(x in parameter_table_all.name_full), parameter_list) # doing `occursin` here could also work, so that we don't update the funtion getParameters
     if !isempty(missing_parameters)
         error("Model Inconsistency: $([missing_parameters...]) parameter(s) not found in the selected model structure. Check the model structure in model_structure.json to include the parameter(s) or change model_parameters_to_optimize in optimization.json to exclude the parameter(s).")
     end

From 1e38bea7a17f7caaf3545e2fb3dfd6ef14638dec Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Wed, 9 Jul 2025 16:23:26 +0100
Subject: [PATCH 05/21] fix paths, and add missing import

---
 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index a496c2e21..b20937684 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -98,7 +98,7 @@ indices_sites_testing = siteNameToID.(sites_testing, Ref(sites_forcing));
 
 indices_sites_batch = indices_sites_training;
 
-xfeatures = loadCovariates(sites_forcing; kind="all", cube_path=path_covariates);
+xfeatures = loadCovariates(sites_forcing; kind="all", cube_path=joinpath(@__DIR__, path_covariates));
 @info "xfeatures: [$(minimum(xfeatures)), $(maximum(xfeatures))]"
 
 nor_names_order = xfeatures.features;
@@ -150,6 +150,7 @@ input_args = (
         sites_batch
 );
 
+using SindbadML.Sindbad
 grads_lib = ForwardDiffGrad();
 grads_lib = FiniteDifferencesGrad();
 grads_lib = FiniteDiffGrad();

From 7df8e028d90b6ff4a2e2edcb88d0f7ae8b1f9442 Mon Sep 17 00:00:00 2001
From: lazarusA <lazarus.alon@gmail.com>
Date: Thu, 10 Jul 2025 10:54:08 +0200
Subject: [PATCH 06/21] fixes support for lts 1.10

---
 Project.toml                             | 4 ++--
 examples/exp_fluxnet_hybrid/Project.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 99c0c43f7..d94d68016 100644
--- a/Project.toml
+++ b/Project.toml
@@ -25,12 +25,12 @@ TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
 [compat]
 Accessors = "0.1.42"
 Crayons = "4.1.1"
-Dates = "1.11.0"
+Dates = "1.10, 1.11.0"
 NaNStatistics = "0.6.50"
 Pkg = "1.10.0"
 Reexport = "1.2.2"
 TypedTables = "1.4.6"
-julia = "1.9"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 2502d7ab7..386bd862e 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -17,7 +17,7 @@ SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [sources]
-Sindbad = {path = "C:\\Users\\lalonso\\Documents\\SINDBAD"}
+Sindbad = {path =  "../../"}
 SindbadData = {path = "../../lib/SindbadData"}
 SindbadML = {path = "../../lib/SindbadML"}
 SindbadMetrics = {path = "../../lib/SindbadMetrics"}

From 4a6bd5b9845c18d26539539ee7fefc01c2c0f425 Mon Sep 17 00:00:00 2001
From: lazarusA <lazarus.alon@gmail.com>
Date: Thu, 10 Jul 2025 12:06:21 +0200
Subject: [PATCH 07/21] explicitly set PRECOMPILE_TASKS, and don't include
 ForwardDiff and SindbadOptmization in the same Project.toml

---
 examples/exp_fluxnet_hybrid/Project.toml      | 32 ++++++++++++-------
 .../exp_fluxnet_hybrid_clean_la.jl            |  7 ++--
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 386bd862e..0c4f980d5 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -1,27 +1,35 @@
 [deps]
 CMAEvolutionStrategy = "8d3b24bd-414e-49e0-94fb-163cc3a3e411"
-CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GCMAES = "4aa9d100-eb0f-11e8-15f1-25748831eb3b"
-GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Sindbad = "6686e6de-2010-4bf8-9178-b2bcc470766e"
 SindbadData = "772b809f-5329-4bfd-a2e9-cc4714ce84b1"
 SindbadML = "eb1d8004-2d42-4eef-b369-ed0645c101e8"
 SindbadMetrics = "7a8077d0-ecf5-44b0-95d4-89eadbafc35c"
-SindbadOptimization = "64fff8bd-0c13-402c-aa96-b97d0bc3655c"
 SindbadSetup = "2b7f2987-8a1e-48b4-8a02-cc9e7c4eeb1c"
 SindbadTEM = "f6108451-10cb-42fa-b0c1-67671cf08f15"
 SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
-[sources]
-Sindbad = {path =  "../../"}
-SindbadData = {path = "../../lib/SindbadData"}
-SindbadML = {path = "../../lib/SindbadML"}
-SindbadMetrics = {path = "../../lib/SindbadMetrics"}
-SindbadOptimization = {path = "../../lib/SindbadOptimization"}
-SindbadSetup = {path = "../../lib/SindbadSetup"}
-SindbadTEM = {path = "../../lib/SindbadTEM"}
-SindbadUtils = {path = "../../lib/SindbadUtils"}
+[sources.Sindbad]
+path = "../../"
+
+[sources.SindbadData]
+path = "../../lib/SindbadData"
+
+[sources.SindbadML]
+path = "../../lib/SindbadML"
+
+[sources.SindbadMetrics]
+path = "../../lib/SindbadMetrics"
+
+[sources.SindbadSetup]
+path = "../../lib/SindbadSetup"
+
+[sources.SindbadTEM]
+path = "../../lib/SindbadTEM"
+
+[sources.SindbadUtils]
+path = "../../lib/SindbadUtils"
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index b20937684..4fb42553f 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -1,7 +1,8 @@
 # activate project's environment and develop the package
+ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
 using Pkg
 Pkg.activate("examples/exp_fluxnet_hybrid")
-Pkg.develop(path=pwd())
+# Pkg.develop(path=pwd())
 Pkg.instantiate()
 # start using the package
 using SindbadUtils
@@ -11,16 +12,12 @@ using SindbadData
 using SindbadData.DimensionalData
 using SindbadData.AxisKeys
 using SindbadData.YAXArrays
-# using SindbadTEM
 using SindbadML
 using SindbadML.JLD2
 using ProgressMeter
-# using SindbadOptimization
 using SindbadML.Zygote
 # import AbstractDifferentiation as AD, Zygote
 
-
-# extra includes for covariate and activation functions
 # extra includes for covariate and activation functions
 # include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/load_covariates.jl"))
 # include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/test_activation_functions.jl"))

From 50a2e79ed09e877e889c7439bd4d6f6f0707a092 Mon Sep 17 00:00:00 2001
From: lazarusA <lazarus.alon@gmail.com>
Date: Thu, 10 Jul 2025 17:01:58 +0200
Subject: [PATCH 08/21] setup slurm jobs

---
 examples/exp_fluxnet_hybrid/Project.toml      |  2 +
 .../exp_fluxnet_hybrid_clean_la.jl            | 92 ++++++++++++++-----
 .../exp_fluxnet_hybrid_clean_la.sh            | 38 ++++++++
 3 files changed, 108 insertions(+), 24 deletions(-)
 create mode 100644 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh

diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 0c4f980d5..19e186b36 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 CMAEvolutionStrategy = "8d3b24bd-414e-49e0-94fb-163cc3a3e411"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GCMAES = "4aa9d100-eb0f-11e8-15f1-25748831eb3b"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
@@ -11,6 +12,7 @@ SindbadMetrics = "7a8077d0-ecf5-44b0-95d4-89eadbafc35c"
 SindbadSetup = "2b7f2987-8a1e-48b4-8a02-cc9e7c4eeb1c"
 SindbadTEM = "f6108451-10cb-42fa-b0c1-67671cf08f15"
 SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab"
+SlurmClusterManager = "c82cd089-7bf7-41d7-976b-6b5d413cbe0a"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [sources.Sindbad]
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index 4fb42553f..39da82fe3 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -1,10 +1,28 @@
-# activate project's environment and develop the package
-ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
-using Pkg
-Pkg.activate("examples/exp_fluxnet_hybrid")
-# Pkg.develop(path=pwd())
-Pkg.instantiate()
-# start using the package
+# ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
+import Pkg
+Pkg.activate(@__DIR__)
+
+using Distributed
+using SlurmClusterManager # pkg> add https://github.com/lazarusA/SlurmClusterManager.jl.git#la/asynclaunch
+addprocs(SlurmManager())
+
+@everywhere begin
+    import Pkg
+    Pkg.activate(@__DIR__)
+    using SindbadUtils
+    using SindbadTEM
+    using SindbadSetup
+    using SindbadData
+    using SindbadData.DimensionalData
+    using SindbadData.AxisKeys
+    using SindbadData.YAXArrays
+    using SindbadML
+    using SindbadML.JLD2
+    using ProgressMeter
+    using SindbadML.Zygote
+    using SindbadML.Sindbad
+end
+
 using SindbadUtils
 using SindbadTEM
 using SindbadSetup
@@ -16,21 +34,52 @@ using SindbadML
 using SindbadML.JLD2
 using ProgressMeter
 using SindbadML.Zygote
+using SindbadML.Sindbad
+
+# # activate project's environment and develop the package
+# using Pkg
+# Pkg.activate("examples/exp_fluxnet_hybrid")
+# # Pkg.develop(path=pwd())
+# Pkg.instantiate()
+# # start using the package
+# using SindbadUtils
+# using SindbadTEM
+# using SindbadSetup
+# using SindbadData
+# using SindbadData.DimensionalData
+# using SindbadData.AxisKeys
+# using SindbadData.YAXArrays
+# using SindbadML
+# using SindbadML.JLD2
+# using ProgressMeter
+# using SindbadML.Zygote
+
+
 # import AbstractDifferentiation as AD, Zygote
 
 # extra includes for covariate and activation functions
 # include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/load_covariates.jl"))
 # include(joinpath(@__DIR__, "../../examples/exp_fluxnet_hybrid/test_activation_functions.jl"))
 
+# load folds # $nfold $nlayer $neuron $batchsize
+_nfold = Base.parse(Int, ARGS[1]) # 5
+nlayers = Base.parse(Int, ARGS[2]) # 3
+n_neurons = Base.parse(Int, ARGS[3]) # 32
+batch_size = Base.parse(Int, ARGS[4]) # 32
+
+batch_seed = 123 * batch_size * 2
+n_epochs = 500
 
 ## paths
 file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices.jld2"))
 path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json"
 
 # for remote node
-path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr"
+# path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr"
+path_input = "/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr"
+# path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
+path_covariates = "/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr"
 
-path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
 replace_info = Dict()
 
 replace_info = Dict(
@@ -80,7 +129,7 @@ sites_forcing = forcing.data[1].site; # sites names
 
 
 # ! selection and batching
-_nfold = 5 #Base.parse(Int, ARGS[1]) # select the fold
+# _nfold = 5 #Base.parse(Int, ARGS[1]) # select the fold
 xtrain, xval, xtest = file_folds["unfold_training"][_nfold], file_folds["unfold_validation"][_nfold], file_folds["unfold_tests"][_nfold]
 
 # ? training
@@ -103,11 +152,6 @@ n_features = length(nor_names_order)
 
 ## Build ML method
 n_params = sum(tbl_params.is_ml);
-nlayers = 3 # Base.parse(Int, ARGS[2])
-n_neurons = 32 # Base.parse(Int, ARGS[3])
-batch_size = 32 # Base.parse(Int, ARGS[4])
-batch_seed = 123 * batch_size * 2
-n_epochs = 2
 k_σ = 1.f0
 # custom_activation = CustomSigmoid(k_σ)
 # custom_activation = sigmoid_3
@@ -147,15 +191,14 @@ input_args = (
         sites_batch
 );
 
-using SindbadML.Sindbad
-grads_lib = ForwardDiffGrad();
-grads_lib = FiniteDifferencesGrad();
-grads_lib = FiniteDiffGrad();
-grads_lib = PolyesterForwardDiffGrad();
+# grads_lib = ForwardDiffGrad();
+# grads_lib = FiniteDifferencesGrad();
+# grads_lib = FiniteDiffGrad();
 # grads_lib = ZygoteGrad();
 # grads_lib = EnzymeGrad();
 # backend = AD.ZygoteBackend();
 
+grads_lib = PolyesterForwardDiffGrad();
 loc_params, inner_args = getInnerArgs(1, grads_lib, input_args...);
 
 loss_tmp(x) = lossSite(x, grads_lib, inner_args...)
@@ -179,9 +222,10 @@ in_gargs=(;
     loss_fargs = (lossSite, getInnerArgs)
 );
 
-checkpoint_path = "$(info.output.dirs.data)/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
-
-mkpath(checkpoint_path)
+# checkpoint_path = "$(info.output.dirs.data)/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
+remote_raven = "/ptmp/lalonso/HybridOutput/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
+mkpath(remote_raven)
+checkpoint_path = remote_raven
 
 @info checkpoint_path
-mixedGradientTraining(grads_lib, mlBaseline, in_gargs.train_refs, in_gargs.test_val_refs, in_gargs.total_constraints, in_gargs.loss_fargs, in_gargs.forward_args; n_epochs=n_epochs, path_experiment=checkpoint_path)
+mixedGradientTraining(grads_lib, mlBaseline, in_gargs.train_refs, in_gargs.test_val_refs, in_gargs.total_constraints, in_gargs.loss_fargs, in_gargs.forward_args; n_epochs=n_epochs, path_experiment=checkpoint_path)
\ No newline at end of file
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
new file mode 100644
index 000000000..4d4ad92f6
--- /dev/null
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+#SBATCH --job-name=HyALL_ALL_la
+#SBATCH --ntasks=8
+#SBATCH --cpus-per-task=17
+#SBATCH --mem-per-cpu=500
+#SBATCH --time=14:50:00
+#SBATCH --array=0-9  # 10 jobs
+#SBATCH -o /ptmp/lalonso/slurmOutput/HyALL_ALL_la-%A_%a.out
+
+module load julia/1.10
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+# Define parameter ranges
+nfolds=(1 2 3 4 5)
+nlayers=(3 2)
+neurons=(32)
+batchsizes=(32)
+
+# Calculate which combination to use based on SLURM_ARRAY_TASK_ID
+id=$SLURM_ARRAY_TASK_ID
+# Change the order to prioritize folds first
+n_fold=$((id % ${#nfolds[@]}))
+id=$((id / ${#nfolds[@]}))
+n_layer=$((id % ${#nlayers[@]}))
+id=$((id / ${#nlayers[@]}))
+n_neuron=$((id % ${#neurons[@]}))
+id=$((id / ${#neurons[@]}))
+n_batch=$((id % ${#batchsizes[@]}))
+
+# Get the actual parameter values
+nfold=${nfolds[$n_fold]}
+nlayer=${nlayers[$n_layer]}
+neuron=${neurons[$n_neuron]}
+batchsize=${batchsizes[$n_batch]}
+
+echo "Running with: nfold=$nfold nlayer=$nlayer neuron=$neuron batchsize=$batchsize"
+# Run the program with calculated parameters
+julia --project --heap-size-hint=16G exp_fluxnet_hybrid_clean_la.jl $nfold $nlayer $neuron $batchsize
\ No newline at end of file

From ace9f5287b51e75fa52def426a750d3761f578ad Mon Sep 17 00:00:00 2001
From: lazarusA <lazarus.alon@gmail.com>
Date: Thu, 10 Jul 2025 18:24:06 +0200
Subject: [PATCH 09/21] more juice and don't save info.jld2 for every fold,
 TODO, needs a better solution

---
 .../exp_fluxnet_hybrid_clean_la.jl            |  3 +-
 .../exp_fluxnet_hybrid_clean_la.sh            |  6 +-
 .../exp_fluxnet_hybrid/sampling/Project.toml  |  2 -
 .../sampling/kfold_stratify.jl                | 90 ++++++++++---------
 .../experiment_hybrid.json                    |  2 +-
 5 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index 39da82fe3..03011e322 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -66,12 +66,13 @@ _nfold = Base.parse(Int, ARGS[1]) # 5
 nlayers = Base.parse(Int, ARGS[2]) # 3
 n_neurons = Base.parse(Int, ARGS[3]) # 32
 batch_size = Base.parse(Int, ARGS[4]) # 32
+id_fold = Base.parse(Int, ARGS[5]) # 1
 
 batch_seed = 123 * batch_size * 2
 n_epochs = 500
 
 ## paths
-file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices.jld2"))
+file_folds = load(joinpath(@__DIR__, "./sampling/nfolds_sites_indices_$(id_fold).jld2"))
 path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json"
 
 # for remote node
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
index 4d4ad92f6..a22932177 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
@@ -2,7 +2,7 @@
 #SBATCH --job-name=HyALL_ALL_la
 #SBATCH --ntasks=8
 #SBATCH --cpus-per-task=17
-#SBATCH --mem-per-cpu=500
+#SBATCH --mem-per-cpu=2GB
 #SBATCH --time=14:50:00
 #SBATCH --array=0-9  # 10 jobs
 #SBATCH -o /ptmp/lalonso/slurmOutput/HyALL_ALL_la-%A_%a.out
@@ -33,6 +33,6 @@ nlayer=${nlayers[$n_layer]}
 neuron=${neurons[$n_neuron]}
 batchsize=${batchsizes[$n_batch]}
 
-echo "Running with: nfold=$nfold nlayer=$nlayer neuron=$neuron batchsize=$batchsize"
+echo "Running with: id=$id nfold=$nfold nlayer=$nlayer neuron=$neuron batchsize=$batchsize"
 # Run the program with calculated parameters
-julia --project --heap-size-hint=16G exp_fluxnet_hybrid_clean_la.jl $nfold $nlayer $neuron $batchsize
\ No newline at end of file
+julia --project --heap-size-hint=16G exp_fluxnet_hybrid_clean_la.jl $nfold $nlayer $neuron $batchsize $id
\ No newline at end of file
diff --git a/examples/exp_fluxnet_hybrid/sampling/Project.toml b/examples/exp_fluxnet_hybrid/sampling/Project.toml
index d3705788f..64ebca66a 100644
--- a/examples/exp_fluxnet_hybrid/sampling/Project.toml
+++ b/examples/exp_fluxnet_hybrid/sampling/Project.toml
@@ -1,6 +1,4 @@
 [deps]
-CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
-GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
index 20bdb8408..9c9f3f1f5 100644
--- a/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
+++ b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
@@ -1,7 +1,9 @@
+ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
 # activate project's environment and develop the package
 using Pkg
 Pkg.activate("examples/exp_fluxnet_hybrid/sampling")
-Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"])
+Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2"])
+# Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"])
 Pkg.instantiate()
 
 # start using packages
@@ -10,18 +12,22 @@ using StatsBase
 using YAXArrays
 using Zarr
 using JLD2
-using GLMakie
+# using GLMakie
 
 # get data
 # $examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr .
 # @examples/data> scp -r lalonso@ruthenia:/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr .
 
 # create a new folder for plots
-mkpath(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/"))
 
-c_read = Cube(joinpath(@__DIR__, "../../data/CovariatesFLUXNET_3.zarr"));
+# mkpath(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/"))
+
+# c_read = Cube(joinpath(@__DIR__, "../../data/CovariatesFLUXNET_3.zarr"));
+c_read = Cube("/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr")
+
+# ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr"))
+ds = open_dataset("/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr")
 
-ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr"))
 ds.properties["SITE_ID"][[98, 99, 100, 137, 138]]
 # ! update PFTs categories, original ones are not up to date!
 ds.properties["PFT"][[98, 99, 100, 137, 138]] .= ["WET", "WET", "GRA", "WET", "SNO"] 
@@ -89,45 +95,45 @@ px = sortperm(x_keys)
 pz = sortperm(z_keys)
 py = sortperm(y_keys)
 
-with_theme(theme_light()) do 
-    fig = Figure(; size=(600, 600))
-    ax1 = Axis(fig[1,1]; title = "training")
-    ax2 = Axis(fig[3,1]; title = "test")
-    ax3 = Axis(fig[2,1]; title = "validation")
+# with_theme(theme_light()) do 
+#     fig = Figure(; size=(600, 600))
+#     ax1 = Axis(fig[1,1]; title = "training")
+#     ax2 = Axis(fig[3,1]; title = "test")
+#     ax3 = Axis(fig[2,1]; title = "validation")
 
-    barplot!(ax1, x_vals[px]; color = 1:13, colormap = :Hiroshige)
-    barplot!(ax2, z_vals[pz]; color = 1:length(z_vals), colorrange=(1,13), colormap = :Hiroshige)
-    barplot!(ax3, y_vals[py]; color = 1:length(y_vals), colorrange=(1,13), colormap = :Hiroshige)
+#     barplot!(ax1, x_vals[px]; color = 1:13, colormap = :Hiroshige)
+#     barplot!(ax2, z_vals[pz]; color = 1:length(z_vals), colorrange=(1,13), colormap = :Hiroshige)
+#     barplot!(ax3, y_vals[py]; color = 1:length(y_vals), colorrange=(1,13), colormap = :Hiroshige)
 
-    ax1.xticks = (1:length(x_keys), x_keys[px])
-    ax2.xticks = (1:length(z_keys), z_keys[pz])
-    ax3.xticks = (1:length(y_keys), y_keys[py])
-    fig 
-end
+#     ax1.xticks = (1:length(x_keys), x_keys[px])
+#     ax2.xticks = (1:length(z_keys), z_keys[pz])
+#     ax3.xticks = (1:length(y_keys), y_keys[py])
+#     fig 
+# end
 
 all_counts, all_keys, all_vals = countmapPFTs(setPFT)
 
-using CairoMakie
-CairoMakie.activate!()
-# GLMakie.activate!()
-
-with_theme(theme_latexfonts()) do 
-    px = sortperm(all_keys)
-    fig = Figure(; size=(1200, 400), fontsize= 24)
-    ax = Axis(fig[1,1]; title = "Total number of sites ($(sum(all_vals))) per Plant Functional Type (PFT)", titlefont=:regular)
-    # barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85)
-    barplot!(ax, all_vals[px]; color = (:grey25, 0.05), strokewidth=0.65, width=0.85)
-
-    # hlines!(ax, 5; color =:black, linewidth=0.85)
-    ax.xticks = (1:length(all_keys), all_keys[px])
-    ylims!(ax, 0, 50)
-    ax.yticks = (0:15:50, ["", "", "", ""])
-    text!(ax, Point2f.(1:length(all_keys), all_vals[px]), text=string.(all_vals[px]), align = (:center, :bottom), fontsize=24)
-    hidespines!(ax)
-    hideydecorations!(ax, grid=false)
-    fig 
-    save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/PFTs_sites_counts_bw.pdf"), fig)
-end
+# using CairoMakie
+# CairoMakie.activate!()
+# # GLMakie.activate!()
+
+# with_theme(theme_latexfonts()) do 
+#     px = sortperm(all_keys)
+#     fig = Figure(; size=(1200, 400), fontsize= 24)
+#     ax = Axis(fig[1,1]; title = "Total number of sites ($(sum(all_vals))) per Plant Functional Type (PFT)", titlefont=:regular)
+#     # barplot!(ax, all_vals[px]; color = 1:13, colormap = (:mk_12, 0.35), strokewidth=0.65, width=0.85)
+#     barplot!(ax, all_vals[px]; color = (:grey25, 0.05), strokewidth=0.65, width=0.85)
+
+#     # hlines!(ax, 5; color =:black, linewidth=0.85)
+#     ax.xticks = (1:length(all_keys), all_keys[px])
+#     ylims!(ax, 0, 50)
+#     ax.yticks = (0:15:50, ["", "", "", ""])
+#     text!(ax, Point2f.(1:length(all_keys), all_vals[px]), text=string.(all_vals[px]), align = (:center, :bottom), fontsize=24)
+#     hidespines!(ax)
+#     hideydecorations!(ax, grid=false)
+#     fig 
+#     save(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/PFTs_sites_counts_bw.pdf"), fig)
+# end
 
 
 # custom rules
@@ -191,8 +197,10 @@ unfold_training = [vcat(_fold_t_i[:, f]...) for f in 1:5]
 _fold_v_i = [validation_folds[i][f] for i in 1:10, f in 1:5]
 unfold_validation = [vcat(_fold_v_i[:, f]...) for f in 1:5]
 
-jldsave(joinpath(@__DIR__, "nfolds_sites_indices.jld2");
-    unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests)
+for id in 0:10
+    jldsave(joinpath(@__DIR__, "nfolds_sites_indices_$(id).jld2");
+        unfold_training=unfold_training, unfold_validation=unfold_validation, unfold_tests=unfold_tests)
+end
 
 _nfold = 1
 xtrain, xval, xtest = unfold_training[_nfold], unfold_validation[_nfold], unfold_tests[_nfold]
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
index fd91ffad4..0525d60cf 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
@@ -33,7 +33,7 @@
     "inline_update": false,
     "run_forward": true,
     "run_optimization": true,
-    "save_info": true,
+    "save_info": false,
     "spinup_TEM": true,
     "store_spinup": false,
     "use_forward_diff": false

From 4bba851855c68b1b1ce68d21aaa6c4f0a7caa929 Mon Sep 17 00:00:00 2001
From: lazarusA <lazarus.alon@gmail.com>
Date: Fri, 18 Jul 2025 09:51:01 +0200
Subject: [PATCH 10/21] analysis

---
 .../exp_fluxnet_hybrid/analysis/Project.toml  |   3 +-
 .../analysis/fold_analysis.jl                 | 143 ++++++++++++------
 .../exp_fluxnet_hybrid_clean_la.jl            |   3 +-
 .../exp_fluxnet_hybrid_clean_la.sh            |   2 +-
 .../experiment_hybrid.json                    |   2 +-
 lib/SindbadSetup/src/setupExperimentInfo.jl   |   2 +-
 6 files changed, 105 insertions(+), 50 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/analysis/Project.toml b/examples/exp_fluxnet_hybrid/analysis/Project.toml
index 744fd3cf6..cf7aed186 100644
--- a/examples/exp_fluxnet_hybrid/analysis/Project.toml
+++ b/examples/exp_fluxnet_hybrid/analysis/Project.toml
@@ -3,7 +3,6 @@ ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
@@ -13,5 +12,7 @@ NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
 Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
 Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+SwarmMakie = "0b1c068e-6a84-4e66-8136-5c95cafa83ed"
+TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
 YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
 Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
diff --git a/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl b/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl
index 2505a2659..320939b9b 100644
--- a/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl
+++ b/examples/exp_fluxnet_hybrid/analysis/fold_analysis.jl
@@ -1,3 +1,4 @@
+ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1"
 using CairoMakie
 using SwarmMakie
 using Flux
@@ -5,28 +6,37 @@ using Statistics
 using TypedTables
 using JLD2
 
-path_ptmp = "/ptmp/lalonso/HybridOutput/"
-
+path_ptmp = "/ptmp/lalonso/HybridOutputALL/"
+exp_name = "HyALL_ALL"
+_nfold = 5
+nlayers = 3
+n_neurons = 32
+bs= 32
+nepochs=500
 experiment = "$(exp_name)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(bs)/checkpoint"
+# experiment = "$(exp_name)_kσ_1.0_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(nepochs)epochs_batch_size_$(bs)/checkpoint"
 checkpoint_path = joinpath(path_ptmp, experiment)
-losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_500.jld2"))
+losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_100.jld2"))
+# "HyALL_ALL_kσ_1.0_fold_5_nlayers_3_n_neurons_32_500epochs_batch_size_32"
+# "HyALL_ALL_kσ_1.0_fold_5_nlayers_3_n_neurons_32_batch_size_32_500epochs"
 
 losses["loss_split_testing"]
 
 # Load the training history
-function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs)
-    path_ptmp = "/ptmp/lalonso/HybridOutput/"
+function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs, load_nepochs)
+    path_ptmp = "/ptmp/lalonso/HybridOutputALL/"
     if exp_name == "HyFixK_PFT"
         bs = bs * 123
     end
     experiment = "$(exp_name)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(bs)/checkpoint"
+    # experiment = "$(exp_name)_kσ_1.0_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(nepochs)epochs_batch_size_$(bs)/checkpoint"
     checkpoint_path = joinpath(path_ptmp, experiment)
 
     μtrain = Float32[]
     μval = Float32[]
     μtest = Float32[]
 
-    for epoch in 1:nepochs
+    for epoch in 1:load_nepochs
         losses = JLD2.load(joinpath(checkpoint_path, "checkpoint_epoch_$epoch.jld2"))
         push!(μtrain, mean(losses["loss_training"]))
         push!(μval, mean(losses["loss_validation"]))
@@ -36,6 +46,8 @@ function load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs)
     return μtrain, μval, μtest
 end
 
+mkpath(joinpath(@__DIR__, "figs"))
+
 function plot_training_history(μtrain, μval, μtest)
     with_theme(theme_light()) do
         fig = Figure(; size = (600, 400))
@@ -45,62 +57,72 @@ function plot_training_history(μtrain, μval, μtest)
         lines!(ax, μtest, color = :olive, linewidth = 1.25, label = "test")
         # ylims!(ax, 3, 4)
         axislegend(ax, position = :rt)
-        save("history_losses_5.png", fig)
+        save(joinpath(@__DIR__, "figs/history_losses_5.png"), fig)
     end
 end
 
 # ? load n-fold history
 nepochs=500
 _nfold = 5
-nlayers= 2
+nlayers= 3
 n_neurons = 32
 bs = 32
-exp_name = "HyFixK_ALL"
+# exp_name = "HyFixK_ALL"
+exp_name = "HyALL_ALL"
 
-μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500)
+μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500, 210)
 plot_training_history(μtrain, μval, μtest)
 
 
-function plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
-    with_theme(theme_light()) do
-        fig = Figure(; size = (400, 900))
-        axs = [Axis(fig[row, 1]) for row in 1:5]
-        for (_nfold, ax) in enumerate(axs)
-            μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, nepochs)
+function plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs; xpos = 185)
+    with_theme(theme_latexfonts()) do
+        fig = Figure(; size = (1200, 400), fontsize=24)
+        axs = [Axis(fig[row, col]; xgridstyle=:dash, ygridstyle=:dash, xlabel = "epoch", ylabel="loss")
+            for row in 1:2 for col in 1:3]
+        for (_nfold, ax) in enumerate(axs[1:5])
+            μtrain, μval, μtest = load_losses(exp_name, _nfold, nlayers, n_neurons, bs, 500, nepochs)
 
             lines!(ax, μtrain, color = :dodgerblue, linewidth = 1.25, label = "training")
             lines!(ax, μval, color = :orangered, linewidth = 1.25, label = "validation")
             lines!(ax, μtest, color = :olive, linewidth = 1.25, label = "test")
-            ax.title = "fold $(_nfold)"
+            # ax.title = "fold $(_nfold)"
+            text!(ax, [xpos], [5.5], text="fold $(_nfold)", color = :grey25)
         end
-        axislegend(axs[1], position = :rt, nbanks=3)
-        hidexdecorations!.(axs[1:4], ticks=false, grid=false)
+        Legend(fig[2, 3], axs[1], tellwidth=false, tellheight=false, halign=0,
+            framewidth=0.25, patchcolor = (:white, 0.25) )
+        # axislegend(axs[], position = :ct, nbanks=3, framewidth=0.25, patchcolor = (:white, 0.25))
+        hidexdecorations!.(axs[1:3], ticks=false, grid=false)
+        hidespines!(axs[end])
+        hidedecorations!(axs[end])
         linkaxes!.(axs)
-        limits!.(axs, 0, nepochs, 2.7, 4.5)
+        limits!.(axs, 0, nepochs, 2.7, 6)
         rowgap!(fig.layout, 0)
-        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_history.png", fig)
+        hidespines!.(axs)
+        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_history.pdf", fig)
     end
 end
 
 # ? load n-fold history
-nepochs=500
-_nfold = 5
-nlayers= 2
-n_neurons = 32
-bs = 32
-exp_name = "HyFixK_ALL" # HyFixK_PFT
-plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
+# nepochs=500
+# _nfold = 5
+# nlayers= 2
+# n_neurons = 32
+# bs = 32
+# exp_name = "HyFixK_ALL" # HyFixK_PFT
+# plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
 
-exp_name = "HyFixK_PFT" #
-plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
+# exp_name = "HyFixK_PFT" #
+# plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
 
 exp_name = "HyALL_ALL"
-nepochs = 150
-plot_training_history_folds(exp_name, nlayers, n_neurons, bs, nepochs)
+nepochs = 210
+plot_training_history_folds(exp_name, 2, n_neurons, bs, nepochs; xpos = 170)
+plot_training_history_folds(exp_name, 3, n_neurons, bs, 180; xpos = 170)
 
-exp_name = "HyALL_PFT"
-nepochs = 100
-plot_training_history_folds(exp_name, 3, n_neurons, bs, nepochs)
+
+# exp_name = "HyALL_PFT"
+# nepochs = 100
+# plot_training_history_folds(exp_name, 3, n_neurons, bs, nepochs)
 
 # do per variable
 
@@ -147,8 +169,8 @@ split_tested = collect_folds()
 function plot_box_split(split_m)
     _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"]
     _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"]
-    with_theme() do
-        fig = Figure(; size = (1200, 300))
+    with_theme(theme_latexfonts()) do
+        fig = Figure(; size = (1200, 300), fontsize=24)
         axs = [Axis(fig[1, i]) for i in 1:7]
         for (i, ax) in enumerate(axs)
             tmp_vec = filter(!isnan, split_m[:,i])
@@ -165,7 +187,7 @@ function plot_box_split(split_m)
         # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true)
         hidexdecorations!.(axs, grid=false, label=false, ticklabels=true)
         hidespines!.(axs)
-        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_box_test.png", fig)
+        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_box_test.pdf", fig)
     end
 end
 
@@ -175,8 +197,8 @@ plot_box_split(split_tested)
 function plot_violin_split(split_m)
     _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"]
     _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"]
-    with_theme() do
-        fig = Figure(; size = (1200, 300))
+    with_theme(theme_latexfonts()) do
+        fig = Figure(; size = (1200, 300), fontsize=24)
         axs = [Axis(fig[1, i]) for i in 1:7]
         for (i, ax) in enumerate(axs)
             tmp_vec = filter(!isnan, split_m[:,i])
@@ -195,19 +217,50 @@ function plot_violin_split(split_m)
         # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true)
         hidexdecorations!.(axs, grid=false, label=false, ticklabels=true)
         hidespines!.(axs)
-        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_violin_test.png", fig)
+        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_violin_test.pdf", fig)
     end
 end
 
 plot_violin_split(split_tested)
 
+function plot_beeswarm_split(split_m)
+    _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"]
+    _colors = ["#4CAF50", "#1565C0", "#D32F2F", "#00ACC1", "#00897B", "#8D6E63", "#CDDC39"]
+    with_theme(theme_latexfonts()) do
+        fig = Figure(; size = (1200, 300), fontsize=24)
+        axs = [Axis(fig[1, i]) for i in 1:7]
+        for (i, ax) in enumerate(axs)
+            tmp_vec = filter(!isnan, split_m[:,i])
+
+            beeswarm!(ax, fill(1, length(tmp_vec)), tmp_vec; color = (_colors[i], 0.65), markersize = 6)
+            # boxplot!(ax, fill(1, length(tmp_vec)), tmp_vec; width=0.35, strokecolor = :white,
+            #     strokewidth=1.5, whiskercolor=:white, mediancolor=:white,
+            #     color = :transparent)
+            ax.title= rich("$(_constraints[i])", color=:grey15, font=:bold)
+            ax.yticks=[0, 0.25, 0.5, 0.75, 1.0]
+        end
+        axs[1].ylabel = "Loss"
+        ylims!.(axs, 0, 1)
+        xlims!.(axs, 0.5, 1.5)
+        hideydecorations!.(axs[2:end], ticks=true, grid=false)
+        # hidexdecorations!.(axs, grid=false, label=false, ticklabels=false, ticks=true)
+        hidexdecorations!.(axs, grid=false, label=false, ticklabels=true)
+        hidespines!.(axs)
+        save("$(exp_name)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_bs_$(bs)_beeswarm_test.pdf", fig)
+    end
+end
 
 # ! Collect all experiments
 # "HyFixK_ALL", "HyFixK_PFT", "HyALL_ALL", "HyALL_PFT"
-split_tested_HyALL_ALL = collect_folds("HyALL_ALL", 400, 32, 2, 32)
-split_tested_HyALL_PFT = collect_folds("HyALL_PFT", 300,  32, 2, 32)
-split_tested_HyFixK_ALL = collect_folds("HyFixK_ALL", 500, 32, 2, 32)
-split_tested_HyFixK_PFT = collect_folds("HyFixK_PFT", 500, 32, 2, 32)
+split_tested_HyALL_ALL = collect_folds("HyALL_ALL", 200, 32, 2, 32)
+# split_tested_HyALL_PFT = collect_folds("HyALL_PFT", 300,  32, 2, 32)
+# split_tested_HyFixK_ALL = collect_folds("HyFixK_ALL", 500, 32, 2, 32)
+# split_tested_HyFixK_PFT = collect_folds("HyFixK_PFT", 500, 32, 2, 32)
+plot_box_split(split_tested_HyALL_ALL)
+plot_violin_split(split_tested_HyALL_ALL)
+plot_beeswarm_split(split_tested_HyALL_ALL)
+
+
 
 function plot_box_split_all(split_m1, split_m2, split_m3, split_m4)
     _constraints = ["gpp", "nee", "reco", "transpiration", "evapotranspiration", "agb", "ndvi"]
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index 03011e322..448f087f9 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -224,7 +224,8 @@ in_gargs=(;
 );
 
 # checkpoint_path = "$(info.output.dirs.data)/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
-remote_raven = "/ptmp/lalonso/HybridOutput/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
+remote_raven = "/ptmp/lalonso/HybridOutputALL/HyALL_ALL_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_batch_size_$(batch_size)/"
+# remote_raven = "/ptmp/lalonso/HybridOutput/HyALL_ALL_kσ_$(k_σ)_fold_$(_nfold)_nlayers_$(nlayers)_n_neurons_$(n_neurons)_$(n_epochs)epochs_batch_size_$(batch_size)/"
 mkpath(remote_raven)
 checkpoint_path = remote_raven
 
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
index a22932177..70a69b014 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.sh
@@ -3,7 +3,7 @@
 #SBATCH --ntasks=8
 #SBATCH --cpus-per-task=17
 #SBATCH --mem-per-cpu=2GB
-#SBATCH --time=14:50:00
+#SBATCH --time=23:50:00
 #SBATCH --array=0-9  # 10 jobs
 #SBATCH -o /ptmp/lalonso/slurmOutput/HyALL_ALL_la-%A_%a.out
 
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
index 0525d60cf..fd91ffad4 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json
@@ -33,7 +33,7 @@
     "inline_update": false,
     "run_forward": true,
     "run_optimization": true,
-    "save_info": false,
+    "save_info": true,
     "spinup_TEM": true,
     "store_spinup": false,
     "use_forward_diff": false
diff --git a/lib/SindbadSetup/src/setupExperimentInfo.jl b/lib/SindbadSetup/src/setupExperimentInfo.jl
index 340e13ede..86aa799fe 100644
--- a/lib/SindbadSetup/src/setupExperimentInfo.jl
+++ b/lib/SindbadSetup/src/setupExperimentInfo.jl
@@ -104,7 +104,7 @@ Saves or skips saving the experiment configuration to a file.
 function saveInfo end
 
 function saveInfo(info, ::DoSaveInfo)
-    info_path = joinpath(info.output.dirs.settings, "info.jld2")
+    info_path = joinpath(info.output.dirs.settings, "info_rand_id_$(rand(1:10000)).jld2")
     showInfo(saveInfo, @__FILE__, @__LINE__, "saving info to `$(info_path)`")
     @save info_path info
     return nothing

From bac91a18c999021d78c47e5ba4cff70828af061f Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 24 Sep 2025 17:24:38 +0200
Subject: [PATCH 11/21] setup hybrid

---
 examples/exp_fluxnet_hybrid/Project.toml      |  1 +
 .../exp_fluxnet_hybrid_clean_la.jl            |  2 +
 .../exp_fluxnet_hybrid_replace_r.jl           | 82 +++++++++++++++++++
 .../sampling/kfold_stratify.jl                | 21 ++---
 .../parameter_learning.json                   |  6 +-
 5 files changed, 99 insertions(+), 13 deletions(-)
 create mode 100644 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl

diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 19e186b36..565a2a3c1 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -9,6 +9,7 @@ Sindbad = "6686e6de-2010-4bf8-9178-b2bcc470766e"
 SindbadData = "772b809f-5329-4bfd-a2e9-cc4714ce84b1"
 SindbadML = "eb1d8004-2d42-4eef-b369-ed0645c101e8"
 SindbadMetrics = "7a8077d0-ecf5-44b0-95d4-89eadbafc35c"
+SindbadOptimization = "64fff8bd-0c13-402c-aa96-b97d0bc3655c"
 SindbadSetup = "2b7f2987-8a1e-48b4-8a02-cc9e7c4eeb1c"
 SindbadTEM = "f6108451-10cb-42fa-b0c1-67671cf08f15"
 SindbadUtils = "a2ad09c8-73f9-4387-ada0-5c5a8c58f8ab"
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
index 448f087f9..1c0d79359 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_clean_la.jl
@@ -86,6 +86,8 @@ replace_info = Dict()
 replace_info = Dict(
       "forcing.default_forcing.data_path" => path_input,
       "optimization.observations.default_observation.data_path" => path_input,
+      # TODO:
+
       );
 
 info = getExperimentInfo(path_experiment_json; replace_info=replace_info);
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
new file mode 100644
index 000000000..04e8d9bc1
--- /dev/null
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -0,0 +1,82 @@
+using SindbadML
+using SindbadUtils
+using SindbadSetup
+using SindbadData
+using SindbadML.JLD2
+using SindbadML.Random
+using SindbadML.Flux
+using ProgressMeter
+
+# TODO: update these in replace_info!
+# load folds # $nfold $nlayer $neuron $batchsize
+# _nfold = Base.parse(Int, ARGS[1]) # 5
+# nlayers = Base.parse(Int, ARGS[2]) # 3
+# n_neurons = Base.parse(Int, ARGS[3]) # 32
+# batch_size = Base.parse(Int, ARGS[4]) # 32
+# id_fold = Base.parse(Int, ARGS[5]) # 1
+
+
+path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json"
+# path_input = "$(getSindbadDataDepot())/FLUXNET_v2023_12_1D.zarr"
+path_input = "/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr"
+path_observation = path_input
+
+path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
+
+replace_info = Dict(
+    "forcing.default_forcing.data_path" => path_input,
+    "optimization.observations.default_observation.data_path" => path_observation,
+    "optimization.optimization_cost_threaded" => false,
+    # "experiment.hybrid.fold.fold_path" => "/",
+    # "experiment.hybrid.fold.which_fold" => 10,
+    # "experiment.model_output.path" => path_output,
+)
+
+info = getExperimentInfo(path_experiment_json; replace_info=replace_info);
+
+forcing = getForcing(info);
+observations = getObservation(info, forcing.helpers);
+sites_forcing = forcing.data[1].site;
+
+hybrid_helpers = prepHybrid(forcing, observations, info, info.hybrid.ml_training.method);
+
+trainML(hybrid_helpers, info.hybrid.ml_training.method)
+
+# mixedGradientTraining(grads_lib, ml_model, sites_training, sites_validation, sites_testing, xfeatures, info.optimization.parameter_table, batch_size, chunk_size, num_constraints, metadata_global, loss_functions; n_epochs=2, path_experiment=checkpoint_path)
+
+
+## play around with gradient for sites and batch to understand internal workings
+ml_model = hybrid_helpers.ml_model;
+xfeatures = hybrid_helpers.features.data;
+loss_functions = hybrid_helpers.loss_functions;
+loss_component_functions = hybrid_helpers.loss_component_functions;
+
+params_sites = ml_model(xfeatures)
+@info "params_sites: [$(minimum(params_sites)), $(maximum(params_sites))]"
+
+scaled_params_sites = getParamsAct(params_sites, info.optimization.parameter_table)
+@info "scaled_params_sites: [$(minimum(scaled_params_sites)), $(maximum(scaled_params_sites))]"
+
+
+## try for a site
+site_index = 1
+site_name = sites_forcing[site_index]
+
+loc_params = scaled_params_sites(site=site_name).data.data
+loss_f_site = loss_functions(site=site_name);
+loss_vector_f_site = loss_component_functions(site=site_name);
+@time loss_f_site(loc_params)
+loss_vector_f_site(loc_params)
+
+@time g_site = gradientSite(info.hybrid.ml_gradient.method, loc_params, info.hybrid.ml_gradient.options, loss_functions(site=site_name))
+
+## try for a batch
+sites_batch = hybrid_helpers.sites.training[1:info.hybrid.ml_training.options.batch_size]
+scaled_params_batch = scaled_params_sites(; site=sites_batch)
+grads_batch = zeros(Float32, size(scaled_params_batch, 1), length(sites_batch));
+
+g_batch = gradientBatch!(info.hybrid.ml_gradient.method, grads_batch, info.hybrid.ml_gradient.options, loss_functions, scaled_params_batch, sites_batch; showprog=true)
+
+
+
+
diff --git a/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
index 9c9f3f1f5..b2657130c 100644
--- a/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
+++ b/examples/exp_fluxnet_hybrid/sampling/kfold_stratify.jl
@@ -1,10 +1,10 @@
-ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
-# activate project's environment and develop the package
-using Pkg
-Pkg.activate("examples/exp_fluxnet_hybrid/sampling")
-Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2"])
-# Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"])
-Pkg.instantiate()
+# ENV["JULIA_NUM_PRECOMPILE_TASKS"] = "1" # ! due to raven's threads restrictions, this should NOT be used in production!
+# # activate project's environment and develop the package
+# using Pkg
+# Pkg.activate("examples/exp_fluxnet_hybrid/sampling")
+# Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2"])
+# # Pkg.add(["MLUtils", "YAXArrays", "StatsBase", "Zarr", "JLD2", "GLMakie", "CairoMakie"])
+# Pkg.instantiate()
 
 # start using packages
 using MLUtils
@@ -23,10 +23,11 @@ using JLD2
 # mkpath(joinpath(@__DIR__, "../../../../fluxnet_hybrid_plots/"))
 
 # c_read = Cube(joinpath(@__DIR__, "../../data/CovariatesFLUXNET_3.zarr"));
-c_read = Cube("/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr")
-
+# c_read = Cube("/raven/u/lalonso/sindbad.jl/examples/data/CovariatesFLUXNET_3.zarr")
+c_read = Cube("/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr")
 # ds = open_dataset(joinpath(@__DIR__, "../../data/FLUXNET_v2023_12_1D.zarr"))
-ds = open_dataset("/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr")
+# ds = open_dataset("/raven/u/lalonso/sindbad.jl/examples/data/FLUXNET_v2023_12_1D.zarr")
+ds = open_dataset("/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr")
 
 ds.properties["SITE_ID"][[98, 99, 100, 137, 138]]
 # ! update PFTs categories, original ones are not up to date!
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
index 609e2683b..74934d047 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
@@ -20,8 +20,8 @@
       "n_folds": 10
     },
     "package": "",
-    "fold_path": "./sampling/nfolds_sites_indices.jld2",
-    "which_fold": 5
+    "fold_path": "/User/homes/lalonso/SINDBAD/examples/exp_fluxnet_hybrid/sampling/nfolds_sites_indices_0.jld2",
+    "which_fold": 2
   },
   "ml_gradient": {
     "method": "Grad",
@@ -42,7 +42,7 @@
   "replace_value_for_gradient": 0.0,
   "save_checkpoint": true,
   "covariates": {
-    "path": "../data/CovariatesFLUXNET_3.zarr",
+    "path": "/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr",
     "variables": "all"
   },
 

From a20586e011839d9829a387a16b6712f2ee22b963 Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 24 Sep 2025 17:44:33 +0200
Subject: [PATCH 12/21] rename file

---
 lib/SindbadML/src/SindbadML.jl                  | 2 +-
 lib/SindbadML/src/{mlTrain.jl => mlTraining.jl} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename lib/SindbadML/src/{mlTrain.jl => mlTraining.jl} (100%)

diff --git a/lib/SindbadML/src/SindbadML.jl b/lib/SindbadML/src/SindbadML.jl
index ac6661b02..c0da89146 100644
--- a/lib/SindbadML/src/SindbadML.jl
+++ b/lib/SindbadML/src/SindbadML.jl
@@ -81,7 +81,7 @@ module SindbadML
     include("loss.jl")
     include("prepHybrid.jl")
     include("mlGradient.jl")
-    include("mlTrain.jl")
+    include("mlTraining.jl")
     include("neuralNetwork.jl")
     include("siteLosses.jl")
     include("oneHots.jl")
diff --git a/lib/SindbadML/src/mlTrain.jl b/lib/SindbadML/src/mlTraining.jl
similarity index 100%
rename from lib/SindbadML/src/mlTrain.jl
rename to lib/SindbadML/src/mlTraining.jl

From 07a919fe712c2b0abf72819cd6964bf96c36cb93 Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 24 Sep 2025 17:48:18 +0200
Subject: [PATCH 13/21] new train file

---
 lib/SindbadML/src/SindbadML.jl                  | 2 +-
 lib/SindbadML/src/{mlTraining.jl => trainML.jl} | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename lib/SindbadML/src/{mlTraining.jl => trainML.jl} (99%)

diff --git a/lib/SindbadML/src/SindbadML.jl b/lib/SindbadML/src/SindbadML.jl
index c0da89146..e9fe9d5d3 100644
--- a/lib/SindbadML/src/SindbadML.jl
+++ b/lib/SindbadML/src/SindbadML.jl
@@ -81,7 +81,7 @@ module SindbadML
     include("loss.jl")
     include("prepHybrid.jl")
     include("mlGradient.jl")
-    include("mlTraining.jl")
+    include("trainML.jl")
     include("neuralNetwork.jl")
     include("siteLosses.jl")
     include("oneHots.jl")
diff --git a/lib/SindbadML/src/mlTraining.jl b/lib/SindbadML/src/trainML.jl
similarity index 99%
rename from lib/SindbadML/src/mlTraining.jl
rename to lib/SindbadML/src/trainML.jl
index b45c38139..662138fb4 100644
--- a/lib/SindbadML/src/mlTraining.jl
+++ b/lib/SindbadML/src/trainML.jl
@@ -101,4 +101,4 @@ function trainML(hybrid_helpers, ::MixedGradient)
 
     end
 
-end
+end
\ No newline at end of file

From 565dc08be315ed50a1ce5b7cc0d7746cbf32adc9 Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 24 Sep 2025 17:50:17 +0200
Subject: [PATCH 14/21] todo

---
 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
index 04e8d9bc1..a59be0281 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -8,6 +8,7 @@ using SindbadML.Flux
 using ProgressMeter
 
 # TODO: update these in replace_info!
+# TODO: proper attribution of trainML file!
 # load folds # $nfold $nlayer $neuron $batchsize
 # _nfold = Base.parse(Int, ARGS[1]) # 5
 # nlayers = Base.parse(Int, ARGS[2]) # 3

From 58eb9013d77291d9abe90843f51167898795d1cd Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Thu, 25 Sep 2025 12:27:40 +0200
Subject: [PATCH 15/21] fold options

---
 .../exp_fluxnet_hybrid_replace_r.jl            | 18 ++++++++++--------
 .../parameter_learning.json                    |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
index a59be0281..41c0b45d4 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -10,11 +10,11 @@ using ProgressMeter
 # TODO: update these in replace_info!
 # TODO: proper attribution of trainML file!
 # load folds # $nfold $nlayer $neuron $batchsize
-# _nfold = Base.parse(Int, ARGS[1]) # 5
-# nlayers = Base.parse(Int, ARGS[2]) # 3
-# n_neurons = Base.parse(Int, ARGS[3]) # 32
-# batch_size = Base.parse(Int, ARGS[4]) # 32
-# id_fold = Base.parse(Int, ARGS[5]) # 1
+_nfold = 5 # Base.parse(Int, ARGS[1]) # 5
+nlayers = 3 # Base.parse(Int, ARGS[2]) # 3
+n_neurons = 32 # Base.parse(Int, ARGS[3]) # 32
+batch_size = 32 # Base.parse(Int, ARGS[4]) # 32
+id_fold = 1 #  Base.parse(Int, ARGS[5]) # 1
 
 
 path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment_hybrid.json"
@@ -28,9 +28,11 @@ replace_info = Dict(
     "forcing.default_forcing.data_path" => path_input,
     "optimization.observations.default_observation.data_path" => path_observation,
     "optimization.optimization_cost_threaded" => false,
-    # "experiment.hybrid.fold.fold_path" => "/",
-    # "experiment.hybrid.fold.which_fold" => 10,
-    # "experiment.model_output.path" => path_output,
+    "hybrid.ml_training.options.batch_size" => batch_size,
+    "hybrid.ml_training.which_fold" => _nfold,
+    # "hybrid.ml_training.fold_path" => "blablabla",
+    "hybrid.ml_model.options.n_layers" => nlayers,
+    "hybrid.ml_model.options.n_neurons" => n_neurons,
 )
 
 info = getExperimentInfo(path_experiment_json; replace_info=replace_info);
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
index 74934d047..bc0d6e8a5 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
@@ -28,7 +28,7 @@
     "options": {
       "chunk_size": 2
     },
-    "package": "FiniteDiff"
+    "package": "PolyesterForwardDiff"
   },
   "ml_optimizer": {
     "method": "Adam",

From b701b3f9bdcb4feef3cf7259b0d3fb8bbfcedaf3 Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Thu, 25 Sep 2025 12:55:25 +0200
Subject: [PATCH 16/21] get chunk size

---
 examples/exp_fluxnet_hybrid/Project.toml      |  1 +
 .../exp_fluxnet_hybrid_replace_r.jl           | 36 +------------------
 lib/SindbadML/src/mlGradient.jl               |  2 +-
 3 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/Project.toml b/examples/exp_fluxnet_hybrid/Project.toml
index 565a2a3c1..2d8857c70 100644
--- a/examples/exp_fluxnet_hybrid/Project.toml
+++ b/examples/exp_fluxnet_hybrid/Project.toml
@@ -5,6 +5,7 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GCMAES = "4aa9d100-eb0f-11e8-15f1-25748831eb3b"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 Sindbad = "6686e6de-2010-4bf8-9178-b2bcc470766e"
 SindbadData = "772b809f-5329-4bfd-a2e9-cc4714ce84b1"
 SindbadML = "eb1d8004-2d42-4eef-b369-ed0645c101e8"
diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
index 41c0b45d4..1c7f1fb84 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -1,3 +1,4 @@
+using Revise
 using SindbadML
 using SindbadUtils
 using SindbadSetup
@@ -45,41 +46,6 @@ hybrid_helpers = prepHybrid(forcing, observations, info, info.hybrid.ml_training
 
 trainML(hybrid_helpers, info.hybrid.ml_training.method)
 
-# mixedGradientTraining(grads_lib, ml_model, sites_training, sites_validation, sites_testing, xfeatures, info.optimization.parameter_table, batch_size, chunk_size, num_constraints, metadata_global, loss_functions; n_epochs=2, path_experiment=checkpoint_path)
-
-
-## play around with gradient for sites and batch to understand internal workings
-ml_model = hybrid_helpers.ml_model;
-xfeatures = hybrid_helpers.features.data;
-loss_functions = hybrid_helpers.loss_functions;
-loss_component_functions = hybrid_helpers.loss_component_functions;
-
-params_sites = ml_model(xfeatures)
-@info "params_sites: [$(minimum(params_sites)), $(maximum(params_sites))]"
-
-scaled_params_sites = getParamsAct(params_sites, info.optimization.parameter_table)
-@info "scaled_params_sites: [$(minimum(scaled_params_sites)), $(maximum(scaled_params_sites))]"
-
-
-## try for a site
-site_index = 1
-site_name = sites_forcing[site_index]
-
-loc_params = scaled_params_sites(site=site_name).data.data
-loss_f_site = loss_functions(site=site_name);
-loss_vector_f_site = loss_component_functions(site=site_name);
-@time loss_f_site(loc_params)
-loss_vector_f_site(loc_params)
-
-@time g_site = gradientSite(info.hybrid.ml_gradient.method, loc_params, info.hybrid.ml_gradient.options, loss_functions(site=site_name))
-
-## try for a batch
-sites_batch = hybrid_helpers.sites.training[1:info.hybrid.ml_training.options.batch_size]
-scaled_params_batch = scaled_params_sites(; site=sites_batch)
-grads_batch = zeros(Float32, size(scaled_params_batch, 1), length(sites_batch));
-
-g_batch = gradientBatch!(info.hybrid.ml_gradient.method, grads_batch, info.hybrid.ml_gradient.options, loss_functions, scaled_params_batch, sites_batch; showprog=true)
-
 
 
 
diff --git a/lib/SindbadML/src/mlGradient.jl b/lib/SindbadML/src/mlGradient.jl
index 7390bfa2c..f097c7ba1 100644
--- a/lib/SindbadML/src/mlGradient.jl
+++ b/lib/SindbadML/src/mlGradient.jl
@@ -150,7 +150,7 @@ function gradientSite(::PolyesterForwardDiffGrad, x_vals, gradient_options::Name
         # cfg = ForwardDiff.GradientConfig(loss_tmp, x_vals, Chunk{chunk_size}());
         ForwardDiff.gradient!(∇x, loss_f, x_vals) # ?, add `cfg` at the end if further control is needed.
     else
-        PolyesterForwardDiff.threaded_gradient!(loss_f, ∇x, x_vals, ForwardDiff.Chunk(chunk_size));
+        PolyesterForwardDiff.threaded_gradient!(loss_f, ∇x, x_vals, ForwardDiff.Chunk(gradient_options.chunk_size));
     end
     return ∇x
 end

From e65b0461285c3609cf237aa6f806c898ad5bea9e Mon Sep 17 00:00:00 2001
From: dr-ko <skoirala@bgc-jena.mpg.de>
Date: Fri, 17 Oct 2025 12:20:15 +0200
Subject: [PATCH 17/21] prep tem information

---
 .../settings_fluxnet_hybrid/parameter_learning.json           | 4 ++--
 lib/SindbadTEM/src/prepTEM.jl                                 | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
index bc0d6e8a5..7319cbc09 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
@@ -20,7 +20,7 @@
       "n_folds": 10
     },
     "package": "",
-    "fold_path": "/User/homes/lalonso/SINDBAD/examples/exp_fluxnet_hybrid/sampling/nfolds_sites_indices_0.jld2",
+    "fold_path": "",
     "which_fold": 2
   },
   "ml_gradient": {
@@ -42,7 +42,7 @@
   "replace_value_for_gradient": 0.0,
   "save_checkpoint": true,
   "covariates": {
-    "path": "/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr",
+    "path": "CovariatesFLUXNET_3.zarr",
     "variables": "all"
   },
 
diff --git a/lib/SindbadTEM/src/prepTEM.jl b/lib/SindbadTEM/src/prepTEM.jl
index 184e6bb33..413ed3dfa 100644
--- a/lib/SindbadTEM/src/prepTEM.jl
+++ b/lib/SindbadTEM/src/prepTEM.jl
@@ -573,7 +573,7 @@ function prepTEM(forcing::NamedTuple, info::NamedTuple)
 end
 
 function prepTEM(selected_models, forcing::NamedTuple, info::NamedTuple)
-    showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM)", n_f=1)
+    showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM) for $(nameof(typeof(info.helpers.run.land_output_type)))", n_f=1)
     output = prepTEMOut(info, forcing.helpers)
     showInfo(prepTEM, @__FILE__, @__LINE__, "  preparing helpers for running model experiment", n_f=4)
     run_helpers = helpPrepTEM(selected_models, info, forcing, output, info.helpers.run.land_output_type)
@@ -583,7 +583,7 @@ function prepTEM(selected_models, forcing::NamedTuple, info::NamedTuple)
 end
 
 function prepTEM(selected_models, forcing::NamedTuple, observations::NamedTuple, info::NamedTuple)
-    showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM)", n_f=1)
+    showInfo(prepTEM, @__FILE__, @__LINE__, "preparing to run terrestrial ecosystem model (TEM) for $(nameof(typeof(info.helpers.run.land_output_type)))", n_f=1)
     output = prepTEMOut(info, forcing.helpers)
     run_helpers = helpPrepTEM(selected_models, info, forcing, observations, output, info.helpers.run.land_output_type)
     showInfoSeparator()

From 972dff3b40a9a54d1252986697cbdd958c8a5122 Mon Sep 17 00:00:00 2001
From: dr-ko <skoirala@bgc-jena.mpg.de>
Date: Wed, 5 Nov 2025 09:19:56 +0100
Subject: [PATCH 18/21] revert settings

---
 .../settings_fluxnet_hybrid/parameter_learning.json           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
index 7319cbc09..bc0d6e8a5 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/parameter_learning.json
@@ -20,7 +20,7 @@
       "n_folds": 10
     },
     "package": "",
-    "fold_path": "",
+    "fold_path": "/User/homes/lalonso/SINDBAD/examples/exp_fluxnet_hybrid/sampling/nfolds_sites_indices_0.jld2",
     "which_fold": 2
   },
   "ml_gradient": {
@@ -42,7 +42,7 @@
   "replace_value_for_gradient": 0.0,
   "save_checkpoint": true,
   "covariates": {
-    "path": "CovariatesFLUXNET_3.zarr",
+    "path": "/Net/Groups/BGI/work_5/scratch/lalonso/CovariatesFLUXNET_3.zarr",
     "variables": "all"
   },
 

From f014c00838a2c2e959006018a89165552959bf5e Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 5 Nov 2025 13:31:59 +0100
Subject: [PATCH 19/21] space spinup sequence

---
 .../exp_fluxnet_hybrid_replace_r.jl           | 38 ++++++++++++++++++-
 .../settings_fluxnet_hybrid/forcing.json      | 10 +++++
 .../settings_fluxnet_hybrid/optimization.json |  1 -
 lib/SindbadML/src/loss.jl                     | 11 +++---
 lib/SindbadML/src/prepHybrid.jl               |  5 ++-
 lib/SindbadTEM/src/prepTEM.jl                 |  6 ++-
 lib/SindbadTEM/src/runTEMSpace.jl             | 12 ++++++
 lib/SindbadTEM/src/spinupSequence.jl          | 33 ++++++++++------
 lib/SindbadTEM/src/spinupTEM.jl               | 16 ++++++++
 9 files changed, 110 insertions(+), 22 deletions(-)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
index 1c7f1fb84..00e4171cd 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -23,7 +23,7 @@ path_experiment_json = "../exp_fluxnet_hybrid/settings_fluxnet_hybrid/experiment
 path_input = "/Net/Groups/BGI/work_4/scratch/lalonso/FLUXNET_v2023_12_1D.zarr"
 path_observation = path_input
 
-path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
+# path_covariates = "$(getSindbadDataDepot())/CovariatesFLUXNET_3.zarr"
 
 replace_info = Dict(
     "forcing.default_forcing.data_path" => path_input,
@@ -42,10 +42,44 @@ forcing = getForcing(info);
 observations = getObservation(info, forcing.helpers);
 sites_forcing = forcing.data[1].site;
 
+# disturbance years
+# get disturbance years from forcing helpers
+# apply getSequence to each site
+# ds = open_dataset(path_input)
+# disturbances_per_site = Pair.(ds.properties["SITE_ID"], ds.properties["last_disturbance_on"])
+# allSequences = getSequence.(ds.properties["last_disturbance_on"], Ref(info.helpers.dates));
+
 hybrid_helpers = prepHybrid(forcing, observations, info, info.hybrid.ml_training.method);
+selected_models = info.models.forward;
+run_helpers = hybrid_helpers.run_helpers;
 
-trainML(hybrid_helpers, info.hybrid.ml_training.method)
+space_forcing = run_helpers.space_forcing;
+space_observations = run_helpers.space_observation;
+space_output = run_helpers.space_output;
+space_spinup_forcing = run_helpers.space_spinup_forcing;
+space_spinup_sequence = run_helpers.space_spinup_sequence;
+space_ind = run_helpers.space_ind;
+land_init = run_helpers.loc_land;
+loc_forcing_t = run_helpers.loc_forcing_t;
+
+space_cost_options = [prepCostOptions(loc_obs, info.optimization.cost_options) for loc_obs in space_observations];
+constraint_method = info.optimization.run_options.multi_constraint_method;
 
+tem_info = run_helpers.tem_info;
+## do example site
+##
+
+site_example_1 = space_ind[7][1];
+@time SindbadTEM.coreTEM!(selected_models, space_forcing[site_example_1], space_spinup_forcing[site_example_1],
+    space_spinup_sequence[site_example_1],
+    loc_forcing_t, space_output[site_example_1], land_init, tem_info)
+
+trainML(hybrid_helpers, info.hybrid.ml_training.method)
 
 
+# ds = open_dataset(path_input)
+# d_arr = replace(ds.properties["last_disturbance_on"], "undisturbed" => "9999-01-01")
+# disturbance_yax = YAXArray((ds.site, ), year.(Date.(d_arr)))
+# ds2 = Dataset(disturbance_year = disturbance_yax)
+# savedataset(ds2, path=path_input, backend=:zarr, append=true)
 
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json
index e816e59f2..b45a6b943 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/forcing.json
@@ -49,6 +49,16 @@
       "source_unit": null,
       "source_variable": "dist_frac_sb2018"
     },
+    "f_disturbance_year": {
+      "bounds": [0, 10000],
+      "is_categorical": false,
+      "standard_name": "year of last disturbance",
+      "sindbad_unit": null,
+      "source_product": "Besnard2018",
+      "source_unit": null,
+      "source_variable": "disturbance_year",
+      "space_time_type": "spatial"
+    },
     "f_burnt_area": {
         "bounds": [0.0, 1.0],
         "standard_name": "fire fraction area per area pixel",
diff --git a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json
index 1e5035086..9deeaeff2 100644
--- a/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json
+++ b/examples/exp_fluxnet_hybrid/settings_fluxnet_hybrid/optimization.json
@@ -9,7 +9,6 @@
     "autoRespiration,RMN": null,
     "autoRespiration,YG": null,
     "autoRespirationAirT,Q10": null,
-    "cCycleBase,c_remain": null,
     "cCycleBase,ηH": null,
     "cFlow,f_τ": null,
     "cFlow,k_shedding": null,
diff --git a/lib/SindbadML/src/loss.jl b/lib/SindbadML/src/loss.jl
index b911b552b..286f4dd1b 100644
--- a/lib/SindbadML/src/loss.jl
+++ b/lib/SindbadML/src/loss.jl
@@ -40,13 +40,14 @@ This function runs the core TEM model with the provided parameters, forcing data
 loss_vec, loss_idx = lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, LossModelObsML())
 ```
 """
-function lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,::LossModelObsML)
+function lossVector(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,::LossModelObsML)
     loc_output_from_cache = getOutputFromCache(loc_output, params, gradient_lib)
     models = updateModels(params, parameter_to_index, parameter_scaling_type, models)
     coreTEM!(
         models,
         loc_forcing,
         loc_spinup_forcing,
+        loc_spinup_sequence,
         loc_forcing_t,
         loc_output_from_cache,
         land_init,
@@ -92,14 +93,14 @@ This function computes the loss value for a given site by first calling `lossVec
 t_loss = loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, LossModelObsML())
 ```
 """
-function loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML)
-    loss_vector, _ = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type)
+function loss(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML)
+    loss_vector, _ = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type)
     t_loss = combineMetric(loss_vector, constraint_method)
     return t_loss
 end
 
-function lossComponents(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML)
-    loss_vector, loss_indices = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type)
+function lossComponents(params, models, parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib,loss_type::LossModelObsML)
+    loss_vector, loss_indices = lossVector(params, models,parameter_to_index, parameter_scaling_type, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, land_init, tem_info, loc_obs, cost_options, constraint_method, gradient_lib, loss_type)
     t_loss = combineMetric(loss_vector, constraint_method)
     return t_loss, loss_vector, loss_indices
 end
diff --git a/lib/SindbadML/src/prepHybrid.jl b/lib/SindbadML/src/prepHybrid.jl
index 11fa80008..c7679c4b8 100644
--- a/lib/SindbadML/src/prepHybrid.jl
+++ b/lib/SindbadML/src/prepHybrid.jl
@@ -152,10 +152,11 @@ function getLossFunctionHandles(info, run_helpers, sites)
         loc_obs = run_helpers.space_observation[site_location]
         loc_output = getCacheFromOutput(run_helpers.space_output[site_location], info.hybrid.ml_gradient.method)
         loc_spinup_forcing = run_helpers.space_spinup_forcing[site_location]
+        loc_spinup_sequence = run_helpers.space_spinup_sequence[site_location]
         loc_cost_option = prepCostOptions(loc_obs, info.optimization.cost_options)
-        loss_tmp(x) = loss(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function)
+        loss_tmp(x) = loss(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function)
 
-        loss_vector_tmp(x) = lossComponents(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function)
+        loss_vector_tmp(x) = lossComponents(x, info.models.forward, parameter_to_index, info.optimization.run_options.parameter_scaling, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, run_helpers.loc_forcing_t, loc_output, deepcopy(run_helpers.loc_land), run_helpers.tem_info, loc_obs, loc_cost_option, info.optimization.run_options.multi_constraint_method, info.hybrid.ml_gradient.method, info.hybrid.ml_training.options.loss_function)
 
         push!(loss_functions, loss_tmp)
         push!(loss_component_functions, loss_vector_tmp)
diff --git a/lib/SindbadTEM/src/prepTEM.jl b/lib/SindbadTEM/src/prepTEM.jl
index 413ed3dfa..dde1935d6 100644
--- a/lib/SindbadTEM/src/prepTEM.jl
+++ b/lib/SindbadTEM/src/prepTEM.jl
@@ -433,6 +433,10 @@ function helpPrepTEM(selected_models, info, forcing::NamedTuple, observations::N
         getAllSpinupForcing(loc_forcing, info.spinup.sequence, tem_info);
     end
 
+     # f_disturbance_years
+    fd_index = findall(forcing.variables.==:f_disturbance_year)[1]
+    space_spinup_sequence = getSequence.(forcing.data[fd_index], Ref(info.helpers.dates));
+
     space_output = map([space_ind...]) do lsi
         getLocData(output_array, lsi)
     end
@@ -441,7 +445,7 @@ function helpPrepTEM(selected_models, info, forcing::NamedTuple, observations::N
 
     forcing_nt_array = nothing
 
-    run_helpers = (; space_selected_models, space_forcing, space_observation, space_ind, space_spinup_forcing, loc_forcing_t, space_output, loc_land, output_vars=output.variables, tem_info)
+    run_helpers = (; space_selected_models, space_forcing, space_observation, space_ind, space_spinup_forcing, space_spinup_sequence, loc_forcing_t, space_output, loc_land, output_vars=output.variables, tem_info)
 
     return run_helpers
 end
diff --git a/lib/SindbadTEM/src/runTEMSpace.jl b/lib/SindbadTEM/src/runTEMSpace.jl
index 84d889b32..2c464b27b 100644
--- a/lib/SindbadTEM/src/runTEMSpace.jl
+++ b/lib/SindbadTEM/src/runTEMSpace.jl
@@ -36,6 +36,18 @@ function coreTEM!(selected_models, loc_forcing, loc_spinup_forcing, loc_forcing_
     return nothing
 end
 
+function coreTEM!(selected_models, loc_forcing, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, loc_output, loc_land, tem_info)
+    # update the loc_forcing with the actual location
+    loc_forcing_t = getForcingForTimeStep(loc_forcing, loc_forcing_t, 1, tem_info.vals.forcing_types)
+    # run precompute
+    land_prec = precomputeTEM(selected_models, loc_forcing_t, loc_land, tem_info.model_helpers) 
+    # run spinup
+    land_spin = spinupTEM(selected_models, loc_spinup_forcing, loc_spinup_sequence, loc_forcing_t, land_prec, tem_info, tem_info.run.spinup_TEM)
+
+    timeLoopTEM!(selected_models, loc_forcing, loc_forcing_t, loc_output, land_spin, tem_info.vals.forcing_types, tem_info.model_helpers, tem_info.vals.output_vars, tem_info.n_timesteps, tem_info.run.debug_model)
+    return nothing
+end
+
 """
     parallelizeTEM!(space_selected_models, space_forcing, space_spinup_forcing, loc_forcing_t, space_output, space_land, tem_info, parallelization_mode::SindbadParallelizationMethod)
 
diff --git a/lib/SindbadTEM/src/spinupSequence.jl b/lib/SindbadTEM/src/spinupSequence.jl
index 90b498bed..ccb55a629 100644
--- a/lib/SindbadTEM/src/spinupSequence.jl
+++ b/lib/SindbadTEM/src/spinupSequence.jl
@@ -37,25 +37,36 @@ end
 - new spinup sequence object
 """
 function getSequence(year_disturbance, info_helpers_dates; nrepeat_base=200, year_start = 1979)
-    nrepeat_age = nrepeatYearsAge(year_disturbance; year_start)
-    sequence = [
-        Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1),
-        Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base),
-        Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1),
-    ]
-    if nrepeat_age == 0
+    nrepeat_d = nrepeatYearsAge(year_disturbance; year_start)
+    sequence = nothing
+    nrepeat = nrepeat_base
+    if isnothing(nrepeat_d)
         sequence = [
             Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1),
-            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base),
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat),
+            Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1),
+        ]
+    elseif nrepeat_d < 0
+        sequence = [
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1),
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat),
+            Dict("spinup_mode" => "eta_scale_AH", "forcing" => "day_MSC", "n_repeat" => 1),
+        ]
+    elseif nrepeat_d == 0
+        sequence = [
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1),
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat),
             Dict("spinup_mode" => "eta_scale_A0H", "forcing" => "day_MSC", "n_repeat" => 1),
         ]
-    elseif nrepeat_age > 0
+    elseif nrepeat_d > 0
         sequence = [
             Dict("spinup_mode" => "sel_spinup_models", "forcing" => "all_years", "n_repeat" => 1),
-            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_base),
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat),
             Dict("spinup_mode" => "eta_scale_A0H", "forcing" => "day_MSC", "n_repeat" => 1),
-            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_age),
+            Dict("spinup_mode" => "sel_spinup_models", "forcing" => "day_MSC", "n_repeat" => nrepeat_d),
         ]
+    else
+        error("cannot determine the repeat for disturbance")
     end
     new_sequence = getSpinupTemLite(getSpinupSequenceWithTypes(sequence, info_helpers_dates))
     return new_sequence
diff --git a/lib/SindbadTEM/src/spinupTEM.jl b/lib/SindbadTEM/src/spinupTEM.jl
index 3d5151483..e88c2634d 100644
--- a/lib/SindbadTEM/src/spinupTEM.jl
+++ b/lib/SindbadTEM/src/spinupTEM.jl
@@ -623,6 +623,22 @@ function spinupTEM(selected_models, spinup_forcings, loc_forcing_t, land, tem_in
     return land
 end
 
+function spinupTEM(selected_models, spinup_forcings, spinup_sequence, loc_forcing_t, land, tem_info, ::DoSpinupTEM)
+    land = setSpinupLog(land, 1, tem_info.run.store_spinup)
+    log_index = 2
+    for spin_seq ∈ spinup_sequence
+        forc_name = spin_seq.forcing
+        n_timesteps = spin_seq.n_timesteps
+        n_repeat = spin_seq.n_repeat
+        spinup_mode = spin_seq.spinup_mode
+        @debug "Spinup: \n         spinup_mode: $(nameof(typeof(spinup_mode))), forcing: $(forc_name)"
+        sel_forcing = sequenceForcing(spinup_forcings, forc_name)
+        land = spinupSequence(selected_models, sel_forcing, loc_forcing_t, land, tem_info, n_timesteps, log_index, n_repeat, spinup_mode)
+        log_index += n_repeat
+    end
+    return land
+end
+
 function spinupTEM(selected_models, spinup_forcings, loc_forcing_t, land, tem_info, ::DoNotSpinupTEM)
     return land
 end

From b86edc9366bf671aa34bbfa7af60b864369000da Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 5 Nov 2025 14:37:38 +0100
Subject: [PATCH 20/21] import

---
 examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
index 00e4171cd..3435b0aa0 100644
--- a/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
+++ b/examples/exp_fluxnet_hybrid/exp_fluxnet_hybrid_replace_r.jl
@@ -1,5 +1,6 @@
 using Revise
 using SindbadML
+using SindbadTEM
 using SindbadUtils
 using SindbadSetup
 using SindbadData

From 66eff846673dc8c11d5d16c800b4da77e11c57c3 Mon Sep 17 00:00:00 2001
From: Lazaro <lazarus.alon@gmail.com>
Date: Wed, 5 Nov 2025 17:07:30 +0100
Subject: [PATCH 21/21] GSITOOPFT

---
 src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl | 192 ++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl

diff --git a/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl b/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl
new file mode 100644
index 000000000..b2aecca28
--- /dev/null
+++ b/src/Models/cCycleBase/cCycleBase_GSITOOPFT.jl
@@ -0,0 +1,192 @@
+export cCycleBase_GSITOOPFT, adjustPackPoolComponents
+
+struct CCycleBaseGSITOOPFT end
+#! format: off
+@bounds @describe @units @with_kw struct cCycleBase_GSITOOPFT{T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13} <: cCycleBase
+    c_τ_Root::T1 = 1.0 | (0.05, 3.3) | "turnover rate of root carbon pool" | "yr-1"
+    c_τ_Wood::T2 = 0.03 | (0.001, 10.0) | "turnover rate of wood carbon pool" | "yr-1"
+    c_τ_Leaf::T3 = 1.0 | (0.05, 10.0) | "turnover rate of leaf carbon pool" | "yr-1"
+    c_τ_Reserve::T4 = 1.0e-11 | (1.0e-12, 1.0) | "Reserve does not respire, but has a small value to avoid  numerical error" | "yr-1"
+    c_τ_LitFast::T5 = 14.8 | (0.5, 148.0) | "turnover rate of fast litter (leaf litter) carbon pool" | "yr-1"
+    c_τ_LitSlow::T6 = 3.9 | (0.39, 39.0) | "turnover rate of slow litter carbon (wood litter) pool" | "yr-1"
+    c_τ_SoilSlow::T7 = 0.2 | (0.02, 2.0) | "turnover rate of slow soil carbon pool" | "yr-1"
+    c_τ_SoilOld::T8 = 0.0045 | (0.00045, 0.045) | "turnover rate of old soil carbon pool" | "yr-1"
+    c_flow_A_array::T9 = Float64[
+                     -1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
+                     0.0 -1.0 0.0 0.0 0 0.0 0.0 0.0
+                     0.0 0.0 -1.0 1.0 0.0 0 0.0 0.0
+                     1.0 0.0 1.0 -1.0 0.0 0.0 0.0 0.0
+                     1.0 0.0 1.0 0.0 -1.0 0.0 0.0 0.0
+                     0.0 1.0 0.0 0.0 0 -1.0 0.0 0.0
+                     0.0 0.0 0 0.0 1.0 1.0 -1.0 0.0
+                     0.0 0.0 0 0.0 0.0 0.0 1.0 -1.0
+                 ] | (-Inf, Inf) | "Transfer matrix for carbon at ecosystem level" | ""
+    p_C_to_N_cVeg::T10 = Float64[25.0, 260.0, 260.0, 10.0] | (-Inf, Inf) | "carbon to nitrogen ratio in vegetation pools" | "gC/gN"
+    ηH::T11 = 1.0 | (0.01, 100.0) | "scaling factor for heterotrophic pools after spinup" | ""
+    ηA::T12 = 1.0 | (0.01, 100.0) | "scaling factor for vegetation pools after spinup" | ""
+    c_remain::T13 = 10.0 | (0.1, 100.0) | "remaining carbon after disturbance" | ""
+end
+#! format: on
+
+function define(params::cCycleBase_GSITOOPFT, forcing, land, helpers)
+    @unpack_cCycleBase_GSITOOPFT params
+    @unpack_nt begin
+        cEco ⇐ land.pools
+        (z_zero, o_one) ⇐ land.constants
+    end
+    ## instantiate variables
+    C_to_N_cVeg = zero(cEco) #sujan
+    # C_to_N_cVeg[getZix(land.pools.cVeg, helpers.pools.zix.cVeg)] .= p_C_to_N_cVeg
+    c_eco_k_base = zero(cEco)
+    c_eco_τ = zero(cEco)
+
+    # if there is flux order check that is consistent
+    c_flow_order = Tuple(collect(1:length(findall(>(z_zero), c_flow_A_array))))
+    c_taker = Tuple([ind[1] for ind ∈ findall(>(z_zero), c_flow_A_array)])
+    c_giver = Tuple([ind[2] for ind ∈ findall(>(z_zero), c_flow_A_array)])
+
+    c_model = CCycleBaseGSITOOPFT()
+
+
+    ## pack land variables
+    @pack_nt begin
+        c_flow_A_array ⇒ land.diagnostics
+        (c_flow_order, c_taker, c_giver) ⇒ land.constants
+        (C_to_N_cVeg, c_eco_τ, c_eco_k_base) ⇒ land.diagnostics
+        c_model ⇒ land.models
+    end
+    return land
+end
+
+function precompute(params::cCycleBase_GSITOOPFT, forcing, land, helpers)
+    @unpack_cCycleBase_GSITOOPFT params
+    @unpack_nt begin
+        (C_to_N_cVeg, c_eco_k_base, c_eco_τ) ⇐ land.diagnostics
+        (z_zero, o_one) ⇐ land.constants
+    end
+    @unpack_nt f_pft ⇐ forcing
+    # update turnover rates according to plant functional type lookup table
+    # c_τ_Wood, c_τ_Leaf, c_τ_Root = turnover_rates(only(f_pft))
+    c_τ_Wood, c_τ_Leaf, c_τ_Root = turnover_rates(f_pft[1])
+
+    ## replace values
+    @rep_elem c_τ_Root ⇒ (c_eco_τ, 1, :cEco)
+    @rep_elem c_τ_Wood ⇒ (c_eco_τ, 2, :cEco)
+    @rep_elem c_τ_Leaf ⇒ (c_eco_τ, 3, :cEco)
+    @rep_elem c_τ_Reserve ⇒ (c_eco_τ, 4, :cEco)
+    @rep_elem c_τ_LitFast ⇒ (c_eco_τ, 5, :cEco)
+    @rep_elem c_τ_LitSlow ⇒ (c_eco_τ, 6, :cEco)
+    @rep_elem c_τ_SoilSlow ⇒ (c_eco_τ, 7, :cEco)
+    @rep_elem c_τ_SoilOld ⇒ (c_eco_τ, 8, :cEco)
+
+    vegZix = getZix(land.pools.cVeg, helpers.pools.zix.cVeg)
+    for ix ∈ eachindex(vegZix)
+        @rep_elem p_C_to_N_cVeg[ix] ⇒ (C_to_N_cVeg, vegZix[ix], :cEco)
+    end
+    c_one = one(eltype(c_eco_k_base))
+    for i ∈ eachindex(c_eco_k_base)
+        tmp = c_one - (exp(-c_eco_τ[i])^(c_one / helpers.dates.timesteps_in_year))
+        @rep_elem tmp ⇒ (c_eco_k_base, i, :cEco)
+    end
+
+    ## pack land variables
+    @pack_nt begin
+        (C_to_N_cVeg, c_eco_τ, c_eco_k_base, ηA, ηH) ⇒ land.diagnostics
+        c_remain ⇒ land.states
+    end
+    return land
+end
+
+function adjustPackPoolComponents(land, helpers, ::CCycleBaseGSITOOPFT)
+    @unpack_nt (cVeg,
+        cLit,
+        cSoil,
+        cVegRoot,
+        cVegWood,
+        cVegLeaf,
+        cVegReserve,
+        cLitFast,
+        cLitSlow,
+        cSoilSlow,
+        cSoilOld,
+        cEco) ⇐ land.pools
+
+    zix = helpers.pools.zix
+    for (lc, l) in enumerate(zix.cVeg)
+        @rep_elem cEco[l] ⇒ (cVeg, lc, :cVeg)
+    end
+
+    for (lc, l) in enumerate(zix.cVegRoot)
+        @rep_elem cEco[l] ⇒ (cVegRoot, lc, :cVegRoot)
+    end
+
+    for (lc, l) in enumerate(zix.cVegWood)
+        @rep_elem cEco[l] ⇒ (cVegWood, lc, :cVegWood)
+    end
+
+    for (lc, l) in enumerate(zix.cVegLeaf)
+        @rep_elem cEco[l] ⇒ (cVegLeaf, lc, :cVegLeaf)
+    end
+
+    for (lc, l) in enumerate(zix.cVegReserve)
+        @rep_elem cEco[l] ⇒ (cVegReserve, lc, :cVegReserve)
+    end
+
+    for (lc, l) in enumerate(zix.cLit)
+        @rep_elem cEco[l] ⇒ (cLit, lc, :cLit)
+    end
+
+    for (lc, l) in enumerate(zix.cLitFast)
+        @rep_elem cEco[l] ⇒ (cLitFast, lc, :cLitFast)
+    end
+
+    for (lc, l) in enumerate(zix.cLitSlow)
+        @rep_elem cEco[l] ⇒ (cLitSlow, lc, :cLitSlow)
+    end
+
+    for (lc, l) in enumerate(zix.cSoil)
+        @rep_elem cEco[l] ⇒ (cSoil, lc, :cSoil)
+    end
+
+    for (lc, l) in enumerate(zix.cSoilSlow)
+        @rep_elem cEco[l] ⇒ (cSoilSlow, lc, :cSoilSlow)
+    end
+
+    for (lc, l) in enumerate(zix.cSoilOld)
+        @rep_elem cEco[l] ⇒ (cSoilOld, lc, :cSoilOld)
+    end
+    @pack_nt (cVeg,
+        cLit,
+        cSoil,
+        cVegRoot,
+        cVegWood,
+        cVegLeaf,
+        cVegReserve,
+        cLitFast,
+        cLitSlow,
+        cSoilSlow,
+        cSoilOld,
+        cEco) ⇒ land.pools
+    return land
+end
+
+purpose(::Type{cCycleBase_GSITOOPFT}) = "Implements the carbon cycle base model with GSI parameterization for multiple PFTs. Defines turnover rates and carbon to nitrogen ratios for different vegetation and soil pools. "
+
+@doc """
+
+$(getModelDocString(cCycleBase_GSITOOPFT))
+
+---
+
+# Extended help
+
+*References*
+ - Potter; C. S.; J. T. Randerson; C. B. Field; P. A. Matson; P. M.  Vitousek; H. A. Mooney; & S. A. Klooster. 1993. Terrestrial ecosystem  production: A process model based on global satellite & surface data.  Global Biogeochemical Cycles. 7: 811-841.
+
+*Versions*
+ - 1.0 on 28.02.2020 [skoirala]  
+
+*Created by:*
+ - ncarvalhais
+"""
+cCycleBase_GSITOOPFT