diff --git a/generated-usage-examples/go/atlas-sdk-go/main.snippet.disaster-recovery.go b/generated-usage-examples/go/atlas-sdk-go/main.snippet.disaster-recovery.go new file mode 100644 index 0000000..07e2a2f --- /dev/null +++ b/generated-usage-examples/go/atlas-sdk-go/main.snippet.disaster-recovery.go @@ -0,0 +1,116 @@ +// See entire project at https://github.com/mongodb/atlas-architecture-go-sdk +package main + +import ( + "context" + "fmt" + "log" + "time" + + "atlas-sdk-go/internal/auth" + "atlas-sdk-go/internal/config" + "atlas-sdk-go/internal/data/recovery" + "atlas-sdk-go/internal/typeutils" + + "github.com/joho/godotenv" + "go.mongodb.org/atlas-sdk/v20250219001/admin" +) + +const ( + scenarioRegionalOutage = "regional-outage" + scenarioDataDeletion = "data-deletion" +) + +func main() { + envFile := ".env.production" + if err := godotenv.Load(envFile); err != nil { + log.Printf("Warning: could not load %s file: %v", envFile, err) + } + + secrets, cfg, err := config.LoadAllFromEnv() + if err != nil { + log.Fatalf("Failed to load configuration %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + client, err := auth.NewClient(ctx, cfg, secrets) + if err != nil { + log.Fatalf("Failed to initialize authentication client: %v", err) + } + + opts, err := recovery.LoadDROptionsFromEnv(cfg.ProjectID) + if err != nil { + log.Fatalf("Configuration error: %v", err) + } + + fmt.Printf("Starting disaster recovery scenario: %s\nProject: %s\nCluster: %s\n", opts.Scenario, opts.ProjectID, opts.ClusterName) + + if opts.DryRun { + fmt.Println("DRY RUN: no write operations will be performed") + } + + var summary string + var opErr error + + switch opts.Scenario { + case scenarioRegionalOutage: + summary, opErr = simulateRegionalOutage(ctx, client, opts) + case scenarioDataDeletion: + summary, opErr = executeDataDeletionRestore(ctx, client, opts) + default: + opErr = fmt.Errorf("unsupported DR_SCENARIO '%s'", opts.Scenario) + } + + if opErr != nil { + log.Fatalf("Scenario failed: %v", opErr) + } + + fmt.Println("\n=== Summary ===") + fmt.Println(summary) + fmt.Println("Disaster recovery procedure completed.") +} + +// executeDataDeletionRestore initiates a restore job for a specified snapshot in a MongoDB Atlas cluster. +func executeDataDeletionRestore(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + job := admin.DiskBackupSnapshotRestoreJob{SnapshotId: &o.SnapshotID, TargetClusterName: &o.ClusterName} + if o.DryRun { + return fmt.Sprintf("(dry-run) Would submit restore job for snapshot %s", o.SnapshotID), nil + } + _, _, err := client.CloudBackupsApi.CreateBackupRestoreJob(ctx, o.ProjectID, o.ClusterName, &job).Execute() + if err != nil { + return "", fmt.Errorf("create restore job: %w", err) + } + return fmt.Sprintf("Restore job submitted for snapshot %s", o.SnapshotID), nil +} + +// simulateRegionalOutage modifies the electable node count in a target region for a MongoDB Atlas cluster. +func simulateRegionalOutage(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + cluster, _, err := client.ClustersApi.GetCluster(ctx, o.ProjectID, o.ClusterName).Execute() + if err != nil { + return "", fmt.Errorf("get cluster: %w", err) + } + if !cluster.HasReplicationSpecs() { + return "", fmt.Errorf("cluster has no replication specs") + } + repl := cluster.GetReplicationSpecs() + addedNodes, foundTarget := recovery.AddElectableNodesToRegion(repl, o.TargetRegion, o.AddNodes) + if !foundTarget { + return "", fmt.Errorf("target region '%s' not found in replication specs", o.TargetRegion) + } + zeroedRegions := 0 + if o.OutageRegion != "" { + zeroedRegions = recovery.ZeroElectableNodesInRegion(repl, o.OutageRegion) + } + payload := admin.NewClusterDescription20240805() + payload.SetReplicationSpecs(repl) + if o.DryRun { + return fmt.Sprintf("(dry-run) Would add %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil + } + _, _, err = client.ClustersApi.UpdateCluster(ctx, o.ProjectID, o.ClusterName, payload).Execute() + if err != nil { + return "", fmt.Errorf("update cluster: %w", err) + } + return fmt.Sprintf("Added %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil +} + diff --git a/generated-usage-examples/go/atlas-sdk-go/project-copy/README.md b/generated-usage-examples/go/atlas-sdk-go/project-copy/README.md index 96b9817..3420349 100644 --- a/generated-usage-examples/go/atlas-sdk-go/project-copy/README.md +++ b/generated-usage-examples/go/atlas-sdk-go/project-copy/README.md @@ -18,6 +18,7 @@ Currently, the repository includes examples that demonstrate the following: - Return all linked organizations from a specific billing organization - Get historical invoices for an organization - Programmatically archive Atlas cluster data +- Perform disaster recovery operations (e.g. restore from snapshot) As the Architecture Center documentation evolves, this repository will be updated with new examples and improvements to existing code. @@ -29,7 +30,8 @@ and improvements to existing code. ├── examples # Runnable examples by category │ ├── billing/ │ ├── monitoring/ -│ └── performance/ +│ ├── performance/ +│ └── recovery/ ├── configs # Atlas configuration template │ └── config.example.json ├── internal # Shared utilities and helpers @@ -42,7 +44,8 @@ and improvements to existing code. │ ├── errors/ │ ├── fileutils/ │ ├── logs/ -│ └── metrics/ +│ ├── metrics/ +│ └── typeutils/ ├── go.mod ├── go.sum ├── CHANGELOG.md # List of major changes to the project @@ -61,10 +64,10 @@ and improvements to existing code. 1. Create a `.env.` file in the root directory with your MongoDB Atlas service account credentials. For example, create a `.env.development` file for your dev environment: ```dotenv - MONGODB_ATLAS_SERVICE_ACCOUNT_ID= - MONGODB_ATLAS_SERVICE_ACCOUNT_SECRET= - ATLAS_DOWNLOADS_DIR="tmp/atlas_downloads" # optional download directory - CONFIG_PATH="configs/config.development.json" # optional path to Atlas config file + MONGODB_ATLAS_SERVICE_ACCOUNT_ID= + MONGODB_ATLAS_SERVICE_ACCOUNT_SECRET= + ATLAS_DOWNLOADS_DIR="tmp/atlas_downloads" # optional download directory + CONFIG_PATH="configs/config.development.json" # optional path to Atlas config file ``` > **NOTE:** For production, use a secrets manager (e.g. HashiCorp Vault, AWS Secrets Manager) > instead of environment variables. @@ -133,6 +136,13 @@ go run examples/monitoring/metrics_process/main.go go run examples/performance/archiving/main.go ``` +### Recovery + +#### Perform Disaster Recovery Operations +```bash +go run examples/performance/recovery/main.go +``` + ## Changelog For list of major changes to this project, see [CHANGELOG](CHANGELOG.md). diff --git a/generated-usage-examples/go/atlas-sdk-go/project-copy/examples/recovery/main.go b/generated-usage-examples/go/atlas-sdk-go/project-copy/examples/recovery/main.go new file mode 100644 index 0000000..7e3ee9f --- /dev/null +++ b/generated-usage-examples/go/atlas-sdk-go/project-copy/examples/recovery/main.go @@ -0,0 +1,115 @@ +package main + +import ( + "context" + "fmt" + "log" + "time" + + "atlas-sdk-go/internal/auth" + "atlas-sdk-go/internal/config" + "atlas-sdk-go/internal/data/recovery" + "atlas-sdk-go/internal/typeutils" + + "github.com/joho/godotenv" + "go.mongodb.org/atlas-sdk/v20250219001/admin" +) + +const ( + scenarioRegionalOutage = "regional-outage" + scenarioDataDeletion = "data-deletion" +) + +func main() { + envFile := ".env.production" + if err := godotenv.Load(envFile); err != nil { + log.Printf("Warning: could not load %s file: %v", envFile, err) + } + + secrets, cfg, err := config.LoadAllFromEnv() + if err != nil { + log.Fatalf("Failed to load configuration %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + client, err := auth.NewClient(ctx, cfg, secrets) + if err != nil { + log.Fatalf("Failed to initialize authentication client: %v", err) + } + + opts, err := recovery.LoadDROptionsFromEnv(cfg.ProjectID) + if err != nil { + log.Fatalf("Configuration error: %v", err) + } + + fmt.Printf("Starting disaster recovery scenario: %s\nProject: %s\nCluster: %s\n", opts.Scenario, opts.ProjectID, opts.ClusterName) + + if opts.DryRun { + fmt.Println("DRY RUN: no write operations will be performed") + } + + var summary string + var opErr error + + switch opts.Scenario { + case scenarioRegionalOutage: + summary, opErr = simulateRegionalOutage(ctx, client, opts) + case scenarioDataDeletion: + summary, opErr = executeDataDeletionRestore(ctx, client, opts) + default: + opErr = fmt.Errorf("unsupported DR_SCENARIO '%s'", opts.Scenario) + } + + if opErr != nil { + log.Fatalf("Scenario failed: %v", opErr) + } + + fmt.Println("\n=== Summary ===") + fmt.Println(summary) + fmt.Println("Disaster recovery procedure completed.") +} + +// executeDataDeletionRestore initiates a restore job for a specified snapshot in a MongoDB Atlas cluster. +func executeDataDeletionRestore(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + job := admin.DiskBackupSnapshotRestoreJob{SnapshotId: &o.SnapshotID, TargetClusterName: &o.ClusterName} + if o.DryRun { + return fmt.Sprintf("(dry-run) Would submit restore job for snapshot %s", o.SnapshotID), nil + } + _, _, err := client.CloudBackupsApi.CreateBackupRestoreJob(ctx, o.ProjectID, o.ClusterName, &job).Execute() + if err != nil { + return "", fmt.Errorf("create restore job: %w", err) + } + return fmt.Sprintf("Restore job submitted for snapshot %s", o.SnapshotID), nil +} + +// simulateRegionalOutage modifies the electable node count in a target region for a MongoDB Atlas cluster. +func simulateRegionalOutage(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + cluster, _, err := client.ClustersApi.GetCluster(ctx, o.ProjectID, o.ClusterName).Execute() + if err != nil { + return "", fmt.Errorf("get cluster: %w", err) + } + if !cluster.HasReplicationSpecs() { + return "", fmt.Errorf("cluster has no replication specs") + } + repl := cluster.GetReplicationSpecs() + addedNodes, foundTarget := recovery.AddElectableNodesToRegion(repl, o.TargetRegion, o.AddNodes) + if !foundTarget { + return "", fmt.Errorf("target region '%s' not found in replication specs", o.TargetRegion) + } + zeroedRegions := 0 + if o.OutageRegion != "" { + zeroedRegions = recovery.ZeroElectableNodesInRegion(repl, o.OutageRegion) + } + payload := admin.NewClusterDescription20240805() + payload.SetReplicationSpecs(repl) + if o.DryRun { + return fmt.Sprintf("(dry-run) Would add %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil + } + _, _, err = client.ClustersApi.UpdateCluster(ctx, o.ProjectID, o.ClusterName, payload).Execute() + if err != nil { + return "", fmt.Errorf("update cluster: %w", err) + } + return fmt.Sprintf("Added %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil +} + diff --git a/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/options.go b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/options.go new file mode 100644 index 0000000..47ed284 --- /dev/null +++ b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/options.go @@ -0,0 +1,119 @@ +package recovery + +import ( + "fmt" + "os" + "strconv" + "strings" + + "atlas-sdk-go/internal/typeutils" +) + +const ( + defaultAddNodes = 1 + scenarioRegionalOutage = "regional-outage" + scenarioDataDeletion = "data-deletion" +) + +// DrOptions holds the scenario and configuration parameters used by the +// disaster recovery example. Values are typically loaded from environment +// variables. Only the fields relevant to the chosen Scenario are required. +// +// Scenario values: +// - "regional-outage" : simulate adding capacity to a healthy region +// - "data-deletion" : submit a snapshot restore job +// Required per scenario: +// regional-outage: ProjectID, ClusterName, TargetRegion +// data-deletion: ProjectID, ClusterName, SnapshotID +// Optional: +// OutageRegion (regional-outage) region to zero electable nodes +// AddNodes (regional-outage) number of electable nodes to add (default 1) +// DryRun when true prints intended actions only. +type DrOptions struct { + Scenario string + ProjectID string + ClusterName string + TargetRegion string + OutageRegion string + AddNodes int + SnapshotID string + DryRun bool +} + +// LoadDROptionsFromEnv reads environment variables and validates scenario-specific requirements. +// Defaults are applied first, then overridden if env vars are present: +// +// DR_SCENARIO (req) regional-outage | data-deletion +// ATLAS_PROJECT_ID (red unless provided via config loader) +// ATLAS_CLUSTER_NAME (req) target cluster name +// DR_TARGET_REGION (regional-outage req) region receiving added capacity +// DR_OUTAGE_REGION (regional-outage opt) region considered impaired (its electable nodes set to 0) +// DR_ADD_NODES (regional-outage opt) number of electable nodes to add (default: 1) +// DR_SNAPSHOT_ID (data-deletion req) snapshot ID to restore +// DR_DRY_RUN (opt bool) if true, only log intended actions (default: false) +func LoadDROptionsFromEnv(fallbackProjectID string) (DrOptions, error) { + o := DrOptions{ + AddNodes: defaultAddNodes, + } + + o.Scenario = strings.ToLower(strings.TrimSpace(os.Getenv("DR_SCENARIO"))) + o.ProjectID = typeutils.FirstNonEmpty(os.Getenv("ATLAS_PROJECT_ID"), fallbackProjectID) + o.ClusterName = strings.TrimSpace(os.Getenv("ATLAS_CLUSTER_NAME")) + o.TargetRegion = strings.TrimSpace(os.Getenv("DR_TARGET_REGION")) + o.OutageRegion = strings.TrimSpace(os.Getenv("DR_OUTAGE_REGION")) + o.SnapshotID = strings.TrimSpace(os.Getenv("DR_SNAPSHOT_ID")) + + if v, ok := os.LookupEnv("DR_ADD_NODES"); ok { + n, err := strconv.Atoi(strings.TrimSpace(v)) + if err != nil { + return o, fmt.Errorf("invalid DR_ADD_NODES value '%s': must be a positive integer", v) + } + if n <= 0 { + return o, fmt.Errorf("DR_ADD_NODES must be a positive integer, got %d", n) + } + o.AddNodes = n + } + + if v, ok := os.LookupEnv("DR_DRY_RUN"); ok { + o.DryRun = typeutils.ParseBool(v) + } + if err := validateRequiredFields(o); err != nil { + return o, err + } + if err := validateScenarioRequirements(o); err != nil { + return o, err + } + + return o, nil +} + +func validateRequiredFields(o DrOptions) error { + if o.Scenario == "" { + return fmt.Errorf("DR_SCENARIO is required") + } + if o.ProjectID == "" { + return fmt.Errorf("ATLAS_PROJECT_ID is required") + } + if o.ClusterName == "" { + return fmt.Errorf("ATLAS_CLUSTER_NAME is required") + } + return nil +} + +// validateScenarioRequirements checks that scenario-specific required fields are set. +func validateScenarioRequirements(o DrOptions) error { + switch o.Scenario { + case scenarioRegionalOutage: + if o.TargetRegion == "" { + return fmt.Errorf("DR_TARGET_REGION is required for %s scenario", scenarioRegionalOutage) + } + case scenarioDataDeletion: + if o.SnapshotID == "" { + return fmt.Errorf("DR_SNAPSHOT_ID is required for %s scenario", scenarioDataDeletion) + } + default: + return fmt.Errorf("unsupported DR_SCENARIO '%s': valid options are %s, %s", + o.Scenario, scenarioRegionalOutage, scenarioDataDeletion) + } + return nil +} diff --git a/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/restore.go b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/restore.go new file mode 100644 index 0000000..a9f46ba --- /dev/null +++ b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/data/recovery/restore.go @@ -0,0 +1,57 @@ +package recovery + +import ( + "go.mongodb.org/atlas-sdk/v20250219001/admin" +) + +// AddElectableNodesToRegion increases electable node count in the specified target region. +func AddElectableNodesToRegion(repl []admin.ReplicationSpec20240805, targetRegion string, addNodes int) (int, bool) { + added := 0 + found := false + for i := range repl { + rcs := repl[i].GetRegionConfigs() + for j := range rcs { + regionName := "" + if rcs[j].HasRegionName() { + regionName = rcs[j].GetRegionName() + } + if regionName == targetRegion && rcs[j].HasElectableSpecs() { + es := rcs[j].GetElectableSpecs() + before := 0 + if es.HasNodeCount() { + before = es.GetNodeCount() + } + es.SetNodeCount(before + addNodes) + rcs[j].SetElectableSpecs(es) + added += addNodes + found = true + } + } + repl[i].SetRegionConfigs(rcs) + } + return added, found +} + +// ZeroElectableNodesInRegion sets electable node count to zero in the outage region, returning count of regions modified. +func ZeroElectableNodesInRegion(repl []admin.ReplicationSpec20240805, outageRegion string) int { + zeroed := 0 + for i := range repl { + rcs := repl[i].GetRegionConfigs() + for j := range rcs { + regionName := "" + if rcs[j].HasRegionName() { + regionName = rcs[j].GetRegionName() + } + if regionName == outageRegion && rcs[j].HasElectableSpecs() { + es := rcs[j].GetElectableSpecs() + if es.HasNodeCount() && es.GetNodeCount() > 0 { + es.SetNodeCount(0) + rcs[j].SetElectableSpecs(es) + zeroed++ + } + } + } + repl[i].SetRegionConfigs(rcs) + } + return zeroed +} diff --git a/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/typeutils/load.go b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/typeutils/load.go new file mode 100644 index 0000000..e5747f1 --- /dev/null +++ b/generated-usage-examples/go/atlas-sdk-go/project-copy/internal/typeutils/load.go @@ -0,0 +1,38 @@ +package typeutils + +import ( + "fmt" + "strings" +) + +// FirstNonEmpty returns the first non-empty, non-whitespace string from values, or an empty string if none found. +func FirstNonEmpty(values ...string) string { + for _, v := range values { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} + +// ParseBool interprets a string as a boolean. It returns true for "true", "1", "yes", "y" (case insensitive, trimmed), and false otherwise. +func ParseBool(v string) bool { + v = strings.ToLower(strings.TrimSpace(v)) + return v == "true" || v == "1" || v == "yes" || v == "y" +} + +// SuffixZeroed returns a formatted string if zeroed > 0, otherwise an empty string. +func SuffixZeroed(zeroed int, region string) string { + if zeroed == 0 { + return "" + } + return fmt.Sprintf(", zeroed electable nodes in region %s", region) +} + +// DefaultIfBlank returns d if v is an empty string, otherwise returns v. +func DefaultIfBlank(v, d string) string { + if v == "" { + return d + } + return v +} diff --git a/usage-examples/go/atlas-sdk-go/README.md b/usage-examples/go/atlas-sdk-go/README.md index 96b9817..3420349 100644 --- a/usage-examples/go/atlas-sdk-go/README.md +++ b/usage-examples/go/atlas-sdk-go/README.md @@ -18,6 +18,7 @@ Currently, the repository includes examples that demonstrate the following: - Return all linked organizations from a specific billing organization - Get historical invoices for an organization - Programmatically archive Atlas cluster data +- Perform disaster recovery operations (e.g. restore from snapshot) As the Architecture Center documentation evolves, this repository will be updated with new examples and improvements to existing code. @@ -29,7 +30,8 @@ and improvements to existing code. ├── examples # Runnable examples by category │ ├── billing/ │ ├── monitoring/ -│ └── performance/ +│ ├── performance/ +│ └── recovery/ ├── configs # Atlas configuration template │ └── config.example.json ├── internal # Shared utilities and helpers @@ -42,7 +44,8 @@ and improvements to existing code. │ ├── errors/ │ ├── fileutils/ │ ├── logs/ -│ └── metrics/ +│ ├── metrics/ +│ └── typeutils/ ├── go.mod ├── go.sum ├── CHANGELOG.md # List of major changes to the project @@ -61,10 +64,10 @@ and improvements to existing code. 1. Create a `.env.` file in the root directory with your MongoDB Atlas service account credentials. For example, create a `.env.development` file for your dev environment: ```dotenv - MONGODB_ATLAS_SERVICE_ACCOUNT_ID= - MONGODB_ATLAS_SERVICE_ACCOUNT_SECRET= - ATLAS_DOWNLOADS_DIR="tmp/atlas_downloads" # optional download directory - CONFIG_PATH="configs/config.development.json" # optional path to Atlas config file + MONGODB_ATLAS_SERVICE_ACCOUNT_ID= + MONGODB_ATLAS_SERVICE_ACCOUNT_SECRET= + ATLAS_DOWNLOADS_DIR="tmp/atlas_downloads" # optional download directory + CONFIG_PATH="configs/config.development.json" # optional path to Atlas config file ``` > **NOTE:** For production, use a secrets manager (e.g. HashiCorp Vault, AWS Secrets Manager) > instead of environment variables. @@ -133,6 +136,13 @@ go run examples/monitoring/metrics_process/main.go go run examples/performance/archiving/main.go ``` +### Recovery + +#### Perform Disaster Recovery Operations +```bash +go run examples/performance/recovery/main.go +``` + ## Changelog For list of major changes to this project, see [CHANGELOG](CHANGELOG.md). diff --git a/usage-examples/go/atlas-sdk-go/disaster_recovery.md b/usage-examples/go/atlas-sdk-go/disaster_recovery.md new file mode 100644 index 0000000..e69de29 diff --git a/usage-examples/go/atlas-sdk-go/examples/recovery/main.go b/usage-examples/go/atlas-sdk-go/examples/recovery/main.go new file mode 100644 index 0000000..5f549b5 --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/examples/recovery/main.go @@ -0,0 +1,120 @@ +// :snippet-start: disaster-recovery +// :state-remove-start: copy +// See entire project at https://github.com/mongodb/atlas-architecture-go-sdk +// :state-remove-end: [copy] +package main + +import ( + "context" + "fmt" + "log" + "time" + + "atlas-sdk-go/internal/auth" + "atlas-sdk-go/internal/config" + "atlas-sdk-go/internal/data/recovery" + "atlas-sdk-go/internal/typeutils" + + "github.com/joho/godotenv" + "go.mongodb.org/atlas-sdk/v20250219001/admin" +) + +const ( + scenarioRegionalOutage = "regional-outage" + scenarioDataDeletion = "data-deletion" +) + +func main() { + envFile := ".env.production" + if err := godotenv.Load(envFile); err != nil { + log.Printf("Warning: could not load %s file: %v", envFile, err) + } + + secrets, cfg, err := config.LoadAllFromEnv() + if err != nil { + log.Fatalf("Failed to load configuration %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + client, err := auth.NewClient(ctx, cfg, secrets) + if err != nil { + log.Fatalf("Failed to initialize authentication client: %v", err) + } + + opts, err := recovery.LoadDROptionsFromEnv(cfg.ProjectID) + if err != nil { + log.Fatalf("Configuration error: %v", err) + } + + fmt.Printf("Starting disaster recovery scenario: %s\nProject: %s\nCluster: %s\n", opts.Scenario, opts.ProjectID, opts.ClusterName) + + if opts.DryRun { + fmt.Println("DRY RUN: no write operations will be performed") + } + + var summary string + var opErr error + + switch opts.Scenario { + case scenarioRegionalOutage: + summary, opErr = simulateRegionalOutage(ctx, client, opts) + case scenarioDataDeletion: + summary, opErr = executeDataDeletionRestore(ctx, client, opts) + default: + opErr = fmt.Errorf("unsupported DR_SCENARIO '%s'", opts.Scenario) + } + + if opErr != nil { + log.Fatalf("Scenario failed: %v", opErr) + } + + fmt.Println("\n=== Summary ===") + fmt.Println(summary) + fmt.Println("Disaster recovery procedure completed.") +} + +// executeDataDeletionRestore initiates a restore job for a specified snapshot in a MongoDB Atlas cluster. +func executeDataDeletionRestore(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + job := admin.DiskBackupSnapshotRestoreJob{SnapshotId: &o.SnapshotID, TargetClusterName: &o.ClusterName} + if o.DryRun { + return fmt.Sprintf("(dry-run) Would submit restore job for snapshot %s", o.SnapshotID), nil + } + _, _, err := client.CloudBackupsApi.CreateBackupRestoreJob(ctx, o.ProjectID, o.ClusterName, &job).Execute() + if err != nil { + return "", fmt.Errorf("create restore job: %w", err) + } + return fmt.Sprintf("Restore job submitted for snapshot %s", o.SnapshotID), nil +} + +// simulateRegionalOutage modifies the electable node count in a target region for a MongoDB Atlas cluster. +func simulateRegionalOutage(ctx context.Context, client *admin.APIClient, o recovery.DrOptions) (string, error) { + cluster, _, err := client.ClustersApi.GetCluster(ctx, o.ProjectID, o.ClusterName).Execute() + if err != nil { + return "", fmt.Errorf("get cluster: %w", err) + } + if !cluster.HasReplicationSpecs() { + return "", fmt.Errorf("cluster has no replication specs") + } + repl := cluster.GetReplicationSpecs() + addedNodes, foundTarget := recovery.AddElectableNodesToRegion(repl, o.TargetRegion, o.AddNodes) + if !foundTarget { + return "", fmt.Errorf("target region '%s' not found in replication specs", o.TargetRegion) + } + zeroedRegions := 0 + if o.OutageRegion != "" { + zeroedRegions = recovery.ZeroElectableNodesInRegion(repl, o.OutageRegion) + } + payload := admin.NewClusterDescription20240805() + payload.SetReplicationSpecs(repl) + if o.DryRun { + return fmt.Sprintf("(dry-run) Would add %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil + } + _, _, err = client.ClustersApi.UpdateCluster(ctx, o.ProjectID, o.ClusterName, payload).Execute() + if err != nil { + return "", fmt.Errorf("update cluster: %w", err) + } + return fmt.Sprintf("Added %d electable nodes to %s%s", addedNodes, o.TargetRegion, typeutils.SuffixZeroed(zeroedRegions, o.OutageRegion)), nil +} + +// :snippet-end: [disaster-recovery] diff --git a/usage-examples/go/atlas-sdk-go/examples/recovery/main_test.go b/usage-examples/go/atlas-sdk-go/examples/recovery/main_test.go new file mode 100644 index 0000000..ddd274f --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/examples/recovery/main_test.go @@ -0,0 +1,171 @@ +package main + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.mongodb.org/atlas-sdk/v20250219001/admin" + + "atlas-sdk-go/internal/data/recovery" +) + +// testClient helper replicates pattern from internal tests. +func testClient(t *testing.T, handler http.HandlerFunc) *admin.APIClient { + server := httptest.NewServer(handler) + t.Cleanup(server.Close) + client, err := admin.NewClient(admin.UseBaseURL(server.URL)) + require.NoError(t, err) + return client +} + +func TestExecuteDataDeletionRestore_Seam(t *testing.T) { + ctx := context.Background() + opts := recovery.DrOptions{ProjectID: "proj", ClusterName: "ClusterA", SnapshotID: "snap1"} + + // Dry-run path (should not call API) + { + var called atomic.Bool + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + called.Store(true) + w.WriteHeader(http.StatusInternalServerError) + }) + msg, err := executeDataDeletionRestore(ctx, client, recovery.DrOptions{ProjectID: opts.ProjectID, ClusterName: opts.ClusterName, SnapshotID: opts.SnapshotID, DryRun: true}) + require.NoError(t, err) + assert.Contains(t, msg, "(dry-run)") + assert.False(t, called.Load(), "API must not be invoked for dry-run") + } + + // Success path + { + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/backup/restoreJobs") { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + // Provide minimal fields typical for restore job object + _, _ = w.Write([]byte(`{"id":"job1","snapshotId":"snap1","deliveryType":"automated"}`)) + return + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := executeDataDeletionRestore(ctx, client, opts) + require.NoError(t, err) + assert.Contains(t, msg, "Restore job submitted") + } + + // Error path + { + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + }) + msg, err := executeDataDeletionRestore(ctx, client, opts) + require.Error(t, err) + assert.Empty(t, msg) + assert.Contains(t, err.Error(), "create restore job") + } +} + +func TestSimulateRegionalOutage_Seam(t *testing.T) { + ctx := context.Background() + projectID := "proj" + clusterName := "ClusterA" + baseClusterJSON := `{"replicationSpecs":[{"regionConfigs":[{"regionName":"us-east-1","electableSpecs":{"nodeCount":3}},{"regionName":"us-west-2","electableSpecs":{"nodeCount":3}}]}]}` + noReplJSON := `{}` + // Dry-run add nodes only + { + var updateCalled atomic.Bool + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(baseClusterJSON)) + return + } + if r.Method != http.MethodGet { + updateCalled.Store(true) + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := simulateRegionalOutage(ctx, client, recovery.DrOptions{ProjectID: projectID, ClusterName: clusterName, TargetRegion: "us-east-1", AddNodes: 2, DryRun: true}) + require.NoError(t, err) + assert.Contains(t, msg, "(dry-run)") + assert.Contains(t, msg, "add 2 electable nodes") + assert.False(t, updateCalled.Load()) + } + // Dry run with outage region zeroing + { + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(baseClusterJSON)) + return + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := simulateRegionalOutage(ctx, client, recovery.DrOptions{ProjectID: projectID, ClusterName: clusterName, TargetRegion: "us-east-1", OutageRegion: "us-west-2", AddNodes: 1, DryRun: true}) + require.NoError(t, err) + assert.Contains(t, msg, "zeroed electable nodes") + } + // Target region not found + { + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(baseClusterJSON)) + return + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := simulateRegionalOutage(ctx, client, recovery.DrOptions{ProjectID: projectID, ClusterName: clusterName, TargetRegion: "eu-central-1", AddNodes: 1, DryRun: true}) + require.Error(t, err) + assert.Empty(t, msg) + assert.Contains(t, err.Error(), "target region") + } + // No replication specs + { + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(noReplJSON)) + return + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := simulateRegionalOutage(ctx, client, recovery.DrOptions{ProjectID: projectID, ClusterName: clusterName, TargetRegion: "us-east-1", AddNodes: 1, DryRun: true}) + require.Error(t, err) + assert.Empty(t, msg) + assert.Contains(t, err.Error(), "no replication specs") + } + // Update cluster error (non dry-run) + { + var getCount, updateCount int32 + client := testClient(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + atomic.AddInt32(&getCount, 1) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(baseClusterJSON)) + return + } + if strings.HasSuffix(r.URL.Path, "/clusters/"+clusterName) { + atomic.AddInt32(&updateCount, 1) + w.WriteHeader(http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusNotFound) + }) + msg, err := simulateRegionalOutage(ctx, client, recovery.DrOptions{ProjectID: projectID, ClusterName: clusterName, TargetRegion: "us-east-1", AddNodes: 1}) + require.Error(t, err) + assert.Empty(t, msg) + assert.Equal(t, int32(1), getCount) + assert.Equal(t, int32(1), updateCount) + } +} diff --git a/usage-examples/go/atlas-sdk-go/internal/data/recovery/options.go b/usage-examples/go/atlas-sdk-go/internal/data/recovery/options.go new file mode 100644 index 0000000..47ed284 --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/internal/data/recovery/options.go @@ -0,0 +1,119 @@ +package recovery + +import ( + "fmt" + "os" + "strconv" + "strings" + + "atlas-sdk-go/internal/typeutils" +) + +const ( + defaultAddNodes = 1 + scenarioRegionalOutage = "regional-outage" + scenarioDataDeletion = "data-deletion" +) + +// DrOptions holds the scenario and configuration parameters used by the +// disaster recovery example. Values are typically loaded from environment +// variables. Only the fields relevant to the chosen Scenario are required. +// +// Scenario values: +// - "regional-outage" : simulate adding capacity to a healthy region +// - "data-deletion" : submit a snapshot restore job +// Required per scenario: +// regional-outage: ProjectID, ClusterName, TargetRegion +// data-deletion: ProjectID, ClusterName, SnapshotID +// Optional: +// OutageRegion (regional-outage) region to zero electable nodes +// AddNodes (regional-outage) number of electable nodes to add (default 1) +// DryRun when true prints intended actions only. +type DrOptions struct { + Scenario string + ProjectID string + ClusterName string + TargetRegion string + OutageRegion string + AddNodes int + SnapshotID string + DryRun bool +} + +// LoadDROptionsFromEnv reads environment variables and validates scenario-specific requirements. +// Defaults are applied first, then overridden if env vars are present: +// +// DR_SCENARIO (req) regional-outage | data-deletion +// ATLAS_PROJECT_ID (red unless provided via config loader) +// ATLAS_CLUSTER_NAME (req) target cluster name +// DR_TARGET_REGION (regional-outage req) region receiving added capacity +// DR_OUTAGE_REGION (regional-outage opt) region considered impaired (its electable nodes set to 0) +// DR_ADD_NODES (regional-outage opt) number of electable nodes to add (default: 1) +// DR_SNAPSHOT_ID (data-deletion req) snapshot ID to restore +// DR_DRY_RUN (opt bool) if true, only log intended actions (default: false) +func LoadDROptionsFromEnv(fallbackProjectID string) (DrOptions, error) { + o := DrOptions{ + AddNodes: defaultAddNodes, + } + + o.Scenario = strings.ToLower(strings.TrimSpace(os.Getenv("DR_SCENARIO"))) + o.ProjectID = typeutils.FirstNonEmpty(os.Getenv("ATLAS_PROJECT_ID"), fallbackProjectID) + o.ClusterName = strings.TrimSpace(os.Getenv("ATLAS_CLUSTER_NAME")) + o.TargetRegion = strings.TrimSpace(os.Getenv("DR_TARGET_REGION")) + o.OutageRegion = strings.TrimSpace(os.Getenv("DR_OUTAGE_REGION")) + o.SnapshotID = strings.TrimSpace(os.Getenv("DR_SNAPSHOT_ID")) + + if v, ok := os.LookupEnv("DR_ADD_NODES"); ok { + n, err := strconv.Atoi(strings.TrimSpace(v)) + if err != nil { + return o, fmt.Errorf("invalid DR_ADD_NODES value '%s': must be a positive integer", v) + } + if n <= 0 { + return o, fmt.Errorf("DR_ADD_NODES must be a positive integer, got %d", n) + } + o.AddNodes = n + } + + if v, ok := os.LookupEnv("DR_DRY_RUN"); ok { + o.DryRun = typeutils.ParseBool(v) + } + if err := validateRequiredFields(o); err != nil { + return o, err + } + if err := validateScenarioRequirements(o); err != nil { + return o, err + } + + return o, nil +} + +func validateRequiredFields(o DrOptions) error { + if o.Scenario == "" { + return fmt.Errorf("DR_SCENARIO is required") + } + if o.ProjectID == "" { + return fmt.Errorf("ATLAS_PROJECT_ID is required") + } + if o.ClusterName == "" { + return fmt.Errorf("ATLAS_CLUSTER_NAME is required") + } + return nil +} + +// validateScenarioRequirements checks that scenario-specific required fields are set. +func validateScenarioRequirements(o DrOptions) error { + switch o.Scenario { + case scenarioRegionalOutage: + if o.TargetRegion == "" { + return fmt.Errorf("DR_TARGET_REGION is required for %s scenario", scenarioRegionalOutage) + } + case scenarioDataDeletion: + if o.SnapshotID == "" { + return fmt.Errorf("DR_SNAPSHOT_ID is required for %s scenario", scenarioDataDeletion) + } + default: + return fmt.Errorf("unsupported DR_SCENARIO '%s': valid options are %s, %s", + o.Scenario, scenarioRegionalOutage, scenarioDataDeletion) + } + return nil +} diff --git a/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore.go b/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore.go new file mode 100644 index 0000000..a9f46ba --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore.go @@ -0,0 +1,57 @@ +package recovery + +import ( + "go.mongodb.org/atlas-sdk/v20250219001/admin" +) + +// AddElectableNodesToRegion increases electable node count in the specified target region. +func AddElectableNodesToRegion(repl []admin.ReplicationSpec20240805, targetRegion string, addNodes int) (int, bool) { + added := 0 + found := false + for i := range repl { + rcs := repl[i].GetRegionConfigs() + for j := range rcs { + regionName := "" + if rcs[j].HasRegionName() { + regionName = rcs[j].GetRegionName() + } + if regionName == targetRegion && rcs[j].HasElectableSpecs() { + es := rcs[j].GetElectableSpecs() + before := 0 + if es.HasNodeCount() { + before = es.GetNodeCount() + } + es.SetNodeCount(before + addNodes) + rcs[j].SetElectableSpecs(es) + added += addNodes + found = true + } + } + repl[i].SetRegionConfigs(rcs) + } + return added, found +} + +// ZeroElectableNodesInRegion sets electable node count to zero in the outage region, returning count of regions modified. +func ZeroElectableNodesInRegion(repl []admin.ReplicationSpec20240805, outageRegion string) int { + zeroed := 0 + for i := range repl { + rcs := repl[i].GetRegionConfigs() + for j := range rcs { + regionName := "" + if rcs[j].HasRegionName() { + regionName = rcs[j].GetRegionName() + } + if regionName == outageRegion && rcs[j].HasElectableSpecs() { + es := rcs[j].GetElectableSpecs() + if es.HasNodeCount() && es.GetNodeCount() > 0 { + es.SetNodeCount(0) + rcs[j].SetElectableSpecs(es) + zeroed++ + } + } + } + repl[i].SetRegionConfigs(rcs) + } + return zeroed +} diff --git a/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore_test.go b/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore_test.go new file mode 100644 index 0000000..b2e36e6 --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/internal/data/recovery/restore_test.go @@ -0,0 +1,6 @@ +package recovery + +// Tests for regional outage and data deletion recovery flows have moved to +// examples/recovery/main_test.go where the executable seams live. This package +// now only exposes pure helper functions which are indirectly covered via the +// example seam tests. diff --git a/usage-examples/go/atlas-sdk-go/internal/typeutils/load.go b/usage-examples/go/atlas-sdk-go/internal/typeutils/load.go new file mode 100644 index 0000000..e5747f1 --- /dev/null +++ b/usage-examples/go/atlas-sdk-go/internal/typeutils/load.go @@ -0,0 +1,38 @@ +package typeutils + +import ( + "fmt" + "strings" +) + +// FirstNonEmpty returns the first non-empty, non-whitespace string from values, or an empty string if none found. +func FirstNonEmpty(values ...string) string { + for _, v := range values { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} + +// ParseBool interprets a string as a boolean. It returns true for "true", "1", "yes", "y" (case insensitive, trimmed), and false otherwise. +func ParseBool(v string) bool { + v = strings.ToLower(strings.TrimSpace(v)) + return v == "true" || v == "1" || v == "yes" || v == "y" +} + +// SuffixZeroed returns a formatted string if zeroed > 0, otherwise an empty string. +func SuffixZeroed(zeroed int, region string) string { + if zeroed == 0 { + return "" + } + return fmt.Sprintf(", zeroed electable nodes in region %s", region) +} + +// DefaultIfBlank returns d if v is an empty string, otherwise returns v. +func DefaultIfBlank(v, d string) string { + if v == "" { + return d + } + return v +}