@@ -3,6 +3,7 @@ package lndclient
33import (
44 "context"
55 "fmt"
6+ "strings"
67 "sync"
78 "time"
89
@@ -11,6 +12,8 @@ import (
1112 "github.com/lightningnetwork/lnd/chainntnfs"
1213 "github.com/lightningnetwork/lnd/lnrpc/chainrpc"
1314 "google.golang.org/grpc"
15+ "google.golang.org/grpc/codes"
16+ "google.golang.org/grpc/status"
1417)
1518
1619// NotifierOptions is a set of functional options that allow callers to further
@@ -38,6 +41,19 @@ func DefaultNotifierOptions() *NotifierOptions {
3841// events received from the notifier.
3942type NotifierOption func (* NotifierOptions )
4043
44+ const (
45+ // chainNotifierStartupMessage matches the error string returned by lnd
46+ // v0.20.0-rc3+ when a ChainNotifier RPC is invoked before the
47+ // sub-server finishes initialization.
48+ chainNotifierStartupMessage = "chain notifier RPC is still in the " +
49+ "process of starting"
50+
51+ // chainNotifierRetryBackoff defines the delay between successive
52+ // subscription attempts while waiting for the ChainNotifier sub-server
53+ // to become operational.
54+ chainNotifierRetryBackoff = 500 * time .Millisecond
55+ )
56+
4157// WithIncludeBlock is an optional argument that allows the caller to specify
4258// that the block that mined a transaction should be included in the response.
4359func WithIncludeBlock () NotifierOption {
@@ -133,11 +149,23 @@ func (s *chainNotifierClient) RegisterSpendNtfn(ctx context.Context,
133149 }
134150 }
135151
136- macaroonAuth := s .chainMac .WithMacaroonAuth (ctx )
137- resp , err := s .client .RegisterSpendNtfn (macaroonAuth , & chainrpc.SpendRequest {
138- HeightHint : uint32 (heightHint ),
139- Outpoint : rpcOutpoint ,
140- Script : pkScript ,
152+ var (
153+ resp chainrpc.ChainNotifier_RegisterSpendNtfnClient
154+ err error
155+ )
156+
157+ // lnd v0.20.0-rc3 changed the startup ordering so the ChainNotifier
158+ // sub-server can report "still starting" for a short window. Retry the
159+ // registration in that case to avoid aborting clients that subscribe
160+ // immediately at startup.
161+ err = s .retryChainNotifierCall (ctx , func () error {
162+ macaroonAuth := s .chainMac .WithMacaroonAuth (ctx )
163+ resp , err = s .client .RegisterSpendNtfn (macaroonAuth , & chainrpc.SpendRequest {
164+ HeightHint : uint32 (heightHint ),
165+ Outpoint : rpcOutpoint ,
166+ Script : pkScript ,
167+ })
168+ return err
141169 })
142170 if err != nil {
143171 return nil , nil , err
@@ -251,15 +279,25 @@ func (s *chainNotifierClient) RegisterConfirmationsNtfn(ctx context.Context,
251279 if txid != nil {
252280 txidSlice = txid [:]
253281 }
254- confStream , err := s .client .RegisterConfirmationsNtfn (
255- s .chainMac .WithMacaroonAuth (ctx ), & chainrpc.ConfRequest {
256- Script : pkScript ,
257- NumConfs : uint32 (numConfs ),
258- HeightHint : uint32 (heightHint ),
259- Txid : txidSlice ,
260- IncludeBlock : opts .IncludeBlock ,
261- },
282+ var (
283+ confStream chainrpc.ChainNotifier_RegisterConfirmationsNtfnClient
284+ err error
262285 )
286+ // The confirmation RPC is also subject to the post-v0.20.0-rc3 startup
287+ // ordering change, so we retry here until lnd reports the sub-server
288+ // ready.
289+ err = s .retryChainNotifierCall (ctx , func () error {
290+ confStream , err = s .client .RegisterConfirmationsNtfn (
291+ s .chainMac .WithMacaroonAuth (ctx ), & chainrpc.ConfRequest {
292+ Script : pkScript ,
293+ NumConfs : uint32 (numConfs ),
294+ HeightHint : uint32 (heightHint ),
295+ Txid : txidSlice ,
296+ IncludeBlock : opts .IncludeBlock ,
297+ },
298+ )
299+ return err
300+ })
263301 if err != nil {
264302 return nil , nil , err
265303 }
@@ -362,9 +400,18 @@ func (s *chainNotifierClient) RegisterConfirmationsNtfn(ctx context.Context,
362400func (s * chainNotifierClient ) RegisterBlockEpochNtfn (ctx context.Context ) (
363401 chan int32 , chan error , error ) {
364402
365- blockEpochClient , err := s .client .RegisterBlockEpochNtfn (
366- s .chainMac .WithMacaroonAuth (ctx ), & chainrpc.BlockEpoch {},
403+ var (
404+ blockEpochClient chainrpc.ChainNotifier_RegisterBlockEpochNtfnClient
405+ err error
367406 )
407+ // Block epoch subscriptions similarly need to survive the "still
408+ // starting" period introduced in lnd v0.20.0-rc3.
409+ err = s .retryChainNotifierCall (ctx , func () error {
410+ blockEpochClient , err = s .client .RegisterBlockEpochNtfn (
411+ s .chainMac .WithMacaroonAuth (ctx ), & chainrpc.BlockEpoch {},
412+ )
413+ return err
414+ })
368415 if err != nil {
369416 return nil , nil , err
370417 }
@@ -393,3 +440,71 @@ func (s *chainNotifierClient) RegisterBlockEpochNtfn(ctx context.Context) (
393440
394441 return blockEpochChan , blockErrorChan , nil
395442}
443+
444+ // retryChainNotifierCall executes the passed RPC invocation, retrying while
445+ // lnd reports that the ChainNotifier sub-server is still initialising.
446+ //
447+ // Prior to v0.20.0-rc3 the ChainNotifier sub-server finished initialization
448+ // before dependent services started, so a single RPC attempt succeeded. From
449+ // rc3 (LND commit c6f458e478f9ef2cf1d394972bfbc512862c6707) onwards lnd starts
450+ // the notifier later in the daemon lifecycle to avoid rescans from stale
451+ // heights. During the brief gap between client connection and notifier
452+ // readiness lnd returns the string "chain notifier RPC is still in the process
453+ // of starting" wrapped in an Unknown gRPC status. Clients that interact with
454+ // lnd immediately after it connects - such as Loop during integration testing -
455+ // would previously treat that error as fatal and abort startup, even though
456+ // retrying shortly after would succeed.
457+ //
458+ // This helper centralises the retry policy: when the specific "still starting"
459+ // error is encountered we back off briefly and reissue the RPC. Non-startup
460+ // errors are returned to the caller unchanged, and the caller's context
461+ // controls the overall deadline so shutdown conditions are respected.
462+ func (s * chainNotifierClient ) retryChainNotifierCall (ctx context.Context ,
463+ call func () error ) error {
464+
465+ for {
466+ err := call ()
467+ if err == nil {
468+ return nil
469+ }
470+
471+ if ! isChainNotifierStartingErr (err ) {
472+ return err
473+ }
474+
475+ log .Warnf ("Chain notifier RPC not ready yet, retrying: %v" , err )
476+
477+ select {
478+ case <- time .After (chainNotifierRetryBackoff ):
479+ continue
480+
481+ case <- ctx .Done ():
482+ return ctx .Err ()
483+ }
484+ }
485+ }
486+
487+ // detectChainNotifierStartupError reports whether err is due to the lnd
488+ // ChainNotifier sub-server still starting up. Starting with lnd v0.20.0-rc3
489+ // the notifier is initialised later in the daemon lifecycle, and the RPC layer
490+ // surfaces this as an Unknown gRPC status that contains the message defined in
491+ // chainNotifierStartupMessage. There is a PR in LND to return code Unavailable
492+ // instead of Unknown: https://github.com/lightningnetwork/lnd/pull/10352
493+ func isChainNotifierStartingErr (err error ) bool {
494+ if err == nil {
495+ return false
496+ }
497+
498+ // gRPC code Unavailable means "the server can't handle this request
499+ // now, retry later". LND's chain notifier returns this error when
500+ // the server is starting.
501+ // See https://github.com/lightningnetwork/lnd/pull/10352
502+ st , ok := status .FromError (err )
503+ if ok && st .Code () == codes .Unavailable {
504+ return true
505+ }
506+
507+ // TODO(ln-v0.20.0) remove the string fallback once lndclient depends on
508+ // a version of lnd that returns codes.Unavailable for this condition.
509+ return strings .Contains (err .Error (), chainNotifierStartupMessage )
510+ }
0 commit comments