Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
b5197a1
feat: add more spans to opentelemetry plugin
nic-6443 Oct 19, 2025
f6414fd
add todo
nic-6443 Oct 19, 2025
0317cf5
f
nic-6443 Oct 19, 2025
7944e9f
return span on newspan
Revolyssup Oct 23, 2025
ec7adef
fix lint
Revolyssup Oct 23, 2025
bb28639
fix CI
Revolyssup Oct 23, 2025
1b29310
fix opentelemetry3
Revolyssup Oct 23, 2025
57cac44
add test
Revolyssup Oct 23, 2025
f8d5974
add test
Revolyssup Oct 23, 2025
0c38fc7
revert
Revolyssup Oct 23, 2025
ce4277c
revert
Revolyssup Oct 23, 2025
1d6c4e2
revert
Revolyssup Oct 23, 2025
1f0cedb
f
Revolyssup Oct 23, 2025
e6c31c0
add test
Revolyssup Oct 24, 2025
234f9f7
add plugin phase test
Revolyssup Oct 24, 2025
c3f9ae9
fix test
Revolyssup Oct 24, 2025
03f3906
add test
Revolyssup Oct 24, 2025
12d2513
f
Revolyssup Oct 24, 2025
f6e92b8
f
Revolyssup Oct 24, 2025
0577ab2
update docs
Revolyssup Oct 24, 2025
b1aaba2
fix lint
Revolyssup Oct 24, 2025
c3f37eb
fix otel3
Revolyssup Oct 24, 2025
cab6620
fix tests
Revolyssup Oct 24, 2025
9d195e5
remove todo
Revolyssup Oct 24, 2025
b505630
rename
Revolyssup Oct 25, 2025
9fc46b0
Update opentelemetry6.t
Revolyssup Oct 26, 2025
c5be5f9
apply suggestions
Revolyssup Oct 27, 2025
05eda81
fix lint
Revolyssup Oct 27, 2025
956935a
fix
Revolyssup Oct 27, 2025
d2bd719
f
Revolyssup Oct 27, 2025
119f9d3
apply suggestions
Revolyssup Oct 27, 2025
91e37be
fix test
Revolyssup Oct 27, 2025
fe16d9e
fix tests
Revolyssup Oct 27, 2025
14b4101
f
Revolyssup Oct 27, 2025
079484d
apply suggestions
Revolyssup Oct 28, 2025
c310ade
fix lint
Revolyssup Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions apisix/core/response.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
--
-- @module core.response

local tracer = require("apisix.utils.tracer")
local encode_json = require("cjson.safe").encode
local ngx = ngx
local arg = ngx.arg
Expand Down Expand Up @@ -62,6 +63,7 @@ function resp_exit(code, ...)
ngx.status = code
end

local message
for i = 1, select('#', ...) do
local v = select(i, ...)
if type(v) == "table" then
Expand All @@ -73,6 +75,7 @@ function resp_exit(code, ...)
t[idx] = body
idx = idx + 1
t[idx] = "\n"
message = body
end

elseif v ~= nil then
Expand All @@ -86,6 +89,9 @@ function resp_exit(code, ...)
end

if code then
if code >= 400 then
tracer.finish_current_span(tracer.status.ERROR, message or ("response code " .. code))
end
return ngx_exit(code)
end
end
Expand Down
16 changes: 16 additions & 0 deletions apisix/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ local debug = require("apisix.debug")
local pubsub_kafka = require("apisix.pubsub.kafka")
local resource = require("apisix.resource")
local trusted_addresses_util = require("apisix.utils.trusted-addresses")
local tracer = require("apisix.utils.tracer")
local ngx = ngx
local get_method = ngx.req.get_method
local ngx_exit = ngx.exit
Expand Down Expand Up @@ -203,6 +204,8 @@ function _M.ssl_client_hello_phase()
local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
ngx_ctx.api_ctx = api_ctx

local span = tracer.new_span("ssl_client_hello_phase", tracer.kind.server)

local ok, err = router.router_ssl.match_and_set(api_ctx, true, sni)

ngx_ctx.matched_ssl = api_ctx.matched_ssl
Expand All @@ -215,18 +218,23 @@ function _M.ssl_client_hello_phase()
core.log.error("failed to fetch ssl config: ", err)
end
core.log.error("failed to match any SSL certificate by SNI: ", sni)
span:set_status(tracer.status.ERROR, "failed match SNI")
tracer.finish_current_span()
ngx_exit(-1)
end

ok, err = apisix_ssl.set_protocols_by_clienthello(ngx_ctx.matched_ssl.value.ssl_protocols)
if not ok then
core.log.error("failed to set ssl protocols: ", err)
span:set_status(tracer.status.ERROR, "failed set protocols")
tracer.finish_current_span()
ngx_exit(-1)
end

-- in stream subsystem, ngx.ssl.server_name() return hostname of ssl session in preread phase,
-- so that we can't get real SNI without recording it in ngx.ctx during client_hello phase
ngx.ctx.client_hello_sni = sni
tracer.finish_current_span()
end


Expand Down Expand Up @@ -666,6 +674,7 @@ end


function _M.http_access_phase()
tracer.new_span("http_access_phase", tracer.kind.server)
-- from HTTP/3 to HTTP/1.1 we need to convert :authority pesudo-header
-- to Host header, so we set upstream_host variable here.
if ngx.req.http_version() == 3 then
Expand Down Expand Up @@ -716,19 +725,26 @@ function _M.http_access_phase()

handle_x_forwarded_headers(api_ctx)

local router_match_span = tracer.new_span("http_router_match", tracer.kind.internal)
router.router_http.match(api_ctx)

local route = api_ctx.matched_route
if not route then
tracer.new_span("run_global_rules", tracer.kind.internal)
-- run global rule when there is no matching route
local global_rules = apisix_global_rules.global_rules()
plugin.run_global_rules(api_ctx, global_rules, nil)
tracer.finish_current_span()

core.log.info("not find any matched route")
router_match_span:set_status(tracer.status.ERROR, "no matched route")
tracer.finish_current_span()
return core.response.exit(404,
{error_msg = "404 Route Not Found"})
end

tracer.finish_current_span()

core.log.info("matched route: ",
core.json.delay_encode(api_ctx.matched_route, true))

Expand Down
45 changes: 45 additions & 0 deletions apisix/plugins/opentelemetry.lua
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,54 @@ function _M.rewrite(conf, api_ctx)
ngx_var.opentelemetry_span_id = span_context.span_id
end

if not ctx:span():is_recording() then
ngx.ctx._apisix_skip_tracing = true
end

api_ctx.otel_context_token = ctx:attach()

-- inject trace context into the headers of upstream HTTP request
trace_context_propagator:inject(ctx, ngx.req)
end


local function create_child_span(tracer, parent_span_ctx, span)
local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name,
{
kind = span.kind,
attributes = span.attributes,
})
new_span.start_time = span.start_time

for _, child in ipairs(span.children or {}) do
create_child_span(tracer, new_span_ctx, child)
end

new_span:set_status(span.status, span.status)
new_span:finish(span.end_time)
end


local function inject_core_spans(root_span_ctx, api_ctx, conf)
local metadata = plugin.plugin_metadata(plugin_name)
local plugin_info = metadata.value
if not root_span_ctx:span():is_recording() then
return
end
-- TODO: we should create another tracer object with always_on sampler in here,
-- because the root span already decided to sample, all child spans should be sampled too.
local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil,
create_tracer_obj, conf, plugin_info)
if not tracer then
core.log.error("failed to fetch tracer object: ", err)
return
end
for _, sp in ipairs(ngx.ctx._apisix_spans or {}) do
create_child_span(tracer, root_span_ctx, sp)
end
end


function _M.delayed_body_filter(conf, api_ctx)
if api_ctx.otel_context_token and ngx.arg[2] then
local ctx = context:current()
Expand All @@ -399,6 +440,8 @@ function _M.delayed_body_filter(conf, api_ctx)

span:set_attributes(attr.int("http.status_code", upstream_status))

inject_core_spans(ctx, api_ctx, conf)

span:finish()
end
end
Expand All @@ -418,6 +461,8 @@ function _M.log(conf, api_ctx)
"upstream response status: " .. upstream_status)
end

inject_core_spans(span, api_ctx, conf)

span:finish()
end
end
Expand Down
5 changes: 5 additions & 0 deletions apisix/secret.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
local require = require
local core = require("apisix.core")
local string = require("apisix.core.string")
local tracer = require("apisix.utils.tracer")

local local_conf = require("apisix.core.config_local").local_conf()

Expand Down Expand Up @@ -148,16 +149,20 @@ local function fetch_by_uri(secret_uri)
return nil, "no secret conf, secret_uri: " .. secret_uri
end

local span = tracer.new_span("fetch_secret", tracer.kind.client)
local ok, sm = pcall(require, "apisix.secret." .. opts.manager)
if not ok then
return nil, "no secret manager: " .. opts.manager
end

local value, err = sm.get(conf, opts.key)
if err then
span:set_status(tracer.status.ERROR, err)
tracer.finish_current_span()
return nil, err
end

tracer.finish_current_span()
return value
end

Expand Down
9 changes: 9 additions & 0 deletions apisix/ssl/router/radixtree_sni.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ local apisix_ssl = require("apisix.ssl")
local secret = require("apisix.secret")
local ngx_ssl = require("ngx.ssl")
local config_util = require("apisix.core.config_util")
local tracer = require("apisix.utils.tracer")
local ngx = ngx
local ipairs = ipairs
local type = type
Expand Down Expand Up @@ -149,11 +150,15 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
local err
if not radixtree_router or
radixtree_router_ver ~= ssl_certificates.conf_version then
local span = tracer.new_span("create_router", tracer.kind.internal)
radixtree_router, err = create_router(ssl_certificates.values)
if not radixtree_router then
span:set_status(tracer.status.ERROR, "failed create router")
tracer.finish_current_span()
return false, "failed to create radixtree router: " .. err
end
radixtree_router_ver = ssl_certificates.conf_version
tracer.finish_current_span()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Frankly, this API is confusing. The call to end a span should be something like span:finish(). Currently, using tracer.finish_current_span() implies that spans are managed by a context record within the tracer. This creates confusion for me—could it lead to misuse or conflicts when handling multiple requests in parallel or when a single request contains yield operations? Especially since we might optimize it in the future to use some kind of table pooling mechanism.

This is a concern. While I initially suspect it might not be an issue in single-threaded Nginx, we should avoid this confusion in the API design. It requires programmers to be thoroughly familiar with the OpenResty programming model, understanding the extent to which data is shared and how conflicts might arise. This imposes an additional explanation burden on us.

This is from a DX perspective. Technically, we may need to rethink how the stack is used to properly connect all spans.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have the same issue when I review this code first time

APISIX will encounter an error if there are concurrent requests.

Image

New way(should same as @bzp2010)

Image

end

local sni = alt_sni
Expand All @@ -170,15 +175,19 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
core.log.debug("sni: ", sni)

local sni_rev = sni:reverse()
local span = tracer.new_span("sni_radixtree_match", tracer.kind.internal)
local ok = radixtree_router:dispatch(sni_rev, nil, api_ctx)
if not ok then
if not alt_sni then
-- it is expected that alternative SNI doesn't have a SSL certificate associated
-- with it sometimes
core.log.error("failed to find any SSL certificate by SNI: ", sni)
end
span:set_status(tracer.status.ERROR, "failed match SNI")
tracer.finish_current_span()
return false
end
tracer.finish_current_span()


if type(api_ctx.matched_sni) == "table" then
Expand Down
73 changes: 73 additions & 0 deletions apisix/utils/span.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
local util = require("opentelemetry.util")
local span_status = require("opentelemetry.trace.span_status")


local _M = {}


local mt = {
__index = _M
}


function _M.new(name, kind)
local self = {
name = name,
start_time = util.time_nano(),
end_time = 0,
kind = kind,
attributes = {},
children = {},
}
return setmetatable(self, mt)
end


function _M.append_child(self, span)
table.insert(self.children, span)
end


function _M.set_status(self, code, message)
code = span_status.validate(code)
local status = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    local status = self.status
    if not status then
        status = {
            code = code,
            message = ""
        }
        self.status = status
    else
        status.code = code
    end

    if code == span_status.ERROR then
        status.message = message
    end

code = code,
message = ""
}
if code == span_status.ERROR then
status.message = message
end

self.status = status
end


function _M.set_attributes(self, ...)
for _, attr in ipairs({ ... }) do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

current way is slow, make a try with new way:

for ... select('#' )

table.insert(self.attributes, attr)
end
end


function _M.finish(self)
self.end_time = util.time_nano()
end


return _M
Loading
Loading