Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
fc99547
commit
zhoutianzi666 Nov 6, 2025
79969da
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 6, 2025
c2e37b9
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 6, 2025
467b566
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 6, 2025
0cde853
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 6, 2025
731d36a
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 7, 2025
bf06e75
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 7, 2025
53c7909
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 7, 2025
11816ce
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 7, 2025
449685f
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 9, 2025
28acf10
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 10, 2025
8ba0727
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 11, 2025
811ee97
commit
zhoutianzi666 Nov 11, 2025
62d7399
commit
zhoutianzi666 Nov 12, 2025
4071adf
commit
zhoutianzi666 Nov 12, 2025
59bda5a
Merge branch 'develop' into attn_test
zhoutianzi666 Nov 13, 2025
172c237
Merge branch 'develop' into attn_test
zhoutianzi666 Nov 13, 2025
0f0a598
Merge branch 'develop' into attn_test
zhoutianzi666 Nov 13, 2025
03568a3
add
zhoutianzi666 Nov 14, 2025
60d7a33
add
zhoutianzi666 Nov 14, 2025
d973719
add
zhoutianzi666 Nov 14, 2025
0cf5b89
add
zhoutianzi666 Nov 14, 2025
b497b69
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 14, 2025
6085956
add
zhoutianzi666 Nov 14, 2025
a31f034
Merge branch 'develop' into attn_test
zhoutianzi666 Nov 14, 2025
66248e8
Merge remote-tracking branch 'myfd/attn_test' into move_batch_id_per_…
zhoutianzi666 Nov 14, 2025
58b52cf
Merge remote-tracking branch 'origin/develop' into move_batch_id_per_…
zhoutianzi666 Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions fastdeploy/model_executor/models/deepseek_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,17 +347,17 @@ def forward(

query = self.q_a_layernorm(query)[0]
query = self.q_b_proj(query)
query = query.reshape([-1, self.num_attention_heads_tp, self.qk_head_dim])
query.reshape_([-1, self.num_attention_heads_tp, self.qk_head_dim])
query_nope, query_pe = query.split([self.qk_nope_head_dim, self.qk_rope_head_dim], axis=-1)

key_pe = key_pe.reshape([-1, 1, self.qk_rope_head_dim])
compressed_kv = self.kv_a_layernorm(compressed_kv)[0]

key_pe.reshape_([-1, 1, self.qk_rope_head_dim])
query_pe, key_pe = self.rotary_emb(position_ids, query_pe, key_pe)

compressed_kv = self.kv_a_layernorm(compressed_kv)[0]

if forward_meta.max_len_tensor_cpu[1]: # max_enc_len_this_time
key_value = self.kv_b_proj(compressed_kv)
key_value = key_value.reshape(
key_value.reshape_(
[
-1,
self.num_attention_heads_tp,
Expand All @@ -382,9 +382,9 @@ def forward(
forward_meta=forward_meta,
)

fmha_out_prefill = fmha_out_prefill.reshape([-1, self.num_attention_heads_tp, self.qk_head_dim])
fmha_out_prefill.reshape_([-1, self.num_attention_heads_tp, self.qk_head_dim])
fmha_out_prefill = fmha_out_prefill[:, :, : self.v_head_dim]
fmha_out_prefill = fmha_out_prefill.reshape([-1, self.num_attention_heads_tp * self.v_head_dim])
fmha_out_prefill.reshape_([-1, self.num_attention_heads_tp * self.v_head_dim])
fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype)

fmha_out = fmha_out_prefill
Expand All @@ -393,7 +393,7 @@ def forward(
q_nope_out = self.kv_b_proj_bmm(query_nope.transpose([1, 0, 2]), proj_type="k").transpose([1, 0, 2])

q_input = paddle.concat([q_nope_out, query_pe], axis=-1)
q_input = q_input.reshape(
q_input.reshape_(
[
-1,
self.num_attention_heads_tp * (self.kv_lora_rank + self.qk_rope_head_dim),
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,7 @@ def _prepare_inputs(self) -> None:
self.share_inputs["ids_remove_padding"].copy_(ids_remove_padding, False)
# NOTE: (changwenbin) Initialized to max_num_seq '-1' before copying, marking illegal positions
self.share_inputs["batch_id_per_token"][:] = -1
self.share_inputs["batch_id_per_token"].copy_(batch_id_per_token, False)
self.share_inputs["cu_seqlens_q"].copy_(cu_seqlens_q, False)
self.share_inputs["cu_seqlens_k"].copy_(cu_seqlens_k, False)

Expand All @@ -1279,7 +1280,6 @@ def _prepare_inputs(self) -> None:

# Initialize forward meta data
self.initialize_forward_meta()
self.forward_meta.batch_id_per_token.copy_(batch_id_per_token, False)

# Get sampling metadata
self.sampling_metadata = SamplingMetadata(
Expand Down
Loading