@@ -131,7 +131,9 @@ def __init__(self, params: dict = None):
131131 self ._model_usage_timestamps = {}
132132 self ._detector_cleanup_task = None
133133 self .prep_manual = params .get ('prep_manual' , None )
134-
134+ self .context_size = params .get ('context_size' , 0 )
135+ self .all_page_translations = []
136+
135137 def parse_init_params (self , params : dict ):
136138 self .verbose = params .get ('verbose' , False )
137139 self .use_mtpe = params .get ('use_mtpe' , False )
@@ -600,6 +602,85 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
600602
601603 return text_regions
602604
605+ def _build_prev_context (self ):
606+ """
607+ 跳过句子数为0的页面,取最近 context_size 个非空页面,拼成:
608+ <|1|>句子
609+ <|2|>句子
610+ ...
611+ 的格式;如果没有任何非空页面,返回空串。
612+ """
613+ if self .context_size <= 0 or not self .all_page_translations :
614+ return ""
615+ # 筛选出有句子的页面
616+ non_empty_pages = [
617+ page for page in self .all_page_translations
618+ if any (sent .strip () for sent in page .values ())
619+ ]
620+ # 实际要用的页数
621+ pages_used = min (self .context_size , len (non_empty_pages ))
622+ if pages_used == 0 :
623+ return ""
624+ tail = non_empty_pages [- pages_used :]
625+ # 拼接
626+ lines = []
627+ for page in tail :
628+ for sent in page .values ():
629+ if sent .strip ():
630+ lines .append (sent .strip ())
631+ numbered = [f"<|{ i + 1 } |>{ s } " for i , s in enumerate (lines )]
632+ return "Here are the previous translation results for reference:\n " + "\n " .join (numbered )
633+
634+ async def _dispatch_with_context (self , config : Config , texts : list [str ], ctx : Context ):
635+ # 计算实际要使用的上下文页数和跳过的空页数
636+ # Calculate the actual number of context pages to use and empty pages to skip
637+ done_pages = self .all_page_translations
638+ if self .context_size > 0 and done_pages :
639+ pages_expected = min (self .context_size , len (done_pages ))
640+ non_empty_pages = [
641+ page for page in done_pages
642+ if any (sent .strip () for sent in page .values ())
643+ ]
644+ pages_used = min (self .context_size , len (non_empty_pages ))
645+ skipped = pages_expected - pages_used
646+ else :
647+ pages_used = skipped = 0
648+
649+ if self .context_size > 0 :
650+ logger .info (f"Context-aware translation enabled with { self .context_size } pages of history" )
651+
652+ # 构建上下文字符串
653+ # Build the context string
654+ prev_ctx = self ._build_prev_context ()
655+
656+ # 如果是 ChatGPT 翻译器,则专门处理上下文注入
657+ # Special handling for ChatGPT translator: inject context
658+ if config .translator .translator == Translator .chatgpt :
659+ from .translators .chatgpt import OpenAITranslator
660+ translator = OpenAITranslator ()
661+ translator .set_prev_context (prev_ctx )
662+
663+ if pages_used > 0 :
664+ context_count = prev_ctx .count ("<|" )
665+ logger .info (f"Carrying { pages_used } pages of context, { context_count } sentences as translation reference" )
666+ if skipped > 0 :
667+ logger .warning (f"Skipped { skipped } pages with no sentences" )
668+
669+ return await translator ._translate (
670+ ctx .from_lang ,
671+ config .translator .target_lang ,
672+ texts
673+ )
674+
675+ return await dispatch_translation (
676+ config .translator .translator_gen ,
677+ texts ,
678+ config .translator ,
679+ self .use_mtpe ,
680+ ctx ,
681+ 'cpu' if self ._gpu_limited_memory else self .device
682+ )
683+
603684 async def _run_text_translation (self , config : Config , ctx : Context ):
604685 # 如果设置了prep_manual则将translator设置为none,防止token浪费
605686 # Set translator to none to provent token waste if prep_manual is True
@@ -631,12 +712,11 @@ async def _run_text_translation(self, config: Config, ctx: Context):
631712 # 如果是none翻译器,不需要调用翻译服务,文本已经设置为空
632713 # If using none translator, no need to call translation service, text is already set to empty
633714 if config .translator .translator != Translator .none :
715+ # 自动给 ChatGPT 加上下文,其他翻译器不改变
716+ # Automatically add context to ChatGPT, no change for other translators
717+ texts = [region .text for region in ctx .text_regions ]
634718 translated_sentences = \
635- await dispatch_translation (config .translator .translator_gen ,
636- [region .text for region in ctx .text_regions ],
637- config .translator ,
638- self .use_mtpe ,
639- ctx , 'cpu' if self ._gpu_limited_memory else self .device )
719+ await self ._dispatch_with_context (config , texts , ctx )
640720 else :
641721 # 对于none翻译器,创建一个空翻译列表
642722 # For none translator, create an empty translation list
@@ -743,6 +823,12 @@ async def _run_text_translation(self, config: Config, ctx: Context):
743823 for v in replace_items :
744824 region .translation = region .translation .replace (v [1 ], v [0 ])
745825
826+ # 汇总本页翻译,供下一页做上文
827+ # Collect translations for the current page to use as "previous context" for the next page
828+ page_translations = {r .text_raw if hasattr (r , "text_raw" ) else r .text : r .translation
829+ for r in ctx .text_regions }
830+ self .all_page_translations .append (page_translations )
831+
746832 # Apply post dictionary after translating
747833 post_dict = load_dictionary (self .post_dict )
748834 post_replacements = []
0 commit comments