Merge pull request #960 from popcion/main

zyddnys · web-flow · commit 84a24d8708e5 · 2025-06-02T11:16:42.000-04:00
feat: add context-aware translation for openai;  Fix: Font blurring
diff --git a/README.md b/README.md
@@ -396,6 +396,7 @@ Colorizer: **mc2**
 --pre-dict PRE_DICT            Path to the pre-translation replacement dictionary file
 --post-dict POST_DICT          Path to the post-translation replacement dictionary file
 --kernel-size KERNEL_SIZE      Set the kernel size for the convolution of text erasure area to completely clear residual text
+--context-size                 Pages of context are needed for translating the current page. currently, this only applies to openaitranslator. 
 ```
 #### Additional Options
 ##### Local Mode Options
diff --git a/README_CN.md b/README_CN.md
@@ -396,6 +396,7 @@ OCR：
 --pre-dict PRE_DICT            翻译前替换字典文件路径
 --post-dict POST_DICT          翻译后替换字典文件路径
 --kernel-size KERNEL_SIZE      设置文本擦除区域的卷积内核大小以完全清除文本残留
+--context-size                 上<s>下</s>文页数（暂时仅对openaitranslator有效）
 ```
 #### 附加参数
 ##### 本地模式参数
diff --git a/manga_translator/args.py b/manga_translator/args.py
@@ -93,7 +93,7 @@ def general_parser(g_parser):
                         help='Path to the post-translation dictionary file')
     g_parser.add_argument('--kernel-size', default=3, type=int,
                         help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
-
+    g_parser.add_argument('--context-size', default=0, type=int, help='Pages of context are needed for translating the current page')
 
 
 def reparse(arr: list):
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
@@ -131,7 +131,9 @@ def __init__(self, params: dict = None):
         self._model_usage_timestamps = {}
         self._detector_cleanup_task = None
         self.prep_manual = params.get('prep_manual', None)
-        
+        self.context_size = params.get('context_size', 0)
+        self.all_page_translations = []
+
     def parse_init_params(self, params: dict):
         self.verbose = params.get('verbose', False)
         self.use_mtpe = params.get('use_mtpe', False)
@@ -600,6 +602,85 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
         
         return text_regions
 
+    def _build_prev_context(self):   
+        """
+        跳过句子数为0的页面，取最近 context_size 个非空页面，拼成：
+        <|1|>句子
+        <|2|>句子
+        ...
+        的格式；如果没有任何非空页面，返回空串。
+        """
+        if self.context_size <= 0 or not self.all_page_translations:
+            return ""
+        # 筛选出有句子的页面
+        non_empty_pages = [
+            page for page in self.all_page_translations
+            if any(sent.strip() for sent in page.values())
+        ]
+        # 实际要用的页数
+        pages_used = min(self.context_size, len(non_empty_pages))
+        if pages_used == 0:
+            return ""
+        tail = non_empty_pages[-pages_used:]
+        # 拼接
+        lines = []
+        for page in tail:
+            for sent in page.values():
+                if sent.strip():
+                    lines.append(sent.strip())
+        numbered = [f"<|{i+1}|>{s}" for i, s in enumerate(lines)]
+        return "Here are the previous translation results for reference:\n" + "\n".join(numbered)
+
+    async def _dispatch_with_context(self, config: Config, texts: list[str], ctx: Context):
+        # 计算实际要使用的上下文页数和跳过的空页数
+        # Calculate the actual number of context pages to use and empty pages to skip
+        done_pages = self.all_page_translations
+        if self.context_size > 0 and done_pages:
+            pages_expected = min(self.context_size, len(done_pages))
+            non_empty_pages = [
+                page for page in done_pages
+                if any(sent.strip() for sent in page.values())
+            ]
+            pages_used = min(self.context_size, len(non_empty_pages))
+            skipped = pages_expected - pages_used
+        else:
+            pages_used = skipped = 0
+
+        if self.context_size > 0:
+            logger.info(f"Context-aware translation enabled with {self.context_size} pages of history")
+
+        # 构建上下文字符串
+        # Build the context string
+        prev_ctx = self._build_prev_context()
+
+        # 如果是 ChatGPT 翻译器，则专门处理上下文注入
+        # Special handling for ChatGPT translator: inject context
+        if config.translator.translator == Translator.chatgpt:
+            from .translators.chatgpt import OpenAITranslator
+            translator = OpenAITranslator()
+            translator.set_prev_context(prev_ctx)
+
+            if pages_used > 0:
+                context_count = prev_ctx.count("<|")
+                logger.info(f"Carrying {pages_used} pages of context, {context_count} sentences as translation reference")
+            if skipped > 0:
+                logger.warning(f"Skipped {skipped} pages with no sentences")
+                
+            return await translator._translate(
+                ctx.from_lang,          
+                config.translator.target_lang, 
+                texts
+            )
+
+        return await dispatch_translation(
+            config.translator.translator_gen,
+            texts,
+            config.translator,
+            self.use_mtpe,
+            ctx,
+            'cpu' if self._gpu_limited_memory else self.device
+        )
+
     async def _run_text_translation(self, config: Config, ctx: Context):
         # 如果设置了prep_manual则将translator设置为none，防止token浪费
         # Set translator to none to provent token waste if prep_manual is True  
@@ -631,12 +712,11 @@ async def _run_text_translation(self, config: Config, ctx: Context):
             # 如果是none翻译器，不需要调用翻译服务，文本已经设置为空  
             # If using none translator, no need to call translation service, text is already set to empty  
             if config.translator.translator != Translator.none:  
+                # 自动给 ChatGPT 加上下文，其他翻译器不改变
+                # Automatically add context to ChatGPT, no change for other translators
+                texts = [region.text for region in ctx.text_regions]
                 translated_sentences = \
-                    await dispatch_translation(config.translator.translator_gen,  
-                                              [region.text for region in ctx.text_regions],  
-                                              config.translator,  
-                                              self.use_mtpe,  
-                                              ctx, 'cpu' if self._gpu_limited_memory else self.device)  
+                    await self._dispatch_with_context(config, texts, ctx)
             else:  
                 # 对于none翻译器，创建一个空翻译列表  
                 # For none translator, create an empty translation list  
@@ -743,6 +823,12 @@ async def _run_text_translation(self, config: Config, ctx: Context):
                 for v in replace_items:
                     region.translation = region.translation.replace(v[1], v[0])
 
+        # 汇总本页翻译，供下一页做上文
+        # Collect translations for the current page to use as "previous context" for the next page
+        page_translations = {r.text_raw if hasattr(r, "text_raw") else r.text: r.translation
+                             for r in ctx.text_regions}
+        self.all_page_translations.append(page_translations)
+
         # Apply post dictionary after translating
         post_dict = load_dictionary(self.post_dict)
         post_replacements = []  
diff --git a/manga_translator/rendering/__init__.py b/manga_translator/rendering/__init__.py
@@ -1,7 +1,7 @@
 import os
 import cv2
 import numpy as np
-from typing import List
+from typing import List, Optional
 from shapely import affinity
 from shapely.geometry import Polygon
 from tqdm import tqdm
@@ -171,7 +171,8 @@ async def dispatch(
     hyphenate: bool = True,
     render_mask: np.ndarray = None,
     line_spacing: int = None,
-    disable_font_border: bool = False
+    disable_font_border: bool = False,
+    upscale_ratio: Optional[int] = None
     ) -> np.ndarray:
 
     text_render.set_font(font_path)
@@ -187,7 +188,7 @@ async def dispatch(
         if render_mask is not None:
             # set render_mask to 1 for the region that is inside dst_points
             cv2.fillConvexPoly(render_mask, dst_points.astype(np.int32), 1)
-        img = render(img, region, dst_points, hyphenate, line_spacing, disable_font_border)
+        img = render(img, region, dst_points, hyphenate, line_spacing, disable_font_border, upscale_ratio)
     return img
 
 def render(
@@ -196,7 +197,8 @@ def render(
     dst_points,
     hyphenate,
     line_spacing,
-    disable_font_border
+    disable_font_border,
+    upscale_ratio: Optional[int] = None
 ):
     fg, bg = region.get_font_colors()
     fg, bg = fg_bg_compare(fg, bg)
@@ -337,7 +339,9 @@ def render(
     #src_pts[:, 1] = np.clip(np.round(src_pts[:, 1]), 0, enlarged_h * 2)
 
     M, _ = cv2.findHomography(src_points, dst_points, cv2.RANSAC, 5.0)
-    rgba_region = cv2.warpPerspective(box, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
+    # 当开启了upscaler且upscale_ratio不为空时使用线性插值
+    interpolation = cv2.INTER_LINEAR if upscale_ratio is not None else cv2.INTER_NEAREST
+    rgba_region = cv2.warpPerspective(box, M, (img.shape[1], img.shape[0]), flags=interpolation, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
     x, y, w, h = cv2.boundingRect(dst_points.astype(np.int32))
     canvas_region = rgba_region[y:y+h, x:x+w, :3]
     mask_region = rgba_region[y:y+h, x:x+w, 3:4].astype(np.float32) / 255.0
diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py
@@ -91,7 +91,7 @@ def compact_special_symbols(text: str) -> str:
     text = re.sub(pattern, r'\1', text) 
     return text
     
-def rotate_image(image, angle):
+def rotate_image(image, angle, upscale_ratio=None):
     if angle == 0:
         return image, (0, 0)
     image_exp = np.zeros((round(image.shape[0] * 1.5), round(image.shape[1] * 1.5), image.shape[2]), dtype = np.uint8)
@@ -101,7 +101,8 @@ def rotate_image(image, angle):
     # from https://stackoverflow.com/questions/9041681/opencv-python-rotate-image-by-x-degrees-around-specific-point
     image_center = tuple(np.array(image_exp.shape[1::-1]) / 2)
     rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
-    result = cv2.warpAffine(image_exp, rot_mat, image_exp.shape[1::-1], flags=cv2.INTER_LINEAR)
+    interpolation = cv2.INTER_LINEAR if upscale_ratio else cv2.INTER_NEAREST
+    result = cv2.warpAffine(image_exp, rot_mat, image_exp.shape[1::-1], flags=interpolation)
     if angle == 90:
         return result, (0, 0)
     return result, (diff_i, diff_j)
diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
@@ -80,6 +80,10 @@ def __init__(self, check_openai_key=True):
 
         # 添加 rich 的 Console 对象  
         self.console = Console()  
+        self.prev_context = ""
+
+    def set_prev_context(self, text: str = ""):
+        self.prev_context = text or ""     
 
     def parse_args(self, args: CommonTranslator):
         """如果你有外部参数要解析，可在此对 self.config 做更新"""
@@ -116,6 +120,9 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
           - 根据字符长度 roughly 判断
           - 也可以用更准确的 tokens 估算
         """
+
+        lang_name = self._LANGUAGE_CODE_MAP.get(to_lang, to_lang) if to_lang in self._LANGUAGE_CODE_MAP else to_lang
+        
         MAX_CHAR_PER_PROMPT = self._MAX_TOKENS * 4  # 粗略: 1 token ~ 4 chars
         chunk_queries = []
         current_length = 0
@@ -137,7 +144,7 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
         for this_batch in chunk_queries:
             prompt = ""
             if self.include_template:
-                prompt = self.prompt_template.format(to_lang=to_lang)
+                prompt = self.prompt_template.format(to_lang=lang_name)
             # 加上分行内容
             for i, query in enumerate(this_batch):
                 prompt += f"\n<|{i+1}|>{query}"
@@ -524,9 +531,11 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
         The actual request part that calls openai.ChatCompletion.
         Incorporate the glossary function.
         """        
+        lang_name = self._LANGUAGE_CODE_MAP.get(to_lang, to_lang) if to_lang in self._LANGUAGE_CODE_MAP else to_lang
+                
         # 构建 messages / Construct messages
         messages = [  
-            {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},  
+            {'role': 'system', 'content': self.chat_system_template.format(to_lang=lang_name)},  
         ]  
 
         # 提取相关术语并添加到系统消息中  / Extract relevant terms and add them to the system message
@@ -539,7 +548,11 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
             system_message = self.glossary_system_template.format(glossary_text=glossary_text)  
             messages.append({'role': 'system', 'content': system_message})  
             self.logger.info(f"Loaded {len(relevant_terms)} relevant terms from the glossary.")  
-            
+        
+        # 如果有上文，添加到系统消息中 / If there is a previous context, add it to the system message        
+        if self.prev_context:
+            messages.append({'role': 'system', 'content': self.prev_context})            
+        
         # 如果需要先给出示例对话
         # Add chat samples if available
         lang_chat_samples = self.get_chat_sample(to_lang)
diff --git a/manga_translator/translators/config_gpt.py b/manga_translator/translators/config_gpt.py
@@ -153,8 +153,8 @@ class ConfigGPT:
 
     _JSON_MODE=False
 
-    _PROMPT_TEMPLATE = ('Please help me to translate the following text from a manga to {to_lang}.'
-                        'If it\'s already in {to_lang} or looks like gibberish'
+    _PROMPT_TEMPLATE = ('Please help me to translate the following text from a manga to {to_lang}. '
+                        'If it\'s already in {to_lang} or looks like gibberish '
                         'you have to output it as it is instead. Keep prefix format.\n'
                     )
                     
@@ -253,9 +253,12 @@ def _closest_sample_match(self, all_samples: Dict, to_lang: str, max_distance=5)
         if self.langSamples is not None:
             return self.langSamples
         
-        self.langSamples=[]
+        self.langSamples = []
 
         try:
+            if to_lang in self._LANGUAGE_CODE_MAP:
+                to_lang = self._LANGUAGE_CODE_MAP[to_lang]
+
             foundLang = closest_supported_match(
                                 Language.find(to_lang), 
                                 [
diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py
@@ -55,16 +55,13 @@ def __init__(self, check_openai_key=True):
 
         self.client = openai.AsyncOpenAI(api_key=openai.api_key or DEEPSEEK_API_KEY)
         if not self.client.api_key and check_openai_key:
-            raise MissingAPIKeyException(
-                        'Please set the DEEPSEEK_API_KEY environment variable '
-                        'before using the DeepSeek translator.'
-                    )
+            raise MissingAPIKeyException('DEEPSEEK_API_KEY environment variable required')
+            
         self.client.base_url = DEEPSEEK_API_BASE
         self.token_count = 0
         self.token_count_last = 0
         self.config = None
 
-
     def count_tokens(self, text: str):
         """
         通过字符估计标记很困难，并且因语言而异:
@@ -85,6 +82,7 @@ def count_tokens(self, text: str):
 
 
     def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
+        prompt = prompt.strip()  
         if to_lang in self.chat_sample:
             return '\n'.join([
                 'System:',
@@ -228,22 +226,15 @@ async def translate_batch(prompt_queries, prompt_query_indices, split_level=0):
         return translations
 
     async def _request_translation(self, to_lang: str, prompt: str) -> str:
-        # 构建 messages
-        # Build messages
-        messages = [
-            {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
-        ]
-
-        # 如果需要先给出示例对话
-        # Add chat samples if available
+        system_message = self._CHAT_SYSTEM_TEMPLATE.format(to_lang=to_lang) 
+        messages = [  
+            {'role': 'system', 'content': system_message},  
+        ]  
         lang_chat_samples = self.get_chat_sample(to_lang)
         if lang_chat_samples:
             messages.append({'role': 'user', 'content': lang_chat_samples[0]})
             messages.append({'role': 'assistant', 'content': lang_chat_samples[1]})
-
-        # 最终用户请求
-        # User request
-        messages.append({'role': 'user', 'content': prompt})
+        messages.append({"role": "user", "content": prompt})
 
         kwargs = {
             'model': DEEPSEEK_MODEL,
@@ -280,11 +271,6 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
                                 "\n------------------\n"
                             )
                 
-            self.logger.debug("-- GPT Response --\n" +
-                                response.choices[0].message.content +
-                                "\n------------------\n"
-                            )
-
             # If no response with text is found, return the first response's content (which may be empty)
             # 如果没有找到包含文本的响应，则返回第一个响应的内容（可能为空）
             return response.choices[0].message.content