Skip to content

Commit 84a24d8

Browse files
authored
Merge pull request #960 from popcion/main
feat: add context-aware translation for openai; Fix: Font blurring
2 parents ba830ff + fafcb59 commit 84a24d8

File tree

9 files changed

+137
-42
lines changed

9 files changed

+137
-42
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ Colorizer: **mc2**
396396
--pre-dict PRE_DICT Path to the pre-translation replacement dictionary file
397397
--post-dict POST_DICT Path to the post-translation replacement dictionary file
398398
--kernel-size KERNEL_SIZE Set the kernel size for the convolution of text erasure area to completely clear residual text
399+
--context-size Pages of context are needed for translating the current page. currently, this only applies to openaitranslator.
399400
```
400401
#### Additional Options
401402
##### Local Mode Options

README_CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ OCR:
396396
--pre-dict PRE_DICT 翻译前替换字典文件路径
397397
--post-dict POST_DICT 翻译后替换字典文件路径
398398
--kernel-size KERNEL_SIZE 设置文本擦除区域的卷积内核大小以完全清除文本残留
399+
--context-size 上<s>下</s>文页数(暂时仅对openaitranslator有效)
399400
```
400401
#### 附加参数
401402
##### 本地模式参数

manga_translator/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def general_parser(g_parser):
9393
help='Path to the post-translation dictionary file')
9494
g_parser.add_argument('--kernel-size', default=3, type=int,
9595
help='Set the convolution kernel size of the text erasure area to completely clean up text residues')
96-
96+
g_parser.add_argument('--context-size', default=0, type=int, help='Pages of context are needed for translating the current page')
9797

9898

9999
def reparse(arr: list):

manga_translator/manga_translator.py

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ def __init__(self, params: dict = None):
131131
self._model_usage_timestamps = {}
132132
self._detector_cleanup_task = None
133133
self.prep_manual = params.get('prep_manual', None)
134-
134+
self.context_size = params.get('context_size', 0)
135+
self.all_page_translations = []
136+
135137
def parse_init_params(self, params: dict):
136138
self.verbose = params.get('verbose', False)
137139
self.use_mtpe = params.get('use_mtpe', False)
@@ -600,6 +602,85 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
600602

601603
return text_regions
602604

605+
def _build_prev_context(self):
606+
"""
607+
跳过句子数为0的页面,取最近 context_size 个非空页面,拼成:
608+
<|1|>句子
609+
<|2|>句子
610+
...
611+
的格式;如果没有任何非空页面,返回空串。
612+
"""
613+
if self.context_size <= 0 or not self.all_page_translations:
614+
return ""
615+
# 筛选出有句子的页面
616+
non_empty_pages = [
617+
page for page in self.all_page_translations
618+
if any(sent.strip() for sent in page.values())
619+
]
620+
# 实际要用的页数
621+
pages_used = min(self.context_size, len(non_empty_pages))
622+
if pages_used == 0:
623+
return ""
624+
tail = non_empty_pages[-pages_used:]
625+
# 拼接
626+
lines = []
627+
for page in tail:
628+
for sent in page.values():
629+
if sent.strip():
630+
lines.append(sent.strip())
631+
numbered = [f"<|{i+1}|>{s}" for i, s in enumerate(lines)]
632+
return "Here are the previous translation results for reference:\n" + "\n".join(numbered)
633+
634+
async def _dispatch_with_context(self, config: Config, texts: list[str], ctx: Context):
635+
# 计算实际要使用的上下文页数和跳过的空页数
636+
# Calculate the actual number of context pages to use and empty pages to skip
637+
done_pages = self.all_page_translations
638+
if self.context_size > 0 and done_pages:
639+
pages_expected = min(self.context_size, len(done_pages))
640+
non_empty_pages = [
641+
page for page in done_pages
642+
if any(sent.strip() for sent in page.values())
643+
]
644+
pages_used = min(self.context_size, len(non_empty_pages))
645+
skipped = pages_expected - pages_used
646+
else:
647+
pages_used = skipped = 0
648+
649+
if self.context_size > 0:
650+
logger.info(f"Context-aware translation enabled with {self.context_size} pages of history")
651+
652+
# 构建上下文字符串
653+
# Build the context string
654+
prev_ctx = self._build_prev_context()
655+
656+
# 如果是 ChatGPT 翻译器,则专门处理上下文注入
657+
# Special handling for ChatGPT translator: inject context
658+
if config.translator.translator == Translator.chatgpt:
659+
from .translators.chatgpt import OpenAITranslator
660+
translator = OpenAITranslator()
661+
translator.set_prev_context(prev_ctx)
662+
663+
if pages_used > 0:
664+
context_count = prev_ctx.count("<|")
665+
logger.info(f"Carrying {pages_used} pages of context, {context_count} sentences as translation reference")
666+
if skipped > 0:
667+
logger.warning(f"Skipped {skipped} pages with no sentences")
668+
669+
return await translator._translate(
670+
ctx.from_lang,
671+
config.translator.target_lang,
672+
texts
673+
)
674+
675+
return await dispatch_translation(
676+
config.translator.translator_gen,
677+
texts,
678+
config.translator,
679+
self.use_mtpe,
680+
ctx,
681+
'cpu' if self._gpu_limited_memory else self.device
682+
)
683+
603684
async def _run_text_translation(self, config: Config, ctx: Context):
604685
# 如果设置了prep_manual则将translator设置为none,防止token浪费
605686
# Set translator to none to provent token waste if prep_manual is True
@@ -631,12 +712,11 @@ async def _run_text_translation(self, config: Config, ctx: Context):
631712
# 如果是none翻译器,不需要调用翻译服务,文本已经设置为空
632713
# If using none translator, no need to call translation service, text is already set to empty
633714
if config.translator.translator != Translator.none:
715+
# 自动给 ChatGPT 加上下文,其他翻译器不改变
716+
# Automatically add context to ChatGPT, no change for other translators
717+
texts = [region.text for region in ctx.text_regions]
634718
translated_sentences = \
635-
await dispatch_translation(config.translator.translator_gen,
636-
[region.text for region in ctx.text_regions],
637-
config.translator,
638-
self.use_mtpe,
639-
ctx, 'cpu' if self._gpu_limited_memory else self.device)
719+
await self._dispatch_with_context(config, texts, ctx)
640720
else:
641721
# 对于none翻译器,创建一个空翻译列表
642722
# For none translator, create an empty translation list
@@ -743,6 +823,12 @@ async def _run_text_translation(self, config: Config, ctx: Context):
743823
for v in replace_items:
744824
region.translation = region.translation.replace(v[1], v[0])
745825

826+
# 汇总本页翻译,供下一页做上文
827+
# Collect translations for the current page to use as "previous context" for the next page
828+
page_translations = {r.text_raw if hasattr(r, "text_raw") else r.text: r.translation
829+
for r in ctx.text_regions}
830+
self.all_page_translations.append(page_translations)
831+
746832
# Apply post dictionary after translating
747833
post_dict = load_dictionary(self.post_dict)
748834
post_replacements = []

manga_translator/rendering/__init__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import cv2
33
import numpy as np
4-
from typing import List
4+
from typing import List, Optional
55
from shapely import affinity
66
from shapely.geometry import Polygon
77
from tqdm import tqdm
@@ -171,7 +171,8 @@ async def dispatch(
171171
hyphenate: bool = True,
172172
render_mask: np.ndarray = None,
173173
line_spacing: int = None,
174-
disable_font_border: bool = False
174+
disable_font_border: bool = False,
175+
upscale_ratio: Optional[int] = None
175176
) -> np.ndarray:
176177

177178
text_render.set_font(font_path)
@@ -187,7 +188,7 @@ async def dispatch(
187188
if render_mask is not None:
188189
# set render_mask to 1 for the region that is inside dst_points
189190
cv2.fillConvexPoly(render_mask, dst_points.astype(np.int32), 1)
190-
img = render(img, region, dst_points, hyphenate, line_spacing, disable_font_border)
191+
img = render(img, region, dst_points, hyphenate, line_spacing, disable_font_border, upscale_ratio)
191192
return img
192193

193194
def render(
@@ -196,7 +197,8 @@ def render(
196197
dst_points,
197198
hyphenate,
198199
line_spacing,
199-
disable_font_border
200+
disable_font_border,
201+
upscale_ratio: Optional[int] = None
200202
):
201203
fg, bg = region.get_font_colors()
202204
fg, bg = fg_bg_compare(fg, bg)
@@ -337,7 +339,9 @@ def render(
337339
#src_pts[:, 1] = np.clip(np.round(src_pts[:, 1]), 0, enlarged_h * 2)
338340

339341
M, _ = cv2.findHomography(src_points, dst_points, cv2.RANSAC, 5.0)
340-
rgba_region = cv2.warpPerspective(box, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
342+
# 当开启了upscaler且upscale_ratio不为空时使用线性插值
343+
interpolation = cv2.INTER_LINEAR if upscale_ratio is not None else cv2.INTER_NEAREST
344+
rgba_region = cv2.warpPerspective(box, M, (img.shape[1], img.shape[0]), flags=interpolation, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
341345
x, y, w, h = cv2.boundingRect(dst_points.astype(np.int32))
342346
canvas_region = rgba_region[y:y+h, x:x+w, :3]
343347
mask_region = rgba_region[y:y+h, x:x+w, 3:4].astype(np.float32) / 255.0

manga_translator/rendering/text_render.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def compact_special_symbols(text: str) -> str:
9191
text = re.sub(pattern, r'\1', text)
9292
return text
9393

94-
def rotate_image(image, angle):
94+
def rotate_image(image, angle, upscale_ratio=None):
9595
if angle == 0:
9696
return image, (0, 0)
9797
image_exp = np.zeros((round(image.shape[0] * 1.5), round(image.shape[1] * 1.5), image.shape[2]), dtype = np.uint8)
@@ -101,7 +101,8 @@ def rotate_image(image, angle):
101101
# from https://stackoverflow.com/questions/9041681/opencv-python-rotate-image-by-x-degrees-around-specific-point
102102
image_center = tuple(np.array(image_exp.shape[1::-1]) / 2)
103103
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
104-
result = cv2.warpAffine(image_exp, rot_mat, image_exp.shape[1::-1], flags=cv2.INTER_LINEAR)
104+
interpolation = cv2.INTER_LINEAR if upscale_ratio else cv2.INTER_NEAREST
105+
result = cv2.warpAffine(image_exp, rot_mat, image_exp.shape[1::-1], flags=interpolation)
105106
if angle == 90:
106107
return result, (0, 0)
107108
return result, (diff_i, diff_j)

manga_translator/translators/chatgpt.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ def __init__(self, check_openai_key=True):
8080

8181
# 添加 rich 的 Console 对象
8282
self.console = Console()
83+
self.prev_context = ""
84+
85+
def set_prev_context(self, text: str = ""):
86+
self.prev_context = text or ""
8387

8488
def parse_args(self, args: CommonTranslator):
8589
"""如果你有外部参数要解析,可在此对 self.config 做更新"""
@@ -116,6 +120,9 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
116120
- 根据字符长度 roughly 判断
117121
- 也可以用更准确的 tokens 估算
118122
"""
123+
124+
lang_name = self._LANGUAGE_CODE_MAP.get(to_lang, to_lang) if to_lang in self._LANGUAGE_CODE_MAP else to_lang
125+
119126
MAX_CHAR_PER_PROMPT = self._MAX_TOKENS * 4 # 粗略: 1 token ~ 4 chars
120127
chunk_queries = []
121128
current_length = 0
@@ -137,7 +144,7 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
137144
for this_batch in chunk_queries:
138145
prompt = ""
139146
if self.include_template:
140-
prompt = self.prompt_template.format(to_lang=to_lang)
147+
prompt = self.prompt_template.format(to_lang=lang_name)
141148
# 加上分行内容
142149
for i, query in enumerate(this_batch):
143150
prompt += f"\n<|{i+1}|>{query}"
@@ -524,9 +531,11 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
524531
The actual request part that calls openai.ChatCompletion.
525532
Incorporate the glossary function.
526533
"""
534+
lang_name = self._LANGUAGE_CODE_MAP.get(to_lang, to_lang) if to_lang in self._LANGUAGE_CODE_MAP else to_lang
535+
527536
# 构建 messages / Construct messages
528537
messages = [
529-
{'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
538+
{'role': 'system', 'content': self.chat_system_template.format(to_lang=lang_name)},
530539
]
531540

532541
# 提取相关术语并添加到系统消息中 / Extract relevant terms and add them to the system message
@@ -539,7 +548,11 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
539548
system_message = self.glossary_system_template.format(glossary_text=glossary_text)
540549
messages.append({'role': 'system', 'content': system_message})
541550
self.logger.info(f"Loaded {len(relevant_terms)} relevant terms from the glossary.")
542-
551+
552+
# 如果有上文,添加到系统消息中 / If there is a previous context, add it to the system message
553+
if self.prev_context:
554+
messages.append({'role': 'system', 'content': self.prev_context})
555+
543556
# 如果需要先给出示例对话
544557
# Add chat samples if available
545558
lang_chat_samples = self.get_chat_sample(to_lang)

manga_translator/translators/config_gpt.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ class ConfigGPT:
153153

154154
_JSON_MODE=False
155155

156-
_PROMPT_TEMPLATE = ('Please help me to translate the following text from a manga to {to_lang}.'
157-
'If it\'s already in {to_lang} or looks like gibberish'
156+
_PROMPT_TEMPLATE = ('Please help me to translate the following text from a manga to {to_lang}. '
157+
'If it\'s already in {to_lang} or looks like gibberish '
158158
'you have to output it as it is instead. Keep prefix format.\n'
159159
)
160160

@@ -253,9 +253,12 @@ def _closest_sample_match(self, all_samples: Dict, to_lang: str, max_distance=5)
253253
if self.langSamples is not None:
254254
return self.langSamples
255255

256-
self.langSamples=[]
256+
self.langSamples = []
257257

258258
try:
259+
if to_lang in self._LANGUAGE_CODE_MAP:
260+
to_lang = self._LANGUAGE_CODE_MAP[to_lang]
261+
259262
foundLang = closest_supported_match(
260263
Language.find(to_lang),
261264
[

manga_translator/translators/deepseek.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,13 @@ def __init__(self, check_openai_key=True):
5555

5656
self.client = openai.AsyncOpenAI(api_key=openai.api_key or DEEPSEEK_API_KEY)
5757
if not self.client.api_key and check_openai_key:
58-
raise MissingAPIKeyException(
59-
'Please set the DEEPSEEK_API_KEY environment variable '
60-
'before using the DeepSeek translator.'
61-
)
58+
raise MissingAPIKeyException('DEEPSEEK_API_KEY environment variable required')
59+
6260
self.client.base_url = DEEPSEEK_API_BASE
6361
self.token_count = 0
6462
self.token_count_last = 0
6563
self.config = None
6664

67-
6865
def count_tokens(self, text: str):
6966
"""
7067
通过字符估计标记很困难,并且因语言而异:
@@ -85,6 +82,7 @@ def count_tokens(self, text: str):
8582

8683

8784
def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
85+
prompt = prompt.strip()
8886
if to_lang in self.chat_sample:
8987
return '\n'.join([
9088
'System:',
@@ -228,22 +226,15 @@ async def translate_batch(prompt_queries, prompt_query_indices, split_level=0):
228226
return translations
229227

230228
async def _request_translation(self, to_lang: str, prompt: str) -> str:
231-
# 构建 messages
232-
# Build messages
233-
messages = [
234-
{'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
235-
]
236-
237-
# 如果需要先给出示例对话
238-
# Add chat samples if available
229+
system_message = self._CHAT_SYSTEM_TEMPLATE.format(to_lang=to_lang)
230+
messages = [
231+
{'role': 'system', 'content': system_message},
232+
]
239233
lang_chat_samples = self.get_chat_sample(to_lang)
240234
if lang_chat_samples:
241235
messages.append({'role': 'user', 'content': lang_chat_samples[0]})
242236
messages.append({'role': 'assistant', 'content': lang_chat_samples[1]})
243-
244-
# 最终用户请求
245-
# User request
246-
messages.append({'role': 'user', 'content': prompt})
237+
messages.append({"role": "user", "content": prompt})
247238

248239
kwargs = {
249240
'model': DEEPSEEK_MODEL,
@@ -280,11 +271,6 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
280271
"\n------------------\n"
281272
)
282273

283-
self.logger.debug("-- GPT Response --\n" +
284-
response.choices[0].message.content +
285-
"\n------------------\n"
286-
)
287-
288274
# If no response with text is found, return the first response's content (which may be empty)
289275
# 如果没有找到包含文本的响应,则返回第一个响应的内容(可能为空)
290276
return response.choices[0].message.content

0 commit comments

Comments
 (0)