Skip to content

Commit

Permalink
*
Browse files Browse the repository at this point in the history
  • Loading branch information
lemisky committed Aug 4, 2021
2 parents dbc45d6 + 74a61cc commit 2f52a92
Show file tree
Hide file tree
Showing 17 changed files with 31,803 additions and 11,766 deletions.
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,43 @@ document translate, read & translate & write



------

-**v0.0.7** 开始, 无需 **xlwings****Excel**, 废弃 **xls** 文件, 改用无限制的 **xlf**

- 代码结构也调整了, 从 **WordFilters** 转移到 **Doc**

```python
from docts import *

xlf_path='xlf_path.xlf'

# 使用示例方法
todo(xlf_path)

# 运行完毕后, 会在xlf_path同目录下生成一个翻译好的文件, 则直接导入 Sisulizer

# 自定义, 无需再使用 parse_xl* 方法解析字符串, Doc中自动处理
doc = Doc(xlf_path)
# WordFilters对象的add_filter等方法直接转移到, Doc对象中
doc.add_filter(xxx_filter)

# 保存文件也无需使用 write_xl*
doc.save_words()

# 需要导出忽略文件时
doc.save_ignores()

```



看着迷迷糊糊, 不妨动手试一试

--------



## 概述

此项目我暂时只用来结合 **Sisulizer** 翻译 **CHM帮助文档** , 但实际不仅于此, 支持配合 **Sisulizer** 翻译它所支持的所有格式, 理论上也是支持与其他本地化程序配合使用的.
Expand Down
1 change: 1 addition & 0 deletions SisulizerDemo/Sisulizer.sds
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<path>
<item>All</item>
<item>Sisulizer.chm</item>
<item>AndroidSettingsDialog.htm</item>
</path>
<export format="efXliff">
<xliff escape="seNone" items="[]" translation="teWriteEmpty" usefullids="0"/>
Expand Down
23,122 changes: 11,567 additions & 11,555 deletions SisulizerDemo/Sisulizer.slp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion SisulizerDemo/Sisulizer.~slp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<document created="20210709114110" version="4.0.374" date="20210709114121" scan="1" scanned="20210709114110">
<document created="20210709114110" version="4.0.374" date="20210804122731" scan="1" scanned="20210709114110">
<lang id="zh"/>
<source class="TChmSource" name="Sisulizer.chm" original="en-US" date="20181130124438" usebom="1" options="[coKeepTextFormat,coKeepTextFormatInCodeTags,coScanContents,coScanIndex,coScanSequence,coCompileChm,coUseCompatibleLangId,coUseCompatibleIndex,coIncludeImages]" sourcecodecontext="sccValue">
<node class="TChmNode" name="AboutDialog.htm">
Expand Down
Binary file modified SisulizerDemo/Sisulizer_0~4929.xls
Binary file not shown.
20,020 changes: 20,020 additions & 0 deletions SisulizerDemo/Sisulizer_words.xlf

Large diffs are not rendered by default.

Binary file modified SisulizerDemo/zh/Sisulizer.chm
Binary file not shown.
168 changes: 168 additions & 0 deletions docts/Doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import html
import re
from typing import Callable, Pattern, AnyStr, List

from pygtrans import Translate, Null


def parse_xlf(xlf_path: str) -> List[str]:
"""
解析xlf文件, 获取原文字符串
:param xlf_path:
:return:
"""
if not xlf_path.endswith('.xlf'):
print(f'不是xlf文件: {xlf_path}')
raise

# newline='', 换行符原样读入
with open(xlf_path, encoding='utf-8', newline='') as f:
txt = f.read()
origen_words = re.findall(r'<source[^>]*>(.*?)</source>', txt, re.DOTALL)
del txt

i: str
# words = [html.unescape(i.replace('[]\n', '\r\n')) for i in set(origen_words) if i != '']
words = [html.unescape(i) for i in set(origen_words) if i != '']

print(f'过滤重复或空文本 parse_xlf: {len(origen_words) - len(words)}')

return words


def write_xlf(xlf_path: str, origins: List[str], trans: List[str] = None, step=60000):
# 翻译
if trans is None:
client = Translate()
trans = client.translate(origins)
if isinstance(trans, Null):
print(trans.msg)
raise
trans = [i.translatedText for i in trans]

# 写入文件
with open(xlf_path, 'w', encoding='utf-8', newline='') as f:
f.write("""<?xml version="1.0" encoding="utf-8"?>
<xliff version="1.2" xmlns="urn:oasis:names:tc:xliff:document:1.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-strict.xsd">
<file original="Sisulizer" datatype="unknown" source-language="en-US" target-language="zh-CN">
<body>
<group id="root" datatype="unknown">
""")
for a, b in zip(origins, trans):
f.write(f"""<trans-unit>
<source>{html.escape(a)}</source>
<target>{html.escape(b)}</target>
</trans-unit>
""")

f.write("""
</group>
</body>
</file>
</xliff>""")


class Doc:
"""..."""

def __init__(self, xlf_path: str):
"""..."""
self.xlf_path = xlf_path
self.words = parse_xlf(xlf_path)
self.ignores = []

def add_filter(self, _filter: Callable[[str], bool]):
"""
添加过滤器, 排除某些无需导出的内容
:param _filter:
:return:
"""
words = []
for word in self.words:
if _filter(word):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 {_filter.__name__}: {len(self.words) - len(words)}')
self.words = words
return self

def add_contain_filter(self, contain: Pattern[AnyStr]):
"""
支持正则表达式
:param contain:
:return:
"""
words = []
for word in self.words:
if re.search(contain, word):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_contain_filter({contain}): {len(self.words) - len(words)}')
self.words = words
return self

def add_start_filter(self, start: str, strip: str = None):
words = []
word: str
for word in self.words:
if strip:
word = word.lstrip(strip)
if word.startswith(start):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_start_filter({start}): {len(self.words) - len(words)}')
self.words = words
return self

def add_end_filter(self, end: str, strip: str = None):
words = []
word: str
for word in self.words:
if strip:
word = word.rstrip(strip)
if word.endswith(end):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_end_filter({end}): {len(self.words) - len(words)}')
self.words = words
return self

def add_map(self, _map: Callable[[str], str]):
"""
添加自定义映射器, 参考 docts/map/M1.py
:param _map:
:return:
"""
self.words = [_map(i) for i in self.words]
return self

def add_replace(self, old, new):
"""
全局全部替换, 也可以使用 add_map 实现, 这种不需要写函数
:param old:
:param new:
:return:
"""
i: str
self.words = [i.replace(old, new) for i in self.words]
return self

def reset(self):
self.words.extend(self.ignores)
return self

def save_words(self):
"""..."""
xlf_path = self.xlf_path[:-4] + '_words.xlf'
write_xlf(xlf_path, self.words)
return xlf_path

def save_ignores(self):
"""..."""
xlf_path = self.xlf_path[:-4] + '_ignores.xlf'
write_xlf(xlf_path, self.ignores, self.ignores)
return xlf_path
21 changes: 7 additions & 14 deletions docts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from .filter import WordsFilter
from .filter.F1 import *
from .map.M1 import *
from .parser.XLFParser import parse_xlf
from .parser.XLSParser import parse_xls
from .writer.XLSWriter import write_xls
from .filter import *
from .map import *
from .Doc import Doc

__title__ = 'docts'

Expand All @@ -23,11 +20,7 @@ def todo(path: str):
:param path:
:return:
"""
if path.endswith('.xls'):
words = parse_xls(path)
else:
words = parse_xlf(path)
wf = WordsFilter(words)
wf.add_filter(filter_not_str)
wf.add_filter(filter_eq_symbol)
write_xls(path, wf.words)
doc = Doc(path)
doc.add_filter(filter_eq_symbol)
doc.add_filter(filter_not_str)
doc.save_words()
89 changes: 1 addition & 88 deletions docts/filter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,88 +1 @@
import re
from typing import Callable, List, Pattern, AnyStr


class WordsFilter:
def __init__(self, words: List[str]):
self.words = words
self.ignores = []

def add_filter(self, _filter: Callable[[str], bool]):
"""
添加过滤器, 排除某些无需导出的内容
:param _filter:
:return:
"""
words = []
for word in self.words:
if _filter(word):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 {_filter.__name__}: {len(self.words) - len(words)}')
self.words = words
return self

def add_contain_filter(self, contain: Pattern[AnyStr]):
"""
支持正则表达式
:param contain:
:return:
"""
words = []
for word in self.words:
if re.search(contain, word):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_contain_filter({contain}): {len(self.words) - len(words)}')
self.words = words
return self

def add_start_filter(self, start: str, strip: str = None):
words = []
word: str
for word in self.words:
if strip:
word = word.lstrip(strip)
if word.startswith(start):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_start_filter({start}): {len(self.words) - len(words)}')
self.words = words
return self

def add_end_filter(self, end: str, strip: str = None):
words = []
word: str
for word in self.words:
if strip:
word = word.rstrip(strip)
if word.endswith(end):
self.ignores.append(word)
continue
words.append(word)
print(f'过滤文本 add_end_filter({end}): {len(self.words) - len(words)}')
self.words = words
return self

def add_map(self, _map: Callable[[str], str]):
"""
添加自定义映射器, 参考 docts/map/M1.py
:param _map:
:return:
"""
self.words = [_map(i) for i in self.words]
return self

def add_replace(self, old, new):
"""
全局全部替换, 也可以使用 add_map 实现, 这种不需要写函数
:param old:
:param new:
:return:
"""
i: str
self.words = [i.replace(old, new) for i in self.words]
return self
from .F1 import filter_eq_symbol, filter_not_str
1 change: 1 addition & 0 deletions docts/map/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .M1 import map_symbol_dot
28 changes: 0 additions & 28 deletions docts/parser/XLFParser.py

This file was deleted.

Loading

0 comments on commit 2f52a92

Please sign in to comment.