-
Notifications
You must be signed in to change notification settings - Fork 35
/
xliff2_serializer.py
246 lines (212 loc) · 9.08 KB
/
xliff2_serializer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from django.core.serializers import base
from ..cms.models import Page, PageTranslation
from . import base_serializer
if TYPE_CHECKING:
from typing import Any
from xml.dom.minidom import Element
from django.db.models.fields import CharField, TextField
logger = logging.getLogger(__name__)
class Serializer(base_serializer.Serializer):
"""
XLIFF serializer class for XLIFF version 2.0
Serializes :class:`~integreat_cms.cms.models.pages.page_translation.PageTranslation` objects into translatable XLIFF files.
"""
#: The source language of this serializer instance
source_language = None
#: The target language of this serializer instance
target_language = None
def serialize(
self, queryset: list[PageTranslation], *args: Any, **kwargs: Any
) -> str:
r"""
Initialize serialization and find out in which source and target language the given elements are.
:param queryset: QuerySet of all :class:`~integreat_cms.cms.models.pages.page_translation.PageTranslation` objects which
should be serialized
:param \*args: The remaining arguments
:param \**kwargs: The supplied keyword arguments
:raises ~django.core.serializers.base.SerializationError: If the serialization fails
:return: The serialized XLIFF string
"""
# Get all language objects of the given page translations
language_set = {p.language for p in queryset}
logger.debug("XLIFF 2.0 serialization for languages %r", language_set)
if not language_set:
raise base.SerializationError("No page translations given to serialize.")
# Check if all given translations are of the same language
if len(language_set) != 1:
raise base.SerializationError(
"The page translations have different languages, but in XLIFF 2.0 "
"all objects of one file need to have the same language."
)
# Get all region objects of the given page translations
region_set = {p.page.region for p in queryset}
logger.debug("XLIFF 2.0 serialization for regions %r", region_set)
# Check if all given translations are of the same region
if len(region_set) != 1:
raise base.SerializationError(
"The page translations are from different regions."
)
region = next(iter(region_set))
if (target_language := next(iter(language_set))) == region.default_language:
raise base.SerializationError(
"The page translation is in the region's default language."
)
self.target_language = target_language
self.source_language = region.get_source_language(target_language.slug)
logger.debug(
"Starting XLIFF 2.0 serialization for translation from %r to %r",
self.source_language,
target_language,
)
return super().serialize(queryset, *args, **kwargs)
def start_serialization(self) -> None:
"""
Start serialization - open the XML document and the root element.
"""
if TYPE_CHECKING:
assert self.xml
assert self.source_language
assert self.target_language
logger.debug(
"XLIFF 2.0 starting serialization",
)
super().start_serialization()
self.xml.startElement(
"xliff",
{
"version": "2.0",
"xmlns": "urn:oasis:names:tc:xliff:document:2.0",
"srcLang": self.source_language.bcp47_tag,
"srcDir": self.source_language.text_direction,
"trgLang": self.target_language.bcp47_tag,
"trgDir": self.target_language.text_direction,
},
)
def start_object(self, obj: PageTranslation) -> None:
"""
Called as each object is handled.
Adds an XLIFF ``<file>``-block with meta-information about the object.
:param obj: The page translation object which is started
"""
if TYPE_CHECKING:
assert self.xml
logger.debug("XLIFF 2.0 serialization starting object %r", obj)
self.xml.startElement(
"file",
{
"original": str(obj.page.id),
},
)
def handle_field(self, obj: PageTranslation, field: CharField | TextField) -> None:
"""
Called to handle each field on an object (except for ForeignKeys and ManyToManyFields)
:param obj: The page translation object which is handled
:param field: The model field
:raises ~django.core.serializers.base.SerializationError: If the serialization fails
"""
if TYPE_CHECKING:
assert self.xml
logger.debug(
"XLIFF 2.0 serialization handling field %r of object %r", field, obj
)
attrs = {
"id": field.name,
"resname": field.name,
"restype": "string",
"datatype": "html",
}
self.xml.startElement("unit", attrs)
self.xml.startElement("segment", {})
self.xml.startElement("source", {})
source_translation = (
obj.public_source_translation
if self.only_public
else obj.public_or_draft_source_translation
)
if not source_translation:
raise base.SerializationError(
f"Page translation {obj!r} does not have a source translation in "
f"{self.source_language!r} and therefore cannot be serialized to XLIFF."
)
logger.debug("XLIFF 2.0 source translation %r", source_translation)
self.xml.cdata(field.value_to_string(source_translation))
self.xml.endElement("source")
self.xml.startElement("target", {})
self.xml.cdata(field.value_to_string(obj))
self.xml.endElement("target")
self.xml.endElement("segment")
self.xml.endElement("unit")
def end_object(self, obj: PageTranslation) -> None:
"""
Called after handling all fields for an object.
Ends the ``<file>``-block.
:param obj: The page translation object which is finished
"""
if TYPE_CHECKING:
assert self.xml
logger.debug("XLIFF 2.0 serialization ending object %r", obj)
self.xml.endElement("file")
class Deserializer(base_serializer.Deserializer):
"""
XLIFF deserializer class for XLIFF version 2.0
Deserializes XLIFF files into :class:`~integreat_cms.cms.models.pages.page_translation.PageTranslation` objects.
"""
#: The node name of serialized fields
unit_node = "unit"
def __init__(self, *args: Any, **kwargs: Any) -> None:
r"""
Initialize XLIFF 2.0 deserializer
:param \*args: The supplied arguments
:param \**kwargs: The supplied keyword arguments
:raises ~django.core.serializers.base.DeserializationError: If the deserialization fails
"""
# Initialize base deserializer
super().__init__(*args, **kwargs)
# Get language objects from <xliff>-node
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "xliff":
# Get source language stored in the xliff node
self.source_language = self.get_language(
self.require_attribute(node, "srcLang")
)
# Get target language stored in the xliff node
self.target_language = self.get_language(
self.require_attribute(node, "trgLang")
)
logger.debug(
"Starting XLIFF 2.0 deserialization for translation from %r to %r",
self.source_language,
self.target_language,
)
return
raise base.DeserializationError(
"The XLIFF file does not contain an <xliff>-block,"
)
def get_object(self, node: Element) -> PageTranslation:
"""
Retrieve an object from the serialized unit node.
To be implemented in the subclass of this base serializer.
:param node: The current xml node of the object
:return: The original page translation
"""
# Get the page to which this serialized object belongs to
page_id = self.require_attribute(node, "original")
page = Page.objects.get(id=page_id)
logger.debug(
"Referenced original page: %r",
page,
)
# Retrieve a existing target translation or create a new one
if page_translation := page.get_translation(self.target_language.slug):
return page_translation
# Initial attributes passed to model constructor
attrs = {
"page": page,
"language": self.target_language,
}
if source_translation := page.get_translation(self.source_language.slug):
attrs["status"] = source_translation.status
return PageTranslation(**attrs)