blob: 51f81de1facb0ceb0783e0f6e2fcb7c4b93b547d [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001#include "XLIFFFile.h"
2
3#include <algorithm>
4#include <sys/time.h>
5#include <time.h>
6
7const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
8
9const char *const NS_MAP[] = {
10 "", XLIFF_XMLNS,
11 "xml", XMLNS_XMLNS,
12 NULL, NULL
13};
14
15const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
16
17int
18XLIFFFile::File::Compare(const XLIFFFile::File& that) const
19{
20 if (filename != that.filename) {
21 return filename < that.filename ? -1 : 1;
22 }
23 return 0;
24}
25
26// =====================================================================================
27XLIFFFile::XLIFFFile()
28{
29}
30
31XLIFFFile::~XLIFFFile()
32{
33}
34
35static XMLNode*
36get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
37{
38 size_t count = parent->CountElementsByName(ns, name);
39 if (count == 1) {
40 return parent->GetElementByNameAt(ns, name, 0);
41 } else {
42 if (required) {
43 SourcePos pos = count == 0
44 ? parent->Position()
45 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
46 pos.Error("<%s> elements must contain exactly one <%s> element",
47 parent->Name().c_str(), name.c_str());
48 }
49 return NULL;
50 }
51}
52
53XLIFFFile*
54XLIFFFile::Parse(const string& filename)
55{
56 XLIFFFile* result = new XLIFFFile();
57
58 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
59 if (root == NULL) {
60 return NULL;
61 }
62
63 // <file>
64 vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
65 for (size_t i=0; i<files.size(); i++) {
66 XMLNode* file = files[i];
67
68 string datatype = file->GetAttribute("", "datatype", "");
69 string originalFile = file->GetAttribute("", "original", "");
70
71 Configuration sourceConfig;
72 sourceConfig.locale = file->GetAttribute("", "source-language", "");
73 result->m_sourceConfig = sourceConfig;
74
75 Configuration targetConfig;
76 targetConfig.locale = file->GetAttribute("", "target-language", "");
77 result->m_targetConfig = targetConfig;
78
79 result->m_currentVersion = file->GetAttribute("", "build-num", "");
80 result->m_oldVersion = "old";
81
82 // <body>
83 XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
84 if (body == NULL) continue;
85
86 // <trans-unit>
87 vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
88 for (size_t j=0; j<transUnits.size(); j++) {
89 XMLNode* transUnit = transUnits[j];
90
91 string rawID = transUnit->GetAttribute("", "id", "");
92 if (rawID == "") {
93 transUnit->Position().Error("<trans-unit> tag requires an id");
94 continue;
95 }
96 string id;
97 int index;
98
99 if (!StringResource::ParseTypedID(rawID, &id, &index)) {
100 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
101 continue;
102 }
103
104 // <source>
105 XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
106 if (source != NULL) {
107 XMLNode* node = source->Clone();
108 node->SetPrettyRecursive(XMLNode::EXACT);
109 result->AddStringResource(StringResource(source->Position(), originalFile,
110 sourceConfig, id, index, node, CURRENT_VERSION,
111 result->m_currentVersion));
112 }
113
114 // <target>
115 XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
116 if (target != NULL) {
117 XMLNode* node = target->Clone();
118 node->SetPrettyRecursive(XMLNode::EXACT);
119 result->AddStringResource(StringResource(target->Position(), originalFile,
120 targetConfig, id, index, node, CURRENT_VERSION,
121 result->m_currentVersion));
122 }
123
124 // <alt-trans>
125 XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
126 if (altTrans != NULL) {
127 // <source>
128 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
129 if (altSource != NULL) {
130 XMLNode* node = altSource->Clone();
131 node->SetPrettyRecursive(XMLNode::EXACT);
132 result->AddStringResource(StringResource(altSource->Position(),
133 originalFile, sourceConfig, id, index, node, OLD_VERSION,
134 result->m_oldVersion));
135 }
136
137 // <target>
138 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
139 if (altTarget != NULL) {
140 XMLNode* node = altTarget->Clone();
141 node->SetPrettyRecursive(XMLNode::EXACT);
142 result->AddStringResource(StringResource(altTarget->Position(),
143 originalFile, targetConfig, id, index, node, OLD_VERSION,
144 result->m_oldVersion));
145 }
146 }
147 }
148 }
149 delete root;
150 return result;
151}
152
153XLIFFFile*
154XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
155 const string& currentVersion)
156{
157 XLIFFFile* result = new XLIFFFile();
158 result->m_sourceConfig = sourceConfig;
159 result->m_targetConfig = targetConfig;
160 result->m_currentVersion = currentVersion;
161 return result;
162}
163
164set<string>
165XLIFFFile::Files() const
166{
167 set<string> result;
168 for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
169 result.insert(f->filename);
170 }
171 return result;
172}
173
174void
175XLIFFFile::AddStringResource(const StringResource& str)
176{
177 string id = str.TypedID();
178
179 File* f = NULL;
180 const size_t I = m_files.size();
181 for (size_t i=0; i<I; i++) {
182 if (m_files[i].filename == str.file) {
183 f = &m_files[i];
184 break;
185 }
186 }
187 if (f == NULL) {
188 File file;
189 file.filename = str.file;
190 m_files.push_back(file);
191 f = &m_files[I];
192 }
193
194 const size_t J = f->transUnits.size();
195 TransUnit* g = NULL;
196 for (size_t j=0; j<J; j++) {
197 if (f->transUnits[j].id == id) {
198 g = &f->transUnits[j];
199 }
200 }
201 if (g == NULL) {
202 TransUnit group;
203 group.id = id;
204 f->transUnits.push_back(group);
205 g = &f->transUnits[J];
206 }
207
208 StringResource* res = find_string_res(*g, str);
209 if (res == NULL) {
210 return ;
211 }
212 if (res->id != "") {
213 str.pos.Error("Duplicate string resource: %s", res->id.c_str());
214 res->pos.Error("Previous definition here");
215 return ;
216 }
217 *res = str;
218
219 m_strings.insert(str);
220}
221
222void
223XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
224{
225 const size_t I = m_files.size();
226 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
227 File& file = m_files[i];
228
229 const size_t J = file.transUnits.size();
230 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
231 TransUnit& tu = file.transUnits[j];
232
233 bool keep = func(file.filename, tu, cookie);
234 if (!keep) {
235 if (tu.source.id != "") {
236 m_strings.erase(tu.source);
237 }
238 if (tu.target.id != "") {
239 m_strings.erase(tu.target);
240 }
241 if (tu.altSource.id != "") {
242 m_strings.erase(tu.altSource);
243 }
244 if (tu.altTarget.id != "") {
245 m_strings.erase(tu.altTarget);
246 }
247 file.transUnits.erase(file.transUnits.begin()+j);
248 }
249 }
250 if (file.transUnits.size() == 0) {
251 m_files.erase(m_files.begin()+i);
252 }
253 }
254}
255
256void
257XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
258{
259 const size_t I = m_files.size();
260 for (size_t i=0; i<I; i++) {
261 File& file = m_files[i];
262
263 const size_t J = file.transUnits.size();
264 for (size_t j=0; j<J; j++) {
265 func(file.filename, &(file.transUnits[j]), cookie);
266 }
267 }
268}
269
270TransUnit*
271XLIFFFile::EditTransUnit(const string& filename, const string& id)
272{
273 const size_t I = m_files.size();
274 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
275 File& file = m_files[i];
276 if (file.filename == filename) {
277 const size_t J = file.transUnits.size();
278 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
279 TransUnit& tu = file.transUnits[j];
280 if (tu.id == id) {
281 return &tu;
282 }
283 }
284 }
285 }
286 return NULL;
287}
288
289StringResource*
290XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
291{
292 int index;
293 if (str.version == CURRENT_VERSION) {
294 index = 0;
295 }
296 else if (str.version == OLD_VERSION) {
297 index = 2;
298 }
299 else {
300 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
301 return NULL;
302 }
303 if (str.config == m_sourceConfig) {
304 // index += 0;
305 }
306 else if (str.config == m_targetConfig) {
307 index += 1;
308 }
309 else {
310 str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
311 str.config.ToString().c_str());
312 return NULL;
313 }
314 switch (index) {
315 case 0:
316 return &g.source;
317 case 1:
318 return &g.target;
319 case 2:
320 return &g.altSource;
321 case 3:
322 return &g.altTarget;
323 }
324 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
325 return NULL;
326}
327
328int
329convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
330{
331 int err = 0;
332 if (original->Type() == XMLNode::TEXT) {
333 addTo->EditChildren().push_back(original->Clone());
334 return 0;
335 } else {
336 string ctype;
337 if (original->Namespace() == "") {
338 if (original->Name() == "b") {
339 ctype = "bold";
340 }
341 else if (original->Name() == "i") {
342 ctype = "italic";
343 }
344 else if (original->Name() == "u") {
345 ctype = "underline";
346 }
347 }
348 if (ctype != "") {
349 vector<XMLAttribute> attrs;
350 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
351 XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
352 attrs, XMLNode::EXACT);
353
354 const vector<XMLNode*>& children = original->Children();
355 size_t I = children.size();
356 for (size_t i=0; i<I; i++) {
357 err |= convert_html_to_xliff(children[i], name, copy, phID);
358 }
359 return err;
360 }
361 else {
362 if (original->Namespace() == XLIFF_XMLNS) {
363 addTo->EditChildren().push_back(original->Clone());
364 return 0;
365 } else {
366 if (original->Namespace() == "") {
367 // flatten out the tag into ph tags -- but only if there is no namespace
368 // that's still unsupported because propagating the xmlns attribute is hard.
369 vector<XMLAttribute> attrs;
370 char idStr[30];
371 (*phID)++;
372 sprintf(idStr, "id-%d", *phID);
373 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
374
375 if (original->Children().size() == 0) {
376 XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
377 "ph", attrs, XMLNode::EXACT);
378 ph->EditChildren().push_back(
379 XMLNode::NewText(original->Position(),
380 original->ToString(XLIFF_NAMESPACES),
381 XMLNode::EXACT));
382 addTo->EditChildren().push_back(ph);
383 } else {
384 XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
385 "bpt", attrs, XMLNode::EXACT);
386 begin->EditChildren().push_back(
387 XMLNode::NewText(original->Position(),
388 original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
389 XMLNode::EXACT));
390 XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
391 "ept", attrs, XMLNode::EXACT);
392 string endText = "</";
393 endText += original->Name();
394 endText += ">";
395 end->EditChildren().push_back(XMLNode::NewText(original->Position(),
396 endText, XMLNode::EXACT));
397
398 addTo->EditChildren().push_back(begin);
399
400 const vector<XMLNode*>& children = original->Children();
401 size_t I = children.size();
402 for (size_t i=0; i<I; i++) {
403 err |= convert_html_to_xliff(children[i], name, addTo, phID);
404 }
405
406 addTo->EditChildren().push_back(end);
407 }
408 return err;
409 } else {
410 original->Position().Error("invalid <%s> element in <%s> tag\n",
411 original->Name().c_str(), name.c_str());
412 return 1;
413 }
414 }
415 }
416 }
417}
418
419XMLNode*
420create_string_node(const StringResource& str, const string& name)
421{
422 vector<XMLAttribute> attrs;
423 attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
424 XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
425
426 const vector<XMLNode*>& children = str.value->Children();
427 size_t I = children.size();
428 int err = 0;
429 for (size_t i=0; i<I; i++) {
430 int phID = 0;
431 err |= convert_html_to_xliff(children[i], name, node, &phID);
432 }
433
434 if (err != 0) {
435 delete node;
436 }
437 return node;
438}
439
440static bool
441compare_id(const TransUnit& lhs, const TransUnit& rhs)
442{
443 string lid, rid;
444 int lindex, rindex;
445 StringResource::ParseTypedID(lhs.id, &lid, &lindex);
446 StringResource::ParseTypedID(rhs.id, &rid, &rindex);
447 if (lid < rid) return true;
448 if (lid == rid && lindex < rindex) return true;
449 return false;
450}
451
452XMLNode*
453XLIFFFile::ToXMLNode() const
454{
455 XMLNode* root;
456 size_t N;
457
458 // <xliff>
459 {
460 vector<XMLAttribute> attrs;
461 XLIFF_NAMESPACES.AddToAttributes(&attrs);
462 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
463 root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
464 }
465
466 vector<TransUnit> groups;
467
468 // <file>
469 vector<File> files = m_files;
470 sort(files.begin(), files.end());
471 const size_t I = files.size();
472 for (size_t i=0; i<I; i++) {
473 const File& file = files[i];
474
475 vector<XMLAttribute> fileAttrs;
476 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
477 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
478
479 struct timeval tv;
480 struct timezone tz;
481 gettimeofday(&tv, &tz);
482 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
483
484 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
485 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
486 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
487
488 XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
489 XMLNode::PRETTY);
490 root->EditChildren().push_back(fileNode);
491
492 // <body>
493 XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
494 vector<XMLAttribute>(), XMLNode::PRETTY);
495 fileNode->EditChildren().push_back(bodyNode);
496
497 // <trans-unit>
498 vector<TransUnit> transUnits = file.transUnits;
499 sort(transUnits.begin(), transUnits.end(), compare_id);
500 const size_t J = transUnits.size();
501 for (size_t j=0; j<J; j++) {
502 const TransUnit& transUnit = transUnits[j];
503
504 vector<XMLAttribute> tuAttrs;
505
506 // strings start with string:
507 tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
508 XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
509 tuAttrs, XMLNode::PRETTY);
510 bodyNode->EditChildren().push_back(transUnitNode);
511
512 // <extradata>
513 if (transUnit.source.comment != "") {
514 vector<XMLAttribute> extradataAttrs;
515 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
516 extradataAttrs, XMLNode::EXACT);
517 transUnitNode->EditChildren().push_back(extraNode);
518 extraNode->EditChildren().push_back(
519 XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
520 XMLNode::PRETTY));
521 }
522
523 // <source>
524 if (transUnit.source.id != "") {
525 transUnitNode->EditChildren().push_back(
526 create_string_node(transUnit.source, "source"));
527 }
528
529 // <target>
530 if (transUnit.target.id != "") {
531 transUnitNode->EditChildren().push_back(
532 create_string_node(transUnit.target, "target"));
533 }
534
535 // <alt-trans>
536 if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
537 || transUnit.rejectComment != "") {
538 vector<XMLAttribute> altTransAttrs;
539 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
540 altTransAttrs, XMLNode::PRETTY);
541 transUnitNode->EditChildren().push_back(altTransNode);
542
543 // <extradata>
544 if (transUnit.rejectComment != "") {
545 vector<XMLAttribute> extradataAttrs;
546 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
547 "extradata", extradataAttrs,
548 XMLNode::EXACT);
549 altTransNode->EditChildren().push_back(extraNode);
550 extraNode->EditChildren().push_back(
551 XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
552 XMLNode::PRETTY));
553 }
554
555 // <source>
556 if (transUnit.altSource.id != "") {
557 altTransNode->EditChildren().push_back(
558 create_string_node(transUnit.altSource, "source"));
559 }
560
561 // <target>
562 if (transUnit.altTarget.id != "") {
563 altTransNode->EditChildren().push_back(
564 create_string_node(transUnit.altTarget, "target"));
565 }
566 }
567
568 }
569 }
570
571 return root;
572}
573
574
575string
576XLIFFFile::ToString() const
577{
578 XMLNode* xml = ToXMLNode();
579 string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
580 s += xml->ToString(XLIFF_NAMESPACES);
581 delete xml;
582 s += '\n';
583 return s;
584}
585
586Stats
587XLIFFFile::GetStats(const string& config) const
588{
589 Stats stat;
590 stat.config = config;
591 stat.files = m_files.size();
592 stat.toBeTranslated = 0;
593 stat.noComments = 0;
594
595 for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
596 stat.toBeTranslated += file->transUnits.size();
597
598 for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
599 tu!=file->transUnits.end(); tu++) {
600 if (tu->source.comment == "") {
601 stat.noComments++;
602 }
603 }
604 }
605
606 stat.totalStrings = stat.toBeTranslated;
607
608 return stat;
609}