blob: 5da05a27bd4779035f98df2ebfe5cf849de89af1 [file] [log] [blame]
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -07001#include "XLIFFFile.h"
2
3#include <sys/time.h>
4#include <time.h>
5
6const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
7
8const char *const NS_MAP[] = {
9 "", XLIFF_XMLNS,
10 "xml", XMLNS_XMLNS,
11 NULL, NULL
12};
13
14const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
15
16int
17XLIFFFile::File::Compare(const XLIFFFile::File& that) const
18{
19 if (filename != that.filename) {
20 return filename < that.filename ? -1 : 1;
21 }
22 return 0;
23}
24
25// =====================================================================================
26XLIFFFile::XLIFFFile()
27{
28}
29
30XLIFFFile::~XLIFFFile()
31{
32}
33
34static XMLNode*
35get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
36{
37 size_t count = parent->CountElementsByName(ns, name);
38 if (count == 1) {
39 return parent->GetElementByNameAt(ns, name, 0);
40 } else {
41 if (required) {
42 SourcePos pos = count == 0
43 ? parent->Position()
44 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
45 pos.Error("<%s> elements must contain exactly one <%s> element",
46 parent->Name().c_str(), name.c_str());
47 }
48 return NULL;
49 }
50}
51
52XLIFFFile*
53XLIFFFile::Parse(const string& filename)
54{
55 XLIFFFile* result = new XLIFFFile();
56
57 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
58 if (root == NULL) {
59 return NULL;
60 }
61
62 // <file>
63 vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
64 for (size_t i=0; i<files.size(); i++) {
65 XMLNode* file = files[i];
66
67 string datatype = file->GetAttribute("", "datatype", "");
68 string originalFile = file->GetAttribute("", "original", "");
69
70 Configuration sourceConfig;
71 sourceConfig.locale = file->GetAttribute("", "source-language", "");
72 result->m_sourceConfig = sourceConfig;
73
74 Configuration targetConfig;
75 targetConfig.locale = file->GetAttribute("", "target-language", "");
76 result->m_targetConfig = targetConfig;
77
78 result->m_currentVersion = file->GetAttribute("", "build-num", "");
79 result->m_oldVersion = "old";
80
81 // <body>
82 XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
83 if (body == NULL) continue;
84
85 // <trans-unit>
86 vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
87 for (size_t j=0; j<transUnits.size(); j++) {
88 XMLNode* transUnit = transUnits[j];
89
90 string rawID = transUnit->GetAttribute("", "id", "");
91 if (rawID == "") {
92 transUnit->Position().Error("<trans-unit> tag requires an id");
93 continue;
94 }
95 string id;
96 int index;
97
98 if (!StringResource::ParseTypedID(rawID, &id, &index)) {
99 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
100 continue;
101 }
102
103 // <source>
104 XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
105 if (source != NULL) {
106 XMLNode* node = source->Clone();
107 node->SetPrettyRecursive(XMLNode::EXACT);
108 result->AddStringResource(StringResource(source->Position(), originalFile,
109 sourceConfig, id, index, node, CURRENT_VERSION,
110 result->m_currentVersion));
111 }
112
113 // <target>
114 XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
115 if (target != NULL) {
116 XMLNode* node = target->Clone();
117 node->SetPrettyRecursive(XMLNode::EXACT);
118 result->AddStringResource(StringResource(target->Position(), originalFile,
119 targetConfig, id, index, node, CURRENT_VERSION,
120 result->m_currentVersion));
121 }
122
123 // <alt-trans>
124 XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
125 if (altTrans != NULL) {
126 // <source>
127 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
128 if (altSource != NULL) {
129 XMLNode* node = altSource->Clone();
130 node->SetPrettyRecursive(XMLNode::EXACT);
131 result->AddStringResource(StringResource(altSource->Position(),
132 originalFile, sourceConfig, id, index, node, OLD_VERSION,
133 result->m_oldVersion));
134 }
135
136 // <target>
137 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
138 if (altTarget != NULL) {
139 XMLNode* node = altTarget->Clone();
140 node->SetPrettyRecursive(XMLNode::EXACT);
141 result->AddStringResource(StringResource(altTarget->Position(),
142 originalFile, targetConfig, id, index, node, OLD_VERSION,
143 result->m_oldVersion));
144 }
145 }
146 }
147 }
148 delete root;
149 return result;
150}
151
152XLIFFFile*
153XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
154 const string& currentVersion)
155{
156 XLIFFFile* result = new XLIFFFile();
157 result->m_sourceConfig = sourceConfig;
158 result->m_targetConfig = targetConfig;
159 result->m_currentVersion = currentVersion;
160 return result;
161}
162
163set<string>
164XLIFFFile::Files() const
165{
166 set<string> result;
167 for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
168 result.insert(f->filename);
169 }
170 return result;
171}
172
173void
174XLIFFFile::AddStringResource(const StringResource& str)
175{
176 string id = str.TypedID();
177
178 File* f = NULL;
179 const size_t I = m_files.size();
180 for (size_t i=0; i<I; i++) {
181 if (m_files[i].filename == str.file) {
182 f = &m_files[i];
183 break;
184 }
185 }
186 if (f == NULL) {
187 File file;
188 file.filename = str.file;
189 m_files.push_back(file);
190 f = &m_files[I];
191 }
192
193 const size_t J = f->transUnits.size();
194 TransUnit* g = NULL;
195 for (size_t j=0; j<J; j++) {
196 if (f->transUnits[j].id == id) {
197 g = &f->transUnits[j];
198 }
199 }
200 if (g == NULL) {
201 TransUnit group;
202 group.id = id;
203 f->transUnits.push_back(group);
204 g = &f->transUnits[J];
205 }
206
207 StringResource* res = find_string_res(*g, str);
208 if (res == NULL) {
209 return ;
210 }
211 if (res->id != "") {
212 str.pos.Error("Duplicate string resource: %s", res->id.c_str());
213 res->pos.Error("Previous definition here");
214 return ;
215 }
216 *res = str;
217
218 m_strings.insert(str);
219}
220
221void
222XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
223{
224 const size_t I = m_files.size();
225 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
226 File& file = m_files[i];
227
228 const size_t J = file.transUnits.size();
229 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
230 TransUnit& tu = file.transUnits[j];
231
232 bool keep = func(file.filename, tu, cookie);
233 if (!keep) {
234 if (tu.source.id != "") {
235 m_strings.erase(tu.source);
236 }
237 if (tu.target.id != "") {
238 m_strings.erase(tu.target);
239 }
240 if (tu.altSource.id != "") {
241 m_strings.erase(tu.altSource);
242 }
243 if (tu.altTarget.id != "") {
244 m_strings.erase(tu.altTarget);
245 }
246 file.transUnits.erase(file.transUnits.begin()+j);
247 }
248 }
249 if (file.transUnits.size() == 0) {
250 m_files.erase(m_files.begin()+i);
251 }
252 }
253}
254
255void
256XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
257{
258 const size_t I = m_files.size();
259 for (size_t i=0; i<I; i++) {
260 File& file = m_files[i];
261
262 const size_t J = file.transUnits.size();
263 for (size_t j=0; j<J; j++) {
264 func(file.filename, &(file.transUnits[j]), cookie);
265 }
266 }
267}
268
269TransUnit*
270XLIFFFile::EditTransUnit(const string& filename, const string& id)
271{
272 const size_t I = m_files.size();
273 for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
274 File& file = m_files[i];
275 if (file.filename == filename) {
276 const size_t J = file.transUnits.size();
277 for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
278 TransUnit& tu = file.transUnits[j];
279 if (tu.id == id) {
280 return &tu;
281 }
282 }
283 }
284 }
285 return NULL;
286}
287
288StringResource*
289XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
290{
291 int index;
292 if (str.version == CURRENT_VERSION) {
293 index = 0;
294 }
295 else if (str.version == OLD_VERSION) {
296 index = 2;
297 }
298 else {
299 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
300 return NULL;
301 }
302 if (str.config == m_sourceConfig) {
303 // index += 0;
304 }
305 else if (str.config == m_targetConfig) {
306 index += 1;
307 }
308 else {
309 str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
310 str.config.ToString().c_str());
311 return NULL;
312 }
313 switch (index) {
314 case 0:
315 return &g.source;
316 case 1:
317 return &g.target;
318 case 2:
319 return &g.altSource;
320 case 3:
321 return &g.altTarget;
322 }
323 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
324 return NULL;
325}
326
327int
328convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
329{
330 int err = 0;
331 if (original->Type() == XMLNode::TEXT) {
332 addTo->EditChildren().push_back(original->Clone());
333 return 0;
334 } else {
335 string ctype;
336 if (original->Namespace() == "") {
337 if (original->Name() == "b") {
338 ctype = "bold";
339 }
340 else if (original->Name() == "i") {
341 ctype = "italic";
342 }
343 else if (original->Name() == "u") {
344 ctype = "underline";
345 }
346 }
347 if (ctype != "") {
348 vector<XMLAttribute> attrs;
349 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
350 XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
351 attrs, XMLNode::EXACT);
352
353 const vector<XMLNode*>& children = original->Children();
354 size_t I = children.size();
355 for (size_t i=0; i<I; i++) {
356 err |= convert_html_to_xliff(children[i], name, copy, phID);
357 }
358 return err;
359 }
360 else {
361 if (original->Namespace() == XLIFF_XMLNS) {
362 addTo->EditChildren().push_back(original->Clone());
363 return 0;
364 } else {
365 if (original->Namespace() == "") {
366 // flatten out the tag into ph tags -- but only if there is no namespace
367 // that's still unsupported because propagating the xmlns attribute is hard.
368 vector<XMLAttribute> attrs;
369 char idStr[30];
370 (*phID)++;
371 sprintf(idStr, "id-%d", *phID);
372 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
373
374 if (original->Children().size() == 0) {
375 XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
376 "ph", attrs, XMLNode::EXACT);
377 ph->EditChildren().push_back(
378 XMLNode::NewText(original->Position(),
379 original->ToString(XLIFF_NAMESPACES),
380 XMLNode::EXACT));
381 addTo->EditChildren().push_back(ph);
382 } else {
383 XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
384 "bpt", attrs, XMLNode::EXACT);
385 begin->EditChildren().push_back(
386 XMLNode::NewText(original->Position(),
387 original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
388 XMLNode::EXACT));
389 XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
390 "ept", attrs, XMLNode::EXACT);
391 string endText = "</";
392 endText += original->Name();
393 endText += ">";
394 end->EditChildren().push_back(XMLNode::NewText(original->Position(),
395 endText, XMLNode::EXACT));
396
397 addTo->EditChildren().push_back(begin);
398
399 const vector<XMLNode*>& children = original->Children();
400 size_t I = children.size();
401 for (size_t i=0; i<I; i++) {
402 err |= convert_html_to_xliff(children[i], name, addTo, phID);
403 }
404
405 addTo->EditChildren().push_back(end);
406 }
407 return err;
408 } else {
409 original->Position().Error("invalid <%s> element in <%s> tag\n",
410 original->Name().c_str(), name.c_str());
411 return 1;
412 }
413 }
414 }
415 }
416}
417
418XMLNode*
419create_string_node(const StringResource& str, const string& name)
420{
421 vector<XMLAttribute> attrs;
422 attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
423 XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
424
425 const vector<XMLNode*>& children = str.value->Children();
426 size_t I = children.size();
427 int err = 0;
428 for (size_t i=0; i<I; i++) {
429 int phID = 0;
430 err |= convert_html_to_xliff(children[i], name, node, &phID);
431 }
432
433 if (err != 0) {
434 delete node;
435 }
436 return node;
437}
438
439static bool
440compare_id(const TransUnit& lhs, const TransUnit& rhs)
441{
442 string lid, rid;
443 int lindex, rindex;
444 StringResource::ParseTypedID(lhs.id, &lid, &lindex);
445 StringResource::ParseTypedID(rhs.id, &rid, &rindex);
446 if (lid < rid) return true;
447 if (lid == rid && lindex < rindex) return true;
448 return false;
449}
450
451XMLNode*
452XLIFFFile::ToXMLNode() const
453{
454 XMLNode* root;
455 size_t N;
456
457 // <xliff>
458 {
459 vector<XMLAttribute> attrs;
460 XLIFF_NAMESPACES.AddToAttributes(&attrs);
461 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
462 root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
463 }
464
465 vector<TransUnit> groups;
466
467 // <file>
468 vector<File> files = m_files;
469 sort(files.begin(), files.end());
470 const size_t I = files.size();
471 for (size_t i=0; i<I; i++) {
472 const File& file = files[i];
473
474 vector<XMLAttribute> fileAttrs;
475 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
476 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
477
478 struct timeval tv;
479 struct timezone tz;
480 gettimeofday(&tv, &tz);
481 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
482
483 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
484 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
485 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
486
487 XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
488 XMLNode::PRETTY);
489 root->EditChildren().push_back(fileNode);
490
491 // <body>
492 XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
493 vector<XMLAttribute>(), XMLNode::PRETTY);
494 fileNode->EditChildren().push_back(bodyNode);
495
496 // <trans-unit>
497 vector<TransUnit> transUnits = file.transUnits;
498 sort(transUnits.begin(), transUnits.end(), compare_id);
499 const size_t J = transUnits.size();
500 for (size_t j=0; j<J; j++) {
501 const TransUnit& transUnit = transUnits[j];
502
503 vector<XMLAttribute> tuAttrs;
504
505 // strings start with string:
506 tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
507 XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
508 tuAttrs, XMLNode::PRETTY);
509 bodyNode->EditChildren().push_back(transUnitNode);
510
511 // <extradata>
512 if (transUnit.source.comment != "") {
513 vector<XMLAttribute> extradataAttrs;
514 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
515 extradataAttrs, XMLNode::EXACT);
516 transUnitNode->EditChildren().push_back(extraNode);
517 extraNode->EditChildren().push_back(
518 XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
519 XMLNode::PRETTY));
520 }
521
522 // <source>
523 if (transUnit.source.id != "") {
524 transUnitNode->EditChildren().push_back(
525 create_string_node(transUnit.source, "source"));
526 }
527
528 // <target>
529 if (transUnit.target.id != "") {
530 transUnitNode->EditChildren().push_back(
531 create_string_node(transUnit.target, "target"));
532 }
533
534 // <alt-trans>
535 if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
536 || transUnit.rejectComment != "") {
537 vector<XMLAttribute> altTransAttrs;
538 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
539 altTransAttrs, XMLNode::PRETTY);
540 transUnitNode->EditChildren().push_back(altTransNode);
541
542 // <extradata>
543 if (transUnit.rejectComment != "") {
544 vector<XMLAttribute> extradataAttrs;
545 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
546 "extradata", extradataAttrs,
547 XMLNode::EXACT);
548 altTransNode->EditChildren().push_back(extraNode);
549 extraNode->EditChildren().push_back(
550 XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
551 XMLNode::PRETTY));
552 }
553
554 // <source>
555 if (transUnit.altSource.id != "") {
556 altTransNode->EditChildren().push_back(
557 create_string_node(transUnit.altSource, "source"));
558 }
559
560 // <target>
561 if (transUnit.altTarget.id != "") {
562 altTransNode->EditChildren().push_back(
563 create_string_node(transUnit.altTarget, "target"));
564 }
565 }
566
567 }
568 }
569
570 return root;
571}
572
573
574string
575XLIFFFile::ToString() const
576{
577 XMLNode* xml = ToXMLNode();
578 string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
579 s += xml->ToString(XLIFF_NAMESPACES);
580 delete xml;
581 s += '\n';
582 return s;
583}
584
585Stats
586XLIFFFile::GetStats(const string& config) const
587{
588 Stats stat;
589 stat.config = config;
590 stat.files = m_files.size();
591 stat.toBeTranslated = 0;
592 stat.noComments = 0;
593
594 for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
595 stat.toBeTranslated += file->transUnits.size();
596
597 for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
598 tu!=file->transUnits.end(); tu++) {
599 if (tu->source.comment == "") {
600 stat.noComments++;
601 }
602 }
603 }
604
605 stat.totalStrings = stat.toBeTranslated;
606
607 return stat;
608}