blob: 68c03b625cc171443d8b0742eb4b21b0b93d2f46 [file] [log] [blame]
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -07001#include "SourcePos.h"
2#include "ValuesFile.h"
3#include "XLIFFFile.h"
4#include "Perforce.h"
5#include "merge_res_and_xliff.h"
6#include "localize.h"
7#include "file_utils.h"
8#include "res_check.h"
9#include "xmb.h"
10
11#include <host/pseudolocalize.h>
12
The Android Open Source Projectb7986892009-01-09 17:51:23 -080013#include <stdlib.h>
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -070014#include <stdarg.h>
15#include <sstream>
16#include <stdio.h>
17#include <string.h>
Alexey Zaytsev862bfdb2008-10-22 02:05:55 +040018#include <stdlib.h>
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -070019
20using namespace std;
21
22FILE* g_logFile = NULL;
23
24int test();
25
26int
27read_settings(const string& filename, map<string,Settings>* result, const string& rootDir)
28{
29 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
30 if (root == NULL) {
31 SourcePos(filename, -1).Error("Error reading file.");
32 return 1;
33 }
34
35 // <configuration>
36 vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration");
37 const size_t I = configNodes.size();
38 for (size_t i=0; i<I; i++) {
39 const XMLNode* configNode = configNodes[i];
40
41 Settings settings;
42 settings.id = configNode->GetAttribute("", "id", "");
43 if (settings.id == "") {
44 configNode->Position().Error("<configuration> needs an id attribute.");
45 delete root;
46 return 1;
47 }
48
49 settings.oldVersion = configNode->GetAttribute("", "old-cl", "");
50
51 settings.currentVersion = configNode->GetAttribute("", "new-cl", "");
52 if (settings.currentVersion == "") {
53 configNode->Position().Error("<configuration> needs a new-cl attribute.");
54 delete root;
55 return 1;
56 }
57
58 // <app>
59 vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app");
60
61 const size_t J = appNodes.size();
62 for (size_t j=0; j<J; j++) {
63 const XMLNode* appNode = appNodes[j];
64
65 string dir = appNode->GetAttribute("", "dir", "");
66 if (dir == "") {
67 appNode->Position().Error("<app> needs a dir attribute.");
68 delete root;
69 return 1;
70 }
71
72 settings.apps.push_back(dir);
73 }
74
75 // <reject>
76 vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject");
77
78 const size_t K = rejectNodes.size();
79 for (size_t k=0; k<K; k++) {
80 const XMLNode* rejectNode = rejectNodes[k];
81
82 Reject reject;
83
84 reject.file = rejectNode->GetAttribute("", "file", "");
85 if (reject.file == "") {
86 rejectNode->Position().Error("<reject> needs a file attribute.");
87 delete root;
88 return 1;
89 }
90 string f = reject.file;
91 reject.file = rootDir;
92 reject.file += '/';
93 reject.file += f;
94
95 reject.name = rejectNode->GetAttribute("", "name", "");
96 if (reject.name == "") {
97 rejectNode->Position().Error("<reject> needs a name attribute.");
98 delete root;
99 return 1;
100 }
101
102 reject.comment = trim_string(rejectNode->CollapseTextContents());
103
104 settings.reject.push_back(reject);
105 }
106
107 (*result)[settings.id] = settings;
108 }
109
110 delete root;
111 return 0;
112}
113
114
115static void
116ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename)
117{
118 const set<StringResource>& strings = values->GetStrings();
119 for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) {
120 StringResource res = *it;
121 res.file = englishFilename;
122 xliff->AddStringResource(res);
123 }
124}
125
126static bool
127contains_reject(const Settings& settings, const string& file, const TransUnit& tu)
128{
129 const string name = tu.id;
130 const vector<Reject>& reject = settings.reject;
131 const size_t I = reject.size();
132 for (size_t i=0; i<I; i++) {
133 const Reject& r = reject[i];
134 if (r.file == file && r.name == name) {
135 return true;
136 }
137 }
138 return false;
139}
140
141/**
142 * If it's been rejected, then we keep whatever info we have.
143 *
144 * Implements this truth table:
145 *
146 * S AT AS Keep
147 * -----------------------
148 * 0 0 0 0 (this case can't happen)
149 * 0 0 1 0 (it was there, never translated, and removed)
150 * 0 1 0 0 (somehow it got translated, but it was removed)
151 * 0 1 1 0 (it was removed after having been translated)
152 *
153 * 1 0 0 1 (it was just added)
154 * 1 0 1 1 (it was added, has been changed, but it never got translated)
155 * 1 1 0 1 (somehow it got translated, but we don't know based on what)
156 * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're
157 * the same. 1 if S!=AS because S changed, so it should be retranslated)
158 *
159 * The first four are cases where, whatever happened in the past, the string isn't there
160 * now, so it shouldn't be in the XLIFF file.
161 *
162 * For cases 4 and 5, the string has never been translated, so get it translated.
163 *
164 * For case 6, it's unclear where the translated version came from, so we're conservative
165 * and send it back for them to have another shot at.
166 *
167 * For case 7, we have some data. We have two choices. We could rely on the translator's
168 * translation memory or tools to notice that the strings haven't changed, and populate the
169 * <target> field themselves. Or if the string hasn't changed since last time, we can just
170 * not even tell them about it. As the project nears the end, it will be convenient to see
171 * the xliff files reducing in size, so we pick the latter. Obviously, if the string has
172 * changed, then we need to get it retranslated.
173 */
174bool
175keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie)
176{
177 const Settings* settings = reinterpret_cast<const Settings*>(cookie);
178
179 if (contains_reject(*settings, file, unit)) {
180 return true;
181 }
182
183 if (unit.source.id == "") {
184 return false;
185 }
186 if (unit.altTarget.id == "" || unit.altSource.id == "") {
187 return true;
188 }
189 return unit.source.value->ContentsToString(XLIFF_NAMESPACES)
190 != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES);
191}
192
193int
194validate_config(const string& settingsFile, const map<string,Settings>& settings,
195 const string& config)
196{
197 if (settings.find(config) == settings.end()) {
198 SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n",
199 config.c_str());
200 return 1;
201 }
202 return 0;
203}
204
205int
206validate_configs(const string& settingsFile, const map<string,Settings>& settings,
207 const vector<string>& configs)
208{
209 int err = 0;
210 for (size_t i=0; i<configs.size(); i++) {
211 string config = configs[i];
212 err |= validate_config(settingsFile, settings, config);
213 }
214 return err;
215}
216
217int
218select_files(vector<string> *resFiles, const string& config,
219 const map<string,Settings>& settings, const string& rootDir)
220{
221 int err;
222 vector<vector<string> > allResFiles;
223 vector<string> configs;
224 configs.push_back(config);
225 err = select_files(&allResFiles, configs, settings, rootDir);
226 if (err == 0) {
227 *resFiles = allResFiles[0];
228 }
229 return err;
230}
231
232int
233select_files(vector<vector<string> > *allResFiles, const vector<string>& configs,
234 const map<string,Settings>& settings, const string& rootDir)
235{
236 int err;
237 printf("Selecting files...");
238 fflush(stdout);
239
240 for (size_t i=0; i<configs.size(); i++) {
241 const string& config = configs[i];
242 const Settings& setting = settings.find(config)->second;
243
244 vector<string> resFiles;
245 err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir,
246 setting.apps, &resFiles, true);
247 if (err != 0) {
248 fprintf(stderr, "error with perforce. bailing\n");
249 return err;
250 }
251
252 allResFiles->push_back(resFiles);
253 }
254 return 0;
255}
256
257static int
258do_export(const string& settingsFile, const string& rootDir, const string& outDir,
259 const string& targetLocale, const vector<string>& configs)
260{
261 bool success = true;
262 int err;
263
264 if (false) {
265 printf("settingsFile=%s\n", settingsFile.c_str());
266 printf("rootDir=%s\n", rootDir.c_str());
267 printf("outDir=%s\n", outDir.c_str());
268 for (size_t i=0; i<configs.size(); i++) {
269 printf("config[%zd]=%s\n", i, configs[i].c_str());
270 }
271 }
272
273 map<string,Settings> settings;
274 err = read_settings(settingsFile, &settings, rootDir);
275 if (err != 0) {
276 return err;
277 }
278
279 err = validate_configs(settingsFile, settings, configs);
280 if (err != 0) {
281 return err;
282 }
283
284 vector<vector<string> > allResFiles;
285 err = select_files(&allResFiles, configs, settings, rootDir);
286 if (err != 0) {
287 return err;
288 }
289
290 size_t totalFileCount = 0;
291 for (size_t i=0; i<allResFiles.size(); i++) {
292 totalFileCount += allResFiles[i].size();
293 }
294 totalFileCount *= 3; // we try all 3 versions of the file
295
296 size_t fileProgress = 0;
297 vector<Stats> stats;
298 vector<pair<string,XLIFFFile*> > xliffs;
299
300 for (size_t i=0; i<configs.size(); i++) {
301 const string& config = configs[i];
302 const Settings& setting = settings[config];
303
304 if (false) {
305 fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1,
306 configs.size());
307 fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str());
308 fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str());
309 }
310
311 Configuration english;
312 english.locale = "en_US";
313 Configuration translated;
314 translated.locale = targetLocale;
315 XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion);
316
317 const vector<string>& resFiles = allResFiles[i];
318 const size_t J = resFiles.size();
319 for (size_t j=0; j<J; j++) {
320 string resFile = resFiles[j];
321
322 // parse the files into a ValuesFile
323 // pull out the strings and add them to the XLIFFFile
324
325 // current file
326 print_file_status(++fileProgress, totalFileCount);
327 ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION,
328 setting.currentVersion, true);
329 if (currentFile != NULL) {
330 ValuesFile_to_XLIFFFile(currentFile, xliff, resFile);
331 //printf("currentFile=[%s]\n", currentFile->ToString().c_str());
332 } else {
333 fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(),
334 setting.currentVersion.c_str());
335 success = false;
336 }
337
338 // old file
339 print_file_status(++fileProgress, totalFileCount);
340 ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION,
341 setting.oldVersion, false);
342 if (oldFile != NULL) {
343 ValuesFile_to_XLIFFFile(oldFile, xliff, resFile);
344 //printf("oldFile=[%s]\n", oldFile->ToString().c_str());
345 }
346
347 // translated version
348 // (get the head of the tree for the most recent translation, but it's considered
349 // the old one because the "current" one hasn't been made yet, and this goes into
350 // the <alt-trans> tag if necessary
351 print_file_status(++fileProgress, totalFileCount);
352 string transFilename = translated_file_name(resFile, targetLocale);
353 ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION,
354 setting.currentVersion, false);
355 if (transFile != NULL) {
356 ValuesFile_to_XLIFFFile(transFile, xliff, resFile);
357 }
358
359 delete currentFile;
360 delete oldFile;
361 delete transFile;
362 }
363
364 Stats beforeFilterStats = xliff->GetStats(config);
365
366 // run through the XLIFFFile and strip out TransUnits that have identical
367 // old and current source values and are not in the reject list, or just
368 // old values and no source values
369 xliff->Filter(keep_this_trans_unit, (void*)&setting);
370
371 Stats afterFilterStats = xliff->GetStats(config);
372 afterFilterStats.totalStrings = beforeFilterStats.totalStrings;
373
374 // add the reject comments
375 for (vector<Reject>::const_iterator reject = setting.reject.begin();
376 reject != setting.reject.end(); reject++) {
377 TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name);
378 tu->rejectComment = reject->comment;
379 }
380
381 // config-locale-current_cl.xliff
382 stringstream filename;
383 if (outDir != "") {
384 filename << outDir << '/';
385 }
386 filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff";
387 xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff));
388
389 stats.push_back(afterFilterStats);
390 }
391
392 // today is a good day to die
393 if (!success || SourcePos::HasErrors()) {
394 return 1;
395 }
396
397 // write the XLIFF files
398 printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s");
399 for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) {
400 const string& filename = it->first;
401 XLIFFFile* xliff = it->second;
402 string text = xliff->ToString();
403 write_to_file(filename, text);
404 }
405
406 // the stats
407 printf("\n"
408 " to without total\n"
409 " config files translate comments strings\n"
410 "-----------------------------------------------------------------------\n");
411 Stats totals;
412 totals.config = "total";
413 totals.files = 0;
414 totals.toBeTranslated = 0;
415 totals.noComments = 0;
416 totals.totalStrings = 0;
417 for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) {
418 string cfg = it->config;
419 if (cfg.length() > 20) {
420 cfg.resize(20);
421 }
422 printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files,
423 it->toBeTranslated, it->noComments, it->totalStrings);
424 totals.files += it->files;
425 totals.toBeTranslated += it->toBeTranslated;
426 totals.noComments += it->noComments;
427 totals.totalStrings += it->totalStrings;
428 }
429 if (stats.size() > 1) {
430 printf("-----------------------------------------------------------------------\n"
431 " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files,
432 totals.toBeTranslated, totals.noComments, totals.totalStrings);
433 }
434 printf("\n");
435 return 0;
436}
437
438struct PseudolocalizeSettings {
439 XLIFFFile* xliff;
440 bool expand;
441};
442
443
444string
445pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings)
446{
447 return pseudolocalize_string(source);
448}
449
450static XMLNode*
451pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings)
452{
453 if (source->Type() == XMLNode::TEXT) {
454 return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings),
455 source->Pretty());
456 } else {
457 XMLNode* target;
458 if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") {
459 // XXX don't translate these
460 target = XMLNode::NewElement(source->Position(), source->Namespace(),
461 source->Name(), source->Attributes(), source->Pretty());
462 } else {
463 target = XMLNode::NewElement(source->Position(), source->Namespace(),
464 source->Name(), source->Attributes(), source->Pretty());
465 }
466
467 const vector<XMLNode*>& children = source->Children();
468 const size_t I = children.size();
469 for (size_t i=0; i<I; i++) {
470 target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings));
471 }
472
473 return target;
474 }
475}
476
477void
478pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie)
479{
480 const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie;
481
482 const StringResource& source = unit->source;
483 StringResource* target = &unit->target;
484 *target = source;
485
486 target->config = settings->xliff->TargetConfig();
487
488 delete target->value;
489 target->value = pseudolocalize_xml_node(source.value, settings);
490}
491
492int
493pseudolocalize_xliff(XLIFFFile* xliff, bool expand)
494{
495 PseudolocalizeSettings settings;
496
497 settings.xliff = xliff;
498 settings.expand = expand;
499 xliff->Map(pseudolocalize_trans_unit, &settings);
500 return 0;
501}
502
503static int
504do_pseudo(const string& infile, const string& outfile, bool expand)
505{
506 int err;
507
508 XLIFFFile* xliff = XLIFFFile::Parse(infile);
509 if (xliff == NULL) {
510 return 1;
511 }
512
513 pseudolocalize_xliff(xliff, expand);
514
515 err = write_to_file(outfile, xliff->ToString());
516
517 delete xliff;
518
519 return err;
520}
521
522void
523log_printf(const char *fmt, ...)
524{
525 int ret;
526 va_list ap;
527
528 if (g_logFile != NULL) {
529 va_start(ap, fmt);
530 ret = vfprintf(g_logFile, fmt, ap);
531 va_end(ap);
532 fflush(g_logFile);
533 }
534}
535
536void
537close_log_file()
538{
539 if (g_logFile != NULL) {
540 fclose(g_logFile);
541 }
542}
543
544void
545open_log_file(const char* file)
546{
547 g_logFile = fopen(file, "w");
548 printf("log file: %s -- %p\n", file, g_logFile);
549 atexit(close_log_file);
550}
551
552static int
553usage()
554{
555 fprintf(stderr,
556 "usage: localize export OPTIONS CONFIGS...\n"
557 " REQUIRED OPTIONS\n"
558 " --settings SETTINGS The settings file to use. See CONFIGS below.\n"
559 " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n"
560 " --target LOCALE The target locale. See LOCALES below.\n"
561 "\n"
562 " OPTIONAL OPTIONS\n"
563 " --out DIR Directory to put the output files. Defaults to the\n"
564 " current directory if not supplied. Files are\n"
565 " named as follows:\n"
566 " CONFIG-LOCALE-CURRENT_CL.xliff\n"
567 "\n"
568 "\n"
569 "usage: localize import XLIFF_FILE...\n"
570 "\n"
571 "Import a translated XLIFF file back into the tree.\n"
572 "\n"
573 "\n"
574 "usage: localize xlb XMB_FILE VALUES_FILES...\n"
575 "\n"
576 "Read resource files from the tree file and write the corresponding XLB file\n"
577 "\n"
578 "Supply all of the android resource files (values files) to export after that.\n"
579 "\n"
580 "\n"
581 "\n"
582 "CONFIGS\n"
583 "\n"
584 "LOCALES\n"
585 "Locales are specified in the form en_US They will be processed correctly\n"
586 "to locate the resouce files in the tree.\n"
587 "\n"
588 "\n"
589 "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n"
590 " OPTIONAL OPTIONS\n"
591 " --big Pad strings so they get longer.\n"
592 "\n"
593 "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n"
594 "OUTFILE is specified, the results are written there; otherwise, the results are\n"
595 "written back to INFILE.\n"
596 "\n"
597 "\n"
598 "usage: localize rescheck FILES...\n"
599 "\n"
600 "Reads the base strings and prints warnings about bad resources from the given files.\n"
601 "\n");
602 return 1;
603}
604
605int
606main(int argc, const char** argv)
607{
608 //open_log_file("log.txt");
609 //g_logFile = stdout;
610
611 if (argc == 2 && 0 == strcmp(argv[1], "--test")) {
612 return test();
613 }
614
615 if (argc < 2) {
616 return usage();
617 }
618
619 int index = 1;
620
621 if (0 == strcmp("export", argv[index])) {
622 string settingsFile;
623 string rootDir;
624 string outDir;
625 string baseLocale = "en";
626 string targetLocale;
627 string language, region;
628 vector<string> configs;
629
630 index++;
631 while (index < argc) {
632 if (0 == strcmp("--settings", argv[index])) {
633 settingsFile = argv[index+1];
634 index += 2;
635 }
636 else if (0 == strcmp("--root", argv[index])) {
637 rootDir = argv[index+1];
638 index += 2;
639 }
640 else if (0 == strcmp("--out", argv[index])) {
641 outDir = argv[index+1];
642 index += 2;
643 }
644 else if (0 == strcmp("--target", argv[index])) {
645 targetLocale = argv[index+1];
646 index += 2;
647 }
648 else if (argv[index][0] == '-') {
649 fprintf(stderr, "unknown argument %s\n", argv[index]);
650 return usage();
651 }
652 else {
653 break;
654 }
655 }
656 for (; index<argc; index++) {
657 configs.push_back(argv[index]);
658 }
659
660 if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") {
661 return usage();
662 }
663 if (!split_locale(targetLocale, &language, &region)) {
664 fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str());
665 return usage();
666 }
667
668
669 return do_export(settingsFile, rootDir, outDir, targetLocale, configs);
670 }
671 else if (0 == strcmp("import", argv[index])) {
672 vector<string> xliffFilenames;
673
674 index++;
675 for (; index<argc; index++) {
676 xliffFilenames.push_back(argv[index]);
677 }
678
679 return do_merge(xliffFilenames);
680 }
681 else if (0 == strcmp("xlb", argv[index])) {
682 string outfile;
683 vector<string> resFiles;
684
685 index++;
686 if (argc < index+1) {
687 return usage();
688 }
689
690 outfile = argv[index];
691
692 index++;
693 for (; index<argc; index++) {
694 resFiles.push_back(argv[index]);
695 }
696
697 return do_xlb_export(outfile, resFiles);
698 }
699 else if (0 == strcmp("pseudo", argv[index])) {
700 string infile;
701 string outfile;
702 bool big = false;
703
704 index++;
705 while (index < argc) {
706 if (0 == strcmp("--big", argv[index])) {
707 big = true;
708 index += 1;
709 }
710 else if (argv[index][0] == '-') {
711 fprintf(stderr, "unknown argument %s\n", argv[index]);
712 return usage();
713 }
714 else {
715 break;
716 }
717 }
718
719 if (index == argc-1) {
720 infile = argv[index];
721 outfile = argv[index];
722 }
723 else if (index == argc-2) {
724 infile = argv[index];
725 outfile = argv[index+1];
726 }
727 else {
728 fprintf(stderr, "unknown argument %s\n", argv[index]);
729 return usage();
730 }
731
732 return do_pseudo(infile, outfile, big);
733 }
734 else if (0 == strcmp("rescheck", argv[index])) {
735 vector<string> files;
736
737 index++;
738 while (index < argc) {
739 if (argv[index][0] == '-') {
740 fprintf(stderr, "unknown argument %s\n", argv[index]);
741 return usage();
742 }
743 else {
744 break;
745 }
746 }
747 for (; index<argc; index++) {
748 files.push_back(argv[index]);
749 }
750
751 if (files.size() == 0) {
752 return usage();
753 }
754
755 return do_rescheck(files);
756 }
757 else {
758 return usage();
759 }
760
761 if (SourcePos::HasErrors()) {
762 SourcePos::PrintErrors(stderr);
763 return 1;
764 }
765
766 return 0;
767}
768