blob: e22c928a582646d3f60e46ac1c2ce9b04bf11a90 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001#!/usr/bin/perl
2
3#
4#//===----------------------------------------------------------------------===//
5#//
6#// The LLVM Compiler Infrastructure
7#//
8#// This file is dual licensed under the MIT and the University of Illinois Open
9#// Source Licenses. See LICENSE.txt for details.
10#//
11#//===----------------------------------------------------------------------===//
12#
13
14use strict;
15use warnings;
16
17use File::Glob ":glob";
18use Encode qw{ encode };
19
20use FindBin;
21use lib "$FindBin::Bin/lib";
22
23use tools;
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
25our $VERSION = "0.04";
26my $escape = qr{%};
27my $placeholder = qr{(\d)\$(s|l?[du])};
Jonathan Peyton4c91ad12016-01-26 19:44:31 +000028my $target_os;
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
30my $sections =
31 {
32 meta => { short => "prp" }, # "prp" stands for "property".
33 strings => { short => "str" },
34 formats => { short => "fmt" },
35 messages => { short => "msg" },
36 hints => { short => "hnt" },
37 };
38my @sections = qw{ meta strings formats messages hints };
39# Assign section properties: long name, set number, base number.
40map( $sections->{ $sections[ $_ ] }->{ long } = $sections[ $_ ], ( 0 .. @sections - 1 ) );
41map( $sections->{ $sections[ $_ ] }->{ set } = ( $_ + 1 ), ( 0 .. @sections - 1 ) );
42map( $sections->{ $sections[ $_ ] }->{ base } = ( ( $_ + 1 ) << 16 ), ( 0 .. @sections - 1 ) );
43
44# Properties of Meta section.
45my @properties = qw{ Language Country LangId Version Revision };
46
47
48sub _generate_comment($$$) {
49
50 my ( $data, $open, $close ) = @_;
51 my $bulk =
52 $open . " Do not edit this file! " . $close . "\n" .
53 $open . " The file was generated from " . get_file( $data->{ "%meta" }->{ source } ) .
54 " by " . $tool . " on " . localtime() . ". " . $close . "\n";
55 return $bulk;
56
57}; # sub _generate_comment
58
59
60sub msg2sgn($) {
61
62 # Convert message string to signature. Signature is a list of placeholders in sorted order.
63 # For example, signature of "%1$s value \"%2$s\" is invalid." is "%1$s %2$s".
64
65 my ( $msg ) = @_;
66 my @placeholders;
67 pos( $msg ) = 0;
68 while ( $msg =~ m{\G.*?$escape$placeholder}g ) {
69 $placeholders[ $1 - 1 ] = "%$1\$$2";
70 }; # while
71 for ( my $i = 1; $i <= @placeholders; ++ $i ) {
72 if ( not defined( $placeholders[ $i - 1 ] ) ) {
73 $placeholders[ $i - 1 ] = "%$i\$-";
74 }; # if
75 }; # for $i
76 return join( " ", @placeholders );
77
78}; # sub msg2sgn
79
80
81sub msg2src($) {
82
83 # Convert message string to a C string constant.
84
85 my ( $msg ) = @_;
86 if ( $target_os eq "win" ) {
87 $msg =~ s{$escape$placeholder}{\%$1!$2!}g;
88 }; # if
89 return $msg;
90
91}; # sub msg2src
92
93
94my $special =
95 {
96 "n" => "\n",
97 "t" => "\t",
98 };
99
100sub msg2mc($) {
101 my ( $msg ) = @_;
102 $msg = msg2src( $msg ); # Get windows style placeholders.
103 $msg =~ s{\\(.)}{ exists( $special->{ $1 } ) ? $special->{ $1 } : $1 }ge;
104 return $msg;
105}; # sub msg2mc
106
107
108
109sub parse_message($) {
110
111 my ( $msg ) = @_;
112 pos( $msg ) = 0;
113 for ( ; ; ) {
114 if ( $msg !~ m{\G.*?$escape}gc ) {
115 last;
116 }
117 if ( $msg !~ m{\G$placeholder}gc ) {
118 return "Bad %-sequence near \"%" . substr( $msg, pos( $msg ), 7 ) . "\"";
119 }; # if
120 }; # forever
121 return undef;
122
123}; # sub parse_message
124
125
126sub parse_source($) {
127
128 my ( $name ) = @_;
129
130 my @bulk = read_file( $name, -layer => ":utf8" );
131 my $data = {};
132
133 my $line;
134 my $n = 0; # Line number.
135 my $obsolete = 0; # Counter of obsolete entries.
136 my $last_idx;
137 my %idents;
138 my $section;
139
140 my $error =
141 sub {
142 my ( $n, $line, $msg ) = @_;
143 runtime_error( "Error parsing $name line $n: " . "$msg:\n" . " $line" );
144 }; # sub
145
146 foreach $line ( @bulk ) {
147 ++ $n;
148 # Skip empty lines and comments.
149 if ( $line =~ m{\A\s*(\n|#)} ) {
150 $last_idx = undef;
151 next;
152 }; # if
153 # Parse section header.
154 if ( $line =~ m{\A-\*-\s*([A-Z_]*)\s*-\*-\s*\n\z}i ) {
155 $section = ( lc( $1 ) );
156 if ( not grep( $section eq $_, @sections ) ) {
157 $error->( $n, $line, "Unknown section \"$section\" specified" );
158 }; # if
159 if ( exists( $data->{ $section } ) ) {
160 $error->( $n, $line, "Multiple sections of the same type specified" );
161 }; # if
162 %idents = (); # Clean list of known message identifiers.
163 next;
164 }; # if
165 if ( not defined( $section ) ) {
166 $error->( $n, $line, "Section heading expected" );
167 }; # if
168 # Parse section body.
169 if ( $section eq "meta" ) {
170 if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
171 # Parse meta properties (such as Language, Country, and LangId).
172 my ( $property, $value ) = ( $1, $2 );
173 if ( not grep( $_ eq $property , @properties ) ) {
174 $error->( $n, $line, "Unknown property \"$property\" specified" );
175 }; # if
176 if ( exists( $data->{ "%meta" }->{ $property } ) ) {
177 $error->( $n, $line, "Property \"$property\" has already been specified" );
178 }; # if
179 $data->{ "%meta" }->{ $property } = $value;
180 $last_idx = undef;
181 next;
182 }; # if
183 $error->( $n, $line, "Property line expected" );
184 }; # if
185 # Parse message.
186 if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
187 my ( $ident, $message ) = ( $1, $2 );
188 if ( $ident eq "OBSOLETE" ) {
189 # If id is "OBSOLETE", add a unique suffix. It provides convenient way to mark
190 # obsolete messages.
191 ++ $obsolete;
192 $ident .= $obsolete;
193 }; # if
194 if ( exists( $idents{ $ident } ) ) {
195 $error->( $n, $line, "Identifier \"$ident\" is redefined" );
196 }; # if
197 # Check %-sequences.
198 my $err = parse_message( $message );
199 if ( $err ) {
200 $error->( $n, $line, $err );
201 }; # if
202 # Save message.
203 push( @{ $data->{ $section } }, [ $ident, $message ] );
204 $idents{ $ident } = 1;
205 $last_idx = @{ $data->{ $section } } - 1;
206 next;
207 }; # if
208 # Parse continuation line.
209 if ( $line =~ m{\A\s*"(.*)"\s*\z} ) {
210 my $message = $1;
211 if ( not defined( $last_idx ) ) {
212 $error->( $n, $line, "Unexpected continuation line" );
213 }; # if
214 # Check %-sequences.
215 my $err = parse_message( $message );
216 if ( $err ) {
217 $error->( $n, $line, $err );
218 }; # if
219 # Save continuation.
220 $data->{ $section }->[ $last_idx ]->[ 1 ] .= $message;
221 next;
222 }; # if
223 $error->( $n, $line, "Message definition expected" );
224 }; # foreach
225 $data->{ "%meta" }->{ source } = $name;
226 foreach my $section ( @sections ) {
227 if ( not exists( $data->{ $section } ) ) {
228 $data->{ $section } = [];
229 }; # if
230 }; # foreach $section
231
232 foreach my $property ( @properties ) {
233 if ( not defined( $data->{ "%meta" }->{ $property } ) ) {
234 runtime_error(
235 "Error parsing $name: " .
236 "Required \"$property\" property is not specified"
237 );
238 }; # if
239 push( @{ $data->{ meta } }, [ $property, $data->{ "%meta" }->{ $property } ] );
240 }; # foreach
241
242 return $data;
243
244}; # sub parse_source
245
246
247sub generate_enum($$$) {
248
249 my ( $data, $file, $prefix ) = @_;
250 my $bulk = "";
251
252 $bulk =
253 _generate_comment( $data, "//", "//" ) .
254 "\n" .
255 "enum ${prefix}_id {\n\n" .
256 " // A special id for absence of message.\n" .
257 " ${prefix}_null = 0,\n\n";
258
259 foreach my $section ( @sections ) {
260 my $props = $sections->{ $section }; # Section properties.
261 my $short = $props->{ short }; # Short section name, frequently used.
262 $bulk .=
263 " // Set #$props->{ set }, $props->{ long }.\n" .
264 " ${prefix}_${short}_first = $props->{ base },\n";
265 foreach my $item ( @{ $data->{ $section } } ) {
266 my ( $ident, undef ) = @$item;
267 $bulk .= " ${prefix}_${short}_${ident},\n";
268 }; # foreach
269 $bulk .= " ${prefix}_${short}_last,\n\n";
270 }; # foreach $type
271 $bulk .= " ${prefix}_xxx_lastest\n\n";
272
273 $bulk .=
274 "}; // enum ${prefix}_id\n" .
275 "\n" .
276 "typedef enum ${prefix}_id ${prefix}_id_t;\n" .
277 "\n";
278
279 $bulk .=
280 "\n" .
281 "// end of file //\n";
282
283 write_file( $file, \$bulk );
284
285}; # sub generate_enum
286
287
288sub generate_signature($$) {
289
290 my ( $data, $file ) = @_;
291 my $bulk = "";
292
293 $bulk .= "// message catalog signature file //\n\n";
294
295 foreach my $section ( @sections ) {
296 my $props = $sections->{ $section }; # Section properties.
297 my $short = $props->{ short }; # Short section name, frequently used.
298 $bulk .= "-*- " . uc( $props->{ long } ) . "-*-\n\n";
299 foreach my $item ( @{ $data->{ $section } } ) {
300 my ( $ident, $msg ) = @$item;
301 $bulk .= sprintf( "%-40s %s\n", $ident, msg2sgn( $msg ) );
302 }; # foreach
303 $bulk .= "\n";
304 }; # foreach $type
305
306 $bulk .= "// end of file //\n";
307
308 write_file( $file, \$bulk );
309
310}; # sub generate_signature
311
312
313sub generate_default($$$) {
314
315 my ( $data, $file, $prefix ) = @_;
316 my $bulk = "";
317
318 $bulk .=
319 _generate_comment( $data, "//", "//" ) .
320 "\n";
321
322 foreach my $section ( @sections ) {
323 $bulk .=
324 "static char const *\n" .
325 "__${prefix}_default_${section}" . "[] =\n" .
326 " {\n" .
327 " NULL,\n";
328 foreach my $item ( @{ $data->{ $section } } ) {
329 my ( undef, $msg ) = @$item;
330 $bulk .= " \"" . msg2src( $msg ) . "\",\n";
331 }; # while
332 $bulk .=
333 " NULL\n" .
334 " };\n" .
335 "\n";
336 }; # foreach $type
337
338 $bulk .=
339 "struct kmp_i18n_section {\n" .
340 " int size;\n" .
341 " char const ** str;\n" .
342 "}; // struct kmp_i18n_section\n" .
343 "typedef struct kmp_i18n_section kmp_i18n_section_t;\n" .
344 "\n" .
345 "static kmp_i18n_section_t\n" .
346 "__${prefix}_sections[] =\n" .
347 " {\n" .
348 " { 0, NULL },\n";
349 foreach my $section ( @sections ) {
350 $bulk .=
351 " { " . @{ $data->{ $section } } . ", __${prefix}_default_${section} },\n";
352 }; # foreach $type
353 $bulk .=
354 " { 0, NULL }\n" .
355 " };\n" .
356 "\n";
357
358 $bulk .=
359 "struct kmp_i18n_table {\n" .
360 " int size;\n" .
361 " kmp_i18n_section_t * sect;\n" .
362 "}; // struct kmp_i18n_table\n" .
363 "typedef struct kmp_i18n_table kmp_i18n_table_t;\n" .
364 "\n" .
365 "static kmp_i18n_table_t __kmp_i18n_default_table =\n" .
366 " {\n" .
367 " " . @sections . ",\n" .
368 " __kmp_i18n_sections\n" .
369 " };\n" .
370 "\n" .
371 "// end of file //\n";
372
373 write_file( $file, \$bulk );
374
375}; # sub generate_default
376
377
378sub generate_message_unix($$) {
379
380 my ( $data, $file ) = @_;
381 my $bulk = "";
382
383 $bulk .=
384 _generate_comment( $data, "\$", "\$" ) .
385 "\n" .
386 "\$quote \"\n\n";
387
388 foreach my $section ( @sections ) {
389 $bulk .=
390 "\$ " . ( "-" x 78 ) . "\n\$ $section\n\$ " . ( "-" x 78 ) . "\n\n" .
391 "\$set $sections->{ $section }->{ set }\n" .
392 "\n";
393 my $n = 0;
394 foreach my $item ( @{ $data->{ $section } } ) {
395 my ( undef, $msg ) = @$item;
396 ++ $n;
397 $bulk .= "$n \"" . msg2src( $msg ) . "\"\n";
398 }; # foreach
399 $bulk .= "\n";
400 }; # foreach $type
401
402 $bulk .=
403 "\n" .
404 "\$ end of file \$\n";
405
406 write_file( $file, \$bulk, -layer => ":utf8" );
407
408}; # sub generate_message_linux
409
410
411sub generate_message_windows($$) {
412
413 my ( $data, $file ) = @_;
414 my $bulk = "";
415 my $language = $data->{ "%meta" }->{ Language };
416 my $langid = $data->{ "%meta" }->{ LangId };
417
418 $bulk .=
419 _generate_comment( $data, ";", ";" ) .
420 "\n" .
421 "LanguageNames = ($language=$langid:msg_$langid)\n" .
422 "\n";
423
424 $bulk .=
425 "FacilityNames=(\n";
426 foreach my $section ( @sections ) {
427 my $props = $sections->{ $section }; # Section properties.
428 $bulk .=
429 " $props->{ short }=" . $props->{ set } ."\n";
430 }; # foreach $section
431 $bulk .=
432 ")\n\n";
433
434 foreach my $section ( @sections ) {
435 my $short = $sections->{ $section }->{ short };
436 my $n = 0;
437 foreach my $item ( @{ $data->{ $section } } ) {
438 my ( undef, $msg ) = @$item;
439 ++ $n;
440 $bulk .=
441 "MessageId=$n\n" .
442 "Facility=$short\n" .
443 "Language=$language\n" .
444 msg2mc( $msg ) . "\n.\n\n";
445 }; # foreach $item
446 }; # foreach $section
447
448 $bulk .=
449 "\n" .
450 "; end of file ;\n";
451
452 $bulk = encode( "UTF-16LE", $bulk ); # Convert text to UTF-16LE used in Windows* OS.
453 write_file( $file, \$bulk, -binary => 1 );
454
455}; # sub generate_message_windows
456
457
458#
459# Parse command line.
460#
461
462my $input_file;
463my $enum_file;
464my $signature_file;
465my $default_file;
466my $message_file;
467my $id;
468my $prefix = "";
469get_options(
Jonathan Peyton4c91ad12016-01-26 19:44:31 +0000470 "os=s" => \$target_os,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000471 "enum-file=s" => \$enum_file,
472 "signature-file=s" => \$signature_file,
473 "default-file=s" => \$default_file,
474 "message-file=s" => \$message_file,
475 "id|lang-id" => \$id,
476 "prefix=s" => \$prefix,
477);
478if ( @ARGV == 0 ) {
479 cmdline_error( "No source file specified -- nothing to do" );
480}; # if
481if ( @ARGV > 1 ) {
482 cmdline_error( "Too many source files specified" );
483}; # if
484$input_file = $ARGV[ 0 ];
485
486
487my $generate_message;
Andrey Churbanovd315cea2015-01-16 12:54:51 +0000488if ( $target_os =~ m{\A(?:lin|mac)\z} ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489 $generate_message = \&generate_message_unix;
490} elsif ( $target_os eq "win" ) {
491 $generate_message = \&generate_message_windows;
492} else {
493 runtime_error( "OS \"$target_os\" is not supported" );
494}; # if
495
496
497#
498# Do the work.
499#
500
501my $data = parse_source( $input_file );
502if ( defined( $id ) ) {
503 print( $data->{ "%meta" }->{ LangId }, "\n" );
504}; # if
505if ( defined( $enum_file ) ) {
506 generate_enum( $data, $enum_file, $prefix );
507}; # if
508if ( defined( $signature_file ) ) {
509 generate_signature( $data, $signature_file );
510}; # if
511if ( defined( $default_file ) ) {
512 generate_default( $data, $default_file, $prefix );
513}; # if
514if ( defined( $message_file ) ) {
515 $generate_message->( $data, $message_file );
516}; # if
517
518exit( 0 );
519
520__END__
521
522=pod
523
524=head1 NAME
525
526B<message-converter.pl> -- Convert message catalog source file into another text forms.
527
528=head1 SYNOPSIS
529
530B<message-converter.pl> I<option>... <file>
531
532=head1 OPTIONS
533
534=over
535
536=item B<--enum-file=>I<file>
537
538Generate enum file named I<file>.
539
540=item B<--default-file=>I<file>
541
542Generate default messages file named I<file>.
543
544=item B<--lang-id>
545
546Print language identifier of the message catalog source file.
547
548=item B<--message-file=>I<file>
549
550Generate message file.
551
552=item B<--signature-file=>I<file>
553
554Generate signature file.
555
556Signatures are used for checking compatibility. For example, to check a primary
557catalog and its translation to another language, signatures of both catalogs should be generated
558and compared. If signatures are identical, catalogs are compatible.
559
560=item B<--prefix=>I<prefix>
561
562Prefix to be used for all C identifiers (type and variable names) in enum and default messages
563files.
564
565=item B<--os=>I<str>
566
567Specify OS name the message formats to be converted for. If not specified expolicitly, value of
568LIBOMP_OS environment variable is used. If LIBOMP_OS is not defined, host OS is detected.
569
570Depending on OS, B<message-converter.pl> converts message formats to GNU style or MS style.
571
572=item Standard Options
573
574=over
575
576=item B<--doc>
577
578=item B<--manual>
579
580Print full documentation and exit.
581
582=item B<--help>
583
584Print short help message and exit.
585
586=item B<--version>
587
588Print version string and exit.
589
590=back
591
592=back
593
594=head1 ARGUMENTS
595
596=over
597
598=item I<file>
599
600A name of input file.
601
602=back
603
604=head1 DESCRIPTION
605
606=head2 Message Catalog File Format
607
608It is plain text file in UTF-8 encoding. Empty lines and lines beginning with sharp sign (C<#>) are
609ignored. EBNF syntax of content:
610
611 catalog = { section };
612 section = header body;
613 header = "-*- " section-id " -*-" "\n";
614 body = { message };
615 message = message-id string "\n" { string "\n" };
616 section-id = identifier;
617 message-id = "OBSOLETE" | identifier;
618 identifier = letter { letter | digit | "_" };
619 string = """ { character } """;
620
621Identifier starts with letter, with following letters, digits, and underscores. Identifiers are
622case-sensitive. Setion identifiers are fixed: C<META>, C<STRINGS>, C<FORMATS>, C<MESSAGES> and
623C<HINTS>. Message identifiers must be unique within section. Special C<OBSOLETE> pseudo-identifier
624may be used many times.
625
626String is a C string literal which must not cross line boundaries.
627Long messages may occupy multiple lines, a string per line.
628
629Message may include printf-like GNU-style placeholders for arguments: C<%I<n>$I<t>>,
630where I<n> is argument number (C<1>, C<2>, ...),
631I<t> -- argument type, C<s> (string) or C<d> (32-bit integer).
632
633See also comments in F<i18n/en_US.txt>.
634
635=head2 Output Files
636
637This script can generate 3 different text files from single source:
638
639=over
640
641=item Enum file.
642
643Enum file is a C include file, containing definitions of message identifiers, e. g.:
644
645 enum kmp_i18n_id {
646
647 // Set #1, meta.
648 kmp_i18n_prp_first = 65536,
649 kmp_i18n_prp_Language,
650 kmp_i18n_prp_Country,
651 kmp_i18n_prp_LangId,
652 kmp_i18n_prp_Version,
653 kmp_i18n_prp_Revision,
654 kmp_i18n_prp_last,
655
656 // Set #2, strings.
657 kmp_i18n_str_first = 131072,
658 kmp_i18n_str_Error,
659 kmp_i18n_str_UnknownFile,
660 kmp_i18n_str_NotANumber,
661 ...
662
663 // Set #3, fotrmats.
664 ...
665
666 kmp_i18n_xxx_lastest
667
668 }; // enum kmp_i18n_id
669
670 typedef enum kmp_i18n_id kmp_i18n_id_t;
671
672=item Default messages file.
673
674Default messages file is a C include file containing default messages to be embedded into
675application (and used if external message catalog does not exist or could not be open):
676
677 static char const *
678 __kmp_i18n_default_meta[] =
679 {
680 NULL,
681 "English",
682 "USA",
683 "1033",
684 "2",
685 "20090806",
686 NULL
687 };
688
689 static char const *
690 __kmp_i18n_default_strings[] =
691 {
692 "Error",
693 "(unknown file)",
694 "not a number",
695 ...
696 NULL
697 };
698
699 ...
700
701=item Message file.
702
703Message file is an input for message compiler, F<gencat> on Linux* OS and OS X*, or F<mc.exe> on
704Windows* OS.
705
706Here is the example of Linux* OS message file:
707
708 $quote "
709 1 "Japanese"
710 2 "Japan"
711 3 "1041"
712 4 "2"
713 5 "Based on Enlish message catalog revision 20090806"
714 ...
715
716Example of Windows* OS message file:
717
718 LanguageNames = (Japanese=10041:msg_1041)
719
720 FacilityNames = (
721 prp=1
722 str=2
723 fmt=3
724 ...
725 )
726
727 MessageId=1
728 Facility=prp
729 Language=Japanese
730 Japanese
731 .
732
733 ...
734
735=item Signature.
736
737Signature is a processed source file: comments stripped, strings deleted, but placeholders kept and
738sorted.
739
740 -*- FORMATS-*-
741
742 Info %1$d %2$s
743 Warning %1$d %2$s
744 Fatal %1$d %2$s
745 SysErr %1$d %2$s
746 Hint %1$- %2$s
747 Pragma %1$s %2$s %3$s %4$s
748
749The purpose of signatures -- compare two message source files for compatibility. If signatures of
750two message sources are the same, binary message catalogs will be compatible.
751
752=back
753
754=head1 EXAMPLES
755
756Generate include file containing message identifiers:
757
758 $ message-converter.pl --enum-file=kmp_i18n_id.inc en_US.txt
759
760Generate include file contating default messages:
761
762 $ message-converter.pl --default-file=kmp_i18n_default.inc en_US.txt
763
764Generate input file for message compiler, Linux* OS example:
765
766 $ message-converter.pl --message-file=ru_RU.UTF-8.msg ru_RU.txt
767
768Generate input file for message compiler, Windows* OS example:
769
770 > message-converter.pl --message-file=ru_RU.UTF-8.mc ru_RU.txt
771
772=cut
773
774# end of file #
775