Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame^] | 1 | #!/usr/bin/perl |
| 2 | # Copyright 2008 The Go Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style |
| 4 | # license that can be found in the LICENSE file. |
| 5 | |
| 6 | # Modified version of RE2's make_perl_groups.pl. |
| 7 | |
| 8 | # Generate table entries giving character ranges |
| 9 | # for POSIX/Perl character classes. Rather than |
| 10 | # figure out what the definition is, it is easier to ask |
| 11 | # Perl about each letter from 0-128 and write down |
| 12 | # its answer. |
| 13 | |
| 14 | @posixclasses = ( |
| 15 | "[:alnum:]", |
| 16 | "[:alpha:]", |
| 17 | "[:ascii:]", |
| 18 | "[:blank:]", |
| 19 | "[:cntrl:]", |
| 20 | "[:digit:]", |
| 21 | "[:graph:]", |
| 22 | "[:lower:]", |
| 23 | "[:print:]", |
| 24 | "[:punct:]", |
| 25 | "[:space:]", |
| 26 | "[:upper:]", |
| 27 | "[:word:]", |
| 28 | "[:xdigit:]", |
| 29 | ); |
| 30 | |
| 31 | @perlclasses = ( |
| 32 | "\\d", |
| 33 | "\\s", |
| 34 | "\\w", |
| 35 | ); |
| 36 | |
| 37 | sub ComputeClass($) { |
| 38 | my @ranges; |
| 39 | my ($class) = @_; |
| 40 | my $regexp = "[$class]"; |
| 41 | my $start = -1; |
| 42 | for (my $i=0; $i<=129; $i++) { |
| 43 | if ($i == 129) { $i = 256; } |
| 44 | if ($i <= 128 && chr($i) =~ $regexp) { |
| 45 | if ($start < 0) { |
| 46 | $start = $i; |
| 47 | } |
| 48 | } else { |
| 49 | if ($start >= 0) { |
| 50 | push @ranges, [$start, $i-1]; |
| 51 | } |
| 52 | $start = -1; |
| 53 | } |
| 54 | } |
| 55 | return @ranges; |
| 56 | } |
| 57 | |
| 58 | sub PrintClass($$@) { |
| 59 | my ($cname, $name, @ranges) = @_; |
| 60 | print "var code$cname = []rune{ /* $name */\n"; |
| 61 | for (my $i=0; $i<@ranges; $i++) { |
| 62 | my @a = @{$ranges[$i]}; |
| 63 | printf "\t0x%x, 0x%x,\n", $a[0], $a[1]; |
| 64 | } |
| 65 | print "}\n\n"; |
| 66 | my $n = @ranges; |
| 67 | $negname = $name; |
| 68 | if ($negname =~ /:/) { |
| 69 | $negname =~ s/:/:^/; |
| 70 | } else { |
| 71 | $negname =~ y/a-z/A-Z/; |
| 72 | } |
| 73 | return "\t`$name`: {+1, code$cname},\n" . |
| 74 | "\t`$negname`: {-1, code$cname},\n"; |
| 75 | } |
| 76 | |
| 77 | my $gen = 0; |
| 78 | |
| 79 | sub PrintClasses($@) { |
| 80 | my ($cname, @classes) = @_; |
| 81 | my @entries; |
| 82 | foreach my $cl (@classes) { |
| 83 | my @ranges = ComputeClass($cl); |
| 84 | push @entries, PrintClass(++$gen, $cl, @ranges); |
| 85 | } |
| 86 | print "var ${cname}Group = map[string]charGroup{\n"; |
| 87 | foreach my $e (@entries) { |
| 88 | print $e; |
| 89 | } |
| 90 | print "}\n"; |
| 91 | my $count = @entries; |
| 92 | } |
| 93 | |
| 94 | print <<EOF; |
| 95 | // Copyright 2013 The Go Authors. All rights reserved. |
| 96 | // Use of this source code is governed by a BSD-style |
| 97 | // license that can be found in the LICENSE file. |
| 98 | |
| 99 | // GENERATED BY make_perl_groups.pl; DO NOT EDIT. |
| 100 | // make_perl_groups.pl >perl_groups.go |
| 101 | |
| 102 | package syntax |
| 103 | |
| 104 | EOF |
| 105 | |
| 106 | PrintClasses("perl", @perlclasses); |
| 107 | PrintClasses("posix", @posixclasses); |