luci-app-statistics: convert plugin definitions to JSON
[project/luci.git] / build / i18n-scan.pl
index f47fc3553c7a7b7600150fc76f006d61e2b4ded3..879cbe6a9004bd27f8ce852b2e4fcc53f6213d21 100755 (executable)
 
 use strict;
 use warnings;
-use Text::Balanced qw(extract_bracketed extract_delimited extract_tagged);
+use IPC::Open2;
 use POSIX;
 
-POSIX::setlocale(POSIX::LC_ALL, "C");
+$ENV{'LC_ALL'} = 'C';
+POSIX::setlocale(POSIX::LC_ALL, 'C');
 
 @ARGV >= 1 || die "Usage: $0 <source directory>\n";
 
 
-my %stringtable;
+my %keywords = (
+       '.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
+       '.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
+       '.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
+       '.json' => [ '_:1', '_:1,2c' ]
+);
 
-sub dec_lua_str
-{
-       my $s = shift;
-       $s =~ s/[\s\n]+/ /g;
-       $s =~ s/\\n/\n/g;
-       $s =~ s/\\t/\t/g;
-       $s =~ s/\\(.)/$1/g;
-       $s =~ s/^ //;
-       $s =~ s/ $//;
-       return $s;
+sub xgettext($@) {
+       my $path = shift;
+       my @keywords = @_;
+       my ($ext) = $path =~ m!(\.\w+)$!;
+       my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
+
+       if ($ext eq '.htm' || $ext eq '.lua') {
+               push @cmd, '--language=Lua';
+       }
+       elsif ($ext eq '.js' || $ext eq '.json') {
+               push @cmd, '--language=JavaScript';
+       }
+
+       push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
+       push @cmd, '-o', '-';
+
+       return @cmd;
 }
 
-sub dec_tpl_str
-{
+sub whitespace_collapse($) {
        my $s = shift;
-       $s =~ s/-$//;
-       $s =~ s/[\s\n]+/ /g;
+       my %r = ('n' => ' ', 't' => ' ');
+
+       # Translate \t and \n to plain spaces, leave all other escape
+       # sequences alone. Finally replace all consecutive spaces by
+       # single ones and trim leading and trailing space.
+       $s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
+       $s =~ s/ {2,}/ /g;
        $s =~ s/^ //;
        $s =~ s/ $//;
-       $s =~ s/\\/\\\\/g;
+
        return $s;
 }
 
+sub postprocess_pot($$) {
+       my ($path, $source) = @_;
+       my (@res, $msgid);
+       my $skip = 1;
 
-if( open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' ')' | sort |" )
-{
-       while( defined( my $file = readline F ) )
-       {
-               chomp $file;
+       $source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
 
-               if( open S, "< $file" )
-               {
-                       local $/ = undef;
-                       my $raw = <S>;
-                       close S;
+       my @lines = split /\n/, $source;
 
+       # Remove all header lines up to the first location comment
+       while (@lines > 0 && $lines[0] !~ m!^#: !) {
+               shift @lines;
+       }
 
-                       my $text = $raw;
-                       my $line = 1;
+       while (@lines > 0) {
+               my $line = shift @lines;
 
-                       while( $text =~ s/ ^ (.*?) (?:translate|translatef|i18n|_) ([\n\s]*) \( /(/sgx )
-                       {
-                               my ($prefix, $suffix) = ($1, $2);
+               # Concat multiline msgids and collapse whitespaces
+               if ($line =~ m!^(msg\w+) "(.*)"$!) {
+                       my $kw = $1;
+                       my $kv = $2;
 
-                               ( my $code, $text ) = extract_bracketed($text, q{('")});
+                       while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
+                               $kv .= ' '. $1;
+                               shift @lines;
+                       }
 
-                               $line += () = $prefix =~ /\n/g;
+                       $kv = whitespace_collapse($kv);
 
-                               my $position = "$file:$line";
+                       # Filter invalid empty msgids by popping all lines in @res
+                       # leading to this point and skip all subsequent lines in
+                       # @lines belonging to this faulty id.
+                       if ($kw ne 'msgstr' && $kv eq '') {
+                               while (@res > 0 && $res[-1] !~ m!^$!) {
+                                       pop @res;
+                               }
 
-                               $line += () = $suffix =~ /\n/g;
-                               $line += () = $code   =~ /\n/g;
+                               while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
+                                       shift @lines;
+                               }
 
-                               $code =~ s/\\\n/ /g;
-                               $code =~ s/^\([\n\s]*//;
-                               $code =~ s/[\n\s]*\)$//;
+                               next;
+                       }
 
-                               my $res = "";
-                               my $sub = "";
+                       push @res, sprintf '%s "%s"', $kw, $kv;
+               }
 
-                               if( $code =~ /^['"]/ )
-                               {
-                                       while( defined $sub )
-                                       {
-                                               ( $sub, $code ) = extract_delimited($code, q{'"}, q{\s*(?:\.\.\s*)?});
+               # Ignore any flags added by xgettext
+               elsif ($line =~ m!^#, !) {
+                       next;
+               }
 
-                                               if( defined $sub && length($sub) > 2 )
-                                               {
-                                                       $res .= substr $sub, 1, length($sub) - 2;
-                                               }
-                                               else
-                                               {
-                                                       undef $sub;
-                                               }
-                                       }
-                               }
-                               elsif( $code =~ /^(\[=*\[)/ )
-                               {
-                                       my $stag = quotemeta $1;
-                                       my $etag = $stag;
-                                          $etag =~ s/\[/]/g;
+               # Pass through other lines unmodified
+               else {
+                       push @res, $line;
+               }
+       }
 
-                                       ( $res ) = extract_tagged($code, $stag, $etag);
+       return @res ? join("\n", '', @res, '') : '';
+}
 
-                                       $res =~ s/^$stag//;
-                                       $res =~ s/$etag$//;
-                               }
+sub uniq(@) {
+       my %h = map { $_, 1 } @_;
+       return sort keys %h;
+}
 
-                               $res = dec_lua_str($res);
+sub preprocess_htm($$) {
+       my ($path, $source) = @_;
+       my $sub = {
+               '=' => '(%s)',
+               '_' => 'translate([==[%s]==])',
+               ':' => 'translate([==[%s]==])',
+               '+' => 'include([==[%s]==])',
+               '#' => '--[==[%s]==]',
+               ''  => '%s'
+       };
+
+       # Translate the .htm source into a valid Lua source using bracket quotes
+       # to avoid the need for complex escaping.
+       $source =~ s!<%-?([=_:+#]?)(.*?)-?%>!
+               my $t = $1;
+               my $s = $2;
+
+               # Split translation expressions on first non-escaped pipe.
+               if ($t eq ':' || $t eq '_') {
+                       $s =~ s/^((?:[^\|\\]|\\.)*)\|(.*)$/$1]==],[==[$2/;
+               }
 
-                               if ($res) {
-                                       $stringtable{$res} ||= [ ];
-                                       push @{$stringtable{$res}}, $position;
-                               }
-                       }
+               sprintf "]==]; $sub->{$t}; [==[", $s
+       !sge;
 
+       # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
+       # and return them as extra keyword so that xgettext recognizes such expressions
+       # as translate(...) calls.
+       my @extra_function_keywords =
+               map { ("$_:1", "$_:1,2c") }
+               uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
 
-                       $text = $raw;
-                       $line = 1;
+       return ("[==[$source]==]", @extra_function_keywords);
+}
 
-                       while( $text =~ s/ ^ (.*?) <% -? [:_] /<%/sgx )
-                       {
-                               $line += () = $1 =~ /\n/g;
+sub preprocess_lua($$) {
+       my ($path, $source) = @_;
 
-                               ( my $code, $text ) = extract_tagged($text, '<%', '%>');
+       # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
+       # and return them as extra keyword so that xgettext recognizes such expressions
+       # as translate(...) calls.
+       my @extra_function_keywords =
+               map { ("$_:1", "$_:1,2c") }
+               uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
 
-                               if( defined $code )
-                               {
-                                       my $position = "$file:$line";
+       return ($source, @extra_function_keywords);
+}
 
-                                       $line += () = $code =~ /\n/g;
+sub preprocess_json($$) {
+       my ($path, $source) = @_;
+       my ($file) = $path =~ m!([^/]+)$!;
 
-                                       $code = dec_tpl_str(substr $code, 2, length($code) - 4);
+       $source =~ s/("(?:title)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
 
-                                       $stringtable{$code} ||= [];
-                                       push @{$stringtable{$code}}, $position;
-                               }
+       return ($source);
+}
+
+
+my ($msguniq_in, $msguniq_out);
+my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
+
+print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
+
+if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -path '*/menu.d/*.json' -o -path '*/statistics/plugins/*.json' ')' |")
+{
+       while (defined( my $file = readline F))
+       {
+               chomp $file;
+
+               if (open S, '<', $file)
+               {
+                       local $/ = undef;
+                       my $source = <S>;
+                       my @extra_function_keywords;
+
+                       if ($file =~ m!\.htm$!)
+                       {
+                               ($source, @extra_function_keywords) = preprocess_htm($file, $source);
+                       }
+                       elsif ($file =~ m!\.lua$!)
+                       {
+                               ($source, @extra_function_keywords) = preprocess_lua($file, $source);
+                       }
+                       elsif ($file =~ m!\.json$!)
+                       {
+                               ($source, @extra_function_keywords) = preprocess_json($file, $source);
                        }
+
+                       my ($xgettext_in, $xgettext_out);
+                       my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
+
+                       print $xgettext_in $source;
+                       close $xgettext_in;
+
+                       my $pot = readline $xgettext_out;
+                       close $xgettext_out;
+
+                       waitpid $pid, 0;
+
+                       print $msguniq_in postprocess_pot($file, $pot);
                }
        }
 
        close F;
 }
 
+close $msguniq_in;
 
-if( open C, "| msgcat -" )
-{
-       printf C "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n\n";
+my @pot = <$msguniq_out>;
 
-       foreach my $key ( sort keys %stringtable )
-       {
-               if( length $key )
-               {
-                       my @positions = @{$stringtable{$key}};
+close $msguniq_out;
+waitpid $msguniq_pid, 0;
+
+while (@pot > 0) {
+       my $line = shift @pot;
 
-                       $key =~ s/\\/\\\\/g;
-                       $key =~ s/\n/\\n/g;
-                       $key =~ s/\t/\\t/g;
-                       $key =~ s/"/\\"/g;
+       # Reorder the location comments in a detemrinistic way to
+       # reduce SCM noise when frequently updating templates.
+       if ($line =~ m!^#: !) {
+               my @locs = ($line);
 
-                       printf C "#: %s\nmsgid \"%s\"\nmsgstr \"\"\n\n",
-                               join(' ', @positions), $key;
+               while (@pot > 0 && $pot[0] =~ m!^#: !) {
+                       push @locs, shift @pot;
                }
+
+               print
+                       map { join(':', @$_) . "\n" }
+                       sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
+                       map { [ /^(.+):(\d+)$/ ] }
+                       @locs
+               ;
+
+               next;
        }
 
-       close C;
+       print $line;
 }