ddec094caab48c25c9d7a2affdc2157a44246aad
[project/luci.git] / build / i18n-scan.pl
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 use IPC::Open2;
6 use POSIX;
7
8 $ENV{'LC_ALL'} = 'C';
9 POSIX::setlocale(POSIX::LC_ALL, 'C');
10
11 @ARGV >= 1 || die "Usage: $0 <source directory>\n";
12
13
14 my %keywords = (
15 '.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
16 '.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
17 '.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
18 '.json' => [ '_:1', '_:1,2c' ]
19 );
20
21 sub xgettext($@) {
22 my $path = shift;
23 my @keywords = @_;
24 my ($ext) = $path =~ m!(\.\w+)$!;
25 my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
26
27 if ($ext eq '.htm' || $ext eq '.lua') {
28 push @cmd, '--language=Lua';
29 }
30 elsif ($ext eq '.js' || $ext eq '.json') {
31 push @cmd, '--language=JavaScript';
32 }
33
34 push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
35 push @cmd, '-o', '-';
36
37 return @cmd;
38 }
39
40 sub whitespace_collapse($) {
41 my $s = shift;
42 my %r = ('n' => ' ', 't' => ' ');
43
44 # Translate \t and \n to plain spaces, leave all other escape
45 # sequences alone. Finally replace all consecutive spaces by
46 # single ones and trim leading and trailing space.
47 $s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
48 $s =~ s/ {2,}/ /g;
49 $s =~ s/^ //;
50 $s =~ s/ $//;
51
52 return $s;
53 }
54
55 sub postprocess_pot($$) {
56 my ($path, $source) = @_;
57 my (@res, $msgid);
58 my $skip = 1;
59
60 $source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
61
62 my @lines = split /\n/, $source;
63
64 # Remove all header lines up to the first location comment
65 while (@lines > 0 && $lines[0] !~ m!^#: !) {
66 shift @lines;
67 }
68
69 while (@lines > 0) {
70 my $line = shift @lines;
71
72 # Concat multiline msgids and collapse whitespaces
73 if ($line =~ m!^(msg\w+) "(.*)"$!) {
74 my $kw = $1;
75 my $kv = $2;
76
77 while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
78 $kv .= ' '. $1;
79 shift @lines;
80 }
81
82 $kv = whitespace_collapse($kv);
83
84 # Filter invalid empty msgids by popping all lines in @res
85 # leading to this point and skip all subsequent lines in
86 # @lines belonging to this faulty id.
87 if ($kw ne 'msgstr' && $kv eq '') {
88 while (@res > 0 && $res[-1] !~ m!^$!) {
89 pop @res;
90 }
91
92 while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
93 shift @lines;
94 }
95
96 next;
97 }
98
99 push @res, sprintf '%s "%s"', $kw, $kv;
100 }
101
102 # Ignore any flags added by xgettext
103 elsif ($line =~ m!^#, !) {
104 next;
105 }
106
107 # Pass through other lines unmodified
108 else {
109 push @res, $line;
110 }
111 }
112
113 return @res ? join("\n", '', @res, '') : '';
114 }
115
116 sub uniq(@) {
117 my %h = map { $_, 1 } @_;
118 return sort keys %h;
119 }
120
121 sub preprocess_htm($$) {
122 my ($path, $source) = @_;
123 my $sub = {
124 '=' => '(%s)',
125 '_' => 'translate([==[%s]==])',
126 ':' => 'translate([==[%s]==])',
127 '+' => 'include([==[%s]==)',
128 '#' => '--[==[%s]==]',
129 '' => '%s'
130 };
131
132 # Translate the .htm source into a valid Lua source using bracket quotes
133 # to avoid the need for complex escaping.
134 $source =~ s|<%-?([=_:+#]?)(.*?)-?%>|sprintf "]==]; $sub->{$1}; [==[", $2|sge;
135
136 # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
137 # and return them as extra keyword so that xgettext recognizes such expressions
138 # as translate(...) calls.
139 my @extra_function_keywords =
140 map { ("$_:1", "$_:1,2c") }
141 uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
142
143 return ("[==[$source]==]", @extra_function_keywords);
144 }
145
146 sub preprocess_lua($$) {
147 my ($path, $source) = @_;
148
149 # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
150 # and return them as extra keyword so that xgettext recognizes such expressions
151 # as translate(...) calls.
152 my @extra_function_keywords =
153 map { ("$_:1", "$_:1,2c") }
154 uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
155
156 return ($source, @extra_function_keywords);
157 }
158
159 sub preprocess_json($$) {
160 my ($path, $source) = @_;
161 my ($file) = $path =~ m!([^/]+)$!;
162
163 $source =~ s/("(?:title)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
164
165 return ($source);
166 }
167
168
169 my ($msguniq_in, $msguniq_out);
170 my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
171
172 print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
173
174 if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -path '*/menu.d/*.json' ')' |")
175 {
176 while (defined( my $file = readline F))
177 {
178 chomp $file;
179
180 if (open S, '<', $file)
181 {
182 local $/ = undef;
183 my $source = <S>;
184 my @extra_function_keywords;
185
186 if ($file =~ m!\.htm$!)
187 {
188 ($source, @extra_function_keywords) = preprocess_htm($file, $source);
189 }
190 elsif ($file =~ m!\.lua$!)
191 {
192 ($source, @extra_function_keywords) = preprocess_lua($file, $source);
193 }
194 elsif ($file =~ m!\.json$!)
195 {
196 ($source, @extra_function_keywords) = preprocess_json($file, $source);
197 }
198
199 my ($xgettext_in, $xgettext_out);
200 my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
201
202 print $xgettext_in $source;
203 close $xgettext_in;
204
205 my $pot = readline $xgettext_out;
206 close $xgettext_out;
207
208 waitpid $pid, 0;
209
210 print $msguniq_in postprocess_pot($file, $pot);
211 }
212 }
213
214 close F;
215 }
216
217 close $msguniq_in;
218
219 my @pot = <$msguniq_out>;
220
221 close $msguniq_out;
222 waitpid $msguniq_pid, 0;
223
224 while (@pot > 0) {
225 my $line = shift @pot;
226
227 # Reorder the location comments in a detemrinistic way to
228 # reduce SCM noise when frequently updating templates.
229 if ($line =~ m!^#: !) {
230 my @locs = ($line);
231
232 while (@pot > 0 && $pot[0] =~ m!^#: !) {
233 push @locs, shift @pot;
234 }
235
236 print
237 map { join(':', @$_) . "\n" }
238 sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
239 map { [ /^(.+):(\d+)$/ ] }
240 @locs
241 ;
242
243 next;
244 }
245
246 print $line;
247 }