Merge pull request #3444 from swg0101/subjectvalidation
[project/luci.git] / build / i18n-scan.pl
1 #!/usr/bin/perl
2
3 use utf8;
4 use strict;
5 use warnings;
6 use Text::Balanced qw(extract_tagged gen_delimited_pat);
7 use POSIX;
8
9 POSIX::setlocale(POSIX::LC_ALL, "C");
10
11 @ARGV >= 1 || die "Usage: $0 <source directory>\n";
12
13
14 my %stringtable;
15
16 sub dec_lua_str
17 {
18 my $s = shift;
19 my %rep = (
20 'a' => "\x07",
21 'b' => "\x08",
22 'f' => "\x0c",
23 'n' => "\n",
24 'r' => "\r",
25 't' => "\t",
26 'v' => "\x76"
27 );
28
29 $s =~ s!\\(?:([0-9]{1,2})|(.))!
30 $1 ? chr(int($1)) : ($rep{$2} || $2)
31 !segx;
32
33 $s =~ s/[\s\n]+/ /g;
34 $s =~ s/^ //;
35 $s =~ s/ $//;
36
37 return $s;
38 }
39
40 sub dec_json_str
41 {
42 my $s = shift;
43 my %rep = (
44 '"' => '"',
45 '/' => '/',
46 'b' => "\x08",
47 'f' => "\x0c",
48 'n' => "\n",
49 'r' => "\r",
50 't' => "\t",
51 '\\' => '\\'
52 );
53
54 $s =~ s!\\([\\/"bfnrt]|u([0-9a-fA-F]{4}))!
55 $2 ? chr(hex($2)) : $rep{$1}
56 !egx;
57
58 $s =~ s/[\s\n]+/ /g;
59 $s =~ s/^ //;
60 $s =~ s/ $//;
61
62 return $s;
63 }
64
65 sub dec_tpl_str
66 {
67 my $s = shift;
68 $s =~ s/-$//;
69 $s =~ s/[\s\n]+/ /g;
70 $s =~ s/^ //;
71 $s =~ s/ $//;
72 $s =~ s/\\/\\\\/g;
73 return $s;
74 }
75
76 if( open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' ')' | sort |" )
77 {
78 while( defined( my $file = readline F ) )
79 {
80 chomp $file;
81
82 if( open S, "< $file" )
83 {
84 binmode S, ':utf8';
85
86 local $/ = undef;
87 my $raw = <S>;
88 close S;
89
90 my $text = $raw;
91 my $line = 1;
92
93 while ($text =~ s/ ^ (.*?) (?:translate|translatef|i18n|_) ([\n\s]*) \( //sgx)
94 {
95 my ($prefix, $suffix) = ($1, $2);
96 my $code;
97 my $res = "";
98 my $sub = "";
99
100 $line += () = $prefix =~ /\n/g;
101
102 my $position = "$file:$line";
103
104 $line += () = $suffix =~ /\n/g;
105
106 while (defined $sub)
107 {
108 undef $sub;
109
110 if ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (\[=*\[) /sx)
111 {
112 my $ws = $1;
113 my $stag = quotemeta $2;
114 (my $etag = $stag) =~ y/[/]/;
115
116 ($sub, $text) = extract_tagged($text, $stag, $etag, q{\s*(?:\.\.\s*)?});
117
118 $line += () = $ws =~ /\n/g;
119
120 if (defined($sub) && length($sub)) {
121 $line += () = $sub =~ /\n/g;
122
123 $sub =~ s/^$stag//;
124 $sub =~ s/$etag$//;
125 $res .= $sub;
126 }
127 }
128 elsif ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (['"]) /sx)
129 {
130 my $ws = $1;
131 my $quote = $2;
132 my $re = gen_delimited_pat($quote, '\\');
133
134 if ($text =~ m/\G\s*(?:\.\.\s*)?($re)/gcs)
135 {
136 $sub = $1;
137 $text = substr $text, pos $text;
138 }
139
140 $line += () = $ws =~ /\n/g;
141
142 if (defined($sub) && length($sub)) {
143 $line += () = $sub =~ /\n/g;
144
145 $sub =~ s/^$quote//;
146 $sub =~ s/$quote$//;
147 $res .= $sub;
148 }
149 }
150 }
151
152 if (defined($res))
153 {
154 $res = dec_lua_str($res);
155
156 if ($res) {
157 $stringtable{$res} ||= [ ];
158 push @{$stringtable{$res}}, $position;
159 }
160 }
161 }
162
163
164 $text = $raw;
165 $line = 1;
166
167 while( $text =~ s/ ^ (.*?) <% -? [:_] /<%/sgx )
168 {
169 $line += () = $1 =~ /\n/g;
170
171 ( my $code, $text ) = extract_tagged($text, '<%', '%>');
172
173 if( defined $code )
174 {
175 my $position = "$file:$line";
176
177 $line += () = $code =~ /\n/g;
178
179 $code = dec_tpl_str(substr $code, 2, length($code) - 4);
180
181 $stringtable{$code} ||= [];
182 push @{$stringtable{$code}}, $position;
183 }
184 }
185 }
186 }
187
188 close F;
189 }
190
191 if( open F, "find @ARGV -type f -path '*/menu.d/*.json' | sort |" )
192 {
193 while( defined( my $file = readline F ) )
194 {
195 chomp $file;
196
197 if( open S, "< $file" )
198 {
199 binmode S, ':utf8';
200
201 local $/ = undef;
202 my $raw = <S>;
203 close S;
204
205 my $text = $raw;
206 my $line = 1;
207
208 while ($text =~ s/ ^ (.*?) "title" ([\n\s]*) : //sgx)
209 {
210 my ($prefix, $suffix) = ($1, $2);
211 my $code;
212 my $res = "";
213 my $sub = "";
214
215 $line += () = $prefix =~ /\n/g;
216
217 my $position = "$file:$line";
218
219 $line += () = $suffix =~ /\n/g;
220
221 while (defined $sub)
222 {
223 undef $sub;
224
225 if ($text =~ /^ ([\n\s]*) " /sx)
226 {
227 my $ws = $1;
228 my $re = gen_delimited_pat('"', '\\');
229
230 if ($text =~ m/\G\s*($re)/gcs)
231 {
232 $sub = $1;
233 $text = substr $text, pos $text;
234 }
235
236 $line += () = $ws =~ /\n/g;
237
238 if (defined($sub) && length($sub)) {
239 $line += () = $sub =~ /\n/g;
240
241 $sub =~ s/^"//;
242 $sub =~ s/"$//;
243 $res .= $sub;
244 }
245 }
246 }
247
248 if (defined($res))
249 {
250 $res = dec_json_str($res);
251
252 if ($res) {
253 $stringtable{$res} ||= [ ];
254 push @{$stringtable{$res}}, $position;
255 }
256 }
257 }
258 }
259 }
260
261 close F;
262 }
263
264
265 if( open C, "| msgcat -" )
266 {
267 binmode C, ':utf8';
268
269 printf C "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n\n";
270
271 foreach my $key ( sort keys %stringtable )
272 {
273 if( length $key )
274 {
275 my @positions =
276 map { join ':', @$_ }
277 sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
278 map { [ /^(.+):(\d+)$/ ] }
279 @{$stringtable{$key}};
280
281 $key =~ s/\\/\\\\/g;
282 $key =~ s/\n/\\n/g;
283 $key =~ s/\t/\\t/g;
284 $key =~ s/"/\\"/g;
285
286 printf C "#: %s\nmsgid \"%s\"\nmsgstr \"\"\n\n",
287 join(' ', @positions), $key;
288 }
289 }
290
291 close C;
292 }