Merge remote-tracking branch 'weblate/master'
[oweals/luci.git] / build / i18n-scan.pl
1 #!/usr/bin/perl
2
3 use utf8;
4 use strict;
5 use warnings;
6 use Text::Balanced qw(extract_tagged gen_delimited_pat);
7 use POSIX;
8
9 POSIX::setlocale(POSIX::LC_ALL, "C");
10
11 @ARGV >= 1 || die "Usage: $0 <source directory>\n";
12
13
14 my %stringtable;
15
16 sub dec_lua_str
17 {
18         my $s = shift;
19         my %rep = (
20                 'a' => "\x07",
21                 'b' => "\x08",
22                 'f' => "\x0c",
23                 'n' => "\n",
24                 'r' => "\r",
25                 't' => "\t",
26                 'v' => "\x76"
27         );
28
29         $s =~ s!\\(?:([0-9]{1,2})|(.))!
30                 $1 ? chr(int($1)) : ($rep{$2} || $2)
31         !segx;
32
33         $s =~ s/[\s\n]+/ /g;
34         $s =~ s/^ //;
35         $s =~ s/ $//;
36
37         return $s;
38 }
39
40 sub dec_json_str
41 {
42         my $s = shift;
43         my %rep = (
44                 '"' => '"',
45                 '/' => '/',
46                 'b' => "\x08",
47                 'f' => "\x0c",
48                 'n' => "\n",
49                 'r' => "\r",
50                 't' => "\t",
51                 '\\' => '\\'
52         );
53
54         $s =~ s!\\([\\/"bfnrt]|u([0-9a-fA-F]{4}))!
55                 $2 ? chr(hex($2)) : $rep{$1}
56         !egx;
57
58         $s =~ s/[\s\n]+/ /g;
59         $s =~ s/^ //;
60         $s =~ s/ $//;
61
62         return $s;
63 }
64
65 sub dec_tpl_str
66 {
67         my $s = shift;
68         $s =~ s/-$//;
69         $s =~ s/[\s\n]+/ /g;
70         $s =~ s/^ //;
71         $s =~ s/ $//;
72         $s =~ s/\\/\\\\/g;
73         return $s;
74 }
75
76 if( open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' ')' | sort |" )
77 {
78         while( defined( my $file = readline F ) )
79         {
80                 chomp $file;
81
82                 if( open S, "< $file" )
83                 {
84                         binmode S, ':utf8';
85
86                         local $/ = undef;
87                         my $raw = <S>;
88                         close S;
89
90                         my $text = $raw;
91                         my $line = 1;
92
93                         while ($text =~ s/ ^ (.*?) (?:translate|translatef|i18n|_) ([\n\s]*) \( //sgx)
94                         {
95                                 my ($prefix, $suffix) = ($1, $2);
96                                 my $code;
97                                 my $res = "";
98                                 my $sub = "";
99
100                                 $line += () = $prefix =~ /\n/g;
101
102                                 my $position = "$file:$line";
103
104                                 $line += () = $suffix =~ /\n/g;
105
106                                 while (defined $sub)
107                                 {
108                                         undef $sub;
109
110                                         if ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (\[=*\[) /sx)
111                                         {
112                                                 my $ws = $1;
113                                                 my $stag = quotemeta $2;
114                                                 (my $etag = $stag) =~ y/[/]/;
115
116                                                 ($sub, $text) = extract_tagged($text, $stag, $etag, q{\s*(?:\.\.\s*)?});
117
118                                                 $line += () = $ws =~ /\n/g;
119
120                                                 if (defined($sub) && length($sub)) {
121                                                         $line += () = $sub =~ /\n/g;
122
123                                                         $sub =~ s/^$stag//;
124                                                         $sub =~ s/$etag$//;
125                                                         $res .= $sub;
126                                                 }
127                                         }
128                                         elsif ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (['"]) /sx)
129                                         {
130                                                 my $ws = $1;
131                                                 my $quote = $2;
132                                                 my $re = gen_delimited_pat($quote, '\\');
133
134                                                 if ($text =~ m/\G\s*(?:\.\.\s*)?($re)/gcs)
135                                                 {
136                                                         $sub = $1;
137                                                         $text = substr $text, pos $text;
138                                                 }
139
140                                                 $line += () = $ws =~ /\n/g;
141
142                                                 if (defined($sub) && length($sub)) {
143                                                         $line += () = $sub =~ /\n/g;
144
145                                                         $sub =~ s/^$quote//;
146                                                         $sub =~ s/$quote$//;
147                                                         $res .= $sub;
148                                                 }
149                                         }
150                                 }
151
152                                 if (defined($res))
153                                 {
154                                         $res = dec_lua_str($res);
155
156                                         if ($res) {
157                                                 $stringtable{$res} ||= [ ];
158                                                 push @{$stringtable{$res}}, $position;
159                                         }
160                                 }
161                         }
162
163
164                         $text = $raw;
165                         $line = 1;
166
167                         while( $text =~ s/ ^ (.*?) <% -? [:_] /<%/sgx )
168                         {
169                                 $line += () = $1 =~ /\n/g;
170
171                                 ( my $code, $text ) = extract_tagged($text, '<%', '%>');
172
173                                 if( defined $code )
174                                 {
175                                         my $position = "$file:$line";
176
177                                         $line += () = $code =~ /\n/g;
178
179                                         $code = dec_tpl_str(substr $code, 2, length($code) - 4);
180
181                                         $stringtable{$code} ||= [];
182                                         push @{$stringtable{$code}}, $position;
183                                 }
184                         }
185                 }
186         }
187
188         close F;
189 }
190
191 if( open F, "find @ARGV -type f -path '*/menu.d/*.json' | sort |" )
192 {
193         while( defined( my $file = readline F ) )
194         {
195                 chomp $file;
196
197                 if( open S, "< $file" )
198                 {
199                         binmode S, ':utf8';
200
201                         local $/ = undef;
202                         my $raw = <S>;
203                         close S;
204
205                         my $text = $raw;
206                         my $line = 1;
207
208                         while ($text =~ s/ ^ (.*?) "title" ([\n\s]*) : //sgx)
209                         {
210                                 my ($prefix, $suffix) = ($1, $2);
211                                 my $code;
212                                 my $res = "";
213                                 my $sub = "";
214
215                                 $line += () = $prefix =~ /\n/g;
216
217                                 my $position = "$file:$line";
218
219                                 $line += () = $suffix =~ /\n/g;
220
221                                 while (defined $sub)
222                                 {
223                                         undef $sub;
224
225                                         if ($text =~ /^ ([\n\s]*) " /sx)
226                                         {
227                                                 my $ws = $1;
228                                                 my $re = gen_delimited_pat('"', '\\');
229
230                                                 if ($text =~ m/\G\s*($re)/gcs)
231                                                 {
232                                                         $sub = $1;
233                                                         $text = substr $text, pos $text;
234                                                 }
235
236                                                 $line += () = $ws =~ /\n/g;
237
238                                                 if (defined($sub) && length($sub)) {
239                                                         $line += () = $sub =~ /\n/g;
240
241                                                         $sub =~ s/^"//;
242                                                         $sub =~ s/"$//;
243                                                         $res .= $sub;
244                                                 }
245                                         }
246                                 }
247
248                                 if (defined($res))
249                                 {
250                                         $res = dec_json_str($res);
251
252                                         if ($res) {
253                                                 $stringtable{$res} ||= [ ];
254                                                 push @{$stringtable{$res}}, $position;
255                                         }
256                                 }
257                         }
258                 }
259         }
260
261         close F;
262 }
263
264
265 if( open C, "| msgcat -" )
266 {
267         binmode C, ':utf8';
268
269         printf C "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n\n";
270
271         foreach my $key ( sort keys %stringtable )
272         {
273                 if( length $key )
274                 {
275                         my @positions = @{$stringtable{$key}};
276
277                         $key =~ s/\\/\\\\/g;
278                         $key =~ s/\n/\\n/g;
279                         $key =~ s/\t/\\t/g;
280                         $key =~ s/"/\\"/g;
281
282                         printf C "#: %s\nmsgid \"%s\"\nmsgstr \"\"\n\n",
283                                 join(' ', @positions), $key;
284                 }
285         }
286
287         close C;
288 }