squid: disable krb5
[feed/packages.git] / net / adblock / files / adblock-update.sh
1 #!/bin/sh
2 #######################################################
3 # ad/abuse domain blocking script for dnsmasq/openwrt #
4 # written by Dirk Brenken (openwrt@brenken.org) #
5 #######################################################
6
7 # LICENSE
8 # ========
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22 ###############
23 # environment #
24 ###############
25
26 # set the C locale, characters are single bytes, the charset is ASCII
27 # speeds up things like sort, grep etc.
28 #
29 LC_ALL=C
30
31 # set script version
32 #
33 adb_version="0.60.1"
34
35 # get current pid, script directory and openwrt version
36 #
37 pid=${$}
38 adb_scriptdir="${0%/*}"
39 openwrt_version="$(cat /etc/openwrt_version 2>/dev/null)"
40
41 # source in adblock function library
42 #
43 if [ -r "${adb_scriptdir}/adblock-helper.sh" ]
44 then
45 . "${adb_scriptdir}/adblock-helper.sh" 2>/dev/null
46 else
47 rc=100
48 /usr/bin/logger -s -t "adblock[${pid}] error" "adblock function library not found, rc: ${rc}"
49 exit ${rc}
50 fi
51
52 ################
53 # main program #
54 ################
55
56 # call exit function on trap signals (HUP, INT, QUIT, BUS, SEGV, TERM)
57 #
58 trap "rc=255; f_log 'error signal received/trapped' '${rc}'; f_exit" 1 2 3 10 11 15
59
60 # start logging
61 #
62 f_log "domain adblock processing started (${adb_version}, ${openwrt_version}, $(/bin/date "+%d.%m.%Y %H:%M:%S"))"
63
64 # load environment
65 #
66 f_envload
67
68 # parse environment
69 #
70 f_envparse
71
72 # check environment
73 #
74 f_envcheck
75
76 # start shallalist (pre-)processing
77 #
78 if [ -n "${adb_arc_shalla}" ]
79 then
80 # start shallalist processing
81 #
82 shalla_archive="${adb_tmpdir}/shallalist.tar.gz"
83 shalla_file="${adb_tmpdir}/shallalist.txt"
84 src_name="shalla"
85 adb_dnsfile="${adb_dnsdir}/${adb_dnsprefix}.${src_name}"
86 list_time="$(grep -F "# last modified: " "${adb_dnsfile}" 2>/dev/null)"
87 list_time="${list_time/*: /}"
88 f_log "=> (pre-)processing adblock source '${src_name}'"
89
90 # only process shallalist archive with updated timestamp,
91 # extract and merge only domains of selected shallalist categories
92 #
93 shalla_time="$(wget ${wget_parm} --timeout=5 --server-response --spider "${adb_arc_shalla}" 2>&1 | grep -F "Last-Modified: " 2>/dev/null | tr -d '\r' 2>/dev/null)"
94 shalla_time="${shalla_time/*: /}"
95 if [ -z "${shalla_time}" ]
96 then
97 shalla_time="$(date)"
98 f_log " no online timestamp received, current date will be used"
99 fi
100 if [ -z "${list_time}" ] || [ "${list_time}" != "${shalla_time}" ]
101 then
102 wget ${wget_parm} --timeout="${adb_maxtime}" --output-document="${shalla_archive}" "${adb_arc_shalla}" 2>/dev/null
103 rc=${?}
104 if [ $((rc)) -eq 0 ]
105 then
106 > "${shalla_file}"
107 for category in ${adb_cat_shalla}
108 do
109 tar -xOzf "${shalla_archive}" BL/${category}/domains 2>/dev/null >> "${shalla_file}"
110 rc=${?}
111 if [ $((rc)) -ne 0 ]
112 then
113 f_log " archive extraction failed (${category})"
114 break
115 fi
116 done
117
118 # remove temporary files
119 #
120 rm -f "${shalla_archive}" >/dev/null 2>&1
121 rm -rf "${adb_tmpdir}/BL" >/dev/null 2>&1
122 if [ $((rc)) -eq 0 ]
123 then
124 adb_sources="${adb_sources} ${shalla_file}&ruleset=rset_shalla"
125 f_log " source archive (pre-)processing finished"
126 else
127 rc=0
128 fi
129 else
130 f_log " source archive download failed"
131 rc=0
132 fi
133 else
134 adb_srclist="! -name ${adb_dnsprefix}.${src_name}"
135 f_log " source archive doesn't change, no update required"
136 fi
137 fi
138
139 # add blacklist source to active adblock domain sources
140 #
141 if [ -s "${adb_blacklist}" ]
142 then
143 adb_sources="${adb_sources} ${adb_blacklist}&ruleset=rset_blacklist"
144 fi
145
146 # loop through active adblock domain sources,
147 # download sources, prepare output and store all extracted domains in temp file
148 #
149 for src in ${adb_sources}
150 do
151 url="${src/\&ruleset=*/}"
152 src_name="${src/*\&ruleset=rset_/}"
153 adb_dnsfile="${adb_dnsdir}/${adb_dnsprefix}.${src_name}"
154 list_time="$(grep -F "# last modified: " "${adb_dnsfile}" 2>/dev/null)"
155 list_time="${list_time/*: /}"
156 f_log "=> processing adblock source '${src_name}'"
157
158 # prepare find statement with active adblock list sources
159 #
160 if [ -z "${adb_srclist}" ]
161 then
162 adb_srclist="! -name ${adb_dnsprefix}.${src_name}"
163 else
164 adb_srclist="${adb_srclist} -a ! -name ${adb_dnsprefix}.${src_name}"
165 fi
166
167 # only download adblock list with newer/updated timestamp
168 #
169 if [ "${src_name}" = "blacklist" ]
170 then
171 url_time="$(date -r "${adb_blacklist}" 2>/dev/null)"
172 elif [ "${src_name}" = "shalla" ]
173 then
174 url_time="${shalla_time}"
175 else
176 url_time="$(wget ${wget_parm} --timeout=5 --server-response --spider "${url}" 2>&1 | grep -F "Last-Modified: " 2>/dev/null | tr -d '\r' 2>/dev/null)"
177 url_time="${url_time/*: /}"
178 fi
179 if [ -z "${url_time}" ]
180 then
181 url_time="$(date)"
182 f_log " no online timestamp received, current date will be used"
183 fi
184 if [ -z "${list_time}" ] || [ "${list_time}" != "${url_time}" ]
185 then
186 if [ "${src_name}" = "blacklist" ]
187 then
188 tmp_domains="$(cat "${adb_blacklist}" 2>/dev/null)"
189 rc=${?}
190 elif [ "${src_name}" = "shalla" ]
191 then
192 tmp_domains="$(cat "${shalla_file}" 2>/dev/null)"
193 rc=${?}
194 else
195 tmp_domains="$(wget ${wget_parm} --timeout="${adb_maxtime}" --output-document=- "${url}" 2>/dev/null)"
196 rc=${?}
197 fi
198 else
199 f_log " source doesn't change, no update required"
200 continue
201 fi
202
203 # check download result and prepare domain output by regex patterns
204 #
205 if [ $((rc)) -eq 0 ] && [ -n "${tmp_domains}" ]
206 then
207 eval "$(printf "${src}" | sed 's/\(.*\&ruleset=\)/ruleset=\$/g')"
208 count="$(printf "%s\n" "${tmp_domains}" | tr '[A-Z]' '[a-z]' | eval "${ruleset}" | tee "${adb_tmpfile}" | wc -l)"
209 f_log " source download finished (${count} entries)"
210 if [ "${src_name}" = "shalla" ]
211 then
212 rm -f "${shalla_file}" >/dev/null 2>&1
213 fi
214 unset tmp_domains
215 elif [ $((rc)) -eq 0 ] && [ -z "${tmp_domains}" ]
216 then
217 f_log " empty source download finished"
218 continue
219 else
220 rc=0
221 f_log " source download failed"
222 continue
223 fi
224
225 # remove whitelist domains, sort domains and make them unique,
226 # finally rewrite ad/abuse domain information to separate dnsmasq files
227 #
228 if [ $((count)) -gt 0 ] && [ -n "${adb_tmpfile}" ]
229 then
230 if [ -s "${adb_whitelist}" ]
231 then
232 grep -Fvxf "${adb_whitelist}" "${adb_tmpfile}" 2>/dev/null | sort 2>/dev/null | uniq -u 2>/dev/null | eval "${adb_dnsformat}" 2>/dev/null > "${adb_dnsfile}"
233 rc=${?}
234 else
235 sort "${adb_tmpfile}" 2>/dev/null | uniq -u 2>/dev/null | eval "${adb_dnsformat}" 2>/dev/null > "${adb_dnsfile}"
236 rc=${?}
237 fi
238
239 # prepare find statement with revised adblock list sources
240 #
241 if [ -z "${adb_revsrclist}" ]
242 then
243 adb_revsrclist="-name ${adb_dnsprefix}.${src_name}"
244 else
245 adb_revsrclist="${adb_revsrclist} -o -name ${adb_dnsprefix}.${src_name}"
246 fi
247
248 # write preliminary adblock list footer
249 #
250 if [ $((rc)) -eq 0 ]
251 then
252 if [ -n "${adb_wanif4}" ] && [ -n "${adb_wanif6}" ]
253 then
254 count="$(($(wc -l < "${adb_dnsdir}/${adb_dnsprefix}.${src_name}") / 2))"
255 else
256 count="$(wc -l < "${adb_dnsdir}/${adb_dnsprefix}.${src_name}")"
257 fi
258 printf "%s\n" "#------------------------------------------------------------------" >> "${adb_dnsfile}"
259 printf "%s\n" "# ${0##*/} (${adb_version}) - ${count} ad/abuse domains blocked" >> "${adb_dnsfile}"
260 printf "%s\n" "# source: ${url}" >> "${adb_dnsfile}"
261 printf "%s\n" "# last modified: ${url_time}" >> "${adb_dnsfile}"
262 f_log " domain merging finished"
263 else
264 f_log " domain merging failed" "${rc}"
265 f_restore
266 fi
267 else
268 f_log " empty domain input received"
269 continue
270 fi
271 done
272
273 # remove old adblock lists and their backups
274 #
275 if [ -n "${adb_srclist}" ]
276 then
277 adb_rmfind="$(find "${adb_dnsdir}" -maxdepth 1 -type f -name "${adb_dnsprefix}.*" \( ${adb_srclist} \) -print -exec rm -f "{}" \; 2>/dev/null)"
278 if [ $((rc)) -eq 0 ] && [ -n "${adb_rmfind}" ]
279 then
280 f_log "no longer used adblock lists removed" "${rc}"
281 elif [ $((rc)) -ne 0 ]
282 then
283 f_log "error during removal of old adblock lists" "${rc}"
284 f_exit
285 fi
286 if [ "${backup_ok}" = "true" ]
287 then
288 find "${adb_backupdir}" -maxdepth 1 -type f -name "${adb_dnsprefix}.*" \( ${adb_srclist} \) -exec rm -f "{}" \; 2>/dev/null
289 if [ $((rc)) -ne 0 ]
290 then
291 f_log "error during removal of old backups" "${rc}"
292 f_exit
293 fi
294 fi
295 else
296 rm -f "${adb_dnsdir}/${adb_dnsprefix}."* >/dev/null 2>&1
297 if [ "${backup_ok}" = "true" ]
298 then
299 rm -f "${adb_backupdir}/${adb_dnsprefix}."* >/dev/null 2>&1
300 f_log "all available adblock lists and backups removed"
301 else
302 f_log "all available adblock lists removed"
303 fi
304 fi
305
306 # make separate adblock lists unique
307 #
308 if [ $((adb_unique)) -eq 1 ]
309 then
310 if [ -n "${adb_revsrclist}" ]
311 then
312 f_log "remove duplicates in separate adblock lists"
313
314 # generate a temporary, unique overall list
315 #
316 head -qn -4 "${adb_dnsdir}/${adb_dnsprefix}."* 2>/dev/null | sort -u 2>/dev/null > "${adb_dnsdir}/tmp.overall"
317
318 # loop through all separate lists, ordered by size (ascending)
319 #
320 for list in $(ls -Sr "${adb_dnsdir}/${adb_dnsprefix}."* 2>/dev/null)
321 do
322 # check separate lists vs. overall list,
323 # rewrite only duplicate entries back to separate lists
324 #
325 list="${list/*./}"
326 sort "${adb_dnsdir}/tmp.overall" "${adb_dnsdir}/${adb_dnsprefix}.${list}" 2>/dev/null | uniq -d 2>/dev/null > "${adb_dnsdir}/tmp.${list}"
327
328 # remove these entries from overall list,
329 # rewrite only unique entries back to overall list
330 #
331 tmp_unique="$(sort "${adb_dnsdir}/tmp.overall" "${adb_dnsdir}/tmp.${list}" 2>/dev/null | uniq -u 2>/dev/null)"
332 printf "%s\n" "${tmp_unique}" > "${adb_dnsdir}/tmp.overall"
333
334 # write final adblocklist footer
335 #
336 if [ -n "${adb_wanif4}" ] && [ -n "${adb_wanif6}" ]
337 then
338 count="$(($(wc -l < "${adb_dnsdir}/tmp.${list}") / 2))"
339 else
340 count="$(wc -l < "${adb_dnsdir}/tmp.${list}")"
341 fi
342 printf "%s\n" "#------------------------------------------------------------------" >> "${adb_dnsdir}/tmp.${list}"
343 printf "%s\n" "# ${0##*/} (${adb_version}) - ${count} ad/abuse domains blocked" >> "${adb_dnsdir}/tmp.${list}"
344 tail -qn -2 "${adb_dnsdir}/$adb_dnsprefix.${list}" 2>/dev/null >> "${adb_dnsdir}/tmp.${list}"
345 mv -f "${adb_dnsdir}/tmp.${list}" "${adb_dnsdir}/${adb_dnsprefix}.${list}" >/dev/null 2>&1
346 done
347 rm -f "${adb_dnsdir}/tmp.overall" >/dev/null 2>&1
348 fi
349 fi
350
351 # get overall count
352 #
353 if [ -n "${adb_wanif4}" ] && [ -n "${adb_wanif6}" ]
354 then
355 adb_count="$(($(head -qn -4 "${adb_dnsdir}/${adb_dnsprefix}."* 2>/dev/null | wc -l) / 2))"
356 else
357 adb_count="$(head -qn -4 "${adb_dnsdir}/${adb_dnsprefix}."* 2>/dev/null | wc -l)"
358 fi
359
360 # restore adblock lists if overall count is null (i.e. all downloads failed)
361 #
362 if [ "${backup_ok}" = "true" ] && [ $((adb_count)) -eq 0 ]
363 then
364 f_restore
365 fi
366
367 # restart dnsmasq with newly generated or deleted adblock lists,
368 # check dnsmasq startup afterwards
369 #
370 if [ -n "${adb_revsrclist}" ] || [ -n "${adb_rmfind}" ]
371 then
372 /etc/init.d/dnsmasq restart >/dev/null 2>&1
373 sleep 2
374 dns_status="$(ps 2>/dev/null | grep "[d]nsmasq" 2>/dev/null)"
375 if [ -n "${dns_status}" ]
376 then
377 f_log "adblock lists with overall ${adb_count} domains loaded"
378 else
379 rc=105
380 f_log "dnsmasq restart failed, please check 'logread' output" "${rc}"
381 f_restore
382 fi
383 else
384 f_log "adblock lists with overall ${adb_count} domains are still valid, no dnsmasq restart required"
385 fi
386
387 # create adblock list backups
388 #
389 if [ "${backup_ok}" = "true" ] && [ "$(printf "${adb_dnsdir}/${adb_dnsprefix}."*)" != "${adb_dnsdir}/${adb_dnsprefix}.*" ]
390 then
391 for file in ${adb_dnsdir}/${adb_dnsprefix}.*
392 do
393 filename="${file##*/}"
394 if [ ! -f "${adb_backupdir}/${filename}" ] || [ "${file}" -nt "${adb_backupdir}/${filename}" ]
395 then
396 cp -pf "${file}" "${adb_backupdir}" 2>/dev/null
397 rc=${?}
398 if [ $((rc)) -ne 0 ]
399 then
400 f_log "error during backup of adblock list (${filename})" "${rc}"
401 f_exit
402 fi
403 backup_done="true"
404 fi
405 done
406 if [ "${backup_done}" = "true" ]
407 then
408 f_log "new adblock list backups generated"
409 else
410 f_log "adblock list backups are still valid, no new backups required"
411 fi
412 fi
413
414 # remove temporary files and exit
415 #
416 f_exit