222e41247bed745dafc517e0460f9095f169d42c
[feed/packages.git] / net / adblock / files / adblock-update.sh
1 #!/bin/sh
2 # dns based ad/abuse domain blocking script
3 # written by Dirk Brenken (dev@brenken.org)
4
5 # This is free software, licensed under the GNU General Public License v3.
6 # You should have received a copy of the GNU General Public License
7 # along with this program. If not, see <http://www.gnu.org/licenses/>.
8
9 # set the C locale
10 #
11 LC_ALL=C
12
13 # script debug switch (disabled by default)
14 # set 'DEBUG=1' to enable script debugging
15 #
16 DEBUG=0
17 if [ $((DEBUG)) -eq 0 ]
18 then
19 exec 2>/dev/null
20 fi
21
22 # set pid & logger
23 #
24 adb_pid="${$}"
25 adb_pidfile="/var/run/adblock.pid"
26 adb_log="$(which logger)"
27
28 if [ -r "${adb_pidfile}" ]
29 then
30 rc=255
31 "${adb_log}" -s -t "adblock[${adb_pid}] error" "adblock service already running ($(cat ${adb_pidfile}))"
32 exit ${rc}
33 else
34 printf "${adb_pid}" > "${adb_pidfile}"
35 fi
36
37 # get current directory and set script/config version
38 #
39 adb_scriptdir="${0%/*}"
40 adb_scriptver="1.1.15"
41 adb_mincfgver="2.0"
42
43 # source in adblock function library
44 #
45 if [ -r "${adb_scriptdir}/adblock-helper.sh" ]
46 then
47 . "${adb_scriptdir}/adblock-helper.sh"
48 else
49 rc=254
50 "${adb_log}" -s -t "adblock[${adb_pid}] error" "adblock function library not found"
51 rm -f "${adb_pidfile}"
52 exit ${rc}
53 fi
54
55 # call trap function on error signals (HUP, INT, QUIT, BUS, SEGV, TERM)
56 #
57 trap "rc=250; f_log 'error signal received/trapped' '${rc}'; f_exit" 1 2 3 10 11 15
58
59 # load environment
60 #
61 f_envload
62
63 # start logging
64 #
65 f_log "domain adblock processing started (${adb_scriptver}, ${adb_sysver}, $(/bin/date "+%d.%m.%Y %H:%M:%S"))"
66
67 # check environment
68 #
69 f_envcheck
70
71 # loop through active adblock domain sources,
72 # download sources, prepare output and store all extracted domains in temp file
73 #
74 for src_name in ${adb_sources}
75 do
76 eval "url=\"\${adb_src_${src_name}}\""
77 eval "src_rset=\"\${adb_src_rset_${src_name}}\""
78 adb_dnsfile="${adb_dnsdir}/${adb_dnsprefix}.${src_name}"
79 list_time="$(${adb_uci} -q get "adblock.${src_name}.adb_src_timestamp")"
80 f_log "=> processing adblock source '${src_name}'"
81
82 # check 'url' and 'src_rset' values
83 #
84 if [ -z "${url}" ] || [ -z "${src_rset}" ]
85 then
86 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=broken config"
87 f_log " broken source configuration, check 'adb_src' and 'adb_src_rset' in config"
88 continue
89 fi
90
91 # prepare find statement with active adblock list sources
92 #
93 if [ -z "${adb_srclist}" ]
94 then
95 adb_srclist="! -name ${adb_dnsprefix}.${src_name}*"
96 else
97 adb_srclist="${adb_srclist} -a ! -name ${adb_dnsprefix}.${src_name}*"
98 fi
99
100 # only download adblock list with newer/updated timestamp
101 #
102 if [ "${src_name}" = "blacklist" ]
103 then
104 url_time="$(date -r "${url}")"
105 else
106 url_time="$(${adb_fetch} ${fetch_parm} --server-response --spider "${url}" 2>&1 | awk '$0 ~ /Last-Modified/ {printf substr($0,18)}')"
107 fi
108 if [ -z "${url_time}" ]
109 then
110 url_time="$(date)"
111 f_log " no online timestamp received, current date will be used"
112 fi
113 if [ -z "${list_time}" ] || [ "${list_time}" != "${url_time}" ] || [ ! -r "${adb_dnsfile}" ] ||\
114 ([ "${backup_ok}" = "true" ] && [ ! -r "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" ])
115 then
116 if [ "${src_name}" = "blacklist" ]
117 then
118 tmp_domains="$(cat "${url}")"
119 rc=${?}
120 elif [ "${src_name}" = "shalla" ]
121 then
122 shalla_archive="${adb_tmpdir}/shallalist.tar.gz"
123 shalla_file="${adb_tmpdir}/shallalist.txt"
124 "${adb_fetch}" ${fetch_parm} --output-document="${shalla_archive}" "${url}"
125 rc=${?}
126 if [ $((rc)) -eq 0 ]
127 then
128 > "${shalla_file}"
129 for category in ${adb_src_cat_shalla}
130 do
131 tar -xOzf "${shalla_archive}" BL/${category}/domains >> "${shalla_file}"
132 rc=${?}
133 if [ $((rc)) -ne 0 ]
134 then
135 f_log " archive extraction failed (${category})"
136 break
137 fi
138 done
139 rm -f "${shalla_archive}"
140 rm -rf "${adb_tmpdir}/BL"
141 tmp_domains="$(cat "${shalla_file}")"
142 rc=${?}
143 fi
144 else
145 tmp_domains="$(${adb_fetch} ${fetch_parm} --output-document=- "${url}")"
146 rc=${?}
147 fi
148 else
149 f_log " source doesn't change, no update required"
150 continue
151 fi
152
153 # check download result and prepare domain output by regex patterns
154 #
155 if [ $((rc)) -eq 0 ] && [ -n "${tmp_domains}" ]
156 then
157 count="$(printf "%s\n" "${tmp_domains}" | awk "${src_rset}" | tee "${adb_tmpfile}" | wc -l)"
158 f_log " source download finished (${count} entries)"
159 if [ "${src_name}" = "shalla" ]
160 then
161 rm -f "${shalla_file}"
162 fi
163 unset tmp_domains
164 elif [ $((rc)) -eq 0 ] && [ -z "${tmp_domains}" ]
165 then
166 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=empty download"
167 f_log " empty source download finished"
168 continue
169 else
170 rc=0
171 if [ -z "${adb_errsrclist}" ]
172 then
173 adb_errsrclist="-name ${adb_dnsprefix}.${src_name}.gz"
174 else
175 adb_errsrclist="${adb_errsrclist} -o -name ${adb_dnsprefix}.${src_name}.gz"
176 fi
177 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=download failed"
178 f_log " source download failed"
179 continue
180 fi
181
182 # remove whitelist domains, sort domains and make them unique,
183 # finally rewrite ad/abuse domain information to separate dnsmasq files
184 #
185 if [ $((count)) -gt 0 ] && [ -n "${adb_tmpfile}" ]
186 then
187 if [ -s "${adb_tmpdir}/tmp.whitelist" ]
188 then
189 grep -vf "${adb_tmpdir}/tmp.whitelist" "${adb_tmpfile}" | sort -u | eval "${adb_dnsformat}" > "${adb_dnsfile}"
190 rc=${?}
191 else
192 sort -u "${adb_tmpfile}" | eval "${adb_dnsformat}" > "${adb_dnsfile}"
193 rc=${?}
194 fi
195
196 # prepare find statement with revised adblock list sources
197 #
198 if [ -z "${adb_revsrclist}" ]
199 then
200 adb_revsrclist="-name ${adb_dnsprefix}.${src_name}"
201 else
202 adb_revsrclist="${adb_revsrclist} -o -name ${adb_dnsprefix}.${src_name}"
203 fi
204
205 # store source timestamp in config
206 #
207 if [ $((rc)) -eq 0 ]
208 then
209 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=${url_time}"
210 f_log " domain merging finished"
211 else
212 f_log " domain merging failed" "${rc}"
213 f_restore
214 fi
215 else
216 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=empty domain input"
217 f_log " empty domain input received"
218 continue
219 fi
220 done
221
222 # remove disabled adblock lists and their backups
223 #
224 if [ -n "${adb_srclist}" ]
225 then
226 rm_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f \( ${adb_srclist} \) -print -exec rm -f "{}" \;)"
227 rc=${?}
228 if [ "${backup_ok}" = "true" ] && [ -n "${rm_done}" ]
229 then
230 find "${adb_dir_backup}" -maxdepth 1 -type f \( ${adb_srclist} \) -exec rm -f "{}" \;
231 fi
232 else
233 rm_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f -name "${adb_dnsprefix}*" -print -exec rm -f "{}" \;)"
234 rc=${?}
235 if [ "${backup_ok}" = "true" ]
236 then
237 find "${adb_dir_backup}" -maxdepth 1 -type f -name "${adb_dnsprefix}*" -exec rm -f "{}" \;
238 fi
239 fi
240 if [ $((rc)) -eq 0 ] && [ -n "${rm_done}" ]
241 then
242 f_rmconfig "${rm_done}"
243 f_log "remove disabled adblock lists"
244 elif [ $((rc)) -ne 0 ] && [ -n "${rm_done}" ]
245 then
246 f_log "error during removal of disabled adblock lists" "${rc}"
247 f_exit
248 fi
249
250 # partial restore of adblock lists in case of download errors
251 #
252 if [ "${backup_ok}" = "true" ] && [ -n "${adb_errsrclist}" ]
253 then
254 restore_done="$(find "${adb_dir_backup}" -maxdepth 1 -type f \( ${adb_errsrclist} \) -print -exec cp -pf "{}" "${adb_dnsdir}" \;)"
255 rc=${?}
256 if [ $((rc)) -eq 0 ] && [ -n "${restore_done}" ]
257 then
258 find "${adb_dnsdir}" -maxdepth 1 -type f -name "${adb_dnsprefix}*.gz" -exec gunzip -f "{}" \;
259 f_rmconfig "${restore_done}" "true"
260 f_log "partial restore done"
261 elif [ $((rc)) -ne 0 ]
262 then
263 f_log "error during partial restore" "${rc}"
264 f_exit
265 fi
266 fi
267
268 # make separate adblock lists entries unique
269 #
270 if [ "${mem_ok}" = "true" ] && [ -n "${adb_revsrclist}" ]
271 then
272 f_log "remove duplicates in separate adblock lists"
273
274 # generate a unique overall block list
275 #
276 sort -u "${adb_dnsdir}/${adb_dnsprefix}."* > "${adb_tmpdir}/blocklist.overall"
277
278 # loop through all separate lists, ordered by size (ascending)
279 #
280 for list in $(ls -ASr "${adb_dnsdir}/${adb_dnsprefix}"*)
281 do
282 # check overall block list vs. separate block list,
283 # write all duplicate entries to separate list
284 #
285 list="${list/*./}"
286 sort "${adb_tmpdir}/blocklist.overall" "${adb_dnsdir}/${adb_dnsprefix}.${list}" | uniq -d > "${adb_tmpdir}/tmp.${list}"
287 mv -f "${adb_tmpdir}/tmp.${list}" "${adb_dnsdir}/${adb_dnsprefix}.${list}"
288
289 # write all unique entries back to overall block list
290 #
291 sort "${adb_tmpdir}/blocklist.overall" "${adb_dnsdir}/${adb_dnsprefix}.${list}" | uniq -u > "${adb_tmpdir}/tmp.overall"
292 mv -f "${adb_tmpdir}/tmp.overall" "${adb_tmpdir}/blocklist.overall"
293 done
294 rm -f "${adb_tmpdir}/blocklist.overall"
295 fi
296
297 # restart & check dnsmasq with newly generated set of adblock lists
298 #
299 f_cntconfig
300 adb_count="$(${adb_uci} -q get "adblock.global.adb_overall_count")"
301 if [ -n "${adb_revsrclist}" ] || [ -n "${rm_done}" ] || [ -n "${restore_done}" ] || [ -n "${mv_done}" ]
302 then
303 "${adb_uci}" -q set "adblock.global.adb_dnstoggle=on"
304 /etc/init.d/dnsmasq restart
305 sleep 1
306 rc="$(ps | grep -q "[d]nsmasq"; printf ${?})"
307 if [ $((rc)) -eq 0 ]
308 then
309 f_log "adblock lists with overall ${adb_count} domains loaded"
310 else
311 rc=100
312 f_log "dnsmasq restart failed, please check 'logread' output" "${rc}"
313 f_restore
314 fi
315 else
316 f_log "adblock lists with overall ${adb_count} domains are still valid, no update required"
317 fi
318
319 # create adblock list backups
320 #
321 if [ "${backup_ok}" = "true" ] && [ -n "${adb_revsrclist}" ]
322 then
323 backup_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f \( ${adb_revsrclist} \) -print -exec cp -pf "{}" "${adb_dir_backup}" \;)"
324 rc=${?}
325 if [ $((rc)) -eq 0 ] && [ -n "${backup_done}" ]
326 then
327 find "${adb_dir_backup}" -maxdepth 1 -type f \( -name "${adb_dnsprefix}*" -a ! -name "${adb_dnsprefix}*.gz" \) -exec gzip -f "{}" \;
328 f_log "new adblock list backups generated"
329 elif [ $((rc)) -ne 0 ] && [ -n "${backup_done}" ]
330 then
331 f_log "error during backup of adblock lists" "${rc}"
332 f_exit
333 fi
334 fi
335
336 # remove temporary files and exit
337 #
338 f_exit