adblock: release 1.2.0
[feed/packages.git] / net / adblock / files / adblock-update.sh
1 #!/bin/sh
2 # dns based ad/abuse domain blocking script
3 # written by Dirk Brenken (dev@brenken.org)
4
5 # This is free software, licensed under the GNU General Public License v3.
6 # You should have received a copy of the GNU General Public License
7 # along with this program. If not, see <http://www.gnu.org/licenses/>.
8
9 # set the C locale
10 #
11 LC_ALL=C
12
13 # set pid & logger
14 #
15 adb_pid="${$}"
16 adb_pidfile="/var/run/adblock.pid"
17 adb_log="$(which logger)"
18
19 if [ -r "${adb_pidfile}" ]
20 then
21 rc=255
22 "${adb_log}" -s -t "adblock[${adb_pid}] error" "adblock service already running ($(cat ${adb_pidfile}))"
23 exit ${rc}
24 else
25 printf "${adb_pid}" > "${adb_pidfile}"
26 fi
27
28 # get current directory and set script/config version
29 #
30 adb_scriptdir="${0%/*}"
31 adb_scriptver="1.2.0"
32 adb_mincfgver="2.2"
33
34 # source in adblock function library
35 #
36 if [ -r "${adb_scriptdir}/adblock-helper.sh" ]
37 then
38 . "${adb_scriptdir}/adblock-helper.sh"
39 else
40 rc=254
41 "${adb_log}" -s -t "adblock[${adb_pid}] error" "adblock function library not found"
42 rm -f "${adb_pidfile}"
43 exit ${rc}
44 fi
45
46 # call trap function on error signals (HUP, INT, QUIT, BUS, SEGV, TERM)
47 #
48 trap "rc=250; f_log 'error signal received/trapped' '${rc}'; f_exit" 1 2 3 10 11 15
49
50 # load environment
51 #
52 f_envload
53
54 # start logging
55 #
56 f_log "domain adblock processing started (${adb_scriptver}, ${adb_sysver}, $(/bin/date "+%d.%m.%Y %H:%M:%S"))"
57
58 # check environment
59 #
60 f_envcheck
61
62 # loop through active adblock domain sources,
63 # download sources, prepare output and store all extracted domains in temp file
64 #
65 for src_name in ${adb_sources}
66 do
67 eval "url=\"\${adb_src_${src_name}}\""
68 eval "src_rset=\"\${adb_src_rset_${src_name}}\""
69 adb_dnsfile="${adb_dnsdir}/${adb_dnsprefix}.${src_name}"
70 list_time="$(${adb_uci} -q get "adblock.${src_name}.adb_src_timestamp")"
71 f_log "=> processing adblock source '${src_name}'"
72
73 # check 'url' and 'src_rset' values
74 #
75 if [ -z "${url}" ] || [ -z "${src_rset}" ]
76 then
77 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=broken config"
78 f_log " broken source configuration, check 'adb_src' and 'adb_src_rset' in config"
79 continue
80 fi
81
82 # prepare find statement with active adblock list sources
83 #
84 if [ -z "${adb_srclist}" ]
85 then
86 adb_srclist="! -name ${adb_dnsprefix}.${src_name}*"
87 else
88 adb_srclist="${adb_srclist} -a ! -name ${adb_dnsprefix}.${src_name}*"
89 fi
90
91 # download only block list with newer/updated timestamp
92 #
93 if [ "${src_name}" = "blacklist" ]
94 then
95 url_time="$(date -r "${url}")"
96 else
97 url_time="$(${adb_fetch} ${fetch_parm} --server-response --spider "${url}" 2>&1 | awk '$0 ~ /Last-Modified/ {printf substr($0,18)}')"
98 fi
99 if [ -z "${url_time}" ]
100 then
101 url_time="$(date)"
102 f_log " online timestamp not received, current date will be used"
103 fi
104 if [ -z "${list_time}" ] || [ "${list_time}" != "${url_time}" ] || [ ! -r "${adb_dnsfile}" ] ||\
105 ([ "${backup_ok}" = "true" ] && [ ! -r "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" ])
106 then
107 if [ "${src_name}" = "blacklist" ]
108 then
109 tmp_domains="$(cat "${url}")"
110 elif [ "${src_name}" = "shalla" ]
111 then
112 shalla_archive="${adb_tmpdir}/shallalist.tar.gz"
113 shalla_file="${adb_tmpdir}/shallalist.txt"
114 "${adb_fetch}" ${fetch_parm} --output-document="${shalla_archive}" "${url}"
115 rc=${?}
116 if [ $((rc)) -eq 0 ]
117 then
118 > "${shalla_file}"
119 for category in ${adb_src_cat_shalla}
120 do
121 tar -xOzf "${shalla_archive}" BL/${category}/domains >> "${shalla_file}"
122 rc=${?}
123 if [ $((rc)) -ne 0 ]
124 then
125 f_log " archive extraction failed (${category})"
126 break
127 fi
128 done
129 tmp_domains="$(cat "${shalla_file}")"
130 rm -rf "${adb_tmpdir}/BL"
131 rm -f "${shalla_archive}"
132 rm -f "${shalla_file}"
133 fi
134 else
135 tmp_domains="$(${adb_fetch} ${fetch_parm} --output-document=- "${url}")"
136 fi
137 rc=${?}
138 else
139 f_log " source doesn't change, no update required"
140 continue
141 fi
142
143 # check download result and prepare domain output, backup/restore if needed
144 #
145 if [ $((rc)) -eq 0 ] && [ -n "${tmp_domains}" ]
146 then
147 count="$(printf "%s\n" "${tmp_domains}" | awk "${src_rset}" | tee "${adb_tmpfile}" | wc -l)"
148 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=${url_time}"
149 if [ "${backup_ok}" = "true" ]
150 then
151 gzip -cf "${adb_tmpfile}" > "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz"
152 fi
153 f_log " source download finished (${count} entries)"
154 unset tmp_domains
155 elif [ $((rc)) -eq 0 ] && [ -z "${tmp_domains}" ]
156 then
157 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=empty download"
158 f_log " empty source download finished"
159 continue
160 else
161 rc=0
162 if [ "${backup_ok}" = "true" ] && [ -r "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" ]
163 then
164 gunzip -cf "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" > "${adb_tmpfile}"
165 count="$(wc -l < "${adb_tmpfile}")"
166 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=list restored"
167 f_log " source download failed, list restored (${count} entries)"
168 else
169 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=download failed"
170 f_log " source download failed"
171 continue
172 fi
173 fi
174
175 # remove whitelist domains, sort domains and make them unique,
176 # rewrite ad/abuse domain information to separate dnsmasq files
177 #
178 if [ $((count)) -gt 0 ] && [ -n "${adb_tmpfile}" ]
179 then
180 if [ -s "${adb_tmpdir}/tmp.whitelist" ]
181 then
182 grep -vf "${adb_tmpdir}/tmp.whitelist" "${adb_tmpfile}" | sort -u | eval "${adb_dnsformat}" > "${adb_dnsfile}"
183 else
184 sort -u "${adb_tmpfile}" | eval "${adb_dnsformat}" > "${adb_dnsfile}"
185 fi
186 rc=${?}
187
188 # finish domain processing, prepare find statement with revised adblock list source
189 #
190 if [ $((rc)) -eq 0 ]
191 then
192 if [ -z "${adb_revsrclist}" ]
193 then
194 adb_revsrclist="-name ${adb_dnsprefix}.${src_name}"
195 else
196 adb_revsrclist="${adb_revsrclist} -o -name ${adb_dnsprefix}.${src_name}"
197 fi
198 f_log " domain merging finished"
199 else
200 rc=0
201 rm -f "${adb_dnsfile}"
202 if [ "${backup_ok}" = "true" ] && [ -r "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" ]
203 then
204 rm -f "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz"
205 fi
206 f_log " domain merging failed, list removed"
207 continue
208 fi
209 else
210 rm -f "${adb_dnsfile}"
211 if [ "${backup_ok}" = "true" ] && [ -r "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz" ]
212 then
213 rm -f "${adb_dir_backup}/${adb_dnsprefix}.${src_name}.gz"
214 fi
215 "${adb_uci}" -q set "adblock.${src_name}.adb_src_timestamp=empty domain input"
216 f_log " empty domain input received, list removed"
217 continue
218 fi
219 done
220
221 # remove disabled adblock lists and their backups
222 #
223 if [ -n "${adb_srclist}" ]
224 then
225 rm_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f \( ${adb_srclist} \) -print -exec rm -f "{}" \;)"
226 if [ "${backup_ok}" = "true" ] && [ -n "${rm_done}" ]
227 then
228 find "${adb_dir_backup}" -maxdepth 1 -type f \( ${adb_srclist} \) -exec rm -f "{}" \;
229 fi
230 else
231 rm_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f -name "${adb_dnsprefix}*" -print -exec rm -f "{}" \;)"
232 if [ "${backup_ok}" = "true" ]
233 then
234 find "${adb_dir_backup}" -maxdepth 1 -type f -name "${adb_dnsprefix}*" -exec rm -f "{}" \;
235 fi
236 fi
237 if [ -n "${rm_done}" ]
238 then
239 f_rmconfig "${rm_done}"
240 f_log "disabled adblock lists removed"
241 fi
242
243 # make separate adblock lists entries unique
244 #
245 if [ "${mem_ok}" = "true" ] && [ -n "${adb_revsrclist}" ]
246 then
247 f_log "remove duplicates in separate adblock lists"
248
249 # generate a unique overall block list
250 #
251 sort -u "${adb_dnsdir}/${adb_dnsprefix}."* > "${adb_tmpdir}/blocklist.overall"
252
253 # loop through all separate lists, ordered by size (ascending)
254 #
255 for list in $(ls -ASr "${adb_dnsdir}/${adb_dnsprefix}"*)
256 do
257 # check overall block list vs. separate block list,
258 # write all duplicate entries to separate list
259 #
260 list="${list/*./}"
261 sort "${adb_tmpdir}/blocklist.overall" "${adb_dnsdir}/${adb_dnsprefix}.${list}" | uniq -d > "${adb_tmpdir}/tmp.${list}"
262 mv -f "${adb_tmpdir}/tmp.${list}" "${adb_dnsdir}/${adb_dnsprefix}.${list}"
263
264 # write all unique entries back to overall block list
265 #
266 sort "${adb_tmpdir}/blocklist.overall" "${adb_dnsdir}/${adb_dnsprefix}.${list}" | uniq -u > "${adb_tmpdir}/tmp.overall"
267 mv -f "${adb_tmpdir}/tmp.overall" "${adb_tmpdir}/blocklist.overall"
268 done
269 rm -f "${adb_tmpdir}/blocklist.overall"
270 fi
271
272 # restart & check dnsmasq with newly generated set of adblock lists
273 #
274 f_cntconfig
275 adb_count="$(${adb_uci} -q get "adblock.global.adb_overall_count")"
276 if [ -n "${adb_revsrclist}" ] || [ -n "${rm_done}" ]
277 then
278 "${adb_uci}" -q set "adblock.global.adb_dnstoggle=on"
279 /etc/init.d/dnsmasq restart
280 sleep 1
281 check="$(pgrep -f "dnsmasq")"
282 if [ -n "${check}" ]
283 then
284 f_log "adblock lists with overall ${adb_count} domains loaded"
285 else
286 f_log "dnsmasq restart failed, retry without newly generated block lists"
287 rm_done="$(find "${adb_dnsdir}" -maxdepth 1 -type f \( ${adb_revsrclist} \) -print -exec rm -f "{}" \;)"
288 if [ -n "${rm_done}" ]
289 then
290 f_log "bogus adblock lists removed"
291 f_rmconfig "${rm_done}"
292 /etc/init.d/dnsmasq restart
293 sleep 1
294 check="$(pgrep -f "dnsmasq")"
295 if [ -n "${check}" ]
296 then
297 f_cntconfig
298 f_log "adblock lists with overall ${adb_count} domains loaded"
299 else
300 rc=100
301 f_log "dnsmasq restart failed, please check 'logread' output" "${rc}"
302 f_exit
303 fi
304 fi
305 fi
306 else
307 f_log "adblock lists with overall ${adb_count} domains are still valid, no update required"
308 fi
309
310 # remove temporary files and exit
311 #
312 f_exit