remove pfring patches
[openwrt/openwrt.git] / openwrt / package / libpcap / patches / 110-pf_ring.patch
1 diff -urN libpcap.old/pcap-int.h libpcap.dev/pcap-int.h
2 --- libpcap.old/pcap-int.h 2003-12-15 02:42:24.000000000 +0100
3 +++ libpcap.dev/pcap-int.h 2005-10-22 23:20:12.220060500 +0200
4 @@ -30,7 +30,7 @@
5 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6 * SUCH DAMAGE.
7 *
8 - * @(#) $Header: /tcpdump/master/libpcap/pcap-int.h,v 1.55.2.4 2003/12/15 01:42:24 guy Exp $ (LBL)
9 + * @(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-int.h,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL)
10 */
11
12 #ifndef pcap_int_h
13 @@ -46,6 +46,8 @@
14 #include <packet32.h>
15 #endif /* WIN32 */
16
17 +#define RING /* L.Deri */
18 +
19 /*
20 * Savefile
21 */
22 @@ -93,6 +95,57 @@
23 #endif
24 };
25
26 +/* **************************** */
27 +
28 +#ifdef RING
29 +
30 +#include <unistd.h>
31 +#include <sys/mman.h>
32 +#include <errno.h>
33 +#include <sys/poll.h>
34 +
35 +#define PAGE_SIZE 4096
36 +
37 +#define HAVE_PCAP
38 +#include <linux/ring.h>
39 +#endif
40 +
41 +#ifdef RING
42 +
43 +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
44 +
45 +struct e1000_rx_desc {
46 + u_int64_t buffer_addr; /* Address of the descriptor's data buffer */
47 + u_int16_t length; /* Length of data DMAed into data buffer */
48 + u_int16_t csum; /* Packet checksum */
49 + u_int8_t status; /* Descriptor status */
50 + u_int8_t errors; /* Descriptor Errors */
51 + u_int16_t special;
52 +};
53 +
54 +/* Transmit Descriptor */
55 +struct e1000_tx_desc {
56 + u_int64_t buffer_addr; /* Address of the descriptor's data buffer */
57 + union {
58 + u_int32_t data;
59 + struct {
60 + u_int16_t length; /* Data buffer length */
61 + u_int8_t cso; /* Checksum offset */
62 + u_int8_t cmd; /* Descriptor control */
63 + } flags;
64 + } lower;
65 + union {
66 + u_int32_t data;
67 + struct {
68 + u_int8_t status; /* Descriptor status */
69 + u_int8_t css; /* Checksum start */
70 + u_int16_t special;
71 + } fields;
72 + } upper;
73 +};
74 +
75 +#endif
76 +
77 struct pcap {
78 #ifdef WIN32
79 ADAPTER *adapter;
80 @@ -121,6 +174,14 @@
81 u_char *bp;
82 int cc;
83
84 +#ifdef RING
85 + /* PF_RING */
86 + char *ring_buffer, *ring_slots;
87 + int ring_fd;
88 + FlowSlotInfo *slots_info;
89 + u_int page_id, slot_id, pkts_per_page;
90 + u_int poll_sleep;
91 +#endif
92 /*
93 * Place holder for pcap_next().
94 */
95 diff -urN libpcap.old/pcap-linux.c libpcap.dev/pcap-linux.c
96 --- libpcap.old/pcap-linux.c 2003-11-21 11:20:46.000000000 +0100
97 +++ libpcap.dev/pcap-linux.c 2005-10-22 23:43:59.726120250 +0200
98 @@ -27,7 +27,7 @@
99
100 #ifndef lint
101 static const char rcsid[] _U_ =
102 - "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.98.2.4 2003/11/21 10:20:46 guy Exp $ (LBL)";
103 + "@(#) $Header: /export/home/ntop/PF_RING/userland/libpcap-0.8.1-ring/pcap-linux.c,v 1.2 2004/11/25 09:58:00 deri Exp $ (LBL)";
104 #endif
105
106 /*
107 @@ -83,7 +83,7 @@
108 #ifdef HAVE_DAG_API
109 #include "pcap-dag.h"
110 #endif /* HAVE_DAG_API */
111 -
112 +
113 #include <errno.h>
114 #include <stdlib.h>
115 #include <unistd.h>
116 @@ -217,6 +217,83 @@
117 = { 1, &total_insn };
118 #endif
119
120 +#define RING /* L.Deri */
121 +#define SAFE_RING_MODE /*
122 + Copy the bucket in order to avoid kernel
123 + crash if the application faults
124 + */
125 +
126 +#ifdef RING
127 +unsigned char *write_register;
128 +static struct pcap_stat ringStats;
129 +u_long numPollCalls = 0, numReadCalls = 0;
130 +
131 +#define POLL_SLEEP_STEP 10 /* ns = 0.1 ms */
132 +#define POLL_SLEEP_MIN POLL_SLEEP_STEP
133 +#define POLL_SLEEP_MAX 1000 /* ns */
134 +#define POLL_QUEUE_MIN_LEN 500 /* # packets */
135 +
136 +#ifdef SAFE_RING_MODE
137 +static char staticBucket[2048];
138 +#endif
139 +
140 +
141 +/* ******************************* */
142 +
143 +int pcap_set_cluster(pcap_t *handle, u_int clusterId) {
144 + return(handle->ring_fd ? setsockopt(handle->ring_fd, 0, SO_ADD_TO_CLUSTER,
145 + &clusterId, sizeof(clusterId)): -1);
146 +}
147 +
148 +/* ******************************* */
149 +
150 +int pcap_remove_from_cluster(pcap_t *handle) {
151 + return(handle->ring_fd ?
152 + setsockopt(handle->ring_fd, 0, SO_REMOVE_FROM_CLUSTER, NULL, 0) : -1);
153 +}
154 +
155 +/* ******************************* */
156 +
157 +int pcap_set_reflector(pcap_t *handle, char *reflectorDevice) {
158 + return(handle->ring_fd ?
159 + setsockopt(handle->ring_fd, 0, SO_SET_REFLECTOR,
160 + &reflectorDevice, strlen(reflectorDevice)) : -1);
161 +}
162 +
163 +/* ******************************* */
164 +
165 +static int set_if_promisc(const char *device, int set_promisc) {
166 + int sock_fd;
167 + struct ifreq ifr;
168 +
169 + if(device == NULL) return(-3);
170 +
171 + sock_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
172 + if(sock_fd <= 0) return(-1);
173 +
174 + memset(&ifr, 0, sizeof(ifr));
175 + strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
176 + if(ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
177 + close(sock_fd);
178 + return(-2);
179 + }
180 +
181 + if(set_promisc) {
182 + if((ifr.ifr_flags & IFF_PROMISC) == 0) ifr.ifr_flags |= IFF_PROMISC;
183 + } else {
184 + /* Remove promisc */
185 + if((ifr.ifr_flags & IFF_PROMISC) != 0) ifr.ifr_flags &= ~IFF_PROMISC;
186 + }
187 +
188 + if(ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1)
189 + return(-1);
190 +
191 + close(sock_fd);
192 + return(0);
193 +}
194 +
195 +#endif
196 +
197 /*
198 * Get a handle for a live capture from the given device. You can
199 * pass NULL as device to get all packages (without link level
200 @@ -258,6 +335,138 @@
201 handle->snapshot = snaplen;
202 handle->md.timeout = to_ms;
203
204 +#ifdef RING
205 + handle->ring_fd = handle->fd = socket(PF_RING, SOCK_RAW, htons(ETH_P_ALL));
206 +
207 + printf("Open RING [fd=%d]\n", handle->ring_fd);
208 +
209 + if(handle->ring_fd > 0) {
210 + struct sockaddr sa;
211 + int rc;
212 + u_int memSlotsLen;
213 +
214 + err = 0;
215 + sa.sa_family = PF_RING;
216 + snprintf(sa.sa_data, sizeof(sa.sa_data), "%s", device);
217 + rc = bind(handle->ring_fd, (struct sockaddr *)&sa, sizeof(sa));
218 +
219 + if(rc == 0) {
220 +
221 +
222 + handle->md.device = strdup(device);
223 + handle->ring_buffer = (char *)mmap(NULL, PAGE_SIZE,
224 + PROT_READ|PROT_WRITE,
225 + MAP_SHARED,
226 + handle->ring_fd, 0);
227 +
228 + if(handle->ring_buffer == MAP_FAILED) {
229 + sprintf(ebuf, "mmap() failed");
230 + return (NULL);
231 + }
232 +
233 + handle->slots_info = (FlowSlotInfo *)handle->ring_buffer;
234 + if(handle->slots_info->version != RING_FLOWSLOT_VERSION) {
235 + snprintf(ebuf, PCAP_ERRBUF_SIZE, "Wrong RING version: "
236 + "kernel is %i, libpcap was compiled with %i\n",
237 + handle->slots_info->version, RING_FLOWSLOT_VERSION);
238 + return (NULL);
239 + }
240 + memSlotsLen = handle->slots_info->tot_mem;
241 + munmap(handle->ring_buffer, PAGE_SIZE);
242 +
243 + handle->ring_buffer = (char *)mmap(NULL, memSlotsLen,
244 + PROT_READ|PROT_WRITE,
245 + MAP_SHARED, handle->ring_fd, 0);
246 +
247 + if(handle->ring_buffer == MAP_FAILED) {
248 + sprintf(ebuf, "mmap() failed");
249 + return (NULL);
250 + }
251 +
252 + handle->slots_info = (FlowSlotInfo *)handle->ring_buffer;
253 + handle->ring_slots = (char *)(handle->ring_buffer+sizeof(FlowSlotInfo));
254 +
255 + /* Safety check */
256 + if(handle->slots_info->remove_idx >= handle->slots_info->tot_slots)
257 + handle->slots_info->remove_idx = 0;
258 +
259 + handle->page_id = PAGE_SIZE, handle->slot_id = 0,
260 + handle->pkts_per_page = 0;
261 +
262 + if(0) {
263 + int i;
264 +
265 + for(i=0; i<handle->slots_info->tot_slots; i++) {
266 + unsigned long idx = i*handle->slots_info->slot_len;
267 + FlowSlot *slot = (FlowSlot*)&handle->ring_slots[idx];
268 +
269 + printf("RING: Setting RING_MAGIC_VALUE into slot %d [displacement=%lu]\n", i, idx);
270 + slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
271 + printf("RING: slot[%d]: magic=%d, slot_state=%d\n",
272 + slot->magic, slot->slot_state);
273 + }
274 + }
275 +
276 +
277 + /* Set defaults */
278 + handle->linktype = DLT_EN10MB;
279 + handle->offset = 2;
280 +
281 + printf("RING (%s): tot_slots=%d/slot_len=%d/"
282 + "insertIdx=%d/remove_idx=%d/dropped=%d\n",
283 + device,
284 + handle->slots_info->tot_slots,
285 + handle->slots_info->slot_len,
286 + handle->slots_info->insert_idx,
287 + handle->slots_info->remove_idx,
288 + handle->slots_info->tot_lost);
289 +
290 + ringStats.ps_recv = handle->slots_info->tot_read;
291 + ringStats.ps_drop = handle->slots_info->tot_lost;
292 +
293 + if(promisc) {
294 + struct ifreq ifr;
295 +
296 + err = 0;
297 + memset(&ifr, 0, sizeof(ifr));
298 + strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
299 + if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
300 + snprintf(ebuf, PCAP_ERRBUF_SIZE,
301 + "ioctl: %s", pcap_strerror(errno));
302 + err = 1;
303 + }
304 +
305 + if(err == 0) {
306 + if ((ifr.ifr_flags & IFF_PROMISC) == 0) {
307 + /*
308 + * Promiscuous mode isn't currently on,
309 + * so turn it on, and remember that
310 + * we should turn it off when the
311 + * pcap_t is closed.
312 + */
313 +
314 + ifr.ifr_flags |= IFF_PROMISC;
315 + if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
316 + snprintf(ebuf, PCAP_ERRBUF_SIZE,
317 + "ioctl: %s", pcap_strerror(errno));
318 + err = 1;
319 + }
320 + }
321 +
322 + if(err == 0)
323 + handle->md.clear_promisc = 1;
324 + }
325 + }
326 +
327 + if(err == 0)
328 + goto open_open_live_final;
329 + }
330 +
331 + /* Don't put 'else' above... */
332 + close(handle->ring_fd);
333 + /* Continue without ring support */
334 + }
335 +#endif
336 /*
337 * NULL and "any" are special devices which give us the hint to
338 * monitor all devices.
339 @@ -397,6 +606,9 @@
340 return NULL;
341 }
342
343 +#ifdef RING
344 + open_open_live_final:
345 +#endif
346 /*
347 * "handle->fd" is a socket, so "select()" and "poll()"
348 * should work on it.
349 @@ -449,6 +661,120 @@
350 int packet_len, caplen;
351 struct pcap_pkthdr pcap_header;
352
353 +#ifdef RING
354 + if(handle->ring_buffer != NULL) {
355 + u_int idx, numRuns = 0, ptrAddr;
356 + FlowSlot *slot;
357 +
358 + slot = (FlowSlot*)&handle->ring_slots[handle->slots_info->remove_idx*handle->slots_info->slot_len];
359 +
360 + while(1) {
361 + u_int32_t queuedPkts;
362 +
363 + if(handle->slots_info->tot_insert >= handle->slots_info->tot_read)
364 + queuedPkts = handle->slots_info->tot_insert - handle->slots_info->tot_read;
365 + else
366 + queuedPkts = handle->slots_info->tot_slots + handle->slots_info->tot_insert - handle->slots_info->tot_read;
367 +
368 + if(queuedPkts && (slot->slot_state == 1)) {
369 + char *bucket = &slot->bucket;
370 +
371 +#ifdef RING_MAGIC
372 + if(slot->magic != RING_MAGIC_VALUE) {
373 + printf("==>> Bad Magic [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n",
374 + handle->slots_info->remove_idx,
375 + handle->slots_info->insert_idx,
376 + ptrAddr);
377 + slot->magic = RING_MAGIC_VALUE;
378 + }
379 +#endif
380 +
381 +
382 + handle->md.stat.ps_recv++;
383 +
384 +#ifdef SAFE_RING_MODE
385 + {
386 + struct pcap_pkthdr *hdr = (struct pcap_pkthdr*)bucket;
387 + int bktLen = hdr->caplen;
388 +
389 + if(bktLen > sizeof(staticBucket))
390 + bktLen = sizeof(staticBucket);
391 +
392 + memcpy(staticBucket, &bucket[sizeof(struct pcap_pkthdr)], bktLen);
393 +
394 +#ifdef RING_DEBUG
395 + printf("==>> [remove_idx=%u][insert_idx=%u][ptrAddr=%u]\n",
396 + handle->slots_info->remove_idx,
397 + handle->slots_info->insert_idx,
398 + ptrAddr);
399 +#endif
400 +
401 + callback(userdata, hdr, staticBucket);
402 + }
403 +#else
404 + callback(userdata,
405 + (const struct pcap_pkthdr*)bucket,
406 + (const u_char*)&bucket[sizeof(struct pcap_pkthdr)]);
407 +#endif
408 +
409 + if(handle->slots_info->remove_idx >= (handle->slots_info->tot_slots-1)) {
410 + handle->slots_info->remove_idx = 0;
411 + handle->page_id = PAGE_SIZE, handle->slot_id = 0, handle->pkts_per_page = 0;
412 + } else {
413 + handle->slots_info->remove_idx++;
414 + handle->pkts_per_page++, handle->slot_id += handle->slots_info->slot_len;
415 + }
416 +
417 + handle->slots_info->tot_read++;
418 + slot->slot_state = 0;
419 +
420 + return(1);
421 + } else {
422 + struct pollfd pfd;
423 + int rc;
424 +
425 + /* Sleep when nothing is happening */
426 + pfd.fd = handle->ring_fd;
427 + pfd.events = POLLIN|POLLERR;
428 + pfd.revents = 0;
429 +
430 +#ifdef RING_DEBUG
431 + printf("==>> poll [remove_idx=%u][insert_idx=%u][loss=%d][queuedPkts=%u]"
432 + "[slot_state=%d][tot_insert=%u][tot_read=%u]\n",
433 + handle->slots_info->remove_idx,
434 + handle->slots_info->insert_idx,
435 + handle->slots_info->tot_lost,
436 + queuedPkts, slot->slot_state,
437 + handle->slots_info->tot_insert,
438 + handle->slots_info->tot_read);
439 + #endif
440 +
441 +#ifdef RING_DEBUG
442 + printf("==>> poll @ [remove_idx=%u][slot_id=%u]\n", handle->slots_info->remove_idx, handle->slot_id);
443 +#endif
444 + errno = 0;
445 + rc = poll(&pfd, 1, -1);
446 +#ifdef RING_DEBUG
447 + printf("==>> poll returned %d [%s][errno=%d][break_loop=%d]\n",
448 + rc, strerror(errno), errno, handle->break_loop);
449 +#endif
450 + numPollCalls++;
451 +
452 + if(rc == -1) {
453 + if(errno == EINTR) {
454 + if(handle->break_loop) {
455 + handle->break_loop = 0;
456 + return(-2);
457 + } else
458 + return(0);
459 + } else
460 + return(-1);
461 + }
462 + }
463 + } /* while() */
464 + }
465 +#endif
466 +
467 #ifdef HAVE_PF_PACKET_SOCKETS
468 /*
469 * If this is a cooked device, leave extra room for a
470 @@ -688,6 +1014,22 @@
471 socklen_t len = sizeof (struct tpacket_stats);
472 #endif
473
474 +#ifdef RING
475 + if(handle->ring_fd > 0) {
476 + stats->ps_recv = handle->slots_info->tot_read-ringStats.ps_recv;
477 + stats->ps_drop = handle->slots_info->tot_lost-ringStats.ps_drop;
478 +
479 + printf("RING: numPollCalls=%d [%.1f packets/call]\n",
480 + numPollCalls, (float)stats->ps_recv/(float)numPollCalls);
481 + printf("RING: [tot_pkts=%u][tot_read=%u][tot_lost=%u]\n",
482 + handle->slots_info->tot_pkts,
483 + handle->slots_info->tot_read,
484 + handle->slots_info->tot_lost);
485 +
486 + return(0);
487 + }
488 +#endif
489 +
490 #ifdef HAVE_TPACKET_STATS
491 /*
492 * Try to get the packet counts from the kernel.
493 @@ -879,6 +1221,11 @@
494 }
495 }
496
497 +
498 +#ifdef RING
499 + if(handle->ring_fd <= 0) can_filter_in_kernel = 0;
500 +#endif
501 +
502 if (can_filter_in_kernel) {
503 if ((err = set_kernel_filter(handle, &fcode)) == 0)
504 {
505 @@ -1348,7 +1695,7 @@
506 memset(&mr, 0, sizeof(mr));
507 mr.mr_ifindex = device_id;
508 mr.mr_type = PACKET_MR_PROMISC;
509 - if (setsockopt(sock_fd, SOL_PACKET,
510 + if (setsockopt(sock_fd, 0 /* SOL_PACKET */,
511 PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1)
512 {
513 snprintf(ebuf, PCAP_ERRBUF_SIZE,
514 @@ -1425,10 +1772,11 @@
515
516 /* Any pending errors, e.g., network is down? */
517
518 - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
519 - snprintf(ebuf, PCAP_ERRBUF_SIZE,
520 - "getsockopt: %s", pcap_strerror(errno));
521 - return -2;
522 + if ((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1)
523 + && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) {
524 + snprintf(ebuf, PCAP_ERRBUF_SIZE,
525 + "getsockopt: %s", pcap_strerror(errno));
526 + return -2;
527 }
528
529 if (err > 0) {
530 @@ -1482,6 +1830,13 @@
531 struct pcap *p, *prevp;
532 struct ifreq ifr;
533
534 +#ifdef RING
535 + if(handle->ring_buffer != NULL) {
536 + munmap(handle->ring_buffer, handle->slots_info->tot_mem);
537 + handle->ring_buffer = NULL;
538 + }
539 +#endif
540 +
541 if (handle->md.clear_promisc) {
542 /*
543 * We put the interface into promiscuous mode; take
544 @@ -1698,11 +2053,11 @@
545 }
546
547 /* Any pending errors, e.g., network is down? */
548 -
549 - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
550 - snprintf(ebuf, PCAP_ERRBUF_SIZE,
551 - "getsockopt: %s", pcap_strerror(errno));
552 - return -1;
553 + if((getsockopt(fd, PF_RING, SO_ERROR, &err, &errlen) == -1)
554 + && (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1)) {
555 + snprintf(ebuf, PCAP_ERRBUF_SIZE,
556 + "getsockopt: %s", pcap_strerror(errno));
557 + return -1;
558 }
559
560 if (err > 0) {
561 @@ -1924,8 +2279,11 @@
562 * the filtering done in userland even if it could have been
563 * done in the kernel.
564 */
565 - if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
566 - &total_fcode, sizeof(total_fcode)) == 0) {
567 + printf("pcap[setsockopt(%d)]\n", 0);
568 + if (setsockopt(handle->fd, 0 /* SOL_SOCKET */,
569 + SO_ATTACH_FILTER,
570 + &total_fcode,
571 + sizeof(total_fcode)) == 0) {
572 char drain[1];
573
574 /*
575 @@ -1933,6 +2291,9 @@
576 */
577 total_filter_on = 1;
578
579 +#ifdef RING
580 + if(!handle->ring_fd) {
581 +#endif
582 /*
583 * Save the socket's current mode, and put it in
584 * non-blocking mode; we drain it by reading packets
585 @@ -1955,12 +2316,15 @@
586 return -2;
587 }
588 }
589 - }
590 +#ifdef RING
591 + }
592 +#endif
593 +}
594
595 /*
596 * Now attach the new filter.
597 */
598 - ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
599 + ret = setsockopt(handle->fd, 0 /* SOL_SOCKET */, SO_ATTACH_FILTER,
600 fcode, sizeof(*fcode));
601 if (ret == -1 && total_filter_on) {
602 /*
603 @@ -1993,7 +2357,8 @@
604 /* setsockopt() barfs unless it get a dummy parameter */
605 int dummy;
606
607 - return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
608 - &dummy, sizeof(dummy));
609 + return setsockopt(handle->fd, handle->ring_fd > 0 ? PF_RING : SOL_SOCKET,
610 + SO_DETACH_FILTER,
611 + &dummy, sizeof(dummy));
612 }
613 #endif