[quagga-dev 5305] [PATCH] zebra: ignore unwanted netlink events with socket filter

Stephen Hemminger stephen.hemminger at vyatta.com
Thu Apr 17 02:21:47 BST 2008


The following patch solves the problem with the current netlink listener that
causes it to get overrun. There are various configuration options that try and
duct tape over the problem by having a bigger socket buffer, but this should work
better.

What happens is that a link event causes a zebra to change a lot of route entries.
Each route change is done by sending a request down the netlink command socket, but
two responses are generated: one is the reply on the netlink command socket, and
the other is a notification on the netlink listener socket. Zebra doesn't care about
the changes it did, but they stack up on the listener socket and the socket gets
overrun. Part of the problem is that Quagga doesn't use real threading and locking
so it doesn't process the listener socket until it gets back to being idle.

The alternative we implemented in VC4.0 was to use the socket filter to drop the
unwanted notifications in the kernel. Unfortunately, socket filtering didn't
work (did nothing) on older kernels, so in order for this to work it requires
either a kernel patch, or the upcoming 2.6.26 kernel. Since the patch is harmless
on older kernels, I would like to see it put into the general Quagga code without
yet more kernel version checks.


--- a/lib/zebra.h	2008-04-16 18:05:41.000000000 -0700
+++ b/lib/zebra.h	2008-04-16 18:06:22.000000000 -0700
@@ -162,6 +162,8 @@ typedef int socklen_t;
 #ifdef HAVE_NETLINK
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <stddef.h>
 #else
 #define RT_TABLE_MAIN		0
 #endif /* HAVE_NETLINK */
--- a/zebra/rt_netlink.c	2008-04-16 18:00:48.000000000 -0700
+++ b/zebra/rt_netlink.c	2008-04-16 18:03:22.000000000 -0700
@@ -1938,6 +1938,56 @@ kernel_read (struct thread *thread)
   return 0;
 }
 
+/* Filter out messages from self that occur on listener socket */
+static void netlink_install_filter (int sock)
+{
+  /*
+   * Filter is equivalent to netlink_route_change
+   *
+   * if (h->nlmsg_type == RTM_DELROUTE || h->nlmsg_type == RTM_NEWROUTE) {
+   *    if (rtm->rtm_type != RTM_UNICAST)
+   *    	return 0;
+   *    if (rtm->rtm_flags & RTM_F_CLONED)
+   *    	return 0;
+   *    if (rtm->rtm_protocol == RTPROT_REDIRECT)
+   *    	return 0;
+   *    if (rtm->rtm_protocol == RTPROT_KERNEL)
+   *        return 0;
+   *    if (rtm->rtm_protocol == RTPROT_ZEBRA && h->nlmsg_type == RTM_NEWROUTE)
+   * 	return 0;
+   * }
+   * return 0xffff;
+   */
+  struct sock_filter filter[] = {
+    /* 0*/ BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)),
+    /* 1*/ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 1, 0),
+    /* 2*/ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 0, 11),
+    /* 3*/ BPF_STMT(BPF_LD|BPF_ABS|BPF_B,
+		    sizeof(struct nlmsghdr) + offsetof(struct rtmsg, rtm_type)),
+    /* 4*/ BPF_JUMP(BPF_JMP|BPF_B, RTN_UNICAST, 0, 8),
+    /* 5*/ BPF_STMT(BPF_LD|BPF_ABS|BPF_B,
+		    sizeof(struct nlmsghdr) + offsetof(struct rtmsg, rtm_flags)),
+    /* 6*/ BPF_JUMP(BPF_JMP|BPF_JSET|BPF_K, RTM_F_CLONED, 6, 0),
+    /* 7*/ BPF_STMT(BPF_LD|BPF_ABS|BPF_B,
+		    sizeof(struct nlmsghdr) + offsetof(struct rtmsg, rtm_protocol)),
+    /* 8*/ BPF_JUMP(BPF_JMP+ BPF_B, RTPROT_REDIRECT, 4, 0),
+    /* 9*/ BPF_JUMP(BPF_JMP+ BPF_B, RTPROT_KERNEL, 0, 1),
+    /*10*/ BPF_JUMP(BPF_JMP+ BPF_B, RTPROT_ZEBRA, 0, 3),
+    /*11*/ BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)),
+    /*12*/ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 0, 1),
+    /*13*/ BPF_STMT(BPF_RET|BPF_K, 0),		/* drop */
+    /*14*/ BPF_STMT(BPF_RET|BPF_K, 0xffff),	/* keep */
+  };
+
+  struct sock_fprog prog = {
+    .len = sizeof(filter) / sizeof(filter[0]),
+    .filter = filter,
+  };
+
+  if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0)
+    zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno));
+}
+
 /* Exported interface function.  This function simply calls
    netlink_socket (). */
 void
@@ -1954,5 +2004,8 @@ kernel_init (void)
 
   /* Register kernel socket. */
   if (netlink.sock > 0)
-    thread_add_read (zebrad.master, kernel_read, NULL, netlink.sock);
+    {
+      netlink_install_filter (netlink.sock);
+      thread_add_read (zebrad.master, kernel_read, NULL, netlink.sock);
+    }
 }



More information about the Quagga-dev mailing list