[quagga-users 7396] Re: Linux 2.6 kernel patch for TCP MD5 for BGP (RFC 2385)

Jonathan McDowell noodles at earth.li
Tue Aug 8 18:08:06 IST 2006


On Fri, Jul 28, 2006 at 12:24:09PM +0100, Jonathan McDowell wrote:
> On Tue, Jul 25, 2006 at 01:37:47PM +0100, Jonathan McDowell wrote:
> > I've forward ported the patch to 2.6.17. So far I've just compile
> > tested it, but I'll do some actual functionality testing and
> > assuming that goes well post it here, with the aim of eventually
> > waving it at the netdev list for them to tear apart.
> I've had a few queries about this off list. I have TCP MD5 working
> with 2.6.17, but I'm hitting a BUG() which I think has actually caused
> issues with older versions of the patch but is now more visible. I
> have some ideas about how to fix it and hope to get time at the start
> of next week.

Ok. The attached is what I currently have. It's against a clean 2.6.17
kernel. I don't think it's ready for submission anywhere yet, but as I'm
still getting queries about it and I've had some success with my limited
testing I thought I'd throw it out for others to try.

Things to note:

* The TCP socket option is now 14, instead of 13 like previous version
  of the patch. This means you'll need to alter your Quagga build to use
  this instead (Change the 13 after TCP_MD5_AUTH in bgpd/bgp_network.h
  to 14).
* I have not actually tested usage of TCP MD5 with BGP with this patch;
  I have confirmed that the expected behaviour is observed with both
  incoming and outgoing TCP connections to a box running the old
  2.6.12.5 version of the patch though.
* No load testing has been done; only single connections at a time.

In short, don't run this anywhere important yet, but let me know if it
works for you or dies in a ball of fire.

J.

-- 
     noodles is known for its      |  .''`.  Debian GNU/Linux Developer
                                   | : :' :  Happy to accept PGP signed
                                   | `. `'   or encrypted mail - RSA +
                                   |   `-    DSA keys on the keyservers.
-------------- next part --------------
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/include/linux/tcp.h linux-2.6.17-md5/include/linux/tcp.h
--- linux-2.6.17/include/linux/tcp.h	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/include/linux/tcp.h	2006-07-25 10:48:33.000000000 +0100
@@ -94,6 +94,7 @@ enum { 
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
+#define TCP_RFC2385		14	/* TCP MD5 signatures */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -189,6 +190,9 @@ struct tcp_options_received {
 	__u8	num_sacks;	/* Number of SACK blocks		*/
 	__u16	user_mss;  	/* mss requested by user in ioctl */
 	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+#ifdef CONFIG_TCP_RFC2385
+	struct tcp_rfc2385_db *md5_db;
+#endif
 };
 
 struct tcp_request_sock {
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/include/linux/tcp_rfc2385.h linux-2.6.17-md5/include/linux/tcp_rfc2385.h
--- linux-2.6.17/include/linux/tcp_rfc2385.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.17-md5/include/linux/tcp_rfc2385.h	2006-08-04 22:00:03.000000000 +0100
@@ -0,0 +1,65 @@
+
+/* Copyright 2001 AYR Networks, Inc.
+ *
+ * Author: Rick Payne
+ *
+ * This is a free document; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation:
+ *     http://www.gnu.org/copyleft/gpl.html
+ */
+
+/*
+ * Structures and function calls used for the RFC2385 code
+ *
+ * This is #included in both kernel and userland code,
+ * so use __KERNEL__ appropriately.
+ */
+
+#ifndef __LINUX__TCP_RFC2385_H__
+#define __LINUX__TCP_RFC2385_H__
+
+/* Commands (used in the structure passed from userland) */
+#define TCP_RFC2385_ADD 1
+#define TCP_RFC2385_DEL 2
+
+struct tcp_rfc2385_cmd {
+	u_int8_t     command;    /* Command - Add/Delete */
+	u_int32_t    address;    /* IPV4 address associated */
+	u_int8_t     keylen;     /* MD5 Key len (do NOT assume 0 terminated ascii) */
+	void         *key;       /* MD5 Key */
+};
+
+#ifdef __KERNEL__
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+
+struct tcp_rfc2385_tfm;
+struct tcp_rfc2385 *tcp_v4_md5_lookup (struct sock *, __u32);
+int tcp_v4_md5_do_add (struct sock *, __u32, char *, __u8);
+int tcp_v4_parse_md5_keys (struct sock *, char *, int);
+int tcp_v4_calc_md5_hash (char *, struct tcp_rfc2385 *,
+						  struct tcp_rfc2385_tfm *,
+						  __u32, __u32,
+						  struct tcphdr *, int, int);
+
+struct tcp_rfc2385 {
+	__u32   addr;      /* The IPv4 Address for this key */
+	__u8    keylen;    /* The Key Length */
+	__u8    *key;      /* The key itself - not null terminated */
+};
+
+struct tcp_rfc2385_tfm {
+	struct crypto_tfm	*tfm;		/* Our MD5 crypto tfm */
+	spinlock_t		lock;		/* Lock for the tfm */
+	atomic_t		refcount;	/* How many copies we have */
+};
+
+struct tcp_rfc2385_db {
+	__u32			db_entries;	/* Number of entries */
+	struct tcp_rfc2385_tfm	*tfm;		/* Our MD5 crypto tfm object */
+	struct tcp_rfc2385	*entries;	/* The entries */
+};
+#endif
+
+#endif /* __LINUX__TCP_RFC2385_H__ */
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/include/net/inet_timewait_sock.h linux-2.6.17-md5/include/net/inet_timewait_sock.h
--- linux-2.6.17/include/net/inet_timewait_sock.h	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/include/net/inet_timewait_sock.h	2006-08-08 13:57:53.000000000 +0100
@@ -30,6 +30,11 @@
 
 #include <asm/atomic.h>
 
+#ifdef CONFIG_TCP_RFC2385
+#include <linux/crypto.h>
+#include <linux/tcp_rfc2385.h>
+#endif
+
 struct inet_hashinfo;
 
 #define INET_TWDR_RECYCLE_SLOTS_LOG	5
@@ -134,6 +139,11 @@ struct inet_timewait_sock {
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
+#ifdef CONFIG_TCP_RFC2385
+	__u8			*md5_key;
+	__u8			md5_keylen;
+	struct tcp_rfc2385_tfm	*md5_tfm;
+#endif
 };
 
 static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
@@ -201,6 +211,19 @@ static inline void inet_twsk_put(struct 
 		printk(KERN_DEBUG "%s timewait_sock %p released\n",
 		       tw->tw_prot->name, tw);
 #endif
+#ifdef CONFIG_TCP_RFC2385
+		/* Free the memory used for any md5 key */
+		if (tw->md5_key) {
+			kfree(tw->md5_key);
+			tw->md5_key = NULL;
+			tw->md5_keylen = 0;
+			if (tw->md5_tfm &&
+				atomic_dec_and_test(&tw->md5_tfm->refcount)) {
+				crypto_free_tfm(tw->md5_tfm->tfm);
+				kfree(tw->md5_tfm);
+			}
+		}
+#endif
 		kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
 		module_put(owner);
 	}
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/include/net/request_sock.h linux-2.6.17-md5/include/net/request_sock.h
--- linux-2.6.17/include/net/request_sock.h	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/include/net/request_sock.h	2006-07-25 11:58:28.000000000 +0100
@@ -35,7 +35,7 @@ struct request_sock_ops {
 				       struct dst_entry *dst);
 	void		(*send_ack)(struct sk_buff *skb,
 				    struct request_sock *req);
-	void		(*send_reset)(struct sk_buff *skb);
+	void		(*send_reset)(struct sock *sk, struct sk_buff *skb);
 	void		(*destructor)(struct request_sock *req);
 };
 
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/include/net/tcp.h linux-2.6.17-md5/include/net/tcp.h
--- linux-2.6.17/include/net/tcp.h	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/include/net/tcp.h	2006-07-25 11:45:37.000000000 +0100
@@ -41,6 +41,11 @@
 
 #include <linux/seq_file.h>
 
+#ifdef CONFIG_TCP_RFC2385
+#include <linux/tcp_rfc2385.h>
+#include <linux/crypto.h>
+#endif
+
 extern struct inet_hashinfo tcp_hashinfo;
 
 extern atomic_t tcp_orphan_count;
@@ -162,6 +167,7 @@ extern void tcp_time_wait(struct sock *s
 #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
+#define TCPOPT_RFC2385		19	/* TCP MD5 signature */
 
 /*
  *     TCP option lengths
@@ -171,6 +177,7 @@ extern void tcp_time_wait(struct sock *s
 #define TCPOLEN_WINDOW         3
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
+#define TCPOLEN_RFC2385        18
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -179,6 +186,7 @@ extern void tcp_time_wait(struct sock *s
 #define TCPOLEN_SACK_BASE		2
 #define TCPOLEN_SACK_BASE_ALIGNED	4
 #define TCPOLEN_SACK_PERBLOCK		8
+#define TCPOLEN_RFC2385_ALIGNED		20
 
 /* Flags in tp->nonagle */
 #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/dccp/ipv4.c linux-2.6.17-md5/net/dccp/ipv4.c
--- linux-2.6.17/net/dccp/ipv4.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/dccp/ipv4.c	2006-08-04 23:43:05.000000000 +0100
@@ -688,7 +688,7 @@ static struct dst_entry* dccp_v4_route_s
 	return &rt->u.dst;
 }
 
-static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	int err;
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
@@ -810,7 +810,7 @@ int dccp_v4_do_rcv(struct sock *sk, stru
 	return 0;
 
 reset:
-	dccp_v4_ctl_send_reset(skb);
+	dccp_v4_ctl_send_reset(sk, skb);
 discard:
 	kfree_skb(skb);
 	return 0;
@@ -968,7 +968,7 @@ no_dccp_socket:
 	if (dh->dccph_type != DCCP_PKT_RESET) {
 		DCCP_SKB_CB(skb)->dccpd_reset_code =
 					DCCP_RESET_CODE_NO_CONNECTION;
-		dccp_v4_ctl_send_reset(skb);
+		dccp_v4_ctl_send_reset(NULL, skb);
 	}
 
 discard_it:
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/dccp/ipv6.c linux-2.6.17-md5/net/dccp/ipv6.c
--- linux-2.6.17/net/dccp/ipv6.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/dccp/ipv6.c	2006-08-04 22:03:32.000000000 +0100
@@ -36,7 +36,7 @@
 /* Socket used for sending RSTs and ACKs */
 static struct socket *dccp_v6_ctl_socket;
 
-static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
+static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *skb);
 static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
 				   struct request_sock *req);
 static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
@@ -511,7 +511,7 @@ static void dccp_v6_send_check(struct so
 							  skb->csum));
 }
 
-static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
 	const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
@@ -1016,7 +1016,7 @@ static int dccp_v6_do_rcv(struct sock *s
 	return 0;
 
 reset:
-	dccp_v6_ctl_send_reset(skb);
+	dccp_v6_ctl_send_reset(sk, skb);
 discard:
 	if (opt_skb != NULL)
 		__kfree_skb(opt_skb);
@@ -1085,7 +1085,7 @@ no_dccp_socket:
 	if (dh->dccph_type != DCCP_PKT_RESET) {
 		DCCP_SKB_CB(skb)->dccpd_reset_code =
 					DCCP_RESET_CODE_NO_CONNECTION;
-		dccp_v6_ctl_send_reset(skb);
+		dccp_v6_ctl_send_reset(NULL, skb);
 	}
 discard_it:
 
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/dccp/minisocks.c linux-2.6.17-md5/net/dccp/minisocks.c
--- linux-2.6.17/net/dccp/minisocks.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/dccp/minisocks.c	2006-07-25 15:17:19.000000000 +0100
@@ -244,7 +244,7 @@ listen_overflow:
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
 drop:
 	if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
-		req->rsk_ops->send_reset(skb);
+		req->rsk_ops->send_reset(sk, skb);
 
 	inet_csk_reqsk_queue_drop(sk, req, prev);
 	goto out;
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/Kconfig linux-2.6.17-md5/net/ipv4/Kconfig
--- linux-2.6.17/net/ipv4/Kconfig	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/Kconfig	2006-07-25 11:00:58.000000000 +0100
@@ -368,6 +368,20 @@ config SYN_COOKIES
 
 	  If unsure, say N.
 
+config TCP_RFC2385
+	bool "IP: MD5 protection of TCP segments (RFC2385)"
+	select CRYPTO
+	select CRYPTO_MD5
+	---help---
+	RFC2385 specifices a method of giving MD5 protection to TCP sessions.
+	Its main (only?) use is to protect BGP sessions between core routers
+	on the Internet.
+
+	Currently only IPv4 TCP sessions support this option.
+
+	If you will be running something like Zebra or Quagga to talk BGP to
+	Cisco/Juniper/etc routers, you may want to say 'Y' here.
+
 config INET_AH
 	tristate "IP: AH transformation"
 	select XFRM
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/tcp.c linux-2.6.17-md5/net/ipv4/tcp.c
--- linux-2.6.17/net/ipv4/tcp.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/tcp.c	2006-07-25 10:57:53.000000000 +0100
@@ -1865,6 +1865,13 @@ static int do_tcp_setsockopt(struct sock
 		}
 		break;
 
+#ifdef CONFIG_TCP_RFC2385
+	case TCP_RFC2385:
+		/* Read the IP->Key mappings from usermode */
+		err = tcp_v4_parse_md5_keys(sk, optval, optlen);
+		break;
+#endif
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/tcp_input.c linux-2.6.17-md5/net/ipv4/tcp_input.c
--- linux-2.6.17/net/ipv4/tcp_input.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/tcp_input.c	2006-07-25 10:51:29.000000000 +0100
@@ -2661,6 +2661,15 @@ void tcp_parse_options(struct sk_buff *s
 					   opt_rx->sack_ok) {
 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
 					}
+					break;
+#ifdef CONFIG_TCP_RFC2385
+				case TCPOPT_RFC2385:
+					/*
+					 * The MD5 hash has already been
+					 * checked (see tcp_v4_do_rcv)
+					 */
+					break;
+#endif
 	  			};
 	  			ptr+=opsize-2;
 	  			length-=opsize;
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/tcp_ipv4.c linux-2.6.17-md5/net/ipv4/tcp_ipv4.c
--- linux-2.6.17/net/ipv4/tcp_ipv4.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/tcp_ipv4.c	2006-08-08 15:10:14.000000000 +0100
@@ -78,6 +78,11 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
+#ifdef CONFIG_TCP_RFC2385
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#endif
+
 int sysctl_tcp_tw_reuse;
 int sysctl_tcp_low_latency;
 
@@ -95,6 +100,12 @@ struct inet_hashinfo __cacheline_aligned
 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
 };
 
+#ifdef CONFIG_TCP_RFC2385
+/* #define MD5_DEBUG 1 */
+static void tcp_v4_clear_md5_list(struct sock *sk);
+static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb);
+#endif
+
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
@@ -508,11 +519,18 @@ void tcp_v4_send_check(struct sock *sk, 
  *	Exception: precedence violation. We do not implement it in any case.
  */
 
-static void tcp_v4_send_reset(struct sk_buff *skb)
+static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = skb->h.th;
-	struct tcphdr rth;
+	struct {
+		struct tcphdr th;
+		/* Make room for the timestamp option and possible MD5 hash */
+		u32 tsopt[(TCPOLEN_RFC2385_ALIGNED >> 2)];
+	} rep;
 	struct ip_reply_arg arg;
+#ifdef CONFIG_TCP_RFC2385
+	struct tcp_rfc2385 *key;
+#endif
 
 	/* Never send a reset in response to a reset. */
 	if (th->rst)
@@ -522,29 +540,57 @@ static void tcp_v4_send_reset(struct sk_
 		return;
 
 	/* Swap the send and the receive. */
-	memset(&rth, 0, sizeof(struct tcphdr));
-	rth.dest   = th->source;
-	rth.source = th->dest;
-	rth.doff   = sizeof(struct tcphdr) / 4;
-	rth.rst    = 1;
+	memset(&rep, 0, sizeof(rep));
+	rep.th.dest   = th->source;
+	rep.th.source = th->dest;
+	rep.th.doff   = sizeof(struct tcphdr) / 4;
+	rep.th.rst    = 1;
 
 	if (th->ack) {
-		rth.seq = th->ack_seq;
+		rep.th.seq = th->ack_seq;
 	} else {
-		rth.ack = 1;
-		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
+		rep.th.ack = 1;
+		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 				    skb->len - (th->doff << 2));
 	}
 
 	memset(&arg, 0, sizeof arg);
-	arg.iov[0].iov_base = (unsigned char *)&rth;
-	arg.iov[0].iov_len  = sizeof rth;
+	arg.iov[0].iov_base = (unsigned char *)&rep;
+	arg.iov[0].iov_len  = sizeof(struct tcphdr);
+
+#ifdef CONFIG_TCP_RFC2385
+	if (sk) {
+		key = tcp_v4_md5_lookup(sk, skb->nh.iph->daddr);
+	} else {
+		key = NULL;
+	}
+	if (key) {
+		int offset = 0;
+		struct tcp_rfc2385_db *db = tcp_sk(sk)->rx_opt.md5_db;
+
+		rep.tsopt[offset++] = __constant_htonl((TCPOPT_NOP << 24)
+						| (TCPOPT_NOP << 16)
+						| (TCPOPT_RFC2385 << 8)
+						| 18);
+		/* Update length and the length the header thinks exists. */
+		arg.iov[0].iov_len += TCPOLEN_RFC2385_ALIGNED;
+		rep.th.doff = arg.iov[0].iov_len/4;
+
+		tcp_v4_calc_md5_hash((__u8 *) &rep.tsopt[offset],
+					key, db->tfm,
+					skb->nh.iph->daddr,
+					skb->nh.iph->saddr,
+					&rep.th, IPPROTO_TCP,
+					arg.iov[0].iov_len);
+	}
+#endif
+
 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 				      skb->nh.iph->saddr, /*XXX*/
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
-	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 
 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
@@ -554,15 +600,20 @@ static void tcp_v4_send_reset(struct sk_
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
-			    u32 win, u32 ts)
+static void tcp_v4_send_ack(struct inet_timewait_sock *tw, struct sk_buff *skb,
+			    u32 seq, u32 ack, u32 win, u32 ts)
 {
 	struct tcphdr *th = skb->h.th;
 	struct {
 		struct tcphdr th;
-		u32 tsopt[3];
+		u32 tsopt[3 + (TCPOLEN_RFC2385_ALIGNED >> 2)];
 	} rep;
 	struct ip_reply_arg arg;
+#ifdef CONFIG_TCP_RFC2385
+	struct tcp_rfc2385 *key;
+	struct tcp_rfc2385 tw_key;
+	struct tcp_rfc2385_tfm *tfm;
+#endif
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof arg);
@@ -575,7 +626,7 @@ static void tcp_v4_send_ack(struct sk_bu
 				     TCPOLEN_TIMESTAMP);
 		rep.tsopt[1] = htonl(tcp_time_stamp);
 		rep.tsopt[2] = htonl(ts);
-		arg.iov[0].iov_len = sizeof(rep);
+		arg.iov[0].iov_len = sizeof(3 << 2);
 	}
 
 	/* Swap the send and the receive. */
@@ -587,6 +638,49 @@ static void tcp_v4_send_ack(struct sk_bu
 	rep.th.ack     = 1;
 	rep.th.window  = htons(win);
 
+#ifdef CONFIG_TCP_RFC2385
+	/*
+	 * The SKB holds an incoming packet, but may not have a valid ->sk
+	 * pointer. This is especially the case when we're dealing with a
+	 * TIME_WAIT ack, because the sk structure is long gone, and only the
+	 * inet_timewait_sock remains. So the md5 key is stashed in that
+	 * structure and we use it in preference. I believe that
+	 * (tw || skb->sk) hold true, but we program defensively.
+	 */
+	 if (!tw && skb->sk) {
+	 	struct tcp_rfc2385_db *db = tcp_sk(skb->sk)->rx_opt.md5_db;
+
+		key = tcp_v4_md5_lookup(skb->sk, skb->nh.iph->daddr);
+		tfm = db->tfm;
+	 } else if (tw && tw->md5_key) {
+	 	tw_key.key = tw->md5_key;
+		tw_key.keylen = tw->md5_keylen;
+		key = &tw_key;
+		tfm = NULL;
+	 } else {
+	 	key = NULL;
+		tfm = NULL;
+	 }
+
+	 if (key) {
+	 	int offset = (ts) ? 3 : 0;
+
+		rep.tsopt[offset++] = __constant_htonl((TCPOPT_NOP << 24)
+					| (TCPOPT_NOP << 16)
+					| (TCPOPT_RFC2385 << 8)
+					| 18);
+		arg.iov[0].iov_len += TCPOLEN_RFC2385_ALIGNED;
+		rep.th.doff = arg.iov[0].iov_len / 4;
+
+		tcp_v4_calc_md5_hash((__u8 *) &rep.tsopt[offset],
+					key, tfm,
+					skb->nh.iph->daddr,
+					skb->nh.iph->saddr,
+					&rep.th, IPPROTO_TCP,
+					arg.iov[0].iov_len);
+	 }
+#endif
+
 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
 				      skb->nh.iph->saddr, /*XXX*/
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -602,7 +696,7 @@ static void tcp_v4_timewait_ack(struct s
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+	tcp_v4_send_ack(tw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
 
 	inet_twsk_put(tw);
@@ -610,7 +704,7 @@ static void tcp_v4_timewait_ack(struct s
 
 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 {
-	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
+	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent);
 }
 
@@ -760,6 +854,10 @@ int tcp_v4_conn_request(struct sock *sk,
 	tmp_opt.mss_clamp = 536;
 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
 
+#ifdef CONFIG_TCP_RFC2385
+	tmp_opt.md5_db = NULL;
+#endif
+
 	tcp_parse_options(skb, &tmp_opt, 0);
 
 	if (want_cookie) {
@@ -905,6 +1003,30 @@ struct sock *tcp_v4_syn_recv_sock(struct
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
 	tcp_initialize_rcv_mss(newsk);
 
+#ifdef CONFIG_TCP_RFC2385
+	/* Copy over the MD5 key from the original socket. */
+	{
+		struct tcp_rfc2385 *key;
+
+		if ((key = tcp_v4_md5_lookup(sk, newinet->daddr))) {
+			struct tcp_rfc2385_db *db = tcp_sk(sk)->rx_opt.md5_db;
+			/*
+			 * We're using one, so create a matching key on the
+			 * newsk structure. If we fail to get the memory then
+			 * we end up not copying the key across. Shcuks.
+			 */
+			char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
+			if (newkey) {
+				memcpy(newkey, key->key, key->keylen);
+				tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
+						newkey, key->keylen);
+				atomic_inc(&db->tfm->refcount);
+				tcp_sk(newsk)->rx_opt.md5_db->tfm = db->tfm;
+			}
+		}
+	}
+#endif
+
 	__inet_hash(&tcp_hashinfo, newsk, 0);
 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
@@ -980,10 +1102,24 @@ static int tcp_v4_checksum_init(struct s
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct sock *rsk;
+
+#ifdef CONFIG_TCP_RFC2385
+	/*
+	 * We really want to reject the package as early as possible if:
+	 * o We're expecting an MD5'd package and there is no MD5 tcp option.
+	 * o There is an MD5 option and we're not expecting one.
+	 */
+	if (tcp_v4_inbound_md5_hash(sk, skb))
+		goto discard;
+#endif
+
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+			rsk = sk;
 			goto reset;
+		}
 		TCP_CHECK_TIMER(sk);
 		return 0;
 	}
@@ -997,20 +1133,24 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 			goto discard;
 
 		if (nsk != sk) {
-			if (tcp_child_process(sk, nsk, skb))
+			if (tcp_child_process(sk, nsk, skb)) {
+				rsk = nsk;
 				goto reset;
+			}
 			return 0;
 		}
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+		rsk = sk;
 		goto reset;
+	}
 	TCP_CHECK_TIMER(sk);
 	return 0;
 
 reset:
-	tcp_v4_send_reset(skb);
+	tcp_v4_send_reset(rsk, skb);
 discard:
 	kfree_skb(skb);
 	/* Be careful here. If this function gets more complicated and
@@ -1109,7 +1249,7 @@ no_tcp_socket:
 bad_packet:
 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
 	} else {
-		tcp_v4_send_reset(skb);
+		tcp_v4_send_reset(NULL, skb);
 	}
 
 discard_it:
@@ -1277,6 +1417,10 @@ static int tcp_v4_init_sock(struct sock 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
+#ifdef CONFIG_TCP_RFC385
+	tp->rx_opt.md5_db = NULL;
+#endif
+
 	atomic_inc(&tcp_sockets_allocated);
 
 	return 0;
@@ -1291,10 +1435,15 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	tcp_cleanup_congestion_control(sk);
 
 	/* Cleanup up the write buffer. */
-  	sk_stream_writequeue_purge(sk);
+	sk_stream_writequeue_purge(sk);
+
+#ifdef CONFIG_TCP_RFC2385
+	/* Clean up the MD5 key list. */
+	tcp_v4_clear_md5_list(sk);
+#endif
 
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
-  	__skb_queue_purge(&tp->out_of_order_queue);
+	__skb_queue_purge(&tp->out_of_order_queue);
 
 	/* Clean prequeue, it must be empty really */
 	__skb_queue_purge(&tp->ucopy.prequeue);
@@ -1318,6 +1467,468 @@ int tcp_v4_destroy_sock(struct sock *sk)
 
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
+#ifdef CONFIG_TCP_RFC2385
+/*
+ * RFC2385 MD5 checksumming requires a mapping of IP address->MD5 Key.
+ * We need to maintain these in the sk structure.
+ */
+
+struct tcp_rfc2385 *tcp_v4_md5_lookup(struct sock *sk, __u32 addr)
+{
+	/* Find the Key structure for an address */
+	int i;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_rfc2385_db *db;
+
+	if (!tp) {
+		printk(KERN_ERR "%s: No tcp_opt!", __FUNCTION__);
+		return NULL;
+	}
+
+	db = tp->rx_opt.md5_db;
+	if (!db || db->db_entries == 0)
+		return NULL;
+
+	for (i = 0; i < db->db_entries; i++) {
+		if (db->entries[i].addr == addr) {
+			return &db->entries[i];
+		}
+	}
+	return NULL;
+}
+
+static int tcp_v4_md5_add(struct sock *sk, struct tcp_rfc2385_cmd *cmd)
+{
+	unsigned char *newkey;
+	int ret;
+
+	/* Was a key already defined for this address?
+	 * if so, change it.
+	 * Note, GFP_KERNEL is acceptable here.
+	 */
+	newkey = kmalloc(cmd->keylen, GFP_KERNEL);
+	if (newkey) {
+		if (copy_from_user(newkey, cmd->key, cmd->keylen)) {
+			/* Failed to copy the key over, so -EFAULT */
+			/* printk ("Failed to copy key from userland"); */
+			return -EFAULT;
+		}
+	} else {
+		return -ENOMEM;
+	}
+
+	ret = tcp_v4_md5_do_add(sk, cmd->address, newkey, cmd->keylen);
+
+	if (ret == 0) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+
+		/*
+		 * If the TFM hasn't been allocated then we do so here; we
+		 * can't do it in tcp_v4_md5_do_add as that can be called in
+		 * an interrupt context.
+		 */
+		if (!db->tfm) {
+			db->tfm = kmalloc(sizeof(struct tcp_rfc2385_tfm),
+					GFP_KERNEL);
+
+			if (db->tfm) {
+				db->tfm->tfm = crypto_alloc_tfm("md5", 0);
+
+				spin_lock_init(&db->tfm->lock);
+				atomic_set(&db->tfm->refcount, 1);
+			}
+		}
+	}
+
+	return ret;
+}
+
+/* This can be called on a newly created socket, from other files */
+int tcp_v4_md5_do_add(struct sock *sk, __u32 addr, char *newkey, __u8 newkeylen)
+{
+	/* Add Key to the list */
+	struct tcp_rfc2385 *key;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+	struct tcp_rfc2385 *keys;
+
+	key = tcp_v4_md5_lookup(sk, addr);
+	if (key) {
+		/* Pre-existing entry - just update that one. */
+		kfree(key->key);
+		key->key = newkey;
+		key->keylen = newkeylen;
+	} else {
+		if (!db) {
+			db = kmalloc(sizeof (struct tcp_rfc2385_db),
+					GFP_ATOMIC);
+			if (!db)
+				return -ENOMEM;
+
+			db->db_entries = 0;
+			db->entries = NULL;
+			db->tfm = NULL;
+
+			tp->rx_opt.md5_db = db;
+		}
+
+		keys = kmalloc(sizeof(struct tcp_rfc2385) *
+					(db->db_entries + 1),
+					GFP_ATOMIC);
+		if (!keys)
+			return -ENOMEM;
+		
+		if (db->db_entries)
+			memcpy(keys, db->entries,
+				sizeof (struct tcp_rfc2385) * db->db_entries);
+	
+		/* Free old key list, and reference new one */
+		if (db->entries)
+			kfree(db->entries);
+		db->entries = keys;
+		db->db_entries++;
+		db->entries[db->db_entries - 1].addr = addr;
+		db->entries[db->db_entries - 1].key = newkey;
+		db->entries[db->db_entries - 1].keylen = newkeylen;
+	}
+
+	return 0;
+}
+
+static int tcp_v4_md5_del(struct sock *sk, struct tcp_rfc2385_cmd *cmd)
+{
+	int i;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+
+	for (i = 0; i < db->db_entries; i++) {
+		if (db->entries[i].addr == cmd->address) {
+			/* Free the key */
+			kfree(db->entries[i].key);
+			db->db_entries--;
+
+			if (db->db_entries == 0) {
+				/* Free the entire structure */
+				kfree(db->entries);
+
+				/* 
+				 * If we're the only people referring to the
+				 * tfm, delete it.
+				 */
+				if (atomic_dec_and_test(&db->tfm->refcount)) {
+					crypto_free_tfm(db->tfm->tfm);
+					kfree(db->tfm);
+				}
+				kfree(db);
+				tp->rx_opt.md5_db = NULL;
+				return 0;
+			} else {
+				/* Need to do some manipulation */
+				if (db->db_entries != i)
+					memcpy(&db->entries[i],
+						&db->entries[i+1],
+						(db->db_entries - i)
+						* sizeof (struct tcp_rfc2385));
+			}
+
+		}
+	}
+	return -ENOENT;
+}
+
+int tcp_v4_parse_md5_keys(struct sock *sk, char *optval, int optlen)
+{
+	struct tcp_rfc2385_cmd cmd;
+
+	if (optlen != sizeof(cmd))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, optval, sizeof (cmd))) {
+		return -EFAULT;
+	}
+
+	switch (cmd.command) {
+	case TCP_RFC2385_ADD:
+		return tcp_v4_md5_add(sk, &cmd);
+
+	case TCP_RFC2385_DEL:
+		return tcp_v4_md5_del(sk, &cmd);
+
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_rfc2385 *key,
+				  struct tcp_rfc2385_tfm *tfm,
+				  __u32 saddr, __u32 daddr,
+				  struct tcphdr *th, int protocol,
+				  int tcplen)
+{
+	struct scatterlist sg[4];
+	__u16 data_len;
+	int block = 0;
+	int allocated_tfm = !tfm;
+	struct tcp_rfc2385_tfm atfm;
+#ifdef MD5_DEBUG
+	int i;
+#endif
+	__u16 old_checksum;
+	/* A structure to simplify the md5-ing */
+	struct {
+		__u32 saddr;
+		__u32 daddr;
+		__u8 pad;
+		__u8 protocol;
+		__u16 len;
+	} md5_block;
+
+	/* Okay, so RFC2385 is turned on for this connection,
+	 * so we need to generate the MD5 hash for the packet now.
+	 * Use a spinlock to protect a tfm.
+	 * If lock == NULL && tfm == NULL, then allocate one, as we're coming
+	 * from somewhere that does not have a tfm available to use.
+	 *
+	 * 1. the TCP pseudo-header (in the order: source IP address,
+	 * destination IP address, zero-padded protocol number, and
+	 * segment length)
+	 */
+	md5_block.saddr = saddr;
+	md5_block.daddr = daddr;
+	md5_block.pad = 0;
+	md5_block.protocol = protocol;
+	md5_block.len = htons(tcplen);
+	sg[block].page = virt_to_page(&md5_block);
+	sg[block].offset = ((long)(&md5_block) & ~PAGE_MASK);
+	sg[block++].length = sizeof(md5_block);
+
+#ifdef MD5_DEBUG
+	printk("Calcuating hash for: ");
+ 	for (i = 0; i < sizeof(md5_block); i++)
+		printk ("%x ", ((unsigned char *)&md5_block)[i]);
+#endif
+
+	/* 2. the TCP header, excluding options, and assuming a
+	 * checksum of zero
+	 */
+	old_checksum = th->check;
+	th->check = 0;
+	sg[block].page = virt_to_page(th);
+	sg[block].offset = ((long)(th) & ~PAGE_MASK);
+	sg[block++].length = sizeof(struct tcphdr);
+
+#ifdef MD5_DEBUG
+	for (i = 0; i < sizeof(struct tcphdr); i++)
+		printk ("%x ", ((unsigned char *)th)[i]);
+#endif
+
+	/* 3. the TCP segment data (if any) */
+	data_len = tcplen - (th->doff << 2);
+	if (data_len > 0) {
+		unsigned char *data = (unsigned char *)th + (th->doff << 2);
+		
+		sg[block].page = virt_to_page(data);
+		sg[block].offset = ((long)data & ~PAGE_MASK);
+		sg[block++].length = data_len;
+	}
+	
+	/* 4. an independently-specified key or password, known to both
+	 * TCPs and presumably connection-specific
+	 */
+	sg[block].page = virt_to_page(key->key);
+	sg[block].offset = ((long)key->key & ~PAGE_MASK);
+	sg[block++].length = key->keylen;
+
+#ifdef MD5_DEBUG
+	printk ("and password: ");
+	for (i = 0; i < key->keylen; i++)
+		printk ("%x ", (unsigned char *)key->key[i]);
+#endif
+
+	/* Now store the Hash into the packet */
+	if (allocated_tfm) {
+		atomic_set(&atfm.refcount, 1);
+		spin_lock_init(&atfm.lock);
+		atfm.tfm = crypto_alloc_tfm("md5", 0);
+		tfm = &atfm;
+	}
+	if (tfm && tfm->tfm) {
+		unsigned long flags;
+
+		spin_lock_irqsave(tfm->lock, flags);
+		crypto_digest_init(tfm->tfm);
+		crypto_digest_update(tfm->tfm, sg, block);
+		crypto_digest_final(tfm->tfm, md5_hash);
+		spin_unlock_irqrestore(tfm->lock, flags);
+	} else
+		memset(md5_hash, 0, 16);
+
+	if (allocated_tfm && atfm.tfm && atomic_dec_and_test(&atfm.refcount))
+		crypto_free_tfm(atfm.tfm);
+
+	/* Reset header, and free up the crypto */
+	th->check = old_checksum;
+	return 0;
+}
+
+static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
+{
+	/* This gets called for each TCP segment that arrives
+	 * so we want to be efficient.
+	 * We have 3 drop cases:
+	 * o No MD5 hash and one expected.
+	 * o MD5 hash and we're not expecting one.
+	 * o MD5 hash and its wrong.
+	 */
+	__u8 *hash_location = NULL;
+	struct tcp_rfc2385 *hash_expected;
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th = skb->h.th;
+	struct tcp_sock *tp = tcp_sk(sk);
+	int length = (th->doff << 2) - sizeof (struct tcphdr);
+	int genhash;
+	unsigned char *ptr;
+	unsigned char newhash[16];
+
+	hash_expected = tcp_v4_md5_lookup(sk, iph->saddr);
+
+	/* If the TCP option length is less than the TCP_RFC2385
+	 * option length, then we can shortcut
+	 */
+	if (length < TCPOLEN_RFC2385) {
+		if (hash_expected)
+			return 1;
+		else
+			return 0;
+	}
+
+	/* Okay, we can't shortcut - we have to grub through the options */
+	ptr = (unsigned char *)(th + 1);
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			goto done_opts;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2)
+				goto done_opts;
+			if (opsize > length)
+				goto done_opts;
+			
+			if (opcode == TCPOPT_RFC2385) {
+				hash_location = ptr;
+				goto done_opts;
+			}
+			
+		}
+		ptr += opsize-2;
+		length-=opsize;
+	}
+
+done_opts:
+	/* We've parsed the options - do we have a hash? */
+	if (!hash_expected && !hash_location)
+		return 0;
+
+	if (hash_expected && !hash_location) {
+		if (net_ratelimit()) {
+			printk (KERN_INFO "MD5 Hash expected but NOT found "
+				"(%d.%d.%d.%d:%d) -> (%d.%d.%d.%d:%d)\n",
+				NIPQUAD (iph->saddr), ntohs(th->source),
+				NIPQUAD (iph->daddr), ntohs(th->dest));
+		}
+		return 1;
+	}
+
+	if (!hash_expected && hash_location) {
+		if (net_ratelimit()) {
+			printk (KERN_INFO "MD5 Hash NOT expected but found "
+				"(%d.%d.%d.%d:%d) -> (%d.%d.%d.%d:%d)\n",
+				NIPQUAD (iph->saddr), ntohs(th->source),
+				NIPQUAD (iph->daddr), ntohs(th->dest));
+		}
+		return 1;
+	}
+
+	/* Okay, so this is hash_expected and hash_location - 
+	 * so we need to calculate the checksum.
+	 */
+	genhash = tcp_v4_calc_md5_hash (newhash,
+					hash_expected,
+					tp->rx_opt.md5_db->tfm,
+					iph->saddr, iph->daddr,
+					th, sk->sk_protocol,
+					skb->len);
+	if (genhash || memcmp (hash_location, newhash, 16) != 0) {
+		if (net_ratelimit()) {
+			printk (KERN_INFO "MD5 Hash failed for (%d.%d.%d.%d:%d)"
+					" -> (%d.%d.%d.%d:%d)%s\n",
+					NIPQUAD (iph->saddr), ntohs(th->source),
+					NIPQUAD (iph->daddr), ntohs(th->dest),
+					genhash ?
+					" tcp_v4_calc_md5_hash failed" : "");
+#ifdef MD5_DEBUG
+			{
+				int i;
+
+				printk("Received: ");
+				for (i = 0; i < 16; i++)
+					printk ("%x ", (unsigned char *)hash_location[i]);
+				printk("\n");
+				printk("Calculated: ");
+				for (i = 0; i < 16; i++)
+					printk ("%x ", (unsigned char *)newhash[i]);
+				printk("\n");
+			}
+#endif
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+static void tcp_v4_clear_md5_list(struct sock *sk)
+{
+	int i;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+
+	if (NULL == db)
+		return;
+
+	/*
+	 * Free each key, then the set of key entries,
+	 * the crypto element, and then decrement our
+	 * hold on the last resort crypto
+	 */
+	for (i = 0; i < db->db_entries; i++)
+		kfree(db->entries[i].key);
+	if (db->entries)
+		kfree(db->entries);
+	/* 
+	 * If we're the only people referring to the
+	 * tfm, delete it.
+	 */
+	if (db->tfm && atomic_dec_and_test(&db->tfm->refcount)) {
+		crypto_free_tfm(db->tfm->tfm);
+		kfree(db->tfm);
+	}
+
+	kfree(db);
+	tp->rx_opt.md5_db = NULL;
+}
+#endif
+
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/tcp_minisocks.c linux-2.6.17-md5/net/ipv4/tcp_minisocks.c
--- linux-2.6.17/net/ipv4/tcp_minisocks.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/tcp_minisocks.c	2006-08-08 15:12:46.000000000 +0100
@@ -307,6 +307,39 @@ void tcp_time_wait(struct sock *sk, int 
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
+
+#ifdef CONFIG_TCP_RFC2385
+		/*
+		 * The timewait bucket does not have the key DB from the sock
+		 * structure. We just make a quick copy of the MD5 key being
+		 * used (if indeed we are using one) so the timewait ack
+		 * generating code has the key.
+		 */
+		{
+			struct tcp_rfc2385 *key;
+
+			tw->md5_key = NULL;
+			tw->md5_keylen = 0;
+			if ((key = tcp_v4_md5_lookup(sk, inet_sk(sk)->daddr))) {
+				char *newkey = kmalloc(key->keylen,
+							GFP_ATOMIC);
+				if (newkey) {
+					memcpy(newkey, key->key, key->keylen);
+					tw->md5_key = newkey;
+					tw->md5_keylen = key->keylen;
+
+					/*
+					 * Copy the crypto structure, as this
+					 * socket will lose it when it's
+					 * destroyed.
+					 */
+					tw->md5_tfm = tp->rx_opt.md5_db->tfm;
+					tp->rx_opt.md5_db->tfm = NULL;
+				}
+			}
+		}
+#endif
+
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 
@@ -405,6 +438,10 @@ struct sock *tcp_create_openreq_child(st
 		newtp->rx_opt.num_sacks = 0;
 		newtp->urg_data = 0;
 
+#ifdef CONFIG_TCP_RFC2385
+		newtp->rx_opt.md5_db = NULL;
+#endif
+
 		if (sock_flag(newsk, SOCK_KEEPOPEN))
 			inet_csk_reset_keepalive_timer(newsk,
 						       keepalive_time_when(newtp));
@@ -436,6 +473,10 @@ struct sock *tcp_create_openreq_child(st
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
+#ifdef CONFIG_TCP_RFC2385
+		if (tcp_v4_md5_lookup(sk, inet_sk(newsk)->daddr))
+			newtp->tcp_header_len += TCPOLEN_RFC2385_ALIGNED;
+#endif
 		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
@@ -619,6 +660,36 @@ struct sock *tcp_check_req(struct sock *
 		if (child == NULL)
 			goto listen_overflow;
 
+#ifdef CONFIG_TCP_RFC2385
+		/* Copy over the MD5 key from the original socket. */
+		{
+			struct tcp_rfc2385 *key;
+
+			if ((key = tcp_v4_md5_lookup(sk,
+						inet_sk(child)->daddr))) {
+				struct tcp_rfc2385_db *db =
+						tcp_sk(sk)->rx_opt.md5_db;
+				/*
+				 * We're using one, so create a matching key on
+				 * the newsk structure. If we fail to get
+				 * memory then we end up not copying the key
+				 * across. Shucks.
+				 */
+				char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
+				if (newkey) {
+					memcpy(newkey, key->key, key->keylen);
+					tcp_v4_md5_do_add(child,
+							inet_sk(child)->daddr,
+							newkey,
+							key->keylen);
+					atomic_inc(&db->tfm->refcount);
+					tcp_sk(child)->rx_opt.md5_db->tfm =
+							db->tfm;
+				}
+			}
+		}
+#endif
+
 		inet_csk_reqsk_queue_unlink(sk, req, prev);
 		inet_csk_reqsk_queue_removed(sk, req);
 
@@ -634,7 +705,7 @@ struct sock *tcp_check_req(struct sock *
 	embryonic_reset:
 		NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
 		if (!(flg & TCP_FLAG_RST))
-			req->rsk_ops->send_reset(skb);
+			req->rsk_ops->send_reset(sk, skb);
 
 		inet_csk_reqsk_queue_drop(sk, req, prev);
 		return NULL;
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv4/tcp_output.c linux-2.6.17-md5/net/ipv4/tcp_output.c
--- linux-2.6.17/net/ipv4/tcp_output.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv4/tcp_output.c	2006-07-25 13:13:36.000000000 +0100
@@ -42,6 +42,10 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 
+#ifdef CONFIG_TCP_RFC2385
+#include <linux/tcp_rfc2385.h>
+#endif
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
@@ -264,8 +268,13 @@ static u16 tcp_select_window(struct sock
 	return new_win;
 }
 
+#ifdef CONFIG_TCP_RFC2385
+static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+					 __u32 tstamp, int md5, __u8 **md5_hash)
+#else
 static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
 					 __u32 tstamp)
+#endif
 {
 	if (tp->rx_opt.tstamp_ok) {
 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
@@ -293,6 +302,20 @@ static void tcp_build_and_update_options
 			tp->rx_opt.eff_sacks--;
 		}
 	}
+
+#ifdef CONFIG_TCP_RFC2385
+	/*
+	 * If MD5 is enabled then we set the option and include the size
+	 * (always 18). The actual MD5 hash is added just before the packet
+	 * is sent.
+	 */
+	if (md5) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					(TCPOPT_NOP << 16) |
+					(TCPOPT_RFC2385 << 8) | 18);
+		*md5_hash = (__u8 *) ptr;
+	}
+#endif
 }
 
 /* Construct a tcp options header for a SYN or SYN_ACK packet.
@@ -300,9 +323,16 @@ static void tcp_build_and_update_options
  * MAX_SYN_SIZE to match the new maximum number of options that you
  * can generate.
  */
+#ifdef CONFIG_TCP_RFC2385
+static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+				  int offer_wscale, int wscale, __u32 tstamp,
+				  __u32 ts_recent, int md5,
+				  __u8 **md5_hash)
+#else
 static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
 				  int offer_wscale, int wscale, __u32 tstamp,
 				  __u32 ts_recent)
+#endif
 {
 	/* We always get an MSS option.
 	 * The option bytes which will be seen in normal data
@@ -332,6 +362,20 @@ static void tcp_syn_build_options(__u32 
 					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
 	if (offer_wscale)
 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+
+#ifdef CONFIG_TCP_RFC2385
+	/*
+	 * If MD5 is enabled then we set the option and include the size
+	 * (always 18). The actual MD5 hash is added just before the packet
+	 * is sent.
+	 */
+	if (md5) {
+		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+					(TCPOPT_NOP << 16) |
+					(TCPOPT_RFC2385 << 8) | 18);
+		*md5_hash = (__u8 *) ptr;
+	}
+#endif
 }
 
 /* This routine actually transmits TCP packets queued in by
@@ -355,6 +399,10 @@ static int tcp_transmit_skb(struct sock 
 	struct tcphdr *th;
 	int sysctl_flags;
 	int err;
+#ifdef CONFIG_TCP_RFC2385
+	struct tcp_rfc2385 *md5 = NULL;
+	__u8 *md5_hash_location;
+#endif
 
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 
@@ -410,6 +458,14 @@ static int tcp_transmit_skb(struct sock 
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
+#ifdef CONFIG_TCP_RFC2385
+	/* Are we receiving MD5 on this segment? If so, make room for it. */
+	md5 = tcp_v4_md5_lookup(sk, inet->daddr);
+	if (md5) {
+		tcp_header_size += TCPOLEN_RFC2385_ALIGNED;
+	}
+#endif
+
 	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
 	skb->h.th = th;
 	skb_set_owner_w(skb, sk);
@@ -447,13 +503,35 @@ static int tcp_transmit_skb(struct sock 
 				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
 				      tp->rx_opt.rcv_wscale,
 				      tcb->when,
-				      tp->rx_opt.ts_recent);
+				      tp->rx_opt.ts_recent
+#ifdef CONFIG_TCP_RFC2385
+				      , md5 ? 1 : 0,
+				      &md5_hash_location
+#endif
+				      );
 	} else {
 		tcp_build_and_update_options((__u32 *)(th + 1),
-					     tp, tcb->when);
+					     tp, tcb->when
+#ifdef CONFIG_TCP_RFC2385
+				      , md5 ? 1 : 0,
+				      &md5_hash_location
+#endif
+				      );
 		TCP_ECN_send(sk, tp, skb, tcp_header_size);
 	}
 
+#ifdef CONFIG_TCP_RFC2385
+	/* Calculate the MD5 hash, as we have all the info we need now. */
+	if (md5) {
+		struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+		tcp_v4_calc_md5_hash(md5_hash_location,
+				     md5, db->tfm,
+				     inet->saddr, inet->daddr,
+				     skb->h.th, sk->sk_protocol,
+				     skb->len);
+	}
+#endif
+
 	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
@@ -850,6 +928,11 @@ unsigned int tcp_current_mss(struct sock
 	}
 	tp->xmit_size_goal = xmit_size_goal;
 
+#ifdef CONFIG_TCP_RFC2385
+	if (tcp_v4_md5_lookup(sk, inet_sk(sk)->daddr))
+		mss_now -= TCPOLEN_RFC2385_ALIGNED;
+#endif
+
 	return mss_now;
 }
 
@@ -2015,6 +2098,11 @@ struct sk_buff * tcp_make_synack(struct 
 	struct tcphdr *th;
 	int tcp_header_size;
 	struct sk_buff *skb;
+#ifdef CONFIG_TCP_RFC2385
+	struct rtable *rt = (struct rtable *) dst;
+	struct tcp_rfc2385 *md5 = NULL;
+	__u8 *md5_hash_location;
+#endif
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
 	if (skb == NULL)
@@ -2030,6 +2118,15 @@ struct sk_buff * tcp_make_synack(struct 
 			   (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
 			   /* SACK_PERM is in the place of NOP NOP of TS */
 			   ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+
+#ifdef CONFIG_TCP_RFC2385
+	/* Are we doing MD5 on this segment? If so, make room for it. */
+	md5 = tcp_v4_md5_lookup(sk, rt->rt_dst);
+	if (md5) {
+		tcp_header_size += TCPOLEN_RFC2385_ALIGNED;
+	}
+#endif
+
 	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
 
 	memset(th, 0, sizeof(struct tcphdr));
@@ -2068,10 +2165,27 @@ struct sk_buff * tcp_make_synack(struct 
 	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
 			      ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
 			      TCP_SKB_CB(skb)->when,
-			      req->ts_recent);
+			      req->ts_recent
+#ifdef CONFIG_TCP_RFC2385
+			      , (md5 ? 1 : 0), &md5_hash_location
+#endif
+			      );
 
 	skb->csum = 0;
 	th->doff = (tcp_header_size >> 2);
+
+#ifdef CONFIG_TCP_RFC2385
+	/* Calculate the MD5 hash, as we have all the info we need now. */
+	if (md5) {
+		struct tcp_rfc2385_db *db = tp->rx_opt.md5_db;
+		tcp_v4_calc_md5_hash(md5_hash_location,
+				     md5, db->tfm,
+				     rt->rt_src, rt->rt_dst,
+				     skb->h.th, sk->sk_protocol,
+				     skb->len);
+	}
+#endif
+
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 	return skb;
 }
@@ -2091,6 +2205,11 @@ static void tcp_connect_init(struct sock
 	tp->tcp_header_len = sizeof(struct tcphdr) +
 		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
+#ifdef CONFIG_TCP_RFC2385
+	if (tcp_v4_md5_lookup(sk, inet_sk(sk)->daddr))
+		tp->tcp_header_len += TCPOLEN_RFC2385_ALIGNED;
+#endif
+
 	/* If user gave his TCP_MAXSEG, record it to clamp */
 	if (tp->rx_opt.user_mss)
 		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff -ruNp -X linux-2.6.17/Documentation/dontdiff linux-2.6.17/net/ipv6/tcp_ipv6.c linux-2.6.17-md5/net/ipv6/tcp_ipv6.c
--- linux-2.6.17/net/ipv6/tcp_ipv6.c	2006-06-18 02:49:35.000000000 +0100
+++ linux-2.6.17-md5/net/ipv6/tcp_ipv6.c	2006-07-25 13:33:11.000000000 +0100
@@ -70,7 +70,7 @@
 /* Socket used for sending RSTs and ACKs */
 static struct socket *tcp6_socket;
 
-static void	tcp_v6_send_reset(struct sk_buff *skb);
+static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
 static void	tcp_v6_send_check(struct sock *sk, int len, 
 				  struct sk_buff *skb);
@@ -555,7 +555,7 @@ static void tcp_v6_send_check(struct soc
 }
 
 
-static void tcp_v6_send_reset(struct sk_buff *skb)
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = skb->h.th, *t1; 
 	struct sk_buff *buff;
@@ -1118,7 +1118,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 	return 0;
 
 reset:
-	tcp_v6_send_reset(skb);
+	tcp_v6_send_reset(sk, skb);
 discard:
 	if (opt_skb)
 		__kfree_skb(opt_skb);
@@ -1235,7 +1235,7 @@ no_tcp_socket:
 bad_packet:
 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
 	} else {
-		tcp_v6_send_reset(skb);
+		tcp_v6_send_reset(NULL, skb);
 	}
 
 discard_it:


More information about the Quagga-users mailing list