? tcp_reass-5.2.1-20040301.patch
Index: tcp_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.217.2.1
diff -u -p -r1.217.2.1 tcp_input.c
--- sys/netinet/tcp_input.c	9 Jan 2004 12:32:36 -0000	1.217.2.1
+++ sys/netinet/tcp_input.c	1 Mar 2004 15:18:54 -0000
@@ -57,6 +57,8 @@
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
+#include <vm/uma.h>
+
 #include <net/if.h>
 #include <net/route.h>
 
@@ -97,8 +99,6 @@
 
 #include <machine/in_cksum.h>
 
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
 static const int tcprexmtthresh = 3;
 tcp_cc	tcp_ccgen;
 
@@ -134,6 +134,24 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3
     &tcp_do_rfc3390, 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+    "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+    &tcp_reass_maxseg, 0,
+    "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+    &tcp_reass_qsize, 0,
+    "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+    &tcp_reass_overflows, 0,
+    "Global number of TCP Segment Reassembly Queue Overflows");
+
 struct inpcbhead tcb;
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
@@ -174,6 +192,19 @@ do { \
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
+/* Initialize TCP reassembly queue */
+uma_zone_t	tcp_reass_zone;
+void
+tcp_reass_init()
+{
+	tcp_reass_maxseg = nmbclusters / 16;
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+	    &tcp_reass_maxseg);
+	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
 static int
 tcp_reass(tp, th, tlenp, m)
 	register struct tcpcb *tp;
@@ -184,7 +215,7 @@ tcp_reass(tp, th, tlenp, m)
 	struct tseg_qent *q;
 	struct tseg_qent *p = NULL;
 	struct tseg_qent *nq;
-	struct tseg_qent *te;
+	struct tseg_qent *te = NULL;
 	struct socket *so = tp->t_inpcb->inp_socket;
 	int flags;
 
@@ -195,9 +226,27 @@ tcp_reass(tp, th, tlenp, m)
 	if (th == 0)
 		goto present;
 
-	/* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
-	MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
-	       M_NOWAIT);
+	/*
+	 * Limit the number of segments in the reassembly queue to prevent
+	 * holding on to too many segments (and thus running out of mbufs).
+	 * Make sure to let the missing segment through which caused this
+	 * queue.  Always keep one global queue entry spare to be able to
+	 * process the missing segment.
+	 */
+	if (th->th_seq != tp->rcv_nxt &&
+	    tcp_reass_qsize + 1 >= tcp_reass_maxseg) {
+		tcp_reass_overflows++;
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		return (0);
+	}
+	tcp_reass_qsize++;
+
+	/*
+	 * Allocate a new queue entry. If we can't, or hit the zone limit
+	 * just drop the pkt.
+	 */
+	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
 	if (te == NULL) {
 		tcpstat.tcps_rcvmemdrop++;
 		m_freem(m);
@@ -227,7 +276,8 @@ tcp_reass(tp, th, tlenp, m)
 				tcpstat.tcps_rcvduppack++;
 				tcpstat.tcps_rcvdupbyte += *tlenp;
 				m_freem(m);
-				FREE(te, M_TSEGQ);
+				uma_zfree(tcp_reass_zone, te);
+				tcp_reass_qsize--;
 				/*
 				 * Try to present any queued data
 				 * at the left window edge to the user.
@@ -262,7 +312,8 @@ tcp_reass(tp, th, tlenp, m)
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
-		FREE(q, M_TSEGQ);
+		uma_zfree(tcp_reass_zone, q);
+		tcp_reass_qsize--;
 		q = nq;
 	}
 
@@ -296,7 +347,8 @@ present:
 			m_freem(q->tqe_m);
 		else
 			sbappendstream(&so->so_rcv, q->tqe_m);
-		FREE(q, M_TSEGQ);
+		uma_zfree(tcp_reass_zone, q);
+		tcp_reass_qsize--;
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
 	ND6_HINT(tp);
Index: tcp_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.169.2.3
diff -u -p -r1.169.2.3 tcp_subr.c
--- sys/netinet/tcp_subr.c	23 Feb 2004 15:32:55 -0000	1.169.2.3
+++ sys/netinet/tcp_subr.c	1 Mar 2004 15:18:54 -0000
@@ -286,6 +286,7 @@ tcp_init()
 	tcp_timer_init();
 	syncache_init();
 	tcp_hc_init();
+	tcp_reass_init();
 }
 
 /*
@@ -708,7 +709,8 @@ tcp_discardcb(tp)
 	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
-		FREE(q, M_TSEGQ);
+		uma_zfree(tcp_reass_zone, q);
+		tcp_reass_qsize--;
 	}
 	inp->inp_ppcb = NULL;
 	tp->t_inpcb = NULL;
@@ -769,7 +771,8 @@ tcp_drain()
 			            != NULL) {
 					LIST_REMOVE(te, tqe_q);
 					m_freem(te->tqe_m);
-					FREE(te, M_TSEGQ);
+					uma_zfree(tcp_reass_zone, te);
+					tcp_reass_qsize--;
 				}
 			}
 			INP_UNLOCK(inpb);
Index: tcp_var.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.93.2.1
diff -u -p -r1.93.2.1 tcp_var.h
--- sys/netinet/tcp_var.h	9 Jan 2004 12:32:36 -0000	1.93.2.1
+++ sys/netinet/tcp_var.h	1 Mar 2004 15:18:55 -0000
@@ -54,9 +54,8 @@ struct tseg_qent {
 	struct	mbuf	*tqe_m;		/* mbuf contains packet */
 };
 LIST_HEAD(tsegqe_head, tseg_qent);
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_TSEGQ);
-#endif
+extern int	tcp_reass_qsize;
+extern struct uma_zone	*tcp_reass_zone;
 
 struct tcptemp {
 	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
@@ -514,6 +513,7 @@ struct tcpcb *
 int	 tcp_output(struct tcpcb *);
 struct inpcb *
 	 tcp_quench(struct inpcb *, int);
+void	 tcp_reass_init(void);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 int	 tcp_twrespond(struct tcptw *, struct socket *, struct mbuf *, int);
