Attachment 'uto_kernel.diff'

Download

   1 --- //depot/vendor/freebsd/src/share/man/man4/tcp.4	2011-02-21 12:36:15.000000000 0000
   2 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/share/man/man4/tcp.4	2011-08-20 13:56:20.000000000 0000
   3 @@ -38,7 +38,7 @@
   4  .\"     From: @(#)tcp.4	8.1 (Berkeley) 6/5/93
   5  .\" $FreeBSD: src/share/man/man4/tcp.4,v 1.63 2011/02/21 11:56:11 lstewart Exp $
   6  .\"
   7 -.Dd February 15, 2011
   8 +.Dd August 20, 2011
   9  .Dt TCP 4
  10  .Os
  11  .Sh NAME
  12 @@ -122,7 +122,7 @@
  13  .Xr setsockopt 2
  14  and tested with
  15  .Xr getsockopt 2 :
  16 -.Bl -tag -width ".Dv TCP_CONGESTION"
  17 +.Bl -tag -width ".Dv TCP_SNDUTO_TIMEOUT"
  18  .It Dv TCP_INFO
  19  Information about a socket's underlying TCP session may be retrieved
  20  by passing the read-only option
  21 @@ -222,6 +222,52 @@
  22  will have an invalid digest option prepended, and the following error message
  23  will be visible on the system console:
  24  .Em "tcp_signature_compute: SADB lookup failed for %d.%d.%d.%d" .
  25 +.It Dv TCP_SNDUTO_TIMEOUT
  26 +By setting this option the user controls the exact time (in seconds) TCP will
  27 +wait for an ACK before tearing the connection.
  28 +.Pp
  29 +Traditionally, a fixed number of retransmits
  30 +.Dv TCP_MAXRXTSHIFT
  31 +are performed before giving up.
  32 +Using UTO completely overrides this and allows connections with stringent or
  33 +relaxed timeouts.
  34 +.Pp
  35 +After setting the option the first subsequent segment will carry the specified
  36 +value.
  37 +This option is silently left out if (due to other options) not enough space
  38 +is available.
  39 +When specifying more that 3600 seconds of UTO,
  40 +.Dv TCP_SNDUTO_TIMEOUT
  41 +will set the granularity bit and send the value in minutes.
  42 +.It Dv TCP_RCVUTO_TIMEOUT
  43 +When setting this value to 1, the application specifies any value (up to
  44 +.Dv uto.max_timeout
  45 +) is acceptable.
  46 +If the application specifies a value larger than 1, that is considered
  47 +the maximum acceptable timeout for the connection.
  48 +The default value is 0.
  49 +By retriving this option the user can determine the actual timeout (in
  50 +seconds) advertised by the peer.
  51 +.Pp
  52 +When using both
  53 +.Dv TCP_RCVUTO_TIMEOUT
  54 +and
  55 +.Dv TCP_SNDUTO_TIMEOUT,
  56 +the larger value is chosen as User Timeout for the connection.
  57 +.Pp
  58 +The peer is allowed to send this option any number of times.
  59 +Each time the user calls
  60 +.Xr getsockopt 2
  61 +the most recent value received is returned.
  62 +.Pp
  63 +A possible use of this option is allowing a peer to request a large timeout
  64 +(several hours) for a connection.
  65 +Even if the peer is offline for long periods of time the connection will be
  66 +maintained.
  67 +.Pp
  68 +This option has security implications.
  69 +An application must *not* enable this option before the peer has
  70 +authenticated in order to prevent DoS attacks.
  71  .El
  72  .Pp
  73  The option level for the
  74 @@ -431,6 +477,13 @@
  75  .It Va sack.globalmaxholes
  76  Maximum number of SACK holes per system, across all connections.
  77  Defaults to 65536.
  78 +.It Va uto.enable
  79 +Globally permit or deny using UTO.
  80 +.It Va uto.max_timeout
  81 +Maximum value for TCP_SNDUTO_TIMEOUT for any connection. Setting this to a
  82 +large value has security implications.
  83 +.It Va uto.min_timeout
  84 +Minimum value for TCP_RCVUTO_TIMEOUT for any connection.
  85  .It Va maxtcptw
  86  When a TCP connection enters the
  87  .Dv TIME_WAIT
  88 --- //depot/vendor/freebsd/src/sys/netinet/tcp.h	2010-11-17 19:00:26.000000000 0000
  89 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp.h	2011-08-13 19:22:15.000000000 0000
  90 @@ -96,6 +96,8 @@
  91  #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
  92  #define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
  93  #define	   TCPOLEN_SIGNATURE		18
  94 +#define TCPOPT_UTO		28
  95 +#define	   TCPOLEN_UTO			4
  96  
  97  /* Miscellaneous constants */
  98  #define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
  99 @@ -103,6 +105,16 @@
 100  
 101  
 102  /*
 103 + * The timeout ranges for TCP UTO have security implications; in particular,
 104 + * long timeouts might allow for denial-of-service attacks.
 105 + * These are only defaults for net.inet.tcp.min_timeout and max_timeout,
 106 + * respectively.
 107 + */
 108 +#define TCP_UTOMIN	100	/* Minimum user timeout in seconds. */
 109 +#define TCP_UTOMAX	600	/* Maximum user timeout in seconds. */
 110 +
 111 +
 112 +/*
 113   * The default maximum segment size (MSS) to be used for new TCP connections
 114   * when path MTU discovery is not enabled.
 115   *
 116 @@ -158,6 +170,8 @@
 117  #define TCP_MD5SIG	0x10	/* use MD5 digests (RFC2385) */
 118  #define	TCP_INFO	0x20	/* retrieve tcp_info structure */
 119  #define	TCP_CONGESTION	0x40	/* get/set congestion control algorithm */
 120 +#define TCP_SNDUTO_TIMEOUT	0x80	/* get/set sent UTO value */
 121 +#define TCP_RCVUTO_TIMEOUT	0x100	/* accept UTO suggestion */
 122  
 123  #define	TCP_CA_NAME_MAX	16	/* max congestion control name length */
 124  
 125 --- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c	2011-06-04 16:35:14.000000000 0000
 126 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_input.c	2011-08-20 09:40:36.000000000 0000
 127 @@ -1324,6 +1324,20 @@
 128  			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 129  #endif
 130  		tcp_dooptions(&to, optp, optlen, TO_SYN);
 131 +
 132 +		if (to.to_flags & TOF_UTO) {
 133 +			/*
 134 +			 * Storing the value even if the user might not
 135 +			 * accept it.
 136 +			 */
 137 +			tp->rcv_uto = UTO_VALUE(to);
 138 +			/*
 139 +			 * XXX-CN Using option both for send and receive.
 140 +			 * Clear it for syncache.
 141 +			 */
 142 +			to.to_flags &= ~TOF_UTO;
 143 +		}
 144 +
 145  		syncache_add(&inc, &to, th, inp, &so, m);
 146  		/*
 147  		 * Entry added to syncache and mbuf consumed.
 148 @@ -1511,6 +1525,16 @@
 149  	    (thflags & TH_SYN) ? TO_SYN : 0);
 150  
 151  	/*
 152 +	 * Processing received UTO even if the user doesn't accept it
 153 +	 * yet. The user might want to accept it later (perhaps after
 154 +	 * authentication) but the peer need not send it again.
 155 +	 * The value is converter to seconds and not clamped (the user
 156 +	 * needs to know the real value received).
 157 +	 */
 158 +	if (to.to_flags & TOF_UTO)
 159 +		tp->rcv_uto = UTO_VALUE(to);
 160 +
 161 +	/*
 162  	 * If echoed timestamp is later than the current time,
 163  	 * fall back to non RFC1323 RTT calculation.  Normalize
 164  	 * timestamp if syncookies were used when this connection
 165 @@ -3169,6 +3193,15 @@
 166  			to->to_sacks = cp + 2;
 167  			TCPSTAT_INC(tcps_sack_rcv_blocks);
 168  			break;
 169 +		case TCPOPT_UTO:
 170 +			if (optlen != TCPOLEN_UTO)
 171 +				continue;
 172 +			to->to_flags |= TOF_UTO;
 173 +			bcopy((char *)cp + 2,
 174 +			    (char *)&to->to_uto, sizeof(to->to_uto));
 175 +			to->to_uto = htons(to->to_uto);
 176 +			/* Avoid converting to seconds: it might overflow. */
 177 +			break;
 178  		default:
 179  			continue;
 180  		}
 181 --- //depot/vendor/freebsd/src/sys/netinet/tcp_output.c	2011-07-05 18:50:18.000000000 0000
 182 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_output.c	2011-08-13 19:22:15.000000000 0000
 183 @@ -705,6 +705,18 @@
 184  				to.to_sacks = (u_char *)tp->sackblks;
 185  			}
 186  		}
 187 +		/* UTO */
 188 +		if (tp->t_flags & TF_SND_UTO) {
 189 +			to.to_uto = tp->snd_uto;
 190 +			to.to_flags |= TOF_UTO;
 191 +			/*
 192 +			 * The option is sent with the SYN and with the first
 193 +			 * non-SYN segment.
 194 +			 */
 195 +			if (!(flags & TH_SYN))
 196 +				tp->t_flags &= ~TF_SND_UTO;
 197 +
 198 +		}
 199  #ifdef TCP_SIGNATURE
 200  		/* TCP-MD5 (RFC2385). */
 201  		if (tp->t_flags & TF_SIGNATURE)
 202 @@ -1491,6 +1503,38 @@
 203  			TCPSTAT_INC(tcps_sack_send_blocks);
 204  			break;
 205  			}
 206 +		case TOF_UTO:
 207 +			while (optlen % 4) {
 208 +				optlen += TCPOLEN_NOP;
 209 +				*optp++ = TCPOPT_NOP;
 210 +			}
 211 +			if (TCP_MAXOLEN - optlen < TCPOLEN_UTO)
 212 +				continue;
 213 +			optlen += TCPOLEN_UTO;
 214 +			*optp++ = TCPOPT_UTO;
 215 +			*optp++ = TCPOLEN_UTO;
 216 +
 217 +			if (to->to_uto > UTO_MINS_TH) {
 218 +				/*
 219 +			 	* If the timeout is larger than UTO_MINS
 220 +				* we'll specify minutes.
 221 +			 	*/
 222 +				to->to_uto /= 60;
 223 +				to->to_uto |= UTO_MINS;
 224 +			}
 225 +			
 226 +			/*
 227 +			 * XXX-CN to_uto is 32b because the user is allowed
 228 +			 * to specify more than 16b of seconds (dividing the
 229 +			 * value by 60 will make it fit).
 230 +			 */
 231 +			{
 232 +				uint16_t uto = to->to_uto;
 233 +				uto = htons(uto);
 234 +				bcopy((u_char *)&uto, optp, sizeof(uto));
 235 +				optp += sizeof(uto);
 236 +			}
 237 +			break;
 238  		default:
 239  			panic("%s: unknown TCP option type", __func__);
 240  			break;
 241 --- //depot/vendor/freebsd/src/sys/netinet/tcp_subr.c	2011-07-14 13:45:25.000000000 0000
 242 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_subr.c	2011-08-17 12:00:14.000000000 0000
 243 @@ -161,6 +161,24 @@
 244     "Default TCP Maximum Segment Size for IPv6");
 245  #endif /* INET6 */
 246  
 247 +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, uto, CTLFLAG_RW, 0, "TCP UTO");
 248 +
 249 +VNET_DEFINE(int, uto_enable) = 1;
 250 +SYSCTL_VNET_INT(_net_inet_tcp_uto, OID_AUTO, enable, CTLFLAG_RW,
 251 +    &VNET_NAME(uto_enable), 0,
 252 +    "Enable TCP UTO for all connections");
 253 +
 254 +VNET_DEFINE(int, uto_min_timeout) = TCP_UTOMIN;
 255 +SYSCTL_VNET_INT(_net_inet_tcp_uto, OID_AUTO, min_timeout, CTLFLAG_RW,
 256 +    &VNET_NAME(uto_min_timeout), 0,
 257 +    "Minimum accepted timeout for a connection");
 258 +
 259 +VNET_DEFINE(int, uto_max_timeout) = 600;
 260 +SYSCTL_VNET_INT(_net_inet_tcp_uto, OID_AUTO, max_timeout, CTLFLAG_RW,
 261 +    &VNET_NAME(uto_max_timeout), 0,
 262 +    "Maximum accepted timeout for a connection");
 263 +
 264 +
 265  /*
 266   * Minimum MSS we accept and use. This prevents DoS attacks where
 267   * we are forced to a ridiculous low MSS like 20 and send hundreds
 268 @@ -976,40 +994,66 @@
 269  	return (tp);
 270  }
 271  
 272 +/*
 273 + * Wrapper function for tcp_drain_internal.
 274 + */
 275  void
 276  tcp_drain(void)
 277  {
 278 +	if (do_tcpdrain)
 279 +		return tcp_drain_internal(TCP_DRAIN_CLASSIC);
 280 +}
 281 +
 282 +void
 283 +tcp_drain_internal(u_int flags)
 284 +{
 285  	VNET_ITERATOR_DECL(vnet_iter);
 286  
 287 -	if (!do_tcpdrain)
 288 +	if (!flags)
 289  		return;
 290  
 291  	VNET_LIST_RLOCK_NOSLEEP();
 292  	VNET_FOREACH(vnet_iter) {
 293  		CURVNET_SET(vnet_iter);
 294 -		struct inpcb *inpb;
 295 -		struct tcpcb *tcpb;
 296 +		struct inpcb *inp;
 297 +		struct tcpcb *tp;
 298 +		
 299 +		/* Walk the connections and try to free up space. */
 300 +		INP_INFO_WLOCK(&V_tcbinfo);
 301 +		LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 302 +			if (inp->inp_flags & INP_TIMEWAIT)
 303 +				continue;
 304 +			INP_WLOCK(inp);
 305 +			if ((tp = intotcpcb(inp)) == NULL) {
 306 +				INP_WUNLOCK(inp);
 307 +				continue;
 308 +			}
 309 +
 310 +			/*
 311 +			 * Drop connections that wouldn't have survived without
 312 +			 * UTO.
 313 +			 * XXX-CN This negates the advantages of UTO for
 314 +			 * everyone instead of just dropping misbehaving
 315 +			 * connections.
 316 +			 */
 317 +			if (flags & TCP_DRAIN_UTO &&
 318 +			    tp->t_rxtshift > TCP_MAXRXTSHIFT)
 319 +				tcp_drop(tp, ETIMEDOUT);
 320  
 321 -	/*
 322 -	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 323 -	 * if there is one...
 324 -	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 325 -	 *      reassembly queue should be flushed, but in a situation
 326 -	 *	where we're really low on mbufs, this is potentially
 327 -	 *	usefull.
 328 -	 */
 329 -		INP_INFO_RLOCK(&V_tcbinfo);
 330 -		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 331 -			if (inpb->inp_flags & INP_TIMEWAIT)
 332 -				continue;
 333 -			INP_WLOCK(inpb);
 334 -			if ((tcpb = intotcpcb(inpb)) != NULL) {
 335 -				tcp_reass_flush(tcpb);
 336 -				tcp_clean_sackreport(tcpb);
 337 +			/*
 338 +			 * Flush the reassembly queue, if there is one.
 339 +			 * XXX: The "Net/3" implementation doesn't imply that
 340 +			 * the TCP reassembly queue should be flushed, but in a
 341 +			 * situation where we're really low on mbufs, this is
 342 +			 * potentially usefull.
 343 +			 */
 344 +			if (flags & TCP_DRAIN_CLASSIC) {
 345 +				tcp_reass_flush(tp);
 346 +				tcp_clean_sackreport(tp);
 347  			}
 348 -			INP_WUNLOCK(inpb);
 349 +			INP_WUNLOCK(inp);
 350  		}
 351 -		INP_INFO_RUNLOCK(&V_tcbinfo);
 352 +		INP_INFO_WUNLOCK(&V_tcbinfo);
 353  		CURVNET_RESTORE();
 354  	}
 355  	VNET_LIST_RUNLOCK_NOSLEEP();
 356 --- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.c	2011-06-06 13:00:32.000000000 0000
 357 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_syncache.c	2011-08-17 08:18:09.000000000 0000
 358 @@ -827,6 +827,15 @@
 359  #endif
 360  		if (sc->sc_flags & SCF_SACK)
 361  			tp->t_flags |= TF_SACK_PERMIT;
 362 +		if (sc->sc_flags & SCF_SND_UTO) {
 363 +			tp->t_flags |= TF_SND_UTO;
 364 +			tp->snd_uto = sc->sc_snd_uto;
 365 +		}
 366 +		if (sc->sc_flags & SCF_RCV_UTO)
 367 +			tp->t_flags |= TF_RCV_UTO;
 368 +		/* Regardless of SCF_RCV_UTO. */
 369 +		tp->rcv_uto = sc->sc_rcv_uto;
 370 +		tp->max_uto = sc->sc_max_uto;
 371  	}
 372  
 373  	if (sc->sc_flags & SCF_ECN)
 374 @@ -1039,6 +1048,14 @@
 375  	struct syncache scs;
 376  	struct ucred *cred;
 377  
 378 +	/*
 379 +	 * The client may have sent us an UTO suggestion; even if it hasn't,
 380 +	 * we need to inherit the current disposition (i.e. will the resulting
 381 +	 * socket accept suggestions?).
 382 +	 */
 383 +	uint16_t rcv_uto_tf;
 384 +	uint32_t rcv_uto = 0, max_uto = 0;
 385 +
 386  	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 387  	INP_WLOCK_ASSERT(inp);			/* listen socket */
 388  	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 389 @@ -1063,6 +1080,19 @@
 390  	sb_hiwat = so->so_rcv.sb_hiwat;
 391  	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 392  
 393 +	/* Set User Timeout to send in SYN-ACK. */
 394 +	if (tp->t_flags & TF_SND_UTO) {
 395 +		/* Also inherited after connection is established. */
 396 +		to->to_uto = tp->snd_uto;
 397 +		to->to_flags |= TOF_UTO;
 398 +	}
 399 +
 400 +	/* Remember received UTO regardless of disposition and clear it. */
 401 +	rcv_uto_tf = (tp->t_flags & TF_RCV_UTO) ? SCF_RCV_UTO : 0;
 402 +	rcv_uto = tp->rcv_uto;
 403 +	max_uto = tp->max_uto;
 404 +	tp->rcv_uto = 0;
 405 +
 406  	/* By the time we drop the lock these should no longer be used. */
 407  	so = NULL;
 408  	tp = NULL;
 409 @@ -1271,6 +1301,15 @@
 410  		sc->sc_flags |= SCF_NOOPT;
 411  	if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
 412  		sc->sc_flags |= SCF_ECN;
 413 +	if (to->to_flags & TOF_UTO) {
 414 +		sc->sc_snd_uto = to->to_uto;
 415 +		sc->sc_flags |= SCF_SND_UTO;
 416 +	}
 417 +
 418 +	/* Inherit received UTO, regardless of disposition. */
 419 +	sc->sc_flags |= rcv_uto_tf;
 420 +	sc->sc_rcv_uto = rcv_uto;
 421 +	sc->sc_max_uto = max_uto;
 422  
 423  	if (V_tcp_syncookies) {
 424  		syncookie_generate(sch, sc, &flowtmp);
 425 @@ -1438,6 +1477,10 @@
 426  		}
 427  		if (sc->sc_flags & SCF_SACK)
 428  			to.to_flags |= TOF_SACKPERM;
 429 +		if (sc->sc_flags & SCF_SND_UTO) {
 430 +			to.to_uto = sc->sc_snd_uto;
 431 +			to.to_flags |= TOF_UTO;
 432 +		}
 433  #ifdef TCP_SIGNATURE
 434  		if (sc->sc_flags & SCF_SIGNATURE)
 435  			to.to_flags |= TOF_SIGNATURE;
 436 --- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.h	2011-07-17 21:20:17.000000000 0000
 437 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_syncache.h	2011-08-17 08:18:09.000000000 0000
 438 @@ -82,7 +82,9 @@
 439  	struct label	*sc_label;		/* MAC label reference */
 440  	struct ucred	*sc_cred;		/* cred cache for jail checks */
 441  
 442 -	u_int32_t	sc_spare[2];		/* UTO */
 443 +	u_int32_t	sc_snd_uto;		/* Sent UTO (seconds) */
 444 +	u_int32_t	sc_rcv_uto;		/* Received UTO (seconds) */
 445 +	u_int32_t	sc_max_uto;		/* Maximum UTO (seconds) */
 446  };
 447  
 448  /*
 449 @@ -96,6 +98,8 @@
 450  #define SCF_SIGNATURE	0x20			/* send MD5 digests */
 451  #define SCF_SACK	0x80			/* send SACK option */
 452  #define SCF_ECN		0x100			/* send ECN setup packet */
 453 +#define SCF_SND_UTO	0x200			/* send UTO */
 454 +#define SCF_RCV_UTO	0x400			/* receive UTO suggestions */
 455  
 456  #define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
 457  #define	SYNCOOKIE_LIFETIME	16	/* seconds */
 458 --- //depot/vendor/freebsd/src/sys/netinet/tcp_timer.c	2011-05-30 09:45:22.000000000 0000
 459 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_timer.c	2011-08-17 08:18:09.000000000 0000
 460 @@ -67,6 +67,9 @@
 461  #include <netinet/tcp_debug.h>
 462  #endif
 463  
 464 +/* XXX-CN this will have to move */
 465 +#define ticks_to_secs(t)        ((t) / hz)
 466 +
 467  int	tcp_keepinit;
 468  SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
 469      &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
 470 @@ -309,6 +312,18 @@
 471  		return;
 472  	}
 473  	callout_deactivate(&tp->t_timers->tt_keep);
 474 +	if (USING_UTO(tp)) {
 475 +		/*
 476 +		 * This connection is using UTO (either sending or has
 477 +		 * received a value). We need to stop sending keepalives
 478 +		 * (RFC 5482 4.2).
 479 +		 * Returning without resetting the timer.
 480 +		 */
 481 +		INP_WUNLOCK(inp);
 482 +		INP_INFO_WUNLOCK(&V_tcbinfo);
 483 +		CURVNET_RESTORE();
 484 +		return;
 485 +	}
 486  	/*
 487  	 * Keep-alive timer went off; send something
 488  	 * or drop connection if idle for too long.
 489 @@ -447,6 +462,7 @@
 490  	int rexmt;
 491  	int headlocked;
 492  	struct inpcb *inp;
 493 +	int uto_left = 0;
 494  #ifdef TCPDEBUG
 495  	int ostate;
 496  
 497 @@ -477,12 +493,45 @@
 498  	}
 499  	callout_deactivate(&tp->t_timers->tt_rexmt);
 500  	tcp_free_sackholes(tp);
 501 +
 502 +	if (tp->t_rxtshift == 0)
 503 +		/* UTO starting again since it's the first retransmit. */
 504 +		tp->t_suto = 0;
 505 +
 506 +	if (USING_UTO(tp)) {
 507 +		/*
 508 +		 * Since we're using UTO for this connection we need to
 509 +		 * compute how much time we've got left.
 510 +		 */
 511 +		uto_left = 0;
 512 +		if (tp->t_flags & TF_RCV_UTO && tp->rcv_uto)
 513 +			/* Clamping the received value. */
 514 +			uto_left = min(tp->max_uto, 
 515 +			    max(V_uto_min_timeout, tp->rcv_uto));
 516 +
 517 +		/* Taking the longer timeout. */
 518 +		uto_left = max(tp->snd_uto, uto_left);
 519 +
 520 +		/* Subtract time that has passed since the first retransmit. */
 521 +		if (tp->t_suto)
 522 +			uto_left -= ticks_to_secs(ticks - tp->t_suto);
 523 +
 524 +		/*
 525 +		 * The user may choose a value that's less than TCP_MAXRXTSHIFT
 526 +		 * retransmits.
 527 +		 */
 528 +		if (uto_left <= 0)
 529 +			/* Before or after the retransmits, UTO was exceeded. */
 530 +			goto timeoutdrop;
 531 +	}
 532 +
 533  	/*
 534  	 * Retransmission timer went off.  Message has not
 535  	 * been acked within retransmit interval.  Back off
 536  	 * to a longer retransmit interval and retransmit one segment.
 537  	 */
 538 -	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 539 +	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT && uto_left <= 0) {
 540 +timeoutdrop:
 541  		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 542  		TCPSTAT_INC(tcps_timeoutdrop);
 543  		in_pcbref(inp);
 544 @@ -525,13 +574,22 @@
 545  			tp->t_flags &= ~TF_WASCRECOVERY;
 546  		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 547  		tp->t_flags |= TF_PREVVALID;
 548 +		tp->t_suto = ticks; /* Keep track of UTO start. */
 549  	} else
 550  		tp->t_flags &= ~TF_PREVVALID;
 551  	TCPSTAT_INC(tcps_rexmttimeo);
 552  	if (tp->t_state == TCPS_SYN_SENT)
 553  		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
 554 -	else
 555 -		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 556 +	else {
 557 +		if (tp->t_rxtshift <= TCP_MAXRXTSHIFT)
 558 +			rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 559 +		else
 560 +			rexmt = TCPTV_REXMTMAX;		
 561 +	}
 562 +	/* We might want to wait less than an entire backoff. */
 563 +	if (uto_left)
 564 +		rexmt = min(rexmt, uto_left * hz);
 565 +
 566  	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 567  		      tp->t_rttmin, TCPTV_REXMTMAX);
 568  	/*
 569 --- //depot/vendor/freebsd/src/sys/netinet/tcp_usrreq.c	2011-06-02 10:25:18.000000000 0000
 570 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_usrreq.c	2011-08-20 09:40:36.000000000 0000
 571 @@ -1322,6 +1322,54 @@
 572  			INP_WUNLOCK(inp);
 573  			break;
 574  #endif /* TCP_SIGNATURE */
 575 +		case TCP_SNDUTO_TIMEOUT:
 576 +			INP_WUNLOCK(inp);
 577 +			error = sooptcopyin(sopt, &optval, sizeof optval,
 578 +			    sizeof optval);
 579 +			if (error)
 580 +				return (error);
 581 +
 582 +			INP_WLOCK_RECHECK(inp);
 583 +			/* Silently turn it off if !V_uto_enable. */
 584 +			if (!V_uto_enable || optval <= 0) {
 585 +				/* Disable sending the option. */
 586 +				tp->t_flags &= ~TF_SND_UTO;
 587 +				tp->snd_uto = 0;
 588 +			} else if (optval >= V_uto_min_timeout &&
 589 +			    optval <= V_uto_max_timeout) {
 590 +				/* The timeout is acceptable. */
 591 +				tp->snd_uto = optval;
 592 +				tp->t_flags |= TF_SND_UTO;
 593 +			} else
 594 +				error = EINVAL;
 595 +
 596 +			INP_WUNLOCK(inp);
 597 +			break;
 598 +
 599 +		case TCP_RCVUTO_TIMEOUT:
 600 +			INP_WUNLOCK(inp);
 601 +			error = sooptcopyin(sopt, &optval, sizeof optval,
 602 +			    sizeof optval);
 603 +			if (error)
 604 +				return (error);
 605 +
 606 +			INP_WLOCK_RECHECK(inp);
 607 +			if (!V_uto_enable || optval <= 0)
 608 +				/* This connection will ignore suggestions. */
 609 +				tp->t_flags &= ~TF_RCV_UTO;
 610 +			else {
 611 +				tp->t_flags |= TF_RCV_UTO;
 612 +				/*
 613 +				 * If optval > 1, we'll use it as the max
 614 +				 * acceptable suggestion (or silently truncate
 615 +				 * it to V_uto_max_timeout).
 616 +				 */
 617 +				tp->max_uto = (optval > 1 &&
 618 +				    optval < V_uto_max_timeout) ?
 619 +				    optval : V_uto_max_timeout;
 620 +			}
 621 +			INP_WUNLOCK(inp);
 622 +			break;
 623  		case TCP_NODELAY:
 624  		case TCP_NOOPT:
 625  			INP_WUNLOCK(inp);
 626 @@ -1454,7 +1502,16 @@
 627  			error = sooptcopyout(sopt, &optval, sizeof optval);
 628  			break;
 629  #endif
 630 -
 631 +		case TCP_SNDUTO_TIMEOUT:
 632 +			optval = tp->snd_uto;
 633 +			INP_WUNLOCK(inp);
 634 +			error = sooptcopyout(sopt, &optval, sizeof optval);
 635 +			break;
 636 +		case TCP_RCVUTO_TIMEOUT:
 637 +			optval = tp->rcv_uto;
 638 +			INP_WUNLOCK(inp);
 639 +			error = sooptcopyout(sopt, &optval, sizeof optval);
 640 +			break;
 641  		case TCP_NODELAY:
 642  			optval = tp->t_flags & TF_NODELAY;
 643  			INP_WUNLOCK(inp);
 644 --- //depot/vendor/freebsd/src/sys/netinet/tcp_var.h	2011-07-17 21:20:17.000000000 0000
 645 +++ //depot/projects/soc2011/cnicutar_tcputo_9/src/sys/netinet/tcp_var.h	2011-08-20 09:40:36.000000000 0000
 646 @@ -203,9 +203,14 @@
 647  	struct cc_var	*ccv;		/* congestion control specific vars */
 648  	struct osd	*osd;		/* storage for Khelp module data */
 649  
 650 -	uint32_t t_ispare[12];		/* 4 keep timers, 5 UTO, 3 TBD */
 651 +	uint32_t t_ispare[8];		/* 4 keep timers, 1 UTO, 3 TBD */
 652  	void	*t_pspare2[4];		/* 4 TBD */
 653  	uint64_t _pad[6];		/* 6 TBD (1-2 CC/RTT?) */
 654 +
 655 +	uint32_t snd_uto;		/* sent timeout (seconds) */
 656 +	uint32_t rcv_uto;		/* received suggestion (seconds) */
 657 +	uint32_t max_uto;		/* max received uto (seconds) */
 658 +	int t_suto;			/* uto starting time (ticks) */
 659  };
 660  
 661  /*
 662 @@ -225,6 +230,8 @@
 663  #define	TF_NEEDFIN	0x000800	/* send FIN (implicit state) */
 664  #define	TF_NOPUSH	0x001000	/* don't push */
 665  #define	TF_PREVVALID	0x002000	/* saved values for bad rxmit valid */
 666 +#define TF_SND_UTO	0x004000	/* send UTO option */
 667 +#define TF_RCV_UTO	0x008000	/* accept UTO suggestions */
 668  #define	TF_MORETOCOME	0x010000	/* More data to be appended to sock */
 669  #define	TF_LQ_OVERFLOW	0x020000	/* listen queue overflow */
 670  #define	TF_LASTIDLE	0x040000	/* connection was previously idle */
 671 @@ -255,6 +262,12 @@
 672  
 673  #define	BYTES_THIS_ACK(tp, th)	(th->th_ack - tp->snd_una)
 674  
 675 +#define USING_UTO(tp) V_uto_enable && ((tp)->snd_uto ||	\
 676 +    (((tp)->t_flags & TF_RCV_UTO) && (tp)->rcv_uto))
 677 +
 678 +#define UTO_VALUE(to)	((to).to_uto & UTO_MINS) ?	\
 679 +    ((to).to_uto & ~(UTO_MINS)) * 60 : (to).to_uto
 680 +
 681  /*
 682   * Flags for the t_oobflags field.
 683   */
 684 @@ -292,7 +305,8 @@
 685  #define	TOF_TS		0x0010		/* timestamp */
 686  #define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
 687  #define	TOF_SACK	0x0080		/* Peer sent SACK option */
 688 -#define	TOF_MAXOPT	0x0100
 689 +#define TOF_UTO		0x0100		/* user timeout option */
 690 +#define	TOF_MAXOPT	0x0200
 691  	u_int32_t	to_tsval;	/* new timestamp */
 692  	u_int32_t	to_tsecr;	/* reflected timestamp */
 693  	u_char		*to_sacks;	/* pointer to the first SACK blocks */
 694 @@ -300,7 +314,7 @@
 695  	u_int16_t	to_mss;		/* maximum segment size */
 696  	u_int8_t	to_wscale;	/* window scaling */
 697  	u_int8_t	to_nsacks;	/* number of SACK blocks */
 698 -	u_int32_t	to_spare;	/* UTO */
 699 +	u_int32_t	to_uto;		/* UTO */
 700  };
 701  
 702  /*
 703 @@ -308,6 +322,19 @@
 704   */
 705  #define	TO_SYN		0x01		/* parse SYN-only options */
 706  
 707 +/*
 708 + * Values for TCP UTO.
 709 + */
 710 +#define UTO_MINS	0x8000		/* Highest bit set means "minutes". */
 711 +#define UTO_MINS_TH	3600		/* Send minutes if >= one hour. */
 712 +
 713 +/*
 714 + * Flags for tcp_drain_internal.
 715 + */
 716 +#define TCP_DRAIN_CLASSIC	0x1	/* Flushes the reassembly queue. */
 717 +#define TCP_DRAIN_UTO		0x2	/* Drops connections outside the
 718 +					   normal resend window. */
 719 +
 720  struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
 721  	u_long	rmx_mtu;	/* MTU for this path */
 722  	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 723 @@ -611,6 +638,10 @@
 724  VNET_DECLARE(int, ss_fltsz_local);
 725  VNET_DECLARE(int, tcp_do_rfc3465);
 726  VNET_DECLARE(int, tcp_abc_l_var);
 727 +VNET_DECLARE(int, uto_enable);
 728 +VNET_DECLARE(int, uto_min_timeout);
 729 +VNET_DECLARE(int, uto_max_timeout);
 730 +
 731  #define	V_tcb			VNET(tcb)
 732  #define	V_tcbinfo		VNET(tcbinfo)
 733  #define	V_tcpstat		VNET(tcpstat)
 734 @@ -623,6 +654,9 @@
 735  #define	V_ss_fltsz_local	VNET(ss_fltsz_local)
 736  #define	V_tcp_do_rfc3465	VNET(tcp_do_rfc3465)
 737  #define	V_tcp_abc_l_var		VNET(tcp_abc_l_var)
 738 +#define V_uto_enable		VNET(uto_enable)
 739 +#define V_uto_min_timeout	VNET(uto_min_timeout)
 740 +#define V_uto_max_timeout	VNET(uto_max_timeout)
 741  
 742  VNET_DECLARE(int, tcp_do_sack);			/* SACK enabled/disabled */
 743  VNET_DECLARE(int, tcp_sc_rst_sock_fail);	/* RST on sock alloc failure */
 744 @@ -652,6 +686,7 @@
 745  struct tcpcb *
 746  	 tcp_drop(struct tcpcb *, int);
 747  void	 tcp_drain(void);
 748 +void	 tcp_drain_internal(u_int flags);
 749  void	 tcp_init(void);
 750  #ifdef VIMAGE
 751  void	 tcp_destroy(void);

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2011-07-03T19:16:07+0000, 3.2 KB) [[attachment:600uto.pcap]]
  • [get | view] (2011-06-21T06:50:05+0000, 0.5 KB) [[attachment:Both peers send UTO]]
  • [get | view] (2011-07-03T19:15:56+0000, 2.2 KB) [[attachment:no-uto.pcap]]
  • [get | view] (2011-08-15T07:38:02+0000, 28.8 KB) [[attachment:partial.pcap]]
  • [get | view] (2011-07-30T21:20:10+0000, 9.4 KB) [[attachment:uto6.pcap]]
  • [get | view] (2011-08-20T14:26:20+0000, 23.3 KB) [[attachment:uto_kernel.diff]]
  • [get | view] (2011-08-20T14:26:57+0000, 5.6 KB) [[attachment:uto_tests.tar.bz2]]
  • [get | view] (2011-08-20T14:26:34+0000, 22.5 KB) [[attachment:uto_userland.diff]]
  • [get | view] (2011-08-16T17:43:56+0000, 1.3 KB) [[attachment:varnishd.diff]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.